|
@@ -0,0 +1,52 @@
|
|
|
+from sklearn.cluster import DBSCAN
|
|
|
+from sklearn import metrics
|
|
|
+import numpy as np
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+
|
|
|
+import csv_parser
|
|
|
+import recog
|
|
|
+import metric
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ data = csv_parser.parse_data_from_csv('test.csv')
|
|
|
+ entries = recog.recognize_entries(data)
|
|
|
+ x = []
|
|
|
+ for e in entries:
|
|
|
+ x.append([e.lon, e.lat])
|
|
|
+ x = np.array(x)
|
|
|
+ db = DBSCAN(eps = 10/6400000, min_samples = 3,
|
|
|
+ metric = lambda x,y:metric.spherical_distance(x,y)).fit(x)
|
|
|
+ labels = db.labels_
|
|
|
+ core_samples_mask = np.zeros_like(db.labels_, dtype = bool)
|
|
|
+ core_samples_mask[db.core_sample_indices_] = True
|
|
|
+ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
|
|
|
+ n_noise_ = list(labels).count(-1)
|
|
|
+
|
|
|
+ print('Estimated number of clusters: %d' % n_clusters_)
|
|
|
+ print('Estimated number of noise points: %d' % n_noise_)
|
|
|
+ if n_clusters_ == 0:
|
|
|
+ print('can not get any clusters')
|
|
|
+ plt.plot(x[:,0], x[:,1], 'o')
|
|
|
+ plt.show()
|
|
|
+ exit(0)
|
|
|
+ print("Silhouette Coefficient: %0.3f"
|
|
|
+ % metrics.silhouette_score(x, labels))
|
|
|
+
|
|
|
+ unique_labels = set(labels)
|
|
|
+ colors = [plt.cm.Spectral(each)
|
|
|
+ for each in np.linspace(0, 1, len(unique_labels))]
|
|
|
+ for k, col in zip(unique_labels, colors):
|
|
|
+ if k == -1:
|
|
|
+ # Black used for noise.
|
|
|
+ col = [0, 0, 0, 1]
|
|
|
+
|
|
|
+ class_member_mask = (labels == k)
|
|
|
+
|
|
|
+ xy = x[class_member_mask & core_samples_mask]
|
|
|
+ plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
|
|
|
+ markeredgecolor='k', markersize=14)
|
|
|
+
|
|
|
+ xy = x[class_member_mask & ~core_samples_mask]
|
|
|
+ plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
|
|
|
+ markeredgecolor='k', markersize=6)
|
|
|
+ plt.show()
|