main.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. from sklearn.cluster import DBSCAN
  2. from sklearn import metrics
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import csv_parser
  6. import recog
  7. import metric
  8. if __name__ == '__main__':
  9. data = csv_parser.parse_data_from_csv('test.csv')
  10. entries = recog.recognize_entries(data)
  11. x = []
  12. for e in entries:
  13. x.append([e.lon, e.lat])
  14. x = np.array(x)
  15. db = DBSCAN(eps = 10/6400000, min_samples = 3,
  16. metric = lambda x,y:metric.spherical_distance(x,y)).fit(x)
  17. labels = db.labels_
  18. core_samples_mask = np.zeros_like(db.labels_, dtype = bool)
  19. core_samples_mask[db.core_sample_indices_] = True
  20. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
  21. n_noise_ = list(labels).count(-1)
  22. print('Estimated number of clusters: %d' % n_clusters_)
  23. print('Estimated number of noise points: %d' % n_noise_)
  24. if n_clusters_ == 0:
  25. print('can not get any clusters')
  26. plt.plot(x[:,0], x[:,1], 'o')
  27. plt.show()
  28. exit(0)
  29. print("Silhouette Coefficient: %0.3f"
  30. % metrics.silhouette_score(x, labels))
  31. unique_labels = set(labels)
  32. colors = [plt.cm.Spectral(each)
  33. for each in np.linspace(0, 1, len(unique_labels))]
  34. for k, col in zip(unique_labels, colors):
  35. if k == -1:
  36. # Black used for noise.
  37. col = [0, 0, 0, 1]
  38. class_member_mask = (labels == k)
  39. xy = x[class_member_mask & core_samples_mask]
  40. plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
  41. markeredgecolor='k', markersize=14)
  42. xy = x[class_member_mask & ~core_samples_mask]
  43. plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
  44. markeredgecolor='k', markersize=6)
  45. plt.show()