6 years ago · b4f00f89b2
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 
				+*.csv
			
 
				+__pycache__
			
--- a/csv_parser.py
+++ b/csv_parser.py
@@ -0,0 +1,33 @@
 
				+from data import LocationData
			
 
				+
			
 
				+field_col_name = {
			
 
				+    'lat' : '纬度', 'lon' : '经度', 'eig' : '精度估算', 'prec' : "误差", 
			
 
				+}
			
 
				+
			
 
				+def data_from_row(d, yaw_col):
			
 
				+    d1 = {}
			
 
				+    for field in field_col_name:
			
 
				+        d1[field] = float(d[field_col_name[field]])
			
 
				+    if yaw_col is None:
			
 
				+        d1['yaw'] = 0
			
 
				+    else:
			
 
				+        d1['yaw'] = d[yaw_col]
			
 
				+    if d1['lat'] == 0:
			
 
				+        d1['lat'] = None
			
 
				+    if d1['lon'] == 0:
			
 
				+        d1['lon'] = None
			
 
				+    return LocationData(**d1)
			
 
				+
			
 
				+def parse_data_from_csv(f, yaw_col = None):
			
 
				+    if type(f) == str:
			
 
				+        with open(f, encoding='gb2312') as fd:
			
 
				+            return parse_data_from_csv(fd)
			
 
				+    from csv import DictReader
			
 
				+    rd = DictReader(f)
			
 
				+    ret = []
			
 
				+    for row in rd:
			
 
				+        ret.append(data_from_row(row, yaw_col))
			
 
				+    return ret
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    print(parse_data_from_csv("test.csv"))
			
--- a/data.py
+++ b/data.py
@@ -0,0 +1,12 @@
 
				+from collections import namedtuple
			
 
				+
			
 
				+LocationData = namedtuple('LocationData', 'lat lon eig prec yaw')
			
 
				+
			
 
				+def have_location(x):
			
 
				+    return x.lat != None and x.lon != None
			
 
				+
			
 
				+def maybe_entry(x):
			
 
				+    return x.eig < 5 and x.prec > 40
			
 
				+
			
 
				+def maybe_indoor(x):
			
 
				+    return x.eig < 1 and x.prec > 400
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,52 @@
 
				+from sklearn.cluster import DBSCAN
			
 
				+from sklearn import metrics
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+import csv_parser
			
 
				+import recog
			
 
				+import metric
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    data = csv_parser.parse_data_from_csv('test.csv')
			
 
				+    entries = recog.recognize_entries(data)
			
 
				+    x = []
			
 
				+    for e in entries:
			
 
				+        x.append([e.lon, e.lat])
			
 
				+    x = np.array(x)
			
 
				+    db = DBSCAN(eps = 10/6400000, min_samples = 3,
			
 
				+            metric = lambda x,y:metric.spherical_distance(x,y)).fit(x)
			
 
				+    labels = db.labels_
			
 
				+    core_samples_mask = np.zeros_like(db.labels_, dtype = bool)
			
 
				+    core_samples_mask[db.core_sample_indices_] = True
			
 
				+    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
			
 
				+    n_noise_ = list(labels).count(-1)
			
 
				+
			
 
				+    print('Estimated number of clusters: %d' % n_clusters_)
			
 
				+    print('Estimated number of noise points: %d' % n_noise_)
			
 
				+    if n_clusters_ == 0:
			
 
				+        print('can not get any clusters')
			
 
				+        plt.plot(x[:,0], x[:,1], 'o')
			
 
				+        plt.show()
			
 
				+        exit(0)
			
 
				+    print("Silhouette Coefficient: %0.3f"
			
 
				+              % metrics.silhouette_score(x, labels))
			
 
				+
			
 
				+    unique_labels = set(labels)
			
 
				+    colors = [plt.cm.Spectral(each)
			
 
				+                      for each in np.linspace(0, 1, len(unique_labels))]
			
 
				+    for k, col in zip(unique_labels, colors):
			
 
				+            if k == -1:
			
 
				+                    # Black used for noise.
			
 
				+                    col = [0, 0, 0, 1]
			
 
				+
			
 
				+            class_member_mask = (labels == k)
			
 
				+
			
 
				+            xy = x[class_member_mask & core_samples_mask]
			
 
				+            plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
			
 
				+                             markeredgecolor='k', markersize=14)
			
 
				+
			
 
				+            xy = x[class_member_mask & ~core_samples_mask]
			
 
				+            plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
			
 
				+                             markeredgecolor='k', markersize=6)
			
 
				+    plt.show()
			
--- a/metric.py
+++ b/metric.py
@@ -0,0 +1,15 @@
 
				+from math import pi, asin, cos, sin, sqrt
			
 
				+
			
 
				+def spherical_distance(a, b):
			
 
				+    coef = pi / 180
			
 
				+    radlat1 = a[1] * coef
			
 
				+    radlat2 = b[1] * coef
			
 
				+    radlng1 = a[0] * coef
			
 
				+    radlng2 = b[0] * coef
			
 
				+    a = sin((radlat1 - radlat2)/2)**2
			
 
				+    b = cos(radlat1) * cos(radlat2) * sin((radlng1 - radlng2)/2) ** 2
			
 
				+    return asin(sqrt(a+b))
			
 
				+
			
 
				+def ang_distance(a, b):
			
 
				+    x = abs(a-b)
			
 
				+    return min(x, 360-x)
			
--- a/recog.py
+++ b/recog.py
@@ -0,0 +1,37 @@
 
				+import data
			
 
				+
			
 
				+def list_have_location(l):
			
 
				+    for d in l:
			
 
				+        if not data.have_location(d):
			
 
				+            return False
			
 
				+    return True
			
 
				+
			
 
				+def list_have_continue_maybe_indoor(l):
			
 
				+    last_indoor = False
			
 
				+    for d in l:
			
 
				+        indoor = data.maybe_indoor(d)
			
 
				+        if indoor and last_indoor:
			
 
				+            return True
			
 
				+        last_indoor = indoor
			
 
				+    return False
			
 
				+
			
 
				+def check_entry(before, curr, after):
			
 
				+    if not list_have_location(before):
			
 
				+        return None
			
 
				+    if not data.have_location(curr):
			
 
				+        return None
			
 
				+    if not data.maybe_entry(curr):
			
 
				+        return None
			
 
				+    if data.maybe_entry(before[-1]):
			
 
				+        return None
			
 
				+    if not list_have_continue_maybe_indoor(after):
			
 
				+        return None
			
 
				+    return before[-1]
			
 
				+
			
 
				+def recognize_entries(l):
			
 
				+    ret = []
			
 
				+    for i in range(60, len(l) - 12):
			
 
				+        x = check_entry(l[i-60:i], l[i], l[i+1:i+13])
			
 
				+        if not x is None:
			
 
				+            ret.append(x)
			
 
				+    return ret