Browse Source

first commit

Xi Ruoyao 4 years ago
commit
b4f00f89b2
6 changed files with 151 additions and 0 deletions
  1. 2 0
      .gitignore
  2. 33 0
      csv_parser.py
  3. 12 0
      data.py
  4. 52 0
      main.py
  5. 15 0
      metric.py
  6. 37 0
      recog.py

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+*.csv
+__pycache__

+ 33 - 0
csv_parser.py

@@ -0,0 +1,33 @@
+from data import LocationData
+
+field_col_name = {
+    'lat' : '纬度', 'lon' : '经度', 'eig' : '精度估算', 'prec' : "误差", 
+}
+
+def data_from_row(d, yaw_col):
+    d1 = {}
+    for field in field_col_name:
+        d1[field] = float(d[field_col_name[field]])
+    if yaw_col is None:
+        d1['yaw'] = 0
+    else:
+        d1['yaw'] = d[yaw_col]
+    if d1['lat'] == 0:
+        d1['lat'] = None
+    if d1['lon'] == 0:
+        d1['lon'] = None
+    return LocationData(**d1)
+
+def parse_data_from_csv(f, yaw_col = None):
+    if type(f) == str:
+        with open(f, encoding='gb2312') as fd:
+            return parse_data_from_csv(fd)
+    from csv import DictReader
+    rd = DictReader(f)
+    ret = []
+    for row in rd:
+        ret.append(data_from_row(row, yaw_col))
+    return ret
+
+if __name__ == '__main__':
+    print(parse_data_from_csv("test.csv"))

+ 12 - 0
data.py

@@ -0,0 +1,12 @@
+from collections import namedtuple
+
+LocationData = namedtuple('LocationData', 'lat lon eig prec yaw')
+
+def have_location(x):
+    return x.lat != None and x.lon != None
+
+def maybe_entry(x):
+    return x.eig < 5 and x.prec > 40
+
+def maybe_indoor(x):
+    return x.eig < 1 and x.prec > 400

+ 52 - 0
main.py

@@ -0,0 +1,52 @@
+from sklearn.cluster import DBSCAN
+from sklearn import metrics
+import numpy as np
+import matplotlib.pyplot as plt
+
+import csv_parser
+import recog
+import metric
+
+if __name__ == '__main__':
+    data = csv_parser.parse_data_from_csv('test.csv')
+    entries = recog.recognize_entries(data)
+    x = []
+    for e in entries:
+        x.append([e.lon, e.lat])
+    x = np.array(x)
+    db = DBSCAN(eps = 10/6400000, min_samples = 3,
+            metric = lambda x,y:metric.spherical_distance(x,y)).fit(x)
+    labels = db.labels_
+    core_samples_mask = np.zeros_like(db.labels_, dtype = bool)
+    core_samples_mask[db.core_sample_indices_] = True
+    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
+    n_noise_ = list(labels).count(-1)
+
+    print('Estimated number of clusters: %d' % n_clusters_)
+    print('Estimated number of noise points: %d' % n_noise_)
+    if n_clusters_ == 0:
+        print('can not get any clusters')
+        plt.plot(x[:,0], x[:,1], 'o')
+        plt.show()
+        exit(0)
+    print("Silhouette Coefficient: %0.3f"
+              % metrics.silhouette_score(x, labels))
+
+    unique_labels = set(labels)
+    colors = [plt.cm.Spectral(each)
+                      for each in np.linspace(0, 1, len(unique_labels))]
+    for k, col in zip(unique_labels, colors):
+            if k == -1:
+                    # Black used for noise.
+                    col = [0, 0, 0, 1]
+
+            class_member_mask = (labels == k)
+
+            xy = x[class_member_mask & core_samples_mask]
+            plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
+                             markeredgecolor='k', markersize=14)
+
+            xy = x[class_member_mask & ~core_samples_mask]
+            plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
+                             markeredgecolor='k', markersize=6)
+    plt.show()

+ 15 - 0
metric.py

@@ -0,0 +1,15 @@
+from math import pi, asin, cos, sin, sqrt
+
+def spherical_distance(a, b):
+    coef = pi / 180
+    radlat1 = a[1] * coef
+    radlat2 = b[1] * coef
+    radlng1 = a[0] * coef
+    radlng2 = b[0] * coef
+    a = sin((radlat1 - radlat2)/2)**2
+    b = cos(radlat1) * cos(radlat2) * sin((radlng1 - radlng2)/2) ** 2
+    return asin(sqrt(a+b))
+
+def ang_distance(a, b):
+    x = abs(a-b)
+    return min(x, 360-x)

+ 37 - 0
recog.py

@@ -0,0 +1,37 @@
+import data
+
+def list_have_location(l):
+    for d in l:
+        if not data.have_location(d):
+            return False
+    return True
+
+def list_have_continue_maybe_indoor(l):
+    last_indoor = False
+    for d in l:
+        indoor = data.maybe_indoor(d)
+        if indoor and last_indoor:
+            return True
+        last_indoor = indoor
+    return False
+
+def check_entry(before, curr, after):
+    if not list_have_location(before):
+        return None
+    if not data.have_location(curr):
+        return None
+    if not data.maybe_entry(curr):
+        return None
+    if data.maybe_entry(before[-1]):
+        return None
+    if not list_have_continue_maybe_indoor(after):
+        return None
+    return before[-1]
+
+def recognize_entries(l):
+    ret = []
+    for i in range(60, len(l) - 12):
+        x = check_entry(l[i-60:i], l[i], l[i+1:i+13])
+        if not x is None:
+            ret.append(x)
+    return ret