|
@@ -0,0 +1,165 @@
|
|
|
+# Copyright (C) 2019 Xiaotiancai Science & Technology Co. Ltd.
|
|
|
+# All rights reserved.
|
|
|
+
|
|
|
+# Parse MAC address to int
|
|
|
+def parse_macaddr(s):
|
|
|
+ return int("".join(s.split(':')), 16)
|
|
|
+
|
|
|
+# Parse timestamp
|
|
|
+def timestamp(s):
|
|
|
+ from time import strptime, mktime
|
|
|
+ return mktime(strptime(s, '%Y-%m-%d %H:%M:%S'))
|
|
|
+
|
|
|
+# Read input data from file, and parse it
|
|
|
+def read_data(fd):
|
|
|
+ if isinstance(fd, str):
|
|
|
+ with open(fd) as f:
|
|
|
+ return read_data(f)
|
|
|
+ from collections import namedtuple
|
|
|
+ Data = namedtuple('Data',
|
|
|
+ ['watch_id', 'latitude', 'longitude', 'wifi_snr', 'timestamp'])
|
|
|
+ import json
|
|
|
+ import csv
|
|
|
+ reader = csv.DictReader(fd)
|
|
|
+ data = []
|
|
|
+ for row in reader:
|
|
|
+ params = json.loads(row['params'])
|
|
|
+ if not 'macs' in params:
|
|
|
+ continue
|
|
|
+ macs = params['macs']
|
|
|
+ if macs == '':
|
|
|
+ continue
|
|
|
+ wifi_snr = {}
|
|
|
+ for i in macs.split('|'):
|
|
|
+ fields = i.split(',')
|
|
|
+ macaddr = parse_macaddr(fields[0])
|
|
|
+ wifi_snr[macaddr] = float(fields[1])
|
|
|
+ point = (Data(int(row['watch_id'], 16), float(row['latitude']),
|
|
|
+ float(row['longitude']), wifi_snr, timestamp(row['create_time'])))
|
|
|
+ data.append(point)
|
|
|
+ return data
|
|
|
+
|
|
|
+def dist(p, q):
|
|
|
+ from math import sin, cos, sqrt, asin, radians
|
|
|
+ lat0 = radians(p.latitude)
|
|
|
+ lat1 = radians(q.latitude)
|
|
|
+ lng0 = radians(p.longitude)
|
|
|
+ lng1 = radians(q.longitude)
|
|
|
+
|
|
|
+ dlng = abs(lng0 - lng1)
|
|
|
+ dlat = abs(lat0 - lat1)
|
|
|
+ hav = lambda x:sin(x/2)*sin(x/2)
|
|
|
+ h = hav(dlat) + cos(lat0) * cos(lat1) * hav(dlng)
|
|
|
+ return 2 * 63710000 * asin(sqrt(h))
|
|
|
+
|
|
|
+# A trivial function, like how we detect cheating in programming contests.
|
|
|
+def best_match(known_points, new_point, similarity):
|
|
|
+ max_ = None
|
|
|
+ best = None
|
|
|
+ for pt in known_points:
|
|
|
+ if len(pt.wifi_snr) == 0:
|
|
|
+ continue
|
|
|
+ sim = similarity(pt, new_point)
|
|
|
+ if sim is None:
|
|
|
+ continue
|
|
|
+ if max_ is None or sim > max_:
|
|
|
+ max_ = sim
|
|
|
+ best = pt
|
|
|
+ return best, max_
|
|
|
+
|
|
|
+def remove_bad_wifi(pts, crit = 500):
|
|
|
+ from collections import namedtuple
|
|
|
+ Loc = namedtuple('Loc', ['latitude', 'longitude'])
|
|
|
+ mac_loc = {}
|
|
|
+ bad_mac = set()
|
|
|
+ for i in range(len(pts)):
|
|
|
+ for mac in pts[i].wifi_snr:
|
|
|
+ if not mac in mac_loc:
|
|
|
+ mac_loc[mac] = []
|
|
|
+ loc = Loc(pts[i].latitude, pts[i].longitude)
|
|
|
+ """
|
|
|
+ for loc1 in mac_loc[mac]:
|
|
|
+ if dist(loc1, loc) > crit:
|
|
|
+ print(loc1, loc, dist(loc1, loc))
|
|
|
+ bad_mac.add(mac)
|
|
|
+ break
|
|
|
+ """
|
|
|
+ mac_loc[mac].append(loc)
|
|
|
+ for mac in mac_loc:
|
|
|
+ sum_la = 0.0
|
|
|
+ sum_lo = 0.0
|
|
|
+ for loc in mac_loc[mac]:
|
|
|
+ sum_la += loc.latitude
|
|
|
+ sum_lo += loc.longitude
|
|
|
+ cent = Loc(sum_la / len(mac_loc[mac]),
|
|
|
+ sum_lo / len(mac_loc[mac]))
|
|
|
+ for loc in mac_loc[mac]:
|
|
|
+ if dist(cent, loc) > crit:
|
|
|
+ bad_mac.add(mac)
|
|
|
+ break
|
|
|
+ for i in range(len(pts)):
|
|
|
+ for mac in bad_mac:
|
|
|
+ if mac in pts[i].wifi_snr:
|
|
|
+ pts[i].wifi_snr.pop(mac)
|
|
|
+
|
|
|
+def toplev(infile, sim, bad_wifi_crit, sim_crit):
|
|
|
+ from itertools import groupby
|
|
|
+ from random import shuffle
|
|
|
+ data = read_data(infile)
|
|
|
+ # print(data)
|
|
|
+ key = lambda x:x.watch_id
|
|
|
+ data.sort(key = key)
|
|
|
+ groups = groupby(data, key)
|
|
|
+ tot = 0
|
|
|
+ matched = 0
|
|
|
+ cnt = [0] * 10
|
|
|
+ for k, g in groups:
|
|
|
+ points = list(g)
|
|
|
+ if len(points) < 2:
|
|
|
+ # print(k, "is skipped because the number of points < 2")
|
|
|
+ continue
|
|
|
+ # shuffle(points)
|
|
|
+ points.sort(key = lambda x:x.timestamp)
|
|
|
+ train = points[:len(points)//2]
|
|
|
+ remove_bad_wifi(train, bad_wifi_crit)
|
|
|
+ # print(len(train))
|
|
|
+ verify = points[len(points)//2:]
|
|
|
+ tot += len(verify)
|
|
|
+ for pt in verify:
|
|
|
+ if len(pt.wifi_snr) == 0:
|
|
|
+ continue
|
|
|
+ pt1, sim1 = best_match(train, pt, sim)
|
|
|
+ if sim1 is None or sim1 < sim_crit:
|
|
|
+ # print('no match, sim =', sim(pt1, pt))
|
|
|
+ continue
|
|
|
+ matched += 1
|
|
|
+ d = dist(pt1, pt)
|
|
|
+ # print('d =', d)
|
|
|
+ if int(d / 100) < 10:
|
|
|
+ cnt[int(d / 100)] += 1
|
|
|
+ print('测试点总数 =', tot)
|
|
|
+ print('匹配数 =', matched)
|
|
|
+ acc = 0
|
|
|
+ for i in range(10):
|
|
|
+ acc += cnt[i]
|
|
|
+ print(i+1, '百米内匹配数 =', acc, ', 占测试点总数比例 =',
|
|
|
+ acc / tot, ', 占所有匹配结果比例 =',
|
|
|
+ acc / matched)
|
|
|
+
|
|
|
+def simple_sim(a, b):
|
|
|
+ from math import sqrt
|
|
|
+ up, d1, d2 = 0.0, 0.0, 0.0
|
|
|
+ for key in a.wifi_snr:
|
|
|
+ d1 += a.wifi_snr[key] * a.wifi_snr[key]
|
|
|
+ if key in b.wifi_snr:
|
|
|
+ up += a.wifi_snr[key] * b.wifi_snr[key]
|
|
|
+ for key in b.wifi_snr:
|
|
|
+ d2 += b.wifi_snr[key] * b.wifi_snr[key]
|
|
|
+ if up == 0 or up < 1.9:
|
|
|
+ return -1
|
|
|
+ else:
|
|
|
+ return up / sqrt(d1) / sqrt(d2)
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ toplev(infile = "basicdata.csv", sim = simple_sim, bad_wifi_crit = 5000,
|
|
|
+ sim_crit = 0.0)
|