Browse Source

First commit

Xi Ruoyao 5 years ago
commit
5c0d9cc600
3 changed files with 186 additions and 0 deletions
  1. 2 0
      .gitignore
  2. 19 0
      cosine.py
  3. 165 0
      wifiloc.py

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+__pycache__
+*.csv

+ 19 - 0
cosine.py

@@ -0,0 +1,19 @@
+def cosine_sim(known_point, new_point):
+    from math import sqrt
+    up, d1, d2 = 0.0, 0.0, 0.0
+    a, b = known_point, new_point
+    for key in a.wifi_snr:
+        d1 += a.wifi_snr[key] * a.wifi_snr[key]
+        if key in b.wifi_snr:
+            up += a.wifi_snr[key] * b.wifi_snr[key]
+    for key in b.wifi_snr:
+        d2 += b.wifi_snr[key] * b.wifi_snr[key]
+    if up == 0:
+        return None
+    else:
+        return up / sqrt(d1) / sqrt(d2)
+
+if __name__ == '__main__':
+    from wifiloc import toplev
+    toplev(infile = "basicdata.csv", sim = cosine_sim, bad_wifi_crit = 5000,
+            sim_crit = 0.0)

+ 165 - 0
wifiloc.py

@@ -0,0 +1,165 @@
+# Copyright (C) 2019  Xiaotiancai Science & Technology Co. Ltd.
+# All rights reserved.
+
+# Parse MAC address to int
+def parse_macaddr(s):
+    return int("".join(s.split(':')), 16)
+
+# Parse timestamp
+def timestamp(s):
+    from time import strptime, mktime
+    return mktime(strptime(s, '%Y-%m-%d %H:%M:%S'))
+
+# Read input data from file, and parse it
+def read_data(fd):
+    if isinstance(fd, str):
+        with open(fd) as f:
+            return read_data(f)
+    from collections import namedtuple
+    Data = namedtuple('Data',
+            ['watch_id', 'latitude', 'longitude', 'wifi_snr', 'timestamp'])
+    import json
+    import csv
+    reader = csv.DictReader(fd)
+    data = []
+    for row in reader:
+        params = json.loads(row['params'])
+        if not 'macs' in params:
+            continue
+        macs = params['macs']
+        if macs == '':
+            continue
+        wifi_snr = {}
+        for i in macs.split('|'):
+            fields = i.split(',')
+            macaddr = parse_macaddr(fields[0])
+            wifi_snr[macaddr] = float(fields[1])
+        point = (Data(int(row['watch_id'], 16), float(row['latitude']),
+            float(row['longitude']), wifi_snr, timestamp(row['create_time'])))
+        data.append(point)
+    return data
+
+def dist(p, q):
+    from math import sin, cos, sqrt, asin, radians
+    lat0 = radians(p.latitude)
+    lat1 = radians(q.latitude)
+    lng0 = radians(p.longitude)
+    lng1 = radians(q.longitude)
+
+    dlng = abs(lng0 - lng1)
+    dlat = abs(lat0 - lat1)
+    hav = lambda x:sin(x/2)*sin(x/2)
+    h = hav(dlat) + cos(lat0) * cos(lat1) * hav(dlng)
+    return 2 * 63710000 * asin(sqrt(h))
+
+# A trivial function, like how we detect cheating in programming contests.
+def best_match(known_points, new_point, similarity):
+    max_ = None
+    best = None
+    for pt in known_points:
+        if len(pt.wifi_snr) == 0:
+            continue
+        sim = similarity(pt, new_point)
+        if sim is None:
+            continue
+        if max_ is None or sim > max_:
+            max_ = sim
+            best = pt
+    return best, max_
+
+def remove_bad_wifi(pts, crit = 500):
+    from collections import namedtuple
+    Loc = namedtuple('Loc', ['latitude', 'longitude'])
+    mac_loc = {}
+    bad_mac = set()
+    for i in range(len(pts)):
+        for mac in pts[i].wifi_snr:
+            if not mac in mac_loc:
+                mac_loc[mac] = []
+            loc = Loc(pts[i].latitude, pts[i].longitude)
+            """
+            for loc1 in mac_loc[mac]:
+                if dist(loc1, loc) > crit:
+                    print(loc1, loc, dist(loc1, loc))
+                    bad_mac.add(mac)
+                    break
+            """
+            mac_loc[mac].append(loc)
+        for mac in mac_loc:
+            sum_la = 0.0
+            sum_lo = 0.0
+            for loc in mac_loc[mac]:
+                sum_la += loc.latitude
+                sum_lo += loc.longitude
+            cent = Loc(sum_la / len(mac_loc[mac]),
+                    sum_lo / len(mac_loc[mac]))
+            for loc in mac_loc[mac]:
+                if dist(cent, loc) > crit:
+                    bad_mac.add(mac)
+                    break
+    for i in range(len(pts)):
+        for mac in bad_mac:
+            if mac in pts[i].wifi_snr:
+                pts[i].wifi_snr.pop(mac)
+
+def toplev(infile, sim, bad_wifi_crit, sim_crit):
+    from itertools import groupby
+    from random import shuffle
+    data = read_data(infile)
+    # print(data)
+    key = lambda x:x.watch_id
+    data.sort(key = key)
+    groups = groupby(data, key)
+    tot = 0
+    matched = 0
+    cnt = [0] * 10
+    for k, g in groups:
+        points = list(g)
+        if len(points) < 2:
+            # print(k, "is skipped because the number of points < 2")
+            continue
+        # shuffle(points)
+        points.sort(key = lambda x:x.timestamp)
+        train = points[:len(points)//2]
+        remove_bad_wifi(train, bad_wifi_crit)
+        # print(len(train))
+        verify = points[len(points)//2:]
+        tot += len(verify)
+        for pt in verify:
+            if len(pt.wifi_snr) == 0:
+                continue
+            pt1, sim1 = best_match(train, pt, sim)
+            if sim1 is None or sim1 < sim_crit:
+                # print('no match, sim =', sim(pt1, pt))
+                continue
+            matched += 1
+            d = dist(pt1, pt)
+            # print('d =', d)
+            if int(d / 100) < 10:
+                cnt[int(d / 100)] += 1
+    print('测试点总数 =', tot)
+    print('匹配数 =', matched)
+    acc = 0
+    for i in range(10):
+        acc += cnt[i]
+        print(i+1, '百米内匹配数 =', acc, ', 占测试点总数比例 =',
+                acc / tot, ', 占所有匹配结果比例 =',
+                acc / matched)
+
+def simple_sim(a, b):
+    from math import sqrt
+    up, d1, d2 = 0.0, 0.0, 0.0
+    for key in a.wifi_snr:
+        d1 += a.wifi_snr[key] * a.wifi_snr[key]
+        if key in b.wifi_snr:
+            up += a.wifi_snr[key] * b.wifi_snr[key]
+    for key in b.wifi_snr:
+        d2 += b.wifi_snr[key] * b.wifi_snr[key]
+    if up == 0 or up < 1.9:
+        return -1
+    else:
+        return up / sqrt(d1) / sqrt(d2)
+
+if __name__ == "__main__":
+    toplev(infile = "basicdata.csv", sim = simple_sim, bad_wifi_crit = 5000,
+            sim_crit = 0.0)