wifiloc.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. # Copyright (C) 2019 Xiaotiancai Science & Technology Co. Ltd.
  2. # All rights reserved.
  3. # Parse MAC address to int
  4. def parse_macaddr(s):
  5. return int("".join(s.split(':')), 16)
  6. # Parse timestamp
  7. def timestamp(s):
  8. from time import strptime, mktime
  9. return mktime(strptime(s, '%Y-%m-%d %H:%M:%S'))
  10. # Read input data from file, and parse it
  11. def read_data(fd):
  12. if isinstance(fd, str):
  13. with open(fd) as f:
  14. return read_data(f)
  15. from collections import namedtuple
  16. Data = namedtuple('Data',
  17. ['watch_id', 'latitude', 'longitude', 'wifi_snr', 'timestamp'])
  18. import json
  19. import csv
  20. reader = csv.DictReader(fd)
  21. data = []
  22. for row in reader:
  23. params = json.loads(row['params'])
  24. if not 'macs' in params:
  25. continue
  26. macs = params['macs']
  27. if macs == '':
  28. continue
  29. wifi_snr = {}
  30. for i in macs.split('|'):
  31. fields = i.split(',')
  32. macaddr = parse_macaddr(fields[0])
  33. wifi_snr[macaddr] = float(fields[1])
  34. point = (Data(int(row['watch_id'], 16), float(row['latitude']),
  35. float(row['longitude']), wifi_snr, timestamp(row['create_time'])))
  36. data.append(point)
  37. return data
  38. def dist(p, q):
  39. from math import sin, cos, sqrt, asin, radians
  40. lat0 = radians(p.latitude)
  41. lat1 = radians(q.latitude)
  42. lng0 = radians(p.longitude)
  43. lng1 = radians(q.longitude)
  44. dlng = abs(lng0 - lng1)
  45. dlat = abs(lat0 - lat1)
  46. hav = lambda x:sin(x/2)*sin(x/2)
  47. h = hav(dlat) + cos(lat0) * cos(lat1) * hav(dlng)
  48. return 2 * 63710000 * asin(sqrt(h))
  49. # A trivial function, like how we detect cheating in programming contests.
  50. def best_match(known_points, new_point, similarity):
  51. max_ = None
  52. best = None
  53. for pt in known_points:
  54. if len(pt.wifi_snr) == 0:
  55. continue
  56. sim = similarity(pt, new_point)
  57. if sim is None:
  58. continue
  59. if max_ is None or sim > max_:
  60. max_ = sim
  61. best = pt
  62. return best, max_
  63. def remove_bad_wifi(pts, crit = 500):
  64. from collections import namedtuple
  65. Loc = namedtuple('Loc', ['latitude', 'longitude'])
  66. mac_loc = {}
  67. bad_mac = set()
  68. for i in range(len(pts)):
  69. for mac in pts[i].wifi_snr:
  70. if not mac in mac_loc:
  71. mac_loc[mac] = []
  72. loc = Loc(pts[i].latitude, pts[i].longitude)
  73. """
  74. for loc1 in mac_loc[mac]:
  75. if dist(loc1, loc) > crit:
  76. print(loc1, loc, dist(loc1, loc))
  77. bad_mac.add(mac)
  78. break
  79. """
  80. mac_loc[mac].append(loc)
  81. for mac in mac_loc:
  82. sum_la = 0.0
  83. sum_lo = 0.0
  84. for loc in mac_loc[mac]:
  85. sum_la += loc.latitude
  86. sum_lo += loc.longitude
  87. cent = Loc(sum_la / len(mac_loc[mac]),
  88. sum_lo / len(mac_loc[mac]))
  89. for loc in mac_loc[mac]:
  90. if dist(cent, loc) > crit:
  91. bad_mac.add(mac)
  92. break
  93. for i in range(len(pts)):
  94. for mac in bad_mac:
  95. if mac in pts[i].wifi_snr:
  96. pts[i].wifi_snr.pop(mac)
  97. def toplev(infile, sim, bad_wifi_crit, sim_crit, stat_cnt = 10,
  98. stat_delta = 50):
  99. from itertools import groupby
  100. from random import shuffle
  101. data = read_data(infile)
  102. # print(data)
  103. key = lambda x:x.watch_id
  104. data.sort(key = key)
  105. groups = groupby(data, key)
  106. tot = 0
  107. matched = 0
  108. cnt = [0] * stat_cnt
  109. for k, g in groups:
  110. points = list(g)
  111. if len(points) < 2:
  112. # print(k, "is skipped because the number of points < 2")
  113. continue
  114. # shuffle(points)
  115. points.sort(key = lambda x:x.timestamp)
  116. train = points[:len(points)//2]
  117. remove_bad_wifi(train, bad_wifi_crit)
  118. # print(len(train))
  119. verify = points[len(points)//2:]
  120. tot += len(verify)
  121. for pt in verify:
  122. if len(pt.wifi_snr) == 0:
  123. continue
  124. pt1, sim1 = best_match(train, pt, sim)
  125. if sim1 is None or sim1 < sim_crit:
  126. # print('no match, sim =', sim(pt1, pt))
  127. continue
  128. matched += 1
  129. d = dist(pt1, pt)
  130. # print('d =', d)
  131. if int(d / stat_delta) < stat_cnt:
  132. cnt[int(d / stat_delta)] += 1
  133. print('测试点总数 =', tot)
  134. print('匹配数 =', matched)
  135. acc = 0
  136. for i in range(10):
  137. acc += cnt[i]
  138. print((i+1) * stat_delta, '米内匹配数 =', acc,
  139. ', 占测试点总数比例 =', acc / tot, ', 占所有匹配结果比例 =',
  140. acc / matched)
  141. def simple_sim(a, b):
  142. from math import sqrt
  143. up, d1, d2 = 0.0, 0.0, 0.0
  144. for key in a.wifi_snr:
  145. d1 += a.wifi_snr[key] * a.wifi_snr[key]
  146. if key in b.wifi_snr:
  147. up += a.wifi_snr[key] * b.wifi_snr[key]
  148. for key in b.wifi_snr:
  149. d2 += b.wifi_snr[key] * b.wifi_snr[key]
  150. if up == 0 or up < 1.9:
  151. return -1
  152. else:
  153. return up / sqrt(d1) / sqrt(d2)
  154. if __name__ == "__main__":
  155. toplev(infile = "basicdata.csv", sim = simple_sim, bad_wifi_crit = 5000,
  156. sim_crit = 0.0)