wifiloc.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # Copyright (C) 2019 Xiaotiancai Science & Technology Co. Ltd.
  2. # All rights reserved.
  3. # Parse MAC address to int
  4. def parse_macaddr(s):
  5. return int("".join(s.split(':')), 16)
  6. # Parse timestamp
  7. def timestamp(s):
  8. from time import strptime, mktime
  9. return mktime(strptime(s, '%Y-%m-%d %H:%M:%S'))
  10. # Read input data from file, and parse it
  11. def read_data(fd):
  12. if isinstance(fd, str):
  13. with open(fd) as f:
  14. return read_data(f)
  15. from collections import namedtuple
  16. Data = namedtuple('Data',
  17. ['watch_id', 'latitude', 'longitude', 'wifi_snr', 'timestamp'])
  18. import json
  19. import csv
  20. reader = csv.DictReader(fd)
  21. data = []
  22. for row in reader:
  23. params = json.loads(row['params'])
  24. if not 'macs' in params:
  25. continue
  26. macs = params['macs']
  27. if macs == '':
  28. continue
  29. wifi_snr = {}
  30. for i in macs.split('|'):
  31. fields = i.split(',')
  32. macaddr = parse_macaddr(fields[0])
  33. wifi_snr[macaddr] = float(fields[1])
  34. point = (Data(int(row['watch_id'], 16), float(row['latitude']),
  35. float(row['longitude']), wifi_snr, timestamp(row['create_time'])))
  36. data.append(point)
  37. return data
  38. def dist(p, q):
  39. from math import sin, cos, sqrt, asin, radians
  40. lat0 = radians(p.latitude)
  41. lat1 = radians(q.latitude)
  42. lng0 = radians(p.longitude)
  43. lng1 = radians(q.longitude)
  44. dlng = abs(lng0 - lng1)
  45. dlat = abs(lat0 - lat1)
  46. hav = lambda x:sin(x/2)*sin(x/2)
  47. h = hav(dlat) + cos(lat0) * cos(lat1) * hav(dlng)
  48. return 2 * 63710000 * asin(sqrt(h))
  49. # A trivial function, like how we detect cheating in programming contests.
  50. def best_match(known_points, new_point, similarity):
  51. max_ = None
  52. best = None
  53. for pt in known_points:
  54. if len(pt.wifi_snr) == 0:
  55. continue
  56. sim = similarity(pt, new_point)
  57. if sim is None:
  58. continue
  59. if max_ is None or sim > max_:
  60. max_ = sim
  61. best = pt
  62. return best, max_
  63. def select_bad_wifi(pts, crit):
  64. from collections import namedtuple
  65. Loc = namedtuple('Loc', ['latitude', 'longitude'])
  66. mac_loc = {}
  67. bad_mac = set()
  68. for i in range(len(pts)):
  69. for mac in pts[i].wifi_snr:
  70. if not mac in mac_loc:
  71. mac_loc[mac] = []
  72. loc = Loc(pts[i].latitude, pts[i].longitude)
  73. mac_loc[mac].append(loc)
  74. for mac in mac_loc:
  75. sum_la = 0.0
  76. sum_lo = 0.0
  77. for loc in mac_loc[mac]:
  78. sum_la += loc.latitude
  79. sum_lo += loc.longitude
  80. cent = Loc(sum_la / len(mac_loc[mac]),
  81. sum_lo / len(mac_loc[mac]))
  82. for loc in mac_loc[mac]:
  83. if dist(cent, loc) > crit:
  84. bad_mac.add(mac)
  85. break
  86. return bad_mac
  87. def remove_bad_wifi(pts, bad_mac):
  88. for i in range(len(pts)):
  89. for mac in bad_mac:
  90. if mac in pts[i].wifi_snr:
  91. pts[i].wifi_snr.pop(mac)
  92. return pts
  93. def remove_bad_wifi2(pts, bad_mac):
  94. good_index = []
  95. j = 0
  96. for i in range(len(pts)):
  97. bad = False
  98. for mac in pts[i].wifi_snr:
  99. if mac in bad_mac:
  100. bad = True
  101. break
  102. if not bad:
  103. pts[j] = pts[i]
  104. j += 1
  105. return pts[:j]
  106. def toplev(infile, sim, sim_crit, bad_wifi_crit = 500, stat_cnt = 10,
  107. stat_delta = 50, remove_bad_wifi_policy = 1,
  108. skip_tests_with_bad_wifi = False):
  109. from itertools import groupby
  110. from random import shuffle
  111. data = read_data(infile)
  112. # print(data)
  113. key = lambda x:x.watch_id
  114. data.sort(key = key)
  115. groups = groupby(data, key)
  116. tot = 0
  117. matched = 0
  118. cnt = [0] * stat_cnt
  119. for k, g in groups:
  120. points = list(g)
  121. if len(points) < 2:
  122. # print(k, "is skipped because the number of points < 2")
  123. continue
  124. # shuffle(points)
  125. points.sort(key = lambda x:x.timestamp)
  126. train = points[:len(points)//2]
  127. if not remove_bad_wifi_policy in {0, 1, 2}:
  128. raise Exception("unknown remove_bad_wifi_policy")
  129. if remove_bad_wifi_policy > 0:
  130. bad_wifi = select_bad_wifi(train, bad_wifi_crit)
  131. else:
  132. bad_wifi = {}
  133. if remove_bad_wifi_policy == 1:
  134. train = remove_bad_wifi(train, bad_wifi)
  135. if remove_bad_wifi_policy == 2:
  136. train = remove_bad_wifi2(train, bad_wifi)
  137. # print(len(train))
  138. verify = points[len(points)//2:]
  139. tot += len(verify)
  140. for pt in verify:
  141. if len(pt.wifi_snr) == 0:
  142. continue
  143. if skip_tests_with_bad_wifi:
  144. have_bad_wifi = False
  145. for mac in pt.wifi_snr:
  146. if mac in bad_wifi:
  147. have_bad_wifi = True
  148. break
  149. if have_bad_wifi:
  150. continue
  151. pt1, sim1 = best_match(train, pt, sim)
  152. if sim1 is None or sim1 < sim_crit:
  153. # print('no match, sim =', sim(pt1, pt))
  154. continue
  155. matched += 1
  156. d = dist(pt1, pt)
  157. # print('d =', d)
  158. if int(d / stat_delta) < stat_cnt:
  159. cnt[int(d / stat_delta)] += 1
  160. print('测试点总数 =', tot)
  161. print('匹配数 =', matched)
  162. acc = 0
  163. for i in range(10):
  164. acc += cnt[i]
  165. print((i+1) * stat_delta, '米内匹配数 =', acc,
  166. ', 占测试点总数比例 =', acc / tot, ', 占所有匹配结果比例 =',
  167. acc / matched)
  168. def simple_sim(a, b):
  169. from math import sqrt
  170. up, d1, d2 = 0.0, 0.0, 0.0
  171. for key in a.wifi_snr:
  172. d1 += a.wifi_snr[key] * a.wifi_snr[key]
  173. if key in b.wifi_snr:
  174. up += a.wifi_snr[key] * b.wifi_snr[key]
  175. for key in b.wifi_snr:
  176. d2 += b.wifi_snr[key] * b.wifi_snr[key]
  177. if up == 0 or up < 1.9:
  178. return -1
  179. else:
  180. return up / sqrt(d1) / sqrt(d2)
  181. if __name__ == "__main__":
  182. toplev(infile = "basicdata.csv", sim = simple_sim, bad_wifi_crit = 5000,
  183. sim_crit = 0.0)