wifiloc.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # Copyright (C) 2019 Xiaotiancai Science & Technology Co. Ltd.
  2. # All rights reserved.
  3. # Parse MAC address to int
  4. def parse_macaddr(s):
  5. return int("".join(s.split(':')), 16)
  6. # Parse timestamp
  7. def timestamp(s):
  8. from time import strptime, mktime
  9. return mktime(strptime(s, '%Y-%m-%d %H:%M:%S'))
  10. # Read input data from file, and parse it
  11. def read_data(fd):
  12. if isinstance(fd, str):
  13. with open(fd) as f:
  14. return read_data(f)
  15. from collections import namedtuple
  16. Data = namedtuple('Data',
  17. ['watch_id', 'latitude', 'longitude', 'wifi_snr', 'timestamp'])
  18. import json
  19. import csv
  20. reader = csv.DictReader(fd)
  21. data = []
  22. for row in reader:
  23. params = json.loads(row['params'])
  24. if not 'macs' in params:
  25. continue
  26. macs = params['macs']
  27. if macs == '':
  28. continue
  29. wifi_snr = {}
  30. for i in macs.split('|'):
  31. fields = i.split(',')
  32. macaddr = parse_macaddr(fields[0])
  33. wifi_snr[macaddr] = float(fields[1])
  34. point = (Data(int(row['watch_id'], 16), float(row['latitude']),
  35. float(row['longitude']), wifi_snr, timestamp(row['create_time'])))
  36. data.append(point)
  37. return data
  38. def dist(p, q):
  39. from math import sin, cos, sqrt, asin, radians
  40. lat0 = radians(p.latitude)
  41. lat1 = radians(q.latitude)
  42. lng0 = radians(p.longitude)
  43. lng1 = radians(q.longitude)
  44. dlng = abs(lng0 - lng1)
  45. dlat = abs(lat0 - lat1)
  46. hav = lambda x:sin(x/2)*sin(x/2)
  47. h = hav(dlat) + cos(lat0) * cos(lat1) * hav(dlng)
  48. return 2 * 63710000 * asin(sqrt(h))
  49. # A trivial function, like how we detect cheating in programming contests.
  50. def best_match(known_points, new_point, similarity):
  51. max_ = None
  52. best = None
  53. for pt in known_points:
  54. if len(pt.wifi_snr) == 0:
  55. continue
  56. sim = similarity(pt, new_point)
  57. if sim is None:
  58. continue
  59. if max_ is None or sim > max_:
  60. max_ = sim
  61. best = pt
  62. return best, max_
  63. def get_wifi_coverage(pts):
  64. from collections import namedtuple
  65. Loc = namedtuple('Loc', ['latitude', 'longitude'])
  66. mac_loc = {}
  67. mac_cov = {}
  68. for i in range(len(pts)):
  69. for mac in pts[i].wifi_snr:
  70. if not mac in mac_loc:
  71. mac_loc[mac] = []
  72. loc = Loc(pts[i].latitude, pts[i].longitude)
  73. mac_loc[mac].append(loc)
  74. for mac in mac_loc:
  75. sum_la = 0.0
  76. sum_lo = 0.0
  77. for loc in mac_loc[mac]:
  78. sum_la += loc.latitude
  79. sum_lo += loc.longitude
  80. cent = Loc(sum_la / len(mac_loc[mac]),
  81. sum_lo / len(mac_loc[mac]))
  82. cov = 0
  83. for loc in mac_loc[mac]:
  84. cov = max(cov, dist(cent, loc))
  85. mac_cov[mac] = cov
  86. return mac_cov
  87. def select_bad_wifi(pts, crit):
  88. bad_mac = set()
  89. cov = get_wifi_coverage(pts)
  90. for mac in cov:
  91. if cov[mac] > crit:
  92. bad_mac.add(mac)
  93. return bad_mac
  94. def remove_bad_wifi(pts, bad_mac):
  95. for i in range(len(pts)):
  96. for mac in bad_mac:
  97. if mac in pts[i].wifi_snr:
  98. pts[i].wifi_snr.pop(mac)
  99. return pts
  100. def remove_bad_wifi2(pts, bad_mac):
  101. good_index = []
  102. j = 0
  103. for i in range(len(pts)):
  104. bad = False
  105. for mac in pts[i].wifi_snr:
  106. if mac in bad_mac:
  107. bad = True
  108. break
  109. if not bad:
  110. pts[j] = pts[i]
  111. j += 1
  112. return pts[:j]
  113. def toplev(infile, sim, sim_crit, bad_wifi_crit = 500, stat_cnt = 10,
  114. stat_delta = 50, remove_bad_wifi_policy = 1,
  115. skip_tests_with_bad_wifi = False):
  116. from itertools import groupby
  117. from random import shuffle
  118. data = read_data(infile)
  119. # print(data)
  120. key = lambda x:x.watch_id
  121. data.sort(key = key)
  122. groups = groupby(data, key)
  123. tot = 0
  124. matched = 0
  125. cnt = [0] * stat_cnt
  126. for k, g in groups:
  127. points = list(g)
  128. if len(points) < 2:
  129. # print(k, "is skipped because the number of points < 2")
  130. continue
  131. # shuffle(points)
  132. points.sort(key = lambda x:x.timestamp)
  133. train = points[:len(points)//2]
  134. if not remove_bad_wifi_policy in {0, 1, 2}:
  135. raise Exception("unknown remove_bad_wifi_policy")
  136. if remove_bad_wifi_policy > 0:
  137. bad_wifi = select_bad_wifi(train, bad_wifi_crit)
  138. else:
  139. bad_wifi = {}
  140. if remove_bad_wifi_policy == 1:
  141. train = remove_bad_wifi(train, bad_wifi)
  142. if remove_bad_wifi_policy == 2:
  143. train = remove_bad_wifi2(train, bad_wifi)
  144. # print(len(train))
  145. verify = points[len(points)//2:]
  146. tot += len(verify)
  147. for pt in verify:
  148. if len(pt.wifi_snr) == 0:
  149. continue
  150. if skip_tests_with_bad_wifi:
  151. have_bad_wifi = False
  152. for mac in pt.wifi_snr:
  153. if mac in bad_wifi:
  154. have_bad_wifi = True
  155. break
  156. if have_bad_wifi:
  157. continue
  158. pt1, sim1 = best_match(train, pt, sim)
  159. if sim1 is None or sim1 < sim_crit:
  160. # print('no match, sim =', sim(pt1, pt))
  161. continue
  162. matched += 1
  163. d = dist(pt1, pt)
  164. # print('d =', d)
  165. if int(d / stat_delta) < stat_cnt:
  166. cnt[int(d / stat_delta)] += 1
  167. print('测试点总数 =', tot)
  168. print('匹配数 =', matched)
  169. acc = 0
  170. for i in range(10):
  171. acc += cnt[i]
  172. print((i+1) * stat_delta, '米内匹配数 =', acc,
  173. ', 占测试点总数比例 =', acc / tot, ', 占所有匹配结果比例 =',
  174. acc / matched)
  175. def simple_sim(a, b):
  176. from math import sqrt
  177. up, d1, d2 = 0.0, 0.0, 0.0
  178. for key in a.wifi_snr:
  179. d1 += a.wifi_snr[key] * a.wifi_snr[key]
  180. if key in b.wifi_snr:
  181. up += a.wifi_snr[key] * b.wifi_snr[key]
  182. for key in b.wifi_snr:
  183. d2 += b.wifi_snr[key] * b.wifi_snr[key]
  184. if up == 0 or up < 1.9:
  185. return -1
  186. else:
  187. return up / sqrt(d1) / sqrt(d2)
  188. if __name__ == "__main__":
  189. toplev(infile = "basicdata.csv", sim = simple_sim, bad_wifi_crit = 5000,
  190. sim_crit = 0.0)