xry111
/
wifi-location


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
							# Copyright (C) 2019  Xiaotiancai Science & Technology Co. Ltd.
# All rights reserved.

# Parse MAC address to int
def parse_macaddr(s):
    return int("".join(s.split(':')), 16)

# Parse timestamp
def timestamp(s):
    from time import strptime, mktime
    return mktime(strptime(s, '%Y-%m-%d %H:%M:%S'))

# Read input data from file, and parse it
def read_data(fd):
    if isinstance(fd, str):
        with open(fd) as f:
            return read_data(f)
    from collections import namedtuple
    Data = namedtuple('Data',
            ['watch_id', 'latitude', 'longitude', 'wifi_snr', 'timestamp'])
    import json
    import csv
    reader = csv.DictReader(fd)
    data = []
    for row in reader:
        params = json.loads(row['params'])
        if not 'macs' in params:
            continue
        macs = params['macs']
        if macs == '':
            continue
        wifi_snr = {}
        for i in macs.split('|'):
            fields = i.split(',')
            macaddr = parse_macaddr(fields[0])
            wifi_snr[macaddr] = float(fields[1])
        point = (Data(int(row['watch_id'], 16), float(row['latitude']),
            float(row['longitude']), wifi_snr, timestamp(row['create_time'])))
        data.append(point)
    return data

def dist(p, q):
    from math import sin, cos, sqrt, asin, radians
    lat0 = radians(p.latitude)
    lat1 = radians(q.latitude)
    lng0 = radians(p.longitude)
    lng1 = radians(q.longitude)

    dlng = abs(lng0 - lng1)
    dlat = abs(lat0 - lat1)
    hav = lambda x:sin(x/2)*sin(x/2)
    h = hav(dlat) + cos(lat0) * cos(lat1) * hav(dlng)
    return 2 * 63710000 * asin(sqrt(h))

# A trivial function, like how we detect cheating in programming contests.
def best_match(known_points, new_point, similarity):
    max_ = None
    best = None
    for pt in known_points:
        if len(pt.wifi_snr) == 0:
            continue
        sim = similarity(pt, new_point)
        if sim is None:
            continue
        if max_ is None or sim > max_:
            max_ = sim
            best = pt
    return best, max_

def remove_bad_wifi(pts, crit = 500):
    from collections import namedtuple
    Loc = namedtuple('Loc', ['latitude', 'longitude'])
    mac_loc = {}
    bad_mac = set()
    for i in range(len(pts)):
        for mac in pts[i].wifi_snr:
            if not mac in mac_loc:
                mac_loc[mac] = []
            loc = Loc(pts[i].latitude, pts[i].longitude)
            """
            for loc1 in mac_loc[mac]:
                if dist(loc1, loc) > crit:
                    print(loc1, loc, dist(loc1, loc))
                    bad_mac.add(mac)
                    break
            """
            mac_loc[mac].append(loc)
        for mac in mac_loc:
            sum_la = 0.0
            sum_lo = 0.0
            for loc in mac_loc[mac]:
                sum_la += loc.latitude
                sum_lo += loc.longitude
            cent = Loc(sum_la / len(mac_loc[mac]),
                    sum_lo / len(mac_loc[mac]))
            for loc in mac_loc[mac]:
                if dist(cent, loc) > crit:
                    bad_mac.add(mac)
                    break
    for i in range(len(pts)):
        for mac in bad_mac:
            if mac in pts[i].wifi_snr:
                pts[i].wifi_snr.pop(mac)

def toplev(infile, sim, bad_wifi_crit, sim_crit, stat_cnt = 10,
        stat_delta = 50):
    from itertools import groupby
    from random import shuffle
    data = read_data(infile)
    # print(data)
    key = lambda x:x.watch_id
    data.sort(key = key)
    groups = groupby(data, key)
    tot = 0
    matched = 0
    cnt = [0] * stat_cnt
    for k, g in groups:
        points = list(g)
        if len(points) < 2:
            # print(k, "is skipped because the number of points < 2")
            continue
        # shuffle(points)
        points.sort(key = lambda x:x.timestamp)
        train = points[:len(points)//2]
        remove_bad_wifi(train, bad_wifi_crit)
        # print(len(train))
        verify = points[len(points)//2:]
        tot += len(verify)
        for pt in verify:
            if len(pt.wifi_snr) == 0:
                continue
            pt1, sim1 = best_match(train, pt, sim)
            if sim1 is None or sim1 < sim_crit:
                # print('no match, sim =', sim(pt1, pt))
                continue
            matched += 1
            d = dist(pt1, pt)
            # print('d =', d)
            if int(d / stat_delta) < stat_cnt:
                cnt[int(d / stat_delta)] += 1
    print('测试点总数 =', tot)
    print('匹配数 =', matched)
    acc = 0
    for i in range(10):
        acc += cnt[i]
        print((i+1) * stat_delta, '米内匹配数 =', acc,
                ', 占测试点总数比例 =', acc / tot, ', 占所有匹配结果比例 =',
                acc / matched)

def simple_sim(a, b):
    from math import sqrt
    up, d1, d2 = 0.0, 0.0, 0.0
    for key in a.wifi_snr:
        d1 += a.wifi_snr[key] * a.wifi_snr[key]
        if key in b.wifi_snr:
            up += a.wifi_snr[key] * b.wifi_snr[key]
    for key in b.wifi_snr:
        d2 += b.wifi_snr[key] * b.wifi_snr[key]
    if up == 0 or up < 1.9:
        return -1
    else:
        return up / sqrt(d1) / sqrt(d2)

if __name__ == "__main__":
    toplev(infile = "basicdata.csv", sim = simple_sim, bad_wifi_crit = 5000,
            sim_crit = 0.0)