Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ There are many potential ways to interact with the notebooks here. At one extrem
1. **Interactive, UI-only:** Launch the repo in binder and clone one of the example notebooks
1. **Interactive, CLI only:**
1. Fork + clone the repo
1. Run `setup/setup_conda.sh <platform>` to download the correct conda version if it's not already installed
1. Run `~/miniconda-23.1.0/bin/conda init <shell-name>`
1. Run `setup/setup.sh` to set up the local environment
1. you may need to install the correct version of miniconda per instructions
1. Run `setup/activate_conda.sh` in each terminal
1. Start a local notebook server (`juypter notebook`)

## Contributing ##
Expand Down
Empty file modified setup/setup_conda.sh
100644 → 100755
Empty file.
99 changes: 99 additions & 0 deletions transit_matching/analysis_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import json
from transit_matching.motion_activity import get_sensed_mode
from transit_matching.overpass_parser import build_maps

class AnalysisTaks:
def __init__(self, datastore_loc, locations, view, os, phone_label, trip_name, section_name, evaluation_range):
self.locations = locations
self.view = view
self.trip_name = trip_name
self.section_name = section_name
self.os = os
self.phone_label = phone_label
self.evaluation_range = evaluation_range

self.stripped_trip_name = "_".join(self.trip_name.split("_")[:-1])
self.stripped_section_name = "_".join(self.section_name.split("_")[:-1])

self.initial_time = locations.iloc[0]['ts']
self.final_time = locations.iloc[-1]['ts']

self.phone_number = int(phone_label.split("-")[-1])

self.sensed_mode = get_sensed_mode(datastore_loc, view, os, phone_label, self.initial_time, self.final_time)
self.actual_mode = self.get_actual_mode()

self.error = None
self.predicted_mode = None
self.path = None
self.modern = None

def __str__(self):
return f'VIEW: {self.view.ljust(30)}PHONE: {(self.os + "-" + str(self.phone_number)).ljust(12)}TRIP: {self.stripped_trip_name.ljust(40)}SECTION: {self.stripped_section_name.ljust(40)}EVAL: {str(self.evaluation_range).ljust(4)}MODE: {self.predicted_mode.ljust(11)}| {self.actual_mode.ljust(11)}'

def get_actual_mode(self):
if 'walk' in self.section_name:
return 'WALKING'

f = open(f"spec_creation/final_sfbayarea/{self.view}.json", 'r')
trips = json.loads(f.read())['evaluation_trips']

trips_with_id = [n for n in trips if n['id'] == self.stripped_trip_name]
if len(trips_with_id) == 0:
return None
trip = trips_with_id[0]

if 'legs' in trip:
sections_with_id = [n for n in trip['legs'] if n['id'] == self.stripped_section_name]
if len(sections_with_id) == 0:
return None
section = sections_with_id[0]
else:
section = trip

f.close()

return section['mode']


def map_match(self, map_data, error_function):
if self.stripped_trip_name not in map_data:
map_data[self.stripped_trip_name] = {}
if self.stripped_section_name not in map_data[self.stripped_trip_name]:
map_data[self.stripped_trip_name][self.stripped_section_name] = build_maps(self.locations)

error, path, mode, modern, distances = error_function(self.locations, map_data[self.stripped_trip_name][self.stripped_section_name], self.sensed_mode)

self.error = error
self.path = path
self.predicted_mode = mode
self.modern = modern
self.distances = distances


def create_tasks(pvs, datastore_loc):

tasks = []

for view, pv in pvs.items():
for phone_os, phone_map in pv.map().items():
for phone_label, phone_detail_map in phone_map.items():
for evaluation_num in range(len(phone_detail_map["evaluation_ranges"])):
evaluation = phone_detail_map["evaluation_ranges"][evaluation_num]
for trip in evaluation["evaluation_trip_ranges"]:
for section in trip["evaluation_section_ranges"]:
locations = section['location_df']

if len(locations) <= 1:
continue

task = AnalysisTaks(datastore_loc, locations, view, phone_os, phone_label, trip['trip_id'], section['trip_id'], evaluation_num)

# If this trip is not in the timeline, skip this task
if task.actual_mode is None:
continue

tasks.append(task)
print(f'created all tasks for {phone_label} on {view}')

return tasks
116 changes: 116 additions & 0 deletions transit_matching/error_calculations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import numpy as np
from pyproj import Geod
from rtree import index

import utm
import pyproj
import geopandas as gpd
import shapely as shp
import pandas as pd
import math

g = pyproj.Geod(ellps='clrk66')

def calc_error(path, locations):
locations_gdf = gpd.GeoDataFrame(
locations,
geometry=gpd.points_from_xy(locations.longitude, locations.latitude)
)

path_linestring = shp.LineString([(lon, lat) for lat, lon in path])

return dist_using_projection_adjusted(locations_gdf, path_linestring)


def _point_to_segment_distance(loc, p1, p2, geod):
loc_lat, loc_lon = loc
p1_lat, p1_lon = p1
p2_lat, p2_lon = p2

if np.allclose(p1, p2):
_, _, dist = geod.inv(p1_lon, p1_lat, loc_lon, loc_lat)
return dist

az13, _, dist13 = geod.inv(p1_lon, p1_lat, loc_lon, loc_lat)
az12, _, dist12 = geod.inv(p1_lon, p1_lat, p2_lon, p2_lat)

if dist12 < 1e-9:
return dist13

angle_diff_rad = np.radians(az13 - az12)
along_track_dist = dist13 * np.cos(angle_diff_rad)

if along_track_dist < 0:
return dist13
elif along_track_dist > dist12:
_, _, dist23 = geod.inv(p2_lon, p2_lat, loc_lon, loc_lat)
return dist23
else:
return abs(dist13 * np.sin(angle_diff_rad))


def geodesic_squared_error(path, locations, k_neighbors=7):
if len(path) < 2 or len(locations) == 0:
return 0.0

geod = Geod(ellps='WGS84')
p1_pts = path[:-1]
p2_pts = path[1:]
num_segments = len(p1_pts)

idx = index.Index()
for i in range(num_segments):
p1 = p1_pts[i]
p2 = p2_pts[i]
bbox = (
min(p1[1], p2[1]),
min(p1[0], p2[0]),
max(p1[1], p2[1]),
max(p1[0], p2[0]),
)
idx.insert(i, bbox)

total_squared_error = 0.0

for loc in locations:
loc_lon, loc_lat = loc[1], loc[0]

num_candidates = min(k_neighbors, num_segments)
candidate_indices = list(idx.nearest((loc_lon, loc_lat, loc_lon, loc_lat), num_candidates))

min_dist_for_loc = float('inf')
for i in candidate_indices:
dist = _point_to_segment_distance(loc, p1_pts[i], p2_pts[i], geod)
if dist < min_dist_for_loc:
min_dist_for_loc = dist

total_squared_error += min_dist_for_loc ** 2

return total_squared_error

def dist_using_projection_adjusted(location_gpdf, gt_linestring):
lat = gt_linestring.centroid.y
adjusted_points = [shp.geometry.Point(coord[0] / math.cos(math.radians(lat)), coord[1]) for coord in list(gt_linestring.coords)]
unadjusted_points = [shp.geometry.Point(coord[0], coord[1]) for coord in list(gt_linestring.coords)]

adjusted_data = []
unadjusted_data = []
for p in zip(adjusted_points, unadjusted_points):
row = {}
row["geometry"] = p[0]
adjusted_data.append(row)
row2 = {}
row2["geometry"] = p[1]
unadjusted_data.append(row2)

adjusted_gt_gpdf = gpd.GeoDataFrame(adjusted_data, geometry="geometry")
unadjusted_gt_gpdf = gpd.GeoDataFrame(unadjusted_data, geometry="geometry")

adjusted_location_linestring = shp.geometry.LineString([shp.geometry.Point(coord.x / math.cos(math.radians(lat)), coord.y) for coord in list(location_gpdf.geometry)])
projections = adjusted_gt_gpdf.geometry.apply(lambda p: adjusted_location_linestring.project(p))
proj_points = projections.apply(lambda d:adjusted_location_linestring.interpolate(d))

project_x = proj_points.apply(lambda p: p.x * math.cos(math.radians(lat)))
project_y = proj_points.apply(lambda p: p.y)
return pd.Series(g.inv(list(unadjusted_gt_gpdf.geometry.x), list(unadjusted_gt_gpdf.geometry.y),
list(project_x), list(project_y))[2], index=unadjusted_gt_gpdf.index)
107 changes: 107 additions & 0 deletions transit_matching/matching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import numpy as np
from mappymatch.constructs.trace import Trace
from mappymatch.matchers.lcss.lcss import LCSSMatcher
from pyproj import Geod, Transformer

from transit_matching.error_calculations import calc_error
from transit_matching.overpass_parser import parse_relation

def get_graph_match(df, nx_map):
trace = Trace.from_dataframe(df, lat_column="latitude", lon_column="longitude", xy=True)

matcher = LCSSMatcher(nx_map)

match_result = matcher.match_trace(trace)

transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")

if len(match_result.path) > 0:
path = []

for road in match_result.path:
for i in range(len(road.geom.coords.xy[0]) - 1):
path.append(transformer.transform(road.geom.coords.xy[0][i], road.geom.coords.xy[1][i]))

path.append(transformer.transform(match_result.path[-1].geom.coords.xy[0][1], match_result.path[-1].geom.coords.xy[1][1]))

path = np.array(path)

distances = calc_error(path, df)
error = distances.mean()

return (error, path, distances)
else:
return (float('inf'), [], [])



def get_transit_match(df, data):
locations = df[['latitude', 'longitude']].to_numpy()

relations = data['relations']
edges = data['edges']
stops = data['stops']
geometry = data['geometry']

geod = Geod(ellps='WGS84')

best_path = []
best_error = float('inf')
best_mode = ""
best_distances = []
# best_relation = -1

for relation in relations.keys():

stop_order = [n for n in relations[relation]['nodes'] if n in stops]

firstNode = [-1, float('inf'), -1]
lastNode = [-1, float('inf'), -1]

index = 0

for stop in stop_order:
node_loc = geometry[stop]
_, _, distance_first = geod.inv(locations[0][1], locations[0][0], node_loc[1], node_loc[0])
_, _, distance_last = geod.inv(locations[-1][1], locations[-1][0], node_loc[1], node_loc[0])
if distance_first < firstNode[1]:
firstNode = [stop, distance_first, index]
if distance_last < lastNode[1]:
lastNode = [stop, distance_last, index]

index += 1

# The start point has to be before the end point
if firstNode[2] >= lastNode[2]:
continue

passed_stops = stop_order[firstNode[2]:(lastNode[2]+1)]

path = []

stop_edges = parse_relation(relation, relations, edges, stops, geometry)

valid_route = True
for e in zip(passed_stops, passed_stops[1:]):
if e not in stop_edges:
valid_route = False
break
path.extend(stop_edges[e])

if not valid_route:
continue

path = np.array(path)

distances = calc_error(path, df)
error = distances.mean()
if error < best_error:
best_error = error
best_path = path
best_mode = relations[relation]['mode']
best_distances = distances
# best_relation = relation

return (best_error, best_path, best_mode, best_distances)


Loading