MobilityNet · ryan-tock · Aug 21, 2025 · Aug 21, 2025
diff --git a/README.md b/README.md
@@ -38,8 +38,10 @@ There are many potential ways to interact with the notebooks here. At one extrem
 1. **Interactive, UI-only:** Launch the repo in binder and clone one of the example notebooks
 1. **Interactive, CLI only:**
     1. Fork + clone the repo
+    1. Run `setup/setup_conda.sh <platform>` to download the correct conda version if it's not already installed
+    1. Run `~/miniconda-23.1.0/bin/conda init <shell-name>`
     1. Run `setup/setup.sh` to set up the local environment
-        1. you may need to install the correct version of miniconda per instructions
+    1. Run `setup/activate_conda.sh` in each terminal 
     1. Start a local notebook server (`juypter notebook`)
 
 ## Contributing ##

diff --git a/setup/setup_conda.sh b/setup/setup_conda.sh
diff --git a/transit_matching/analysis_helper.py b/transit_matching/analysis_helper.py
@@ -0,0 +1,99 @@
+import json
+from transit_matching.motion_activity import get_sensed_mode
+from transit_matching.overpass_parser import build_maps
+
+class AnalysisTaks:
+    def __init__(self, datastore_loc, locations, view, os, phone_label, trip_name, section_name, evaluation_range):
+        self.locations = locations
+        self.view = view
+        self.trip_name = trip_name
+        self.section_name = section_name
+        self.os = os
+        self.phone_label = phone_label
+        self.evaluation_range = evaluation_range
+
+        self.stripped_trip_name = "_".join(self.trip_name.split("_")[:-1])
+        self.stripped_section_name = "_".join(self.section_name.split("_")[:-1])
+
+        self.initial_time = locations.iloc[0]['ts']
+        self.final_time = locations.iloc[-1]['ts']
+
+        self.phone_number = int(phone_label.split("-")[-1])
+
+        self.sensed_mode = get_sensed_mode(datastore_loc, view, os, phone_label, self.initial_time, self.final_time)
+        self.actual_mode = self.get_actual_mode()
+
+        self.error = None
+        self.predicted_mode = None
+        self.path = None
+        self.modern = None
+
+    def __str__(self):
+        return f'VIEW: {self.view.ljust(30)}PHONE: {(self.os + "-" + str(self.phone_number)).ljust(12)}TRIP: {self.stripped_trip_name.ljust(40)}SECTION: {self.stripped_section_name.ljust(40)}EVAL: {str(self.evaluation_range).ljust(4)}MODE: {self.predicted_mode.ljust(11)}| {self.actual_mode.ljust(11)}'
+
+    def get_actual_mode(self):
+        if 'walk' in self.section_name:
+            return 'WALKING'
+
+        f = open(f"spec_creation/final_sfbayarea/{self.view}.json", 'r')
+        trips = json.loads(f.read())['evaluation_trips']
+
+        trips_with_id = [n for n in trips if n['id'] == self.stripped_trip_name]
+        if len(trips_with_id) == 0:
+            return None
+        trip = trips_with_id[0]
+
+        if 'legs' in trip:
+            sections_with_id = [n for n in trip['legs'] if n['id'] == self.stripped_section_name]
+            if len(sections_with_id) == 0:
+                return None
+            section = sections_with_id[0]
+        else:
+            section = trip
+
+        f.close()
+
+        return section['mode']
+
+
+    def map_match(self, map_data, error_function):
+        if self.stripped_trip_name not in map_data:
+            map_data[self.stripped_trip_name] = {}
+        if self.stripped_section_name not in map_data[self.stripped_trip_name]:
+            map_data[self.stripped_trip_name][self.stripped_section_name] = build_maps(self.locations)
+
+        error, path, mode, modern, distances = error_function(self.locations, map_data[self.stripped_trip_name][self.stripped_section_name], self.sensed_mode)
+
+        self.error = error
+        self.path = path
+        self.predicted_mode = mode
+        self.modern = modern
+        self.distances = distances
+
+
+def create_tasks(pvs, datastore_loc):
+
+    tasks = []
+
+    for view, pv in pvs.items():
+        for phone_os, phone_map in pv.map().items():
+            for phone_label, phone_detail_map in phone_map.items():
+                for evaluation_num in range(len(phone_detail_map["evaluation_ranges"])):
+                    evaluation = phone_detail_map["evaluation_ranges"][evaluation_num]
+                    for trip in evaluation["evaluation_trip_ranges"]:
+                        for section in trip["evaluation_section_ranges"]:
+                            locations = section['location_df']
+
+                            if len(locations) <= 1:
+                                continue
+
+                            task = AnalysisTaks(datastore_loc, locations, view, phone_os, phone_label, trip['trip_id'], section['trip_id'], evaluation_num)
+
+                            # If this trip is not in the timeline, skip this task
+                            if task.actual_mode is None:
+                                continue
+
+                            tasks.append(task)
+                print(f'created all tasks for {phone_label} on {view}')
+
+    return tasks
diff --git a/transit_matching/error_calculations.py b/transit_matching/error_calculations.py
@@ -0,0 +1,116 @@
+import numpy as np
+from pyproj import Geod
+from rtree import index
+
+import utm
+import pyproj
+import geopandas as gpd
+import shapely as shp
+import pandas as pd
+import math
+
+g = pyproj.Geod(ellps='clrk66')
+
+def calc_error(path, locations):
+    locations_gdf = gpd.GeoDataFrame(
+        locations,
+        geometry=gpd.points_from_xy(locations.longitude, locations.latitude)
+    )
+
+    path_linestring = shp.LineString([(lon, lat) for lat, lon in path])
+
+    return dist_using_projection_adjusted(locations_gdf, path_linestring)
+
+
+def _point_to_segment_distance(loc, p1, p2, geod):
+    loc_lat, loc_lon = loc
+    p1_lat, p1_lon = p1
+    p2_lat, p2_lon = p2
+
+    if np.allclose(p1, p2):
+        _, _, dist = geod.inv(p1_lon, p1_lat, loc_lon, loc_lat)
+        return dist
+
+    az13, _, dist13 = geod.inv(p1_lon, p1_lat, loc_lon, loc_lat)
+    az12, _, dist12 = geod.inv(p1_lon, p1_lat, p2_lon, p2_lat)
+
+    if dist12 < 1e-9:
+        return dist13
+
+    angle_diff_rad = np.radians(az13 - az12)
+    along_track_dist = dist13 * np.cos(angle_diff_rad)
+
+    if along_track_dist < 0:
+        return dist13
+    elif along_track_dist > dist12:
+        _, _, dist23 = geod.inv(p2_lon, p2_lat, loc_lon, loc_lat)
+        return dist23
+    else:
+        return abs(dist13 * np.sin(angle_diff_rad))
+
+
+def geodesic_squared_error(path, locations, k_neighbors=7):
+    if len(path) < 2 or len(locations) == 0:
+        return 0.0
+
+    geod = Geod(ellps='WGS84')
+    p1_pts = path[:-1]
+    p2_pts = path[1:]
+    num_segments = len(p1_pts)
+
+    idx = index.Index()
+    for i in range(num_segments):
+        p1 = p1_pts[i]
+        p2 = p2_pts[i]
+        bbox = (
+            min(p1[1], p2[1]),
+            min(p1[0], p2[0]),
+            max(p1[1], p2[1]),
+            max(p1[0], p2[0]),
+        )
+        idx.insert(i, bbox)
+
+    total_squared_error = 0.0
+
+    for loc in locations:
+        loc_lon, loc_lat = loc[1], loc[0]
+
+        num_candidates = min(k_neighbors, num_segments)
+        candidate_indices = list(idx.nearest((loc_lon, loc_lat, loc_lon, loc_lat), num_candidates))
+
+        min_dist_for_loc = float('inf')
+        for i in candidate_indices:
+            dist = _point_to_segment_distance(loc, p1_pts[i], p2_pts[i], geod)
+            if dist < min_dist_for_loc:
+                min_dist_for_loc = dist
+
+        total_squared_error += min_dist_for_loc ** 2
+
+    return total_squared_error
+
+def dist_using_projection_adjusted(location_gpdf, gt_linestring):
+    lat = gt_linestring.centroid.y
+    adjusted_points = [shp.geometry.Point(coord[0] / math.cos(math.radians(lat)), coord[1]) for coord in list(gt_linestring.coords)]
+    unadjusted_points = [shp.geometry.Point(coord[0], coord[1]) for coord in list(gt_linestring.coords)]
+
+    adjusted_data = []
+    unadjusted_data = []
+    for p in zip(adjusted_points, unadjusted_points):
+        row = {}
+        row["geometry"] = p[0]
+        adjusted_data.append(row)
+        row2 = {}
+        row2["geometry"] = p[1]
+        unadjusted_data.append(row2)
+
+    adjusted_gt_gpdf = gpd.GeoDataFrame(adjusted_data, geometry="geometry")
+    unadjusted_gt_gpdf = gpd.GeoDataFrame(unadjusted_data, geometry="geometry")
+
+    adjusted_location_linestring = shp.geometry.LineString([shp.geometry.Point(coord.x / math.cos(math.radians(lat)), coord.y) for coord in list(location_gpdf.geometry)])
+    projections = adjusted_gt_gpdf.geometry.apply(lambda p: adjusted_location_linestring.project(p))
+    proj_points = projections.apply(lambda d:adjusted_location_linestring.interpolate(d))
+
+    project_x = proj_points.apply(lambda p: p.x * math.cos(math.radians(lat)))
+    project_y = proj_points.apply(lambda p: p.y)
+    return pd.Series(g.inv(list(unadjusted_gt_gpdf.geometry.x), list(unadjusted_gt_gpdf.geometry.y),
+                 list(project_x), list(project_y))[2], index=unadjusted_gt_gpdf.index)
diff --git a/transit_matching/matching.py b/transit_matching/matching.py
@@ -0,0 +1,107 @@
+import numpy as np
+from mappymatch.constructs.trace import Trace
+from mappymatch.matchers.lcss.lcss import LCSSMatcher
+from pyproj import Geod, Transformer
+
+from transit_matching.error_calculations import calc_error
+from transit_matching.overpass_parser import parse_relation
+
+def get_graph_match(df, nx_map):
+    trace = Trace.from_dataframe(df, lat_column="latitude", lon_column="longitude", xy=True)
+
+    matcher = LCSSMatcher(nx_map)
+
+    match_result = matcher.match_trace(trace)
+
+    transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")
+
+    if len(match_result.path) > 0:
+        path = []
+
+        for road in match_result.path:
+            for i in range(len(road.geom.coords.xy[0]) - 1):
+                path.append(transformer.transform(road.geom.coords.xy[0][i], road.geom.coords.xy[1][i]))
+
+        path.append(transformer.transform(match_result.path[-1].geom.coords.xy[0][1], match_result.path[-1].geom.coords.xy[1][1]))
+
+        path = np.array(path)
+
+        distances = calc_error(path, df)
+        error = distances.mean()
+
+        return (error, path, distances)
+    else:
+        return (float('inf'), [], [])
+
+
+
+def get_transit_match(df, data):
+    locations = df[['latitude', 'longitude']].to_numpy()
+
+    relations = data['relations']
+    edges = data['edges']
+    stops = data['stops']
+    geometry = data['geometry']
+
+    geod = Geod(ellps='WGS84')
+
+    best_path = []
+    best_error = float('inf')
+    best_mode = ""
+    best_distances = []
+    # best_relation = -1
+
+    for relation in relations.keys():
+
+        stop_order = [n for n in relations[relation]['nodes'] if n in stops]
+
+        firstNode = [-1, float('inf'), -1]
+        lastNode = [-1, float('inf'), -1]
+
+        index = 0
+
+        for stop in stop_order:
+            node_loc = geometry[stop]
+            _, _, distance_first = geod.inv(locations[0][1], locations[0][0], node_loc[1], node_loc[0])
+            _, _, distance_last = geod.inv(locations[-1][1], locations[-1][0], node_loc[1], node_loc[0])
+            if distance_first < firstNode[1]:
+                firstNode = [stop, distance_first, index]
+            if distance_last < lastNode[1]:
+                lastNode = [stop, distance_last, index]
+
+            index += 1
+
+        # The start point has to be before the end point
+        if firstNode[2] >= lastNode[2]:
+            continue
+
+        passed_stops = stop_order[firstNode[2]:(lastNode[2]+1)]
+
+        path = []
+
+        stop_edges = parse_relation(relation, relations, edges, stops, geometry)
+
+        valid_route = True
+        for e in zip(passed_stops, passed_stops[1:]):
+            if e not in stop_edges:
+                valid_route = False
+                break
+            path.extend(stop_edges[e])
+
+        if not valid_route:
+            continue
+
+        path = np.array(path)
+
+        distances = calc_error(path, df)
+        error = distances.mean()
+        if error < best_error:
+            best_error = error
+            best_path = path
+            best_mode = relations[relation]['mode']
+            best_distances = distances
+            # best_relation = relation
+
+    return (best_error, best_path, best_mode, best_distances)
+
+