Skip to content

Commit c5f7844

Browse files
authored
feat: 1312 add missing data to the pmtiles (#1328)
1 parent a609176 commit c5f7844

File tree

8 files changed

+598
-241
lines changed

8 files changed

+598
-241
lines changed

functions-python/pmtiles_builder/function_config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"key": "FEEDS_DATABASE_URL"
1818
}
1919
],
20-
"ingress_settings": "ALLOW_ALL",
20+
"ingress_settings": "ALL",
2121
"max_instance_request_concurrency": 1,
2222
"max_instance_count": 5,
2323
"min_instance_count": 0,

functions-python/pmtiles_builder/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ google-cloud-storage
2424
# Configuration
2525
python-dotenv==1.0.0
2626
tippecanoe
27+
psutil
2728

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#
2+
#
3+
# MobilityData 2025
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
import csv
17+
import os
18+
from shared.helpers.logger import get_logger
19+
20+
21+
STOP_TIMES_FILE = "stop_times.txt"
22+
SHAPES_FILE = "shapes.txt"
23+
TRIPS_FILE = "trips.txt"
24+
ROUTES_FILE = "routes.txt"
25+
STOPS_FILE = "stops.txt"
26+
AGENCY_FILE = "agency.txt"
27+
28+
29+
class CsvCache:
30+
"""
31+
CsvCache provides cached access to GTFS CSV files in a specified working directory.
32+
It lazily loads and caches file contents as lists of dictionaries, and offers
33+
helper methods to retrieve relationships between routes, trips, stops, and shapes.
34+
It lazily loads because not all files are necessarily needed.
35+
"""
36+
37+
def __init__(
38+
self,
39+
workdir: str = "./workdir",
40+
logger=None,
41+
):
42+
if logger:
43+
self.logger = logger
44+
else:
45+
self.logger = get_logger(CsvCache.__name__)
46+
47+
self.workdir = workdir
48+
49+
self.file_data = {}
50+
self.trip_to_stops = None
51+
self.route_to_trip = None
52+
self.route_to_shape = None
53+
self.stop_to_route = None
54+
self.stop_to_coordinates = None
55+
56+
self.logger.info("Using work directory: %s", self.workdir)
57+
58+
def get_path(self, filename: str) -> str:
59+
return os.path.join(self.workdir, filename)
60+
61+
def get_file(self, filename) -> list[dict]:
62+
if self.file_data.get(filename) is None:
63+
self.file_data[filename] = self._read_csv(self.get_path(filename))
64+
return self.file_data[filename]
65+
66+
def add_data(self, filename: str, data: list[dict]):
67+
self.file_data[filename] = data
68+
69+
def _read_csv(self, filename) -> list[dict]:
70+
"""
71+
Reads the content of a CSV file and returns it as a list of dictionaries
72+
where each dictionary represents a row.
73+
74+
Parameters:
75+
filename (str): The file path of the CSV file to be read.
76+
77+
Raises:
78+
Exception: If there is an error during file opening or reading. The raised
79+
exception will include the original error message along with the file name.
80+
81+
Returns:
82+
list[dict]: A list of dictionaries, each representing a row in the CSV file.
83+
"""
84+
try:
85+
self.logger.debug("Loading %s", filename)
86+
with open(filename, newline="", encoding="utf-8") as f:
87+
return list(csv.DictReader(f))
88+
except Exception as e:
89+
raise Exception(f"Failed to read CSV file {filename}: {e}") from e
90+
91+
def get_trip_from_route(self, route_id):
92+
if self.route_to_trip is None:
93+
self.route_to_trip = {}
94+
for row in self.get_file(TRIPS_FILE):
95+
route_id = row["route_id"]
96+
trip_id = row["trip_id"]
97+
if trip_id:
98+
self.route_to_trip.setdefault(route_id, trip_id)
99+
return self.route_to_trip.get(route_id, "")
100+
101+
def get_shape_from_route(self, route_id) -> str:
102+
"""
103+
Returns the first shape_id associated with a given route_id from the trips file.
104+
The relationship from the route to the shape is via the trips file.
105+
Parameters:
106+
route_id (str): The route identifier to look up.
107+
108+
Returns:
109+
The corresponding shape id.
110+
"""
111+
if self.route_to_shape is None:
112+
self.route_to_shape = {}
113+
for row in self.get_file(TRIPS_FILE):
114+
route_id = row["route_id"]
115+
shape_id = row["shape_id"]
116+
if shape_id:
117+
self.route_to_shape.setdefault(route_id, shape_id)
118+
return self.route_to_shape.get(route_id, "")
119+
120+
def get_stops_from_trip(self, trip_id):
121+
# Lazy instantiation of the dictionary, because we may not need it al all if there is a shape.
122+
if self.trip_to_stops is None:
123+
self.trip_to_stops = {}
124+
for row in self.get_file(STOP_TIMES_FILE):
125+
self.trip_to_stops.setdefault(row["trip_id"], []).append(row["stop_id"])
126+
return self.trip_to_stops.get(trip_id, [])
127+
128+
def get_coordinates_for_stop(self, stop_id) -> tuple[float, float] | None:
129+
if self.stop_to_coordinates is None:
130+
self.stop_to_coordinates = {
131+
s["stop_id"]: (float(s["stop_lon"]), float(s["stop_lat"]))
132+
for s in self.get_file(STOPS_FILE)
133+
}
134+
return self.stop_to_coordinates.get(stop_id, None)
135+
136+
def set_workdir(self, workdir):
137+
self.workdir = workdir
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import json
2+
import sys
3+
import logging
4+
from collections import defaultdict
5+
6+
from csv_cache import CsvCache, ROUTES_FILE, TRIPS_FILE, STOP_TIMES_FILE, STOPS_FILE
7+
from shared.helpers.runtime_metrics import track_metrics
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def create_routes_map(routes_data):
13+
"""Creates a dictionary of routes from route data."""
14+
routes = {}
15+
for row in routes_data:
16+
route_id = row.get("route_id")
17+
if route_id:
18+
routes[route_id] = row
19+
return routes
20+
21+
22+
def build_stop_to_routes(stop_times_data, trips_data):
23+
"""Builds a mapping from stop_id to a set of route_ids."""
24+
# Build trip_id -> route_id mapping
25+
trip_to_route = {}
26+
for row in trips_data:
27+
trip_id = row.get("trip_id")
28+
route_id = row.get("route_id")
29+
if trip_id and route_id:
30+
trip_to_route[trip_id] = route_id
31+
32+
# Build stop_id -> set of route_ids
33+
stop_to_routes = defaultdict(set)
34+
for row in stop_times_data:
35+
trip_id = row.get("trip_id")
36+
stop_id = row.get("stop_id")
37+
if trip_id and stop_id:
38+
route_id = trip_to_route.get(trip_id)
39+
if route_id:
40+
stop_to_routes[stop_id].add(route_id)
41+
42+
return stop_to_routes
43+
44+
45+
@track_metrics(metrics=("time", "memory", "cpu"))
46+
def convert_stops_to_geojson(csv_cache: CsvCache, output_file):
47+
"""Converts GTFS stops data to a GeoJSON file."""
48+
routes_map = create_routes_map(csv_cache.get_file(ROUTES_FILE))
49+
stop_to_routes = build_stop_to_routes(
50+
csv_cache.get_file(STOP_TIMES_FILE), csv_cache.get_file(TRIPS_FILE)
51+
)
52+
53+
features = []
54+
55+
for row in csv_cache.get_file(STOPS_FILE):
56+
stop_id = row.get("stop_id")
57+
if not stop_id:
58+
continue
59+
60+
if (
61+
"stop_lat" not in row
62+
or "stop_lon" not in row
63+
or not row["stop_lat"]
64+
or not row["stop_lon"]
65+
):
66+
logger.warning(f"Missing coordinates for stop_id {stop_id}, skipping.")
67+
continue
68+
69+
# Routes serving this stop
70+
route_ids = sorted(stop_to_routes.get(stop_id, []))
71+
route_colors = [
72+
routes_map[r].get("route_color", "") for r in route_ids if r in routes_map
73+
]
74+
75+
try:
76+
stop_lon = float(row["stop_lon"])
77+
stop_lat = float(row["stop_lat"])
78+
except (ValueError, TypeError):
79+
logger.warning(f"Invalid coordinates for stop_id {stop_id}, skipping.")
80+
continue
81+
82+
feature = {
83+
"type": "Feature",
84+
"geometry": {
85+
"type": "Point",
86+
"coordinates": [stop_lon, stop_lat],
87+
},
88+
"properties": {
89+
"stop_id": stop_id,
90+
"stop_code": row.get("stop_code", ""),
91+
"stop_name": row.get("stop_name", ""),
92+
"stop_desc": row.get("stop_desc", ""),
93+
"zone_id": row.get("zone_id", ""),
94+
"stop_url": row.get("stop_url", ""),
95+
"wheelchair_boarding": row.get("wheelchair_boarding", ""),
96+
"location_type": row.get("location_type", ""),
97+
"route_ids": route_ids,
98+
"route_colors": route_colors,
99+
},
100+
}
101+
features.append(feature)
102+
103+
geojson = {"type": "FeatureCollection", "features": features}
104+
105+
with open(output_file, "w", encoding="utf-8") as f:
106+
json.dump(geojson, f, indent=2, ensure_ascii=False)
107+
108+
logger.info(f"✅ GeoJSON file saved to {output_file} with {len(features)} stops")
109+
110+
111+
if __name__ == "__main__":
112+
if len(sys.argv) != 6:
113+
logger.info(
114+
"Usage: python script.py stops stop_times trips routes output.geojson"
115+
)
116+
sys.exit(1)
117+
118+
convert_stops_to_geojson(
119+
sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]
120+
)

0 commit comments

Comments
 (0)