MobilityData
diff --git a/‎functions-python/pmtiles_builder/function_config.json‎
Lines changed: 1 addition & 1 deletion b/‎functions-python/pmtiles_builder/function_config.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎functions-python/pmtiles_builder/requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎functions-python/pmtiles_builder/requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎functions-python/pmtiles_builder/src/csv_cache.py‎
Lines changed: 137 additions & 0 deletions b/‎functions-python/pmtiles_builder/src/csv_cache.py‎
Lines changed: 137 additions & 0 deletions
diff --git a/‎functions-python/pmtiles_builder/src/gtfs_stops_to_geojson.py‎
Lines changed: 120 additions & 0 deletions b/‎functions-python/pmtiles_builder/src/gtfs_stops_to_geojson.py‎
Lines changed: 120 additions & 0 deletions
@@ -17,7 +17,7 @@
       "key": "FEEDS_DATABASE_URL"
     }
   ],
-  "ingress_settings": "ALLOW_ALL",
+  "ingress_settings": "ALL",
   "max_instance_request_concurrency": 1,
   "max_instance_count": 5,
   "min_instance_count": 0,
 
@@ -24,4 +24,5 @@ google-cloud-storage
 # Configuration
 python-dotenv==1.0.0
 tippecanoe
+psutil
 
@@ -0,0 +1,137 @@
+#
+#
+#   MobilityData 2025
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import csv
+import os
+from shared.helpers.logger import get_logger
+
+
+STOP_TIMES_FILE = "stop_times.txt"
+SHAPES_FILE = "shapes.txt"
+TRIPS_FILE = "trips.txt"
+ROUTES_FILE = "routes.txt"
+STOPS_FILE = "stops.txt"
+AGENCY_FILE = "agency.txt"
+
+
+class CsvCache:
+    """
+    CsvCache provides cached access to GTFS CSV files in a specified working directory.
+    It lazily loads and caches file contents as lists of dictionaries, and offers
+    helper methods to retrieve relationships between routes, trips, stops, and shapes.
+    It lazily loads because not all files are necessarily needed.
+    """
+
+    def __init__(
+        self,
+        workdir: str = "./workdir",
+        logger=None,
+    ):
+        if logger:
+            self.logger = logger
+        else:
+            self.logger = get_logger(CsvCache.__name__)
+
+        self.workdir = workdir
+
+        self.file_data = {}
+        self.trip_to_stops = None
+        self.route_to_trip = None
+        self.route_to_shape = None
+        self.stop_to_route = None
+        self.stop_to_coordinates = None
+
+        self.logger.info("Using work directory: %s", self.workdir)
+
+    def get_path(self, filename: str) -> str:
+        return os.path.join(self.workdir, filename)
+
+    def get_file(self, filename) -> list[dict]:
+        if self.file_data.get(filename) is None:
+            self.file_data[filename] = self._read_csv(self.get_path(filename))
+        return self.file_data[filename]
+
+    def add_data(self, filename: str, data: list[dict]):
+        self.file_data[filename] = data
+
+    def _read_csv(self, filename) -> list[dict]:
+        """
+        Reads the content of a CSV file and returns it as a list of dictionaries
+        where each dictionary represents a row.
+
+        Parameters:
+        filename (str): The file path of the CSV file to be read.
+
+        Raises:
+        Exception: If there is an error during file opening or reading. The raised
+        exception will include the original error message along with the file name.
+
+        Returns:
+        list[dict]: A list of dictionaries, each representing a row in the CSV file.
+        """
+        try:
+            self.logger.debug("Loading %s", filename)
+            with open(filename, newline="", encoding="utf-8") as f:
+                return list(csv.DictReader(f))
+        except Exception as e:
+            raise Exception(f"Failed to read CSV file {filename}: {e}") from e
+
+    def get_trip_from_route(self, route_id):
+        if self.route_to_trip is None:
+            self.route_to_trip = {}
+            for row in self.get_file(TRIPS_FILE):
+                route_id = row["route_id"]
+                trip_id = row["trip_id"]
+                if trip_id:
+                    self.route_to_trip.setdefault(route_id, trip_id)
+        return self.route_to_trip.get(route_id, "")
+
+    def get_shape_from_route(self, route_id) -> str:
+        """
+        Returns the first shape_id associated with a given route_id from the trips file.
+        The relationship from the route to the shape is via the trips file.
+        Parameters:
+            route_id (str): The route identifier to look up.
+
+        Returns:
+            The corresponding shape id.
+        """
+        if self.route_to_shape is None:
+            self.route_to_shape = {}
+            for row in self.get_file(TRIPS_FILE):
+                route_id = row["route_id"]
+                shape_id = row["shape_id"]
+                if shape_id:
+                    self.route_to_shape.setdefault(route_id, shape_id)
+        return self.route_to_shape.get(route_id, "")
+
+    def get_stops_from_trip(self, trip_id):
+        # Lazy instantiation of the dictionary, because we may not need it al all if there is a shape.
+        if self.trip_to_stops is None:
+            self.trip_to_stops = {}
+            for row in self.get_file(STOP_TIMES_FILE):
+                self.trip_to_stops.setdefault(row["trip_id"], []).append(row["stop_id"])
+        return self.trip_to_stops.get(trip_id, [])
+
+    def get_coordinates_for_stop(self, stop_id) -> tuple[float, float] | None:
+        if self.stop_to_coordinates is None:
+            self.stop_to_coordinates = {
+                s["stop_id"]: (float(s["stop_lon"]), float(s["stop_lat"]))
+                for s in self.get_file(STOPS_FILE)
+            }
+        return self.stop_to_coordinates.get(stop_id, None)
+
+    def set_workdir(self, workdir):
+        self.workdir = workdir
@@ -0,0 +1,120 @@
+import json
+import sys
+import logging
+from collections import defaultdict
+
+from csv_cache import CsvCache, ROUTES_FILE, TRIPS_FILE, STOP_TIMES_FILE, STOPS_FILE
+from shared.helpers.runtime_metrics import track_metrics
+
+logger = logging.getLogger(__name__)
+
+
+def create_routes_map(routes_data):
+    """Creates a dictionary of routes from route data."""
+    routes = {}
+    for row in routes_data:
+        route_id = row.get("route_id")
+        if route_id:
+            routes[route_id] = row
+    return routes
+
+
+def build_stop_to_routes(stop_times_data, trips_data):
+    """Builds a mapping from stop_id to a set of route_ids."""
+    # Build trip_id -> route_id mapping
+    trip_to_route = {}
+    for row in trips_data:
+        trip_id = row.get("trip_id")
+        route_id = row.get("route_id")
+        if trip_id and route_id:
+            trip_to_route[trip_id] = route_id
+
+    # Build stop_id -> set of route_ids
+    stop_to_routes = defaultdict(set)
+    for row in stop_times_data:
+        trip_id = row.get("trip_id")
+        stop_id = row.get("stop_id")
+        if trip_id and stop_id:
+            route_id = trip_to_route.get(trip_id)
+            if route_id:
+                stop_to_routes[stop_id].add(route_id)
+
+    return stop_to_routes
+
+
+@track_metrics(metrics=("time", "memory", "cpu"))
+def convert_stops_to_geojson(csv_cache: CsvCache, output_file):
+    """Converts GTFS stops data to a GeoJSON file."""
+    routes_map = create_routes_map(csv_cache.get_file(ROUTES_FILE))
+    stop_to_routes = build_stop_to_routes(
+        csv_cache.get_file(STOP_TIMES_FILE), csv_cache.get_file(TRIPS_FILE)
+    )
+
+    features = []
+
+    for row in csv_cache.get_file(STOPS_FILE):
+        stop_id = row.get("stop_id")
+        if not stop_id:
+            continue
+
+        if (
+            "stop_lat" not in row
+            or "stop_lon" not in row
+            or not row["stop_lat"]
+            or not row["stop_lon"]
+        ):
+            logger.warning(f"Missing coordinates for stop_id {stop_id}, skipping.")
+            continue
+
+        # Routes serving this stop
+        route_ids = sorted(stop_to_routes.get(stop_id, []))
+        route_colors = [
+            routes_map[r].get("route_color", "") for r in route_ids if r in routes_map
+        ]
+
+        try:
+            stop_lon = float(row["stop_lon"])
+            stop_lat = float(row["stop_lat"])
+        except (ValueError, TypeError):
+            logger.warning(f"Invalid coordinates for stop_id {stop_id}, skipping.")
+            continue
+
+        feature = {
+            "type": "Feature",
+            "geometry": {
+                "type": "Point",
+                "coordinates": [stop_lon, stop_lat],
+            },
+            "properties": {
+                "stop_id": stop_id,
+                "stop_code": row.get("stop_code", ""),
+                "stop_name": row.get("stop_name", ""),
+                "stop_desc": row.get("stop_desc", ""),
+                "zone_id": row.get("zone_id", ""),
+                "stop_url": row.get("stop_url", ""),
+                "wheelchair_boarding": row.get("wheelchair_boarding", ""),
+                "location_type": row.get("location_type", ""),
+                "route_ids": route_ids,
+                "route_colors": route_colors,
+            },
+        }
+        features.append(feature)
+
+    geojson = {"type": "FeatureCollection", "features": features}
+
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(geojson, f, indent=2, ensure_ascii=False)
+
+    logger.info(f"✅ GeoJSON file saved to {output_file} with {len(features)} stops")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 6:
+        logger.info(
+            "Usage: python script.py stops stop_times trips routes output.geojson"
+        )
+        sys.exit(1)
+
+    convert_stops_to_geojson(
+        sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]
+    )
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`"key": "FEEDS_DATABASE_URL"`
`18`	`18`	`}`
`19`	`19`	`],`
`20`		`- "ingress_settings": "ALLOW_ALL",`
	`20`	`+ "ingress_settings": "ALL",`
`21`	`21`	`"max_instance_request_concurrency": 1,`
`22`	`22`	`"max_instance_count": 5,`
`23`	`23`	`"min_instance_count": 0,`