MobilityData
diff --git a/‎functions-python/pmtiles_builder/src/agencies_processor.py‎
Lines changed: 2 additions & 4 deletions b/‎functions-python/pmtiles_builder/src/agencies_processor.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎functions-python/pmtiles_builder/src/fast_csv_parser.py‎
Lines changed: 18 additions & 2 deletions b/‎functions-python/pmtiles_builder/src/fast_csv_parser.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎functions-python/pmtiles_builder/src/routes_processor.py‎
Lines changed: 86 additions & 67 deletions b/‎functions-python/pmtiles_builder/src/routes_processor.py‎
Lines changed: 86 additions & 67 deletions
diff --git a/‎functions-python/pmtiles_builder/src/routes_processor_for_colors.py‎
Lines changed: 14 additions & 12 deletions b/‎functions-python/pmtiles_builder/src/routes_processor_for_colors.py‎
Lines changed: 14 additions & 12 deletions
diff --git a/‎functions-python/pmtiles_builder/src/shapes_processor.py‎
Lines changed: 15 additions & 8 deletions b/‎functions-python/pmtiles_builder/src/shapes_processor.py‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎functions-python/pmtiles_builder/src/stop_times_processor.py‎
Lines changed: 9 additions & 4 deletions b/‎functions-python/pmtiles_builder/src/stop_times_processor.py‎
Lines changed: 9 additions & 4 deletions
@@ -1,5 +1,3 @@
-import csv
-
 from base_processor import BaseProcessor
 from csv_cache import AGENCY_FILE
 
@@ -16,9 +14,9 @@ def __init__(
     def process_file(self):
         with open(self.filepath, "r", encoding=self.encoding, newline="") as f:
             header = f.readline()
-            if not header:
+            columns = self.csv_parser.parse_header(header)
+            if not columns:
                 return
-            columns = next(csv.reader([header]))
 
             agency_id_index = self.csv_cache.get_index(columns, "agency_id")
             agency_name_index = self.csv_cache.get_index(columns, "agency_name")
 
@@ -19,5 +19,21 @@ def parse(
     ) -> List[str]:  # pragma: no cover (behavior tested indirectly)
         if '"' in line:
             self._lines_with_quotes += 1
-            return next(csv.reader([line]))
-        return line.rstrip("\r\n").split(",")
+            row = next(
+                csv.reader([line]), []
+            )  # default to empty list if iterator is exhausted
+        else:
+            row = line.rstrip("\r\n").split(",")
+
+        return [c.strip() for c in row]
+
+    @staticmethod
+    def parse_header(header: str) -> List[str]:
+        """Parse a CSV header line into a list of column names.
+        Ignore leading/trailing whitespace around column names.
+
+        """
+        if not header:
+            return []
+        columns = next(csv.reader([header]))
+        return [c.strip() for c in columns]
@@ -1,4 +1,3 @@
-import csv
 import json
 from typing import TextIO, Dict, List
 
@@ -53,79 +52,99 @@ def process_file(self):
         ) as routes_json_file:
             geojson_file.write('{"type": "FeatureCollection", "features": [\n')
             routes_json_file.write("[\n")
-            with open(self.filepath, "r", encoding=self.encoding, newline="") as f:
-                header = f.readline()
-                if not header:
-                    return
-                columns = next(csv.reader([header]))
-
-                route_id_index = csv_cache.get_index(columns, "route_id")
-                agency_id_index = csv_cache.get_index(columns, "agency_id")
-                route_short_name_index = csv_cache.get_index(
-                    columns, "route_short_name"
-                )
-                route_long_name_index = csv_cache.get_index(columns, "route_long_name")
-                route_type_index = csv_cache.get_index(columns, "route_type")
-                route_text_color_index = csv_cache.get_index(
-                    columns, "route_text_color"
-                )
-                route_color_index = csv_cache.get_index(columns, "route_color")
-
-                line_number = 1
-                for line in f:
-                    if not line.strip():
-                        continue
-
-                    row = self.csv_parser.parse(line)
-                    route_id = csv_cache.get_safe_value_from_index(row, route_id_index)
-                    agency_id = csv_cache.get_safe_value_from_index(
-                        row, agency_id_index, "default"
-                    )
-                    route_short_name = csv_cache.get_safe_value_from_index(
-                        row, route_short_name_index
+            try:
+                with open(self.filepath, "r", encoding=self.encoding, newline="") as f:
+                    header = f.readline()
+                    columns = self.csv_parser.parse_header(header)
+                    if not columns:
+                        return
+
+                    route_id_index = csv_cache.get_index(columns, "route_id")
+                    agency_id_index = csv_cache.get_index(columns, "agency_id")
+                    route_short_name_index = csv_cache.get_index(
+                        columns, "route_short_name"
                     )
-                    route_long_name = csv_cache.get_safe_value_from_index(
-                        row, route_long_name_index
+                    route_long_name_index = csv_cache.get_index(
+                        columns, "route_long_name"
                     )
-                    route_type = csv_cache.get_safe_value_from_index(
-                        row, route_type_index
-                    )
-                    route_color = csv_cache.get_safe_value_from_index(
-                        row, route_color_index
-                    )
-                    route_text_color = csv_cache.get_safe_value_from_index(
-                        row, route_text_color_index
+                    route_type_index = csv_cache.get_index(columns, "route_type")
+                    route_text_color_index = csv_cache.get_index(
+                        columns, "route_text_color"
                     )
+                    route_color_index = csv_cache.get_index(columns, "route_color")
 
-                    # Pass all parsed values to add_to_routes_geojson
-                    self.add_to_routes_geojson(
-                        geojson_file=geojson_file,
-                        route_id=route_id,
-                        agency_id=agency_id,
-                        route_short_name=route_short_name,
-                        route_long_name=route_long_name,
-                        route_type=route_type,
-                        route_color=route_color,
-                        route_text_color=route_text_color,
-                    )
+                    if route_id_index is None:
+                        self.logger.warning(
+                            "Missing required route_id column in routes header; skipping routes processing"
+                        )
+                        return
 
-                    self.add_to_routes_json(
-                        routes_json_file=routes_json_file,
-                        route_id=route_id,
-                        route_short_name=route_short_name,
-                        route_long_name=route_long_name,
-                        route_type=route_type,
-                        route_color=route_color,
-                        route_text_color=route_text_color,
-                    )
+                    line_number = 0
+                    for line in f:
+                        line_number += 1
+                        if not line.strip():
+                            continue
 
-                if line_number % 100 == 0 or line_number == 1:
-                    self.logger.debug(
-                        "Processed route %d (route_id: %s)", line_number, route_id
-                    )
+                        row = self.csv_parser.parse(line)
+                        route_id = csv_cache.get_safe_value_from_index(
+                            row, route_id_index
+                        )
+                        agency_id = csv_cache.get_safe_value_from_index(
+                            row, agency_id_index, "default"
+                        )
+                        route_short_name = csv_cache.get_safe_value_from_index(
+                            row, route_short_name_index
+                        )
+                        route_long_name = csv_cache.get_safe_value_from_index(
+                            row, route_long_name_index
+                        )
+                        route_type = csv_cache.get_safe_value_from_index(
+                            row, route_type_index
+                        )
+                        route_color = csv_cache.get_safe_value_from_index(
+                            row, route_color_index
+                        )
+                        route_text_color = csv_cache.get_safe_value_from_index(
+                            row, route_text_color_index
+                        )
+
+                        # Pass all parsed values to add_to_routes_geojson
+                        self.add_to_routes_geojson(
+                            geojson_file=geojson_file,
+                            route_id=route_id,
+                            agency_id=agency_id,
+                            route_short_name=route_short_name,
+                            route_long_name=route_long_name,
+                            route_type=route_type,
+                            route_color=route_color,
+                            route_text_color=route_text_color,
+                        )
 
-            geojson_file.write("\n]}")
-            routes_json_file.write("\n]")
+                        self.add_to_routes_json(
+                            routes_json_file=routes_json_file,
+                            route_id=route_id,
+                            route_short_name=route_short_name,
+                            route_long_name=route_long_name,
+                            route_type=route_type,
+                            route_color=route_color,
+                            route_text_color=route_text_color,
+                        )
+
+                    if line_number % 100 == 0 or line_number == 1:
+                        self.logger.debug(
+                            "Processed route %d (route_id: %s)", line_number, route_id
+                        )
+            finally:
+                # Ensure we always close the JSON arrays even on early return or exceptions.
+                try:
+                    geojson_file.write("\n]}")
+                except Exception:
+                    # best-effort: don't let closing failures mask the original error
+                    pass
+                try:
+                    routes_json_file.write("\n]")
+                except Exception:
+                    pass
 
         if self.missing_coordinates_routes:
             self.logger.info(
 
@@ -1,34 +1,36 @@
-import csv
-
 from base_processor import BaseProcessor
 from csv_cache import ROUTES_FILE
 
 
 class RoutesProcessorForColors(BaseProcessor):
     """Read routes.txt to map route_id → route_color for later use.
-    Routes processing is split in two to avoid circular dependencies: StopsProcessor can rely on route colors
-    without requiring the full routes build to have run.
-    The input file is retained for the next pass over routes.txt in RoutesProcessor (no_delete=True).
+
+    Routes processing is split in two to avoid circular dependencies: StopsProcessor can
+    rely on route colors without requiring the full routes build to have run. The
+    input file is retained for the next pass over routes.txt in RoutesProcessor
+    (no_delete=True).
     """
 
-    def __init__(
-        self,
-        csv_cache,
-        logger=None,
-    ):
+    def __init__(self, csv_cache, logger=None):
         super().__init__(ROUTES_FILE, csv_cache, logger, no_delete=True)
         self.route_colors_map = {}
 
     def process_file(self):
         with open(self.filepath, "r", encoding=self.encoding, newline="") as f:
             header = f.readline()
-            if not header:
+            columns = self.csv_parser.parse_header(header)
+            if not columns:
                 return
-            columns = next(csv.reader([header]))
 
             route_id_index = self.csv_cache.get_index(columns, "route_id")
             route_color_index = self.csv_cache.get_index(columns, "route_color")
 
+            if route_id_index is None:
+                self.logger.warning(
+                    "Missing required route_id column in routes header; skipping routes processing for colors"
+                )
+                return
+
             for line in f:
                 if not line.strip():
                     continue
 
@@ -1,5 +1,4 @@
 import collections
-import csv
 import os
 import psutil
 
@@ -8,7 +7,6 @@
 from base_processor import BaseProcessor
 from csv_cache import SHAPES_FILE
 from shared.helpers.runtime_metrics import track_metrics
-from shared.helpers.utils import detect_encoding
 
 
 class ShapesProcessor(BaseProcessor):
@@ -29,18 +27,27 @@ def process_file(self):
         process = psutil.Process(os.getpid())
 
         try:
-            encoding = detect_encoding(filename=self.filepath, logger=self.logger)
-            with open(self.filepath, "r", encoding=encoding, newline="") as f:
+            with open(self.filepath, "r", encoding=self.encoding, newline="") as f:
                 header = f.readline()
-                if not header:
+                columns = self.csv_parser.parse_header(header)
+                if not columns:
                     return
-                columns = next(csv.reader([header]))
+
                 shape_id_index = csv_cache.get_index(columns, "shape_id")
                 lon_idx = csv_cache.get_index(columns, "shape_pt_lon")
                 lat_idx = csv_cache.get_index(columns, "shape_pt_lat")
                 seq_idx = csv_cache.get_index(columns, "shape_pt_sequence")
 
+                # If any required column index is None, warn and skip processing.
+                if None in (shape_id_index, lon_idx, lat_idx, seq_idx):
+                    self.logger.warning(
+                        "Missing required columns in shapes header; skipping shapes processing"
+                    )
+                    return
+
                 for line in f:
+                    line_count += 1
+
                     try:
                         if not line.strip():
                             continue
@@ -52,7 +59,6 @@ def process_file(self):
                         )
 
                         self.unique_shape_id_counts[shape_id] += 1
-                        line_count += 1
                         if line_count % 1_000_000 == 0:
                             mem_mb = process.memory_info().rss / (
                                 1024 * 1024
@@ -89,6 +95,8 @@ def process_file(self):
                 f.readline()  # Skip header
                 needs_sorting = False
                 for line in f:
+                    line_count += 1
+
                     try:
                         if not line.strip():
                             continue
@@ -120,7 +128,6 @@ def process_file(self):
 
                         positions_in_coordinates_arrays[shape_id] = position + 1
 
-                        line_count += 1
                         if line_count % 1_000_000 == 0:
                             mem_mb = process.memory_info().rss / (
                                 1024 * 1024
 
@@ -1,4 +1,3 @@
-import csv
 from collections import defaultdict
 from typing import Dict, List
 
@@ -20,13 +19,19 @@ def process_file(self):
         trip_to_stops: Dict[str, List[tuple]] = {}
         with open(self.filepath, "r", encoding=self.encoding, newline="") as f:
             header = f.readline()
-            if not header:
+            columns = self.csv_parser.parse_header(header)
+            if not columns:
                 return
-            columns = next(csv.reader([header]))
             stop_id_index = self.csv_cache.get_index(columns, "stop_id")
             trip_id_index = self.csv_cache.get_index(columns, "trip_id")
             seq_index = self.csv_cache.get_index(columns, "stop_sequence")
 
+            if trip_id_index is None:
+                self.logger.warning(
+                    "Missing required trip_id column in stop_times header; skipping stop_times processing"
+                )
+                return
+
             # Collect unique trips without shapes across all routes (for parsing only)
             trips_without_shape_set = set()
             for trip_list in self.trips_processor.trips_no_shapes_per_route.values():
@@ -37,13 +42,13 @@ def process_file(self):
             seq_fallback_counter = defaultdict(int)
 
             for line in f:
+                line_count += 1
                 if not line.strip():
                     continue
                 row = self.csv_parser.parse(line)
                 stop_id = self.csv_cache.get_safe_value_from_index(row, stop_id_index)
                 trip_id = self.csv_cache.get_safe_value_from_index(row, trip_id_index)
 
-                line_count += 1
                 if line_count % 1_000_000 == 0:
                     self.logger.debug(
                         "Processed %d lines of %s", line_count, self.filename