Skip to content

Commit c671504

Browse files
authored
fix: Corrected pmtiles code so shape lines are sorted. (#1394)
1 parent 00b6ce4 commit c671504

File tree

6 files changed

+52
-18
lines changed

6 files changed

+52
-18
lines changed

functions-python/pmtiles_builder/function_config.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "The PMTiles Builder function creates PMTiles from dataset files",
44
"entry_point": "build_pmtiles_handler",
55
"timeout": 1680,
6-
"memory": "8Gi",
6+
"memory": "16Gi",
77
"trigger_http": true,
88
"include_folders": ["helpers"],
99
"include_api_folders": ["database_gen", "database", "common"],
@@ -22,7 +22,7 @@
2222
],
2323
"ingress_settings": "ALL",
2424
"max_instance_request_concurrency": 1,
25-
"max_instance_count": 5,
25+
"max_instance_count": 50,
2626
"min_instance_count": 0,
27-
"available_cpu": 2
27+
"available_cpu": 4
2828
}

functions-python/pmtiles_builder/src/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def __init__(
8787
self.name: str
8888

8989
os.makedirs(self._root, exist_ok=True)
90-
91-
self._ttl_seconds = int(os.getenv("WORKDIR_MAX_AGE_SECONDS", "3600"))
90+
# 0 means just delete everything without looking at the file dates.
91+
self._ttl_seconds = int(os.getenv("WORKDIR_MAX_AGE_SECONDS", "0"))
9292

9393
if self._debug_dir:
9494
os.makedirs(self._debug_dir, exist_ok=True)
@@ -135,7 +135,7 @@ def _cleanup_old(self):
135135
if age > self._ttl_seconds:
136136
shutil.rmtree(entry.path, ignore_errors=True)
137137
deleted_count += 1
138-
self._logger.debug(
138+
self._logger.warning(
139139
"Removed expired workdir: %s age=%.0fs", entry.path, age
140140
)
141141
except OSError as e:

functions-python/pmtiles_builder/src/shapes_index.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
from shared.helpers.transform import get_safe_value, get_safe_float, get_safe_int
3030
from shared.helpers.utils import detect_encoding
31+
from shared.helpers.runtime_metrics import track_metrics
3132

3233

3334
class ShapesIndex:
@@ -119,6 +120,7 @@ def build_index(self) -> None:
119120
f.seek(0)
120121
line_count = 0
121122
f.readline() # Skip header
123+
needs_sorting = False
122124
for line in f:
123125
try:
124126
if not line.strip():
@@ -130,7 +132,17 @@ def build_index(self) -> None:
130132
lon_arr, lat_arr, seq_arr = self.coordinates_arrays[shape_id]
131133
lon_arr[position] = get_safe_float(row[lon_idx])
132134
lat_arr[position] = get_safe_float(row[lat_idx])
133-
seq_arr[position] = get_safe_int(row[seq_idx])
135+
seq_value = get_safe_int(row[seq_idx])
136+
seq_arr[position] = seq_value
137+
138+
# If any sequence number goes down, set needs_sorting True.
139+
# Just a little low hanging optimization so we don't sort for nothing.
140+
if (
141+
not needs_sorting
142+
and position > 0
143+
and seq_value < seq_arr[position - 1]
144+
):
145+
needs_sorting = True
134146

135147
positions_in_coordinates_arrays[shape_id] = position + 1
136148

@@ -148,13 +160,27 @@ def build_index(self) -> None:
148160
self.logger.warning(
149161
f"Skipping line {line_count} of shapes.txt because of error: {e}"
150162
)
163+
if needs_sorting:
164+
self.sort_coordinate_arrays()
165+
151166
if self.lines_with_quotes > 0:
152167
self.logger.debug(
153168
f"Found {self.lines_with_quotes} lines with quotes while creating shapes index"
154169
)
155170
except Exception as e:
156171
self.logger.warning("Cannot read shapes file: %s", e)
157172

173+
@track_metrics(metrics=("time", "memory", "cpu"))
174+
def sort_coordinate_arrays(self):
175+
"""
176+
Sorts the coordinate arrays in-place for each shape_id according to the sequence number.
177+
"""
178+
for key, (lon_arr, lat_arr, seq_arr) in self.coordinates_arrays.items():
179+
sort_idx = np.argsort(seq_arr)
180+
lon_arr[:] = lon_arr[sort_idx]
181+
lat_arr[:] = lat_arr[sort_idx]
182+
seq_arr[:] = seq_arr[sort_idx]
183+
158184
def get_objects(self, key: str):
159185
"""Return a list of objects for rows matching key, using row_fn or raw row dicts."""
160186
return self.get_shape_points(key)

functions-python/pmtiles_builder/tests/test_build_pmtiles.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,11 @@ def test_build_pmtiles_creates_correct_shapes_index(self):
212212
shapes_path = os.path.join(temp_dir, "shapes.txt")
213213
with open(shapes_path, "w", encoding="utf-8") as f:
214214
f.write("shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence\n")
215-
f.write("s1,45.0,-73.0,1\n")
216-
f.write("s1,45.1,-73.1,2\n")
215+
# Also test the reordering of shapes lines. In the test file we put sequence number 10 before 8
216+
# It also tests the reordering works fine if there are gaps in the sequence numbers
217+
f.write("s1,45.1,-73.1,10\n")
218+
f.write("s1,45.0,-73.0,8\n")
219+
# f.write("s1,45.1,-73.1,2\n")
217220
f.write("s2,46.0,-74.0,1\n")
218221

219222
index = self.builder._create_shapes_index()
@@ -225,7 +228,12 @@ def test_build_pmtiles_creates_correct_shapes_index(self):
225228
self.assertIn("s2", index.coordinates_arrays)
226229
self.assertEqual(len(index.coordinates_arrays["s1"]), 3)
227230
self.assertEqual(len(index.coordinates_arrays["s1"][0]), 2)
228-
231+
self.assertEqual(len(index.coordinates_arrays["s2"][0]), 1)
232+
# Assert that sequence numbers for s1 are sorted
233+
self.assertListEqual(list(index.coordinates_arrays["s1"][2]), [8, 10])
234+
# Assert that lon and lat for s1 are reordered to match sequence
235+
self.assertListEqual(list(index.coordinates_arrays["s1"][0]), [-73.0, -73.1])
236+
self.assertListEqual(list(index.coordinates_arrays["s1"][1]), [45.0, 45.1])
229237
self.assertEqual(len(index.coordinates_arrays["s2"][0]), 1)
230238

231239
def test_create_routes_geojson(self):

infra/batch/main.tf

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,16 +257,16 @@ resource "google_cloud_tasks_queue" "pmtiles_builder_task_queue" {
257257
name = "pmtiles-builder-queue-${var.environment}-${local.deployment_timestamp}"
258258

259259
rate_limits {
260-
max_concurrent_dispatches = 1
260+
max_concurrent_dispatches = 20
261261
max_dispatches_per_second = 1
262262
}
263263

264264
retry_config {
265-
# This will make the cloud task retry for ~1 hour
266-
max_attempts = 31
265+
# Retries span ~4 minutes: initial try + 2 retries, each 120s apart; total wait ≈ 240s plus processing time
266+
max_attempts = 3
267267
min_backoff = "120s"
268268
max_backoff = "120s"
269-
max_doublings = 2
269+
max_doublings = 2 # Moot here since min_backoff == max_backoff
270270
}
271271
}
272272

infra/functions-python/main.tf

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,16 +1153,16 @@ resource "google_cloud_tasks_queue" "pmtiles_builder_task_queue" {
11531153
name = "pmtiles-builder-queue-${var.environment}-${local.deployment_timestamp}"
11541154

11551155
rate_limits {
1156-
max_concurrent_dispatches = 1
1156+
max_concurrent_dispatches = 20
11571157
max_dispatches_per_second = 1
11581158
}
11591159

11601160
retry_config {
1161-
# This will make the cloud task retry for ~1 hour
1162-
max_attempts = 31
1161+
# Retries span ~4 minutes: initial try + 2 retries, each 120s apart; total wait ≈ 240s plus processing time
1162+
max_attempts = 3
11631163
min_backoff = "120s"
11641164
max_backoff = "120s"
1165-
max_doublings = 2
1165+
max_doublings = 2 # Moot here since min_backoff == max_backoff
11661166
}
11671167
}
11681168

0 commit comments

Comments
 (0)