Skip to content

Commit 5071e85

Browse files
Building centroid PMTiles (#325)
* adds a pipeline step to create a building centroid pmtile layer * snapshot * fix subprocess spacing issue * remove extra vars * in building centroids, round two decimals * change rounding from 2 to 3 * cast to double * select only risk>0 * lint * update rounding and remove gen-ids * retrigger ci * change tippecanoe tile size syntax * cli order * switch to zg * change building tippecanoe settings
1 parent 8a6bc2d commit 5071e85

File tree

5 files changed

+192
-13
lines changed

5 files changed

+192
-13
lines changed

ocr/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,12 +778,14 @@ def wipe(self):
778778
f'Wiping vector data storage at these locations:\n'
779779
f'- {self.building_geoparquet_uri.parent}\n'
780780
f'- {self.buildings_pmtiles_uri.parent}\n'
781+
f'- {self.building_centroids_pmtiles_uri.parent}\n'
781782
f'- {self.region_geoparquet_uri}\n'
782783
f'- {self.aggregated_region_analysis_uri}\n'
783784
f'- {self.tracts_summary_stats_uri.parent}\n'
784785
)
785786
self.upath_delete(self.building_geoparquet_uri.parent)
786787
self.upath_delete(self.buildings_pmtiles_uri.parent)
788+
self.upath_delete(self.building_centroids_pmtiles_uri.parent)
787789
self.upath_delete(self.region_geoparquet_uri)
788790
self.upath_delete(self.aggregated_region_analysis_uri)
789791
self.upath_delete(self.tracts_summary_stats_uri.parent)
@@ -802,6 +804,12 @@ def buildings_pmtiles_uri(self) -> UPath:
802804
path.parent.mkdir(parents=True, exist_ok=True)
803805
return path
804806

807+
@functools.cached_property
808+
def building_centroids_pmtiles_uri(self) -> UPath:
809+
path = UPath(f'{self.storage_root}/{self.pmtiles_prefix}/building_centroids.pmtiles')
810+
path.parent.mkdir(parents=True, exist_ok=True)
811+
return path
812+
805813
@functools.cached_property
806814
def region_pmtiles_uri(self) -> UPath:
807815
path = UPath(f'{self.storage_root}/{self.pmtiles_prefix}/regions.pmtiles')
@@ -929,6 +937,7 @@ def nv(name: str, value: str | None):
929937
nv('Tracts summary stats', str(self.tracts_summary_stats_uri)),
930938
nv('Counties summary stats', str(self.counties_summary_stats_uri)),
931939
nv('Buildings PMTiles', str(self.buildings_pmtiles_uri)),
940+
nv('Buildings PMTiles', str(self.building_centroids_pmtiles_uri)),
932941
nv('Region PMTiles', str(self.region_pmtiles_uri)),
933942
]
934943
)

ocr/deploy/cli.py

Lines changed: 85 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,10 @@ def run(
299299
'software': COILED_SOFTWARE,
300300
},
301301
)
302-
manager.wait_for_completion(exit_on_failure=True)
303302

304303
if write_regional_stats:
304+
manager.wait_for_completion(exit_on_failure=True)
305+
305306
manager = _get_manager(Platform.COILED, config.debug)
306307

307308
manager.submit_job(
@@ -330,6 +331,19 @@ def run(
330331

331332
# ------------- 03 Tiles ---------------
332333

334+
manager = _get_manager(Platform.COILED, config.debug)
335+
manager.submit_job(
336+
command='ocr create-building-centroid-pmtiles',
337+
name=f'create-building-centroid-pmtiles-{config.environment.value}',
338+
kwargs={
339+
**_coiled_kwargs(config, env_file),
340+
'vm_type': 'c8g.12xlarge',
341+
'scheduler_vm_type': 'c8g.12xlarge',
342+
'disk_size': 250,
343+
'software': COILED_SOFTWARE,
344+
}, # PMTiles creation needs more disk space
345+
)
346+
333347
manager = _get_manager(Platform.COILED, config.debug)
334348
manager.submit_job(
335349
command='ocr create-building-pmtiles',
@@ -412,6 +426,15 @@ def run(
412426
)
413427
manager.wait_for_completion(exit_on_failure=True)
414428

429+
# Create building centroid PMTiles from the consolidated geoparquet file
430+
manager = _get_manager(Platform.LOCAL, config.debug)
431+
manager.submit_job(
432+
command='ocr create-building-centroid-pmtiles',
433+
name=f'create-building-centroid-pmtiles-{config.environment.value}',
434+
kwargs={
435+
**_local_kwargs(),
436+
},
437+
)
415438
# Create PMTiles from the consolidated geoparquet file
416439
manager = _get_manager(Platform.LOCAL, config.debug)
417440
manager.submit_job(
@@ -423,18 +446,6 @@ def run(
423446
)
424447
manager.wait_for_completion(exit_on_failure=True)
425448

426-
# TODO: Should we run this locally? It will most likely break due to lack of resources
427-
# # Create pyramid
428-
# manager = _get_manager(Platform.LOCAL, config.debug)
429-
# manager.submit_job(
430-
# command='ocr create-pyramid',
431-
# name=f'create-pyramid-{config.environment.value}',
432-
# kwargs={
433-
# **_local_kwargs(),
434-
# },
435-
# )
436-
# manager.wait_for_completion(exit_on_failure=True)
437-
438449
if config.debug:
439450
# Print out the pretty paths
440451
console.log('Run complete. Current configuration paths:')
@@ -814,6 +825,67 @@ def create_building_pmtiles(
814825
create_building_pmtiles(config=config)
815826

816827

828+
@app.command()
829+
def create_building_centroid_pmtiles(
830+
env_file: Path | None = typer.Option(
831+
None,
832+
'-e',
833+
'--env-file',
834+
help='Path to the environment variables file. These will be used to set up the OCRConfiguration',
835+
show_default=True,
836+
exists=True,
837+
file_okay=True,
838+
resolve_path=True,
839+
),
840+
platform: Platform | None = typer.Option(
841+
None,
842+
'-p',
843+
'--platform',
844+
help='If set, schedule this command on the specified platform instead of running inline.',
845+
show_default=True,
846+
),
847+
vm_type: str | None = typer.Option(
848+
'c8g.8xlarge', '--vm-type', help='Coiled VM type override (Coiled only).'
849+
),
850+
disk_size: int | None = typer.Option(250, '--disk-size', help='Disk size in GB (Coiled only).'),
851+
):
852+
"""
853+
Create building centroid PMTiles from the consolidated geoparquet file.
854+
"""
855+
856+
# Schedule if requested and not already inside a batch task
857+
if platform is not None and not _in_batch():
858+
config = load_config(env_file)
859+
manager = _get_manager(platform, config.debug)
860+
command = 'ocr create-building-pmtiles'
861+
name = f'create-building-pmtiles-{config.environment.value}'
862+
863+
if platform == Platform.COILED:
864+
COILED_SOFTWARE = os.environ.get('COILED_SOFTWARE_ENV_NAME')
865+
if COILED_SOFTWARE is None or not COILED_SOFTWARE.strip():
866+
console.log(
867+
'[red]Error: COILED_SOFTWARE_ENV_NAME environment variable is not set. '
868+
'This must be set to the name of a Coiled software environment with OCR installed. Proceeding with package sync...[/red]'
869+
)
870+
kwargs = {**_coiled_kwargs(config, env_file)}
871+
kwargs['vm_type'] = vm_type
872+
kwargs['scheduler_vm_type'] = vm_type
873+
kwargs['disk_size'] = disk_size
874+
kwargs['software'] = COILED_SOFTWARE
875+
else:
876+
kwargs = {**_local_kwargs()}
877+
878+
manager.submit_job(command=command, name=name, kwargs=kwargs)
879+
manager.wait_for_completion(exit_on_failure=True)
880+
return
881+
882+
from ocr.pipeline.create_building_centroid_pmtiles import create_building_centroid_pmtiles
883+
884+
config = load_config(env_file)
885+
886+
create_building_centroid_pmtiles(config=config)
887+
888+
817889
@app.command()
818890
def create_pyramid(
819891
env_file: Path | None = typer.Option(
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import subprocess
2+
import tempfile
3+
from pathlib import Path
4+
5+
import duckdb
6+
from upath import UPath
7+
8+
from ocr.config import OCRConfig
9+
from ocr.console import console
10+
from ocr.utils import apply_s3_creds, copy_or_upload, get_temp_dir, install_load_extensions
11+
12+
13+
def create_building_centroid_pmtiles(
14+
config: OCRConfig,
15+
):
16+
"""Nearly identical to create_building_pmtiles.py, but creates centroid only layer for higher zoom levels."""
17+
18+
input_path = f'{config.vector.region_geoparquet_uri}/*.parquet' # type: ignore[attr-defined]
19+
20+
output_path = config.vector.building_centroids_pmtiles_uri # type: ignore[attr-defined]
21+
22+
needs_s3 = any(str(p).startswith('s3://') for p in [input_path, output_path])
23+
24+
connection = duckdb.connect(database=':memory:')
25+
26+
try:
27+
install_load_extensions(aws=needs_s3, spatial=True, httpfs=True, con=connection)
28+
if needs_s3:
29+
apply_s3_creds(region='us-west-2', con=connection)
30+
31+
with tempfile.TemporaryDirectory(dir=get_temp_dir()) as tmpdir:
32+
tmp_path = UPath(tmpdir)
33+
local_pmtiles = tmp_path / 'aggregated.pmtiles'
34+
ndjson_path = Path(tmpdir) / 'buildings.ndjson'
35+
36+
if config.debug:
37+
console.log(f'Exporting features from {input_path} to NDJSON in {ndjson_path}')
38+
39+
copy_sql = f"""
40+
COPY (
41+
SELECT
42+
'Feature' AS type,
43+
json_object(
44+
'0', ROUND(CAST(wind_risk_2011 AS DOUBLE), 3),
45+
'1', ROUND(CAST(wind_risk_2047 AS DOUBLE), 3)
46+
) AS properties,
47+
json(ST_AsGeoJson(ST_Centroid(geometry))) AS geometry
48+
FROM read_parquet('{input_path}')
49+
WHERE
50+
wind_risk_2011 > 0
51+
AND
52+
wind_risk_2047 > 0
53+
) TO '{ndjson_path.as_posix()}' (FORMAT json);
54+
"""
55+
connection.execute(copy_sql)
56+
57+
if config.debug:
58+
console.log('NDJSON export complete')
59+
console.log(f'Generating PMTiles at {local_pmtiles}')
60+
# import ipdb; ipdb.set_trace()
61+
tippecanoe_cmd = [
62+
'tippecanoe',
63+
'-o',
64+
str(local_pmtiles),
65+
'-l',
66+
'risk',
67+
'-n',
68+
'centroid',
69+
'-f',
70+
'-P',
71+
'--drop-fraction-as-needed',
72+
'--no-feature-limit',
73+
'--extend-zooms-if-still-dropping',
74+
'-zg',
75+
'-q',
76+
str(ndjson_path),
77+
]
78+
subprocess.run(tippecanoe_cmd, check=True)
79+
80+
if config.debug:
81+
console.log('Tippecanoe tiles generation complete')
82+
console.log(f'Uploading PMTiles to {output_path}')
83+
84+
copy_or_upload(local_pmtiles, output_path)
85+
86+
if config.debug:
87+
console.log('PMTiles upload completed successfully')
88+
finally:
89+
try:
90+
connection.close()
91+
except Exception:
92+
pass

ocr/pipeline/create_building_pmtiles.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def create_building_pmtiles(
7979
'-q',
8080
'--extend-zooms-if-still-dropping',
8181
'-zg',
82+
'-Z 6',
8283
'--generate-ids',
8384
str(ndjson_path),
8485
]

tests/test_config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,11 @@ def test_cached_properties(self, temp_dir):
438438
str(config.buildings_pmtiles_uri)
439439
== f'{temp_dir}/output/fire-risk/vector/qa/pmtiles/buildings.pmtiles'
440440
)
441+
# Test building centroid pmtiles uri
442+
assert (
443+
str(config.building_centroids_pmtiles_uri)
444+
== f'{temp_dir}/output/fire-risk/vector/qa/pmtiles/building_centroids.pmtiles'
445+
)
441446

442447
def test_summary_stats_uris(self, temp_dir):
443448
"""Test summary statistics URI properties."""

0 commit comments

Comments
 (0)