2020 write_production_tiles_file ,
2121)
2222from src .config .psql import conn , local_engine
23+ from src .new_etl .classes .file_manager import FileManager , FileType , LoadType
2324
2425log .basicConfig (level = log_level )
2526
27+ file_manager = FileManager ()
28+
2629
2730def google_cloud_bucket (require_write_access : bool = False ) -> storage .Bucket | None :
2831 """
@@ -290,27 +293,29 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
290293 """
291294 zoom_threshold : int = 13
292295
293- # Export the GeoDataFrame to a temporary GeoJSON file
294- temp_geojson_points : str = f"tmp/temp_{ tiles_file_id_prefix } _points.geojson"
295- temp_geojson_polygons : str = f"tmp/temp_{ tiles_file_id_prefix } _polygons.geojson"
296- temp_pmtiles_points : str = f"tmp/temp_{ tiles_file_id_prefix } _points.pmtiles"
297- temp_pmtiles_polygons : str = f"tmp/temp_{ tiles_file_id_prefix } _polygons.pmtiles"
298- temp_merged_pmtiles : str = f"tmp/temp_{ tiles_file_id_prefix } _merged.pmtiles"
299- temp_parquet : str = f"tmp/{ tiles_file_id_prefix } .parquet"
296+ # Export the GeoDataFrames to a temporary GeoJSON files
297+ temp_geojson_points_file_name : str = f"temp_{ tiles_file_id_prefix } _points"
298+ temp_geojson_polygons_file_name : str = f"temp_{ tiles_file_id_prefix } _polygons"
299+ temp_parquet_file_name : str = f"{ tiles_file_id_prefix } "
300300
301301 # Reproject
302302 gdf_wm = self .gdf .to_crs (epsg = 4326 )
303- gdf_wm .to_file (temp_geojson_polygons , driver = "GeoJSON" )
303+ file_manager .save_gdf (
304+ gdf_wm , temp_geojson_polygons_file_name , LoadType .TEMP , FileType .GEOJSON
305+ )
306+ gdf_wm .to_file (temp_geojson_polygons_file_name , driver = "GeoJSON" )
304307
305308 # Create points dataset
306- self .centroid_gdf = self .gdf .copy ()
307- self .centroid_gdf ["geometry" ] = self .centroid_gdf ["geometry" ].centroid
308- self .centroid_gdf = self .centroid_gdf .to_crs (epsg = 4326 )
309- self .centroid_gdf .to_file (temp_geojson_points , driver = "GeoJSON" )
309+ centroid_gdf = self .gdf .copy ()
310+ centroid_gdf ["geometry" ] = centroid_gdf ["geometry" ].centroid
311+ centroid_gdf = centroid_gdf .to_crs (epsg = 4326 )
312+ centroid_gdf .to_file (temp_geojson_points_file_name , driver = "GeoJSON" )
313+ file_manager .save_gdf (
314+ centroid_gdf , temp_geojson_points_file_name , LoadType .TEMP , FileType .GEOJSON
315+ )
310316
311- # Load the GeoJSON from the polygons, drop geometry, and save as Parquet
312- gdf_polygons = gpd .read_file (temp_geojson_polygons )
313- df_no_geom = gdf_polygons .drop (columns = ["geometry" ])
317+ # Drop geometry, and save as Parquet
318+ df_no_geom = gdf_wm .drop (columns = ["geometry" ])
314319
315320 # Check if the DataFrame has fewer than 25,000 rows
316321 num_rows , num_cols = df_no_geom .shape
@@ -321,9 +326,14 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
321326 return
322327
323328 # Save the DataFrame as Parquet
324- df_no_geom .to_parquet (temp_parquet )
329+ file_manager .save_gdf (
330+ df_no_geom , temp_parquet_file_name , LoadType .TEMP , FileType .PARQUET
331+ )
325332
326333 # Upload Parquet to Google Cloud Storage
334+ temp_parquet_file_path = file_manager .get_file_path (
335+ temp_parquet_file_name , LoadType .TEMP , FileType .PARQUET
336+ )
327337 bucket = google_cloud_bucket (require_write_access = True )
328338 if bucket is None :
329339 print (
@@ -332,8 +342,8 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
332342 return
333343 blob_parquet = bucket .blob (f"{ tiles_file_id_prefix } .parquet" )
334344 try :
335- blob_parquet .upload_from_filename (temp_parquet )
336- parquet_size = os .stat (temp_parquet ).st_size
345+ blob_parquet .upload_from_filename (temp_parquet_file_path )
346+ parquet_size = os .stat (temp_parquet_file_path ).st_size
337347 parquet_size_mb = parquet_size / (1024 * 1024 )
338348 print (
339349 f"Parquet upload successful! Size: { parquet_size } bytes ({ parquet_size_mb :.2f} MB), Dimensions: { num_rows } rows, { num_cols } columns."
@@ -342,16 +352,37 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
342352 print (f"Parquet upload failed: { e } " )
343353 return
344354
355+ temp_pmtiles_points_file_name : str = f"temp_{ tiles_file_id_prefix } _points"
356+ temp_pmtiles_polygons_file_name : str = f"temp_{ tiles_file_id_prefix } _polygons"
357+ temp_merged_pmtiles_file_name : str = f"temp_{ tiles_file_id_prefix } _merged"
358+
359+ temp_pmtiles_points_file_path = file_manager .get_file_path (
360+ temp_pmtiles_points_file_name , LoadType .TEMP , FileType .PMTILES
361+ )
362+ temp_pmtiles_polygons_file_path = file_manager .get_file_path (
363+ temp_pmtiles_polygons_file_name , LoadType .TEMP , FileType .PMTILES
364+ )
365+ temp_merged_pmtiles_file_path = file_manager .get_file_path (
366+ temp_merged_pmtiles_file_name , LoadType .TEMP , FileType .PMTILES
367+ )
368+
369+ temp_geojson_points_file_path = file_manager .get_file_path (
370+ temp_geojson_points_file_name , LoadType .TEMP , FileType .GEOJSON
371+ )
372+ temp_geojson_polygons_file_path = file_manager .get_file_path (
373+ temp_geojson_points_file_name , LoadType .TEMP , FileType .GEOJSON
374+ )
375+
345376 # Command for generating PMTiles for points up to zoom level zoom_threshold
346377 points_command : list [str ] = [
347378 "tippecanoe" ,
348- f"--output={ temp_pmtiles_points } " ,
379+ f"--output={ temp_pmtiles_points_file_path } " ,
349380 f"--maximum-zoom={ zoom_threshold } " ,
350381 "--minimum-zoom=10" ,
351382 "-zg" ,
352383 "-aC" ,
353384 "-r0" ,
354- temp_geojson_points ,
385+ temp_geojson_points_file_path ,
355386 "-l" ,
356387 "vacant_properties_tiles_points" ,
357388 "--force" ,
@@ -360,12 +391,12 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
360391 # Command for generating PMTiles for polygons from zoom level zoom_threshold
361392 polygons_command : list [str ] = [
362393 "tippecanoe" ,
363- f"--output={ temp_pmtiles_polygons } " ,
394+ f"--output={ temp_pmtiles_polygons_file_path } " ,
364395 f"--minimum-zoom={ zoom_threshold } " ,
365396 "--maximum-zoom=16" ,
366397 "-zg" ,
367398 "--no-tile-size-limit" ,
368- temp_geojson_polygons ,
399+ temp_geojson_polygons_file_path ,
369400 "-l" ,
370401 "vacant_properties_tiles_polygons" ,
371402 "--force" ,
@@ -374,10 +405,10 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
374405 # Command for merging the two PMTiles files into a single output file
375406 merge_command : list [str ] = [
376407 "tile-join" ,
377- f"--output={ temp_merged_pmtiles } " ,
408+ f"--output={ temp_merged_pmtiles_file_path } " ,
378409 "--no-tile-size-limit" ,
379- temp_pmtiles_polygons ,
380- temp_pmtiles_points ,
410+ temp_pmtiles_polygons_file_path ,
411+ temp_pmtiles_points_file_path ,
381412 "--force" ,
382413 ]
383414
@@ -391,17 +422,17 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
391422 write_files .append (f"{ tiles_file_id_prefix } .pmtiles" )
392423
393424 # Check whether the temp saved tiles files is big enough.
394- file_size : int = os .stat (temp_merged_pmtiles ).st_size
425+ file_size : int = os .stat (temp_merged_pmtiles_file_path ).st_size
395426 if file_size < min_tiles_file_size_in_bytes :
396427 raise ValueError (
397- f"{ temp_merged_pmtiles } is { file_size } bytes in size but should be at least { min_tiles_file_size_in_bytes } . Therefore, we are not uploading any files to the GCP bucket. The file may be corrupt or incomplete."
428+ f"{ temp_merged_pmtiles_file_path } is { file_size } bytes in size but should be at least { min_tiles_file_size_in_bytes } . Therefore, we are not uploading any files to the GCP bucket. The file may be corrupt or incomplete."
398429 )
399430
400431 # Upload PMTiles to Google Cloud Storage
401432 for file in write_files :
402433 blob = bucket .blob (file )
403434 try :
404- blob .upload_from_filename (temp_merged_pmtiles )
435+ blob .upload_from_filename (temp_merged_pmtiles_file_path )
405436 print (f"PMTiles upload successful for { file } !" )
406437 except Exception as e :
407438 print (f"PMTiles upload failed for { file } : { e } " )
0 commit comments