1515#
1616
1717import logging
18- import os
1918from typing import List , Final , Optional
2019
21- from google .cloud import storage
2220from sqlalchemy import func , distinct
2321from sqlalchemy .orm import Session , selectinload
2422
3230 "trips.txt" ,
3331 "stop_times.txt" ,
3432]
35- PMTILES_FILES : Final [List [str ]] = [
36- "pmtiles/stops.pmtiles" ,
37- "pmtiles/routes.pmtiles" ,
38- "pmtiles/routes.json" ,
39- ]
4033
4134
4235def rebuild_missing_visualization_files_handler (payload ) -> dict :
@@ -49,6 +42,8 @@ def rebuild_missing_visualization_files_handler(payload) -> dict:
4942 "check_existing": bool, # [optional] If True, check if visualization files already exist before creating tasks
5043 "latest_only": bool, # [optional] If True, include only latest datasets
5144 "include_deprecated_feeds": bool, # [optional] If True, include datasets from deprecated feeds
45+ "include_feed_op_status": list[str], # [optional] List of feed operational statuses to include
46+ # e.g., ["published", "wip"]. Default is ["published"].
5247 "limit": int, # [optional] Limit the number of datasets to process
5348 }
5449 Args:
@@ -58,41 +53,41 @@ def rebuild_missing_visualization_files_handler(payload) -> dict:
5853 """
5954 (
6055 dry_run ,
61- bucket_name ,
6256 check_existing ,
6357 latest_only ,
6458 include_deprecated_feeds ,
59+ include_feed_op_status ,
6560 limit ,
6661 ) = get_parameters (payload )
6762
6863 return rebuild_missing_visualization_files (
6964 dry_run = dry_run ,
70- bucket_name = bucket_name ,
7165 check_existing = check_existing ,
7266 latest_only = latest_only ,
67+ include_feed_op_status = include_feed_op_status ,
7368 include_deprecated_feeds = include_deprecated_feeds ,
7469 limit = limit ,
7570 )
7671
7772
7873@with_db_session
7974def rebuild_missing_visualization_files (
80- bucket_name : str ,
8175 dry_run : bool = True ,
8276 check_existing : bool = True ,
8377 latest_only : bool = True ,
8478 include_deprecated_feeds : bool = False ,
79+ include_feed_op_status : list [str ] = ["published" ],
8580 limit : Optional [int ] = None ,
8681 db_session : Session | None = None ,
8782) -> dict :
8883 """
8984 Rebuilds missing visualization files for GTFS datasets.
9085 Args:
91- bucket_name (str): The name of the bucket containing the GTFS data.
9286 dry_run (bool): dry run flag. If True, do not execute the workflow. Default: True
9387 check_existing (bool): If True, check if visualization files already exist before creating tasks. Default: True
9488 latest_only (bool): If True, include only latest datasets. Default: True
9589 include_deprecated_feeds (bool): If True, include datasets from deprecated feeds. Default: False
90+ include_feed_op_status (list[str]): List of feed operational statuses to include. Default: ['published']
9691 limit (Optional[int]): Limit the number of datasets to process. Default: None (no limit)
9792 db_session: DB session
9893
@@ -107,7 +102,16 @@ def rebuild_missing_visualization_files(
107102 datasets_query = datasets_query .filter (
108103 Gtfsdataset .feed .has (Gtfsfeed .status != "deprecated" )
109104 )
110-
105+ if include_feed_op_status :
106+ datasets_query = datasets_query .filter (
107+ Gtfsdataset .feed .has (
108+ Gtfsfeed .operational_status .in_ (include_feed_op_status )
109+ )
110+ )
111+ if check_existing :
112+ datasets_query = datasets_query .join (
113+ Gtfsfeed , Gtfsdataset .feed_id == Gtfsfeed .id
114+ ).filter (Gtfsfeed .visualization_dataset_id .is_ (None ))
111115 datasets_query = (
112116 datasets_query .join (Gtfsdataset .gtfsfiles )
113117 .filter (Gtfsfile .file_name .in_ (REQUIRED_FILES ))
@@ -122,41 +126,14 @@ def rebuild_missing_visualization_files(
122126 datasets = datasets_query .all ()
123127 logging .info (f"Found { len (datasets )} latest datasets with all required files." )
124128
125- # Validate visualization files existence in the storage bucket
126- client = storage .Client ()
127- bucket = client .get_bucket (bucket_name )
128129 tasks_to_create = []
129130 for dataset in datasets :
130- if not check_existing :
131- tasks_to_create .append (
132- {
133- "feed_stable_id" : dataset .feed .stable_id ,
134- "dataset_stable_id" : dataset .stable_id ,
135- }
136- )
137- else :
138- # Check if visualization files already exist
139- all_files_exist = True
140- for file_suffix in PMTILES_FILES :
141- file_path = (
142- f"{ dataset .feed .stable_id } /{ dataset .stable_id } /{ file_suffix } "
143- )
144- blob = bucket .blob (file_path )
145- if not blob .exists ():
146- all_files_exist = False
147- logging .info (f"Missing visualization file: { file_path } " )
148- break
149- if not all_files_exist :
150- tasks_to_create .append (
151- {
152- "feed_stable_id" : dataset .feed .stable_id ,
153- "dataset_stable_id" : dataset .stable_id ,
154- }
155- )
156- else :
157- logging .info (
158- f"All visualization files exist for dataset { dataset .stable_id } . Skipping."
159- )
131+ tasks_to_create .append (
132+ {
133+ "feed_stable_id" : dataset .feed .stable_id ,
134+ "dataset_stable_id" : dataset .stable_id ,
135+ }
136+ )
160137 total_processed = len (tasks_to_create )
161138 logging .info (f"Total datasets to process: { total_processed } " )
162139
@@ -177,7 +154,6 @@ def rebuild_missing_visualization_files(
177154 "total_processed" : total_processed ,
178155 "params" : {
179156 "dry_run" : dry_run ,
180- "bucket_name" : bucket_name ,
181157 "check_existing" : check_existing ,
182158 "latest_only" : latest_only ,
183159 "include_deprecated_feeds" : include_deprecated_feeds ,
@@ -199,9 +175,6 @@ def get_parameters(payload):
199175 """
200176 dry_run = payload .get ("dry_run" , True )
201177 dry_run = dry_run if isinstance (dry_run , bool ) else str (dry_run ).lower () == "true"
202- bucket_name = os .getenv ("DATASETS_BUCKET_NAME" )
203- if not bucket_name :
204- raise EnvironmentError ("DATASETS_BUCKET_NAME environment variable is not set." )
205178 check_existing = payload .get ("check_existing" , True )
206179 check_existing = (
207180 check_existing
@@ -220,13 +193,14 @@ def get_parameters(payload):
220193 if isinstance (include_deprecated_feeds , bool )
221194 else str (include_deprecated_feeds ).lower () == "true"
222195 )
196+ include_feed_op_status = payload .get ("include_feed_op_status" , ["published" ])
223197 limit = payload .get ("limit" , None )
224198 limit = limit if isinstance (limit , int ) and limit > 0 else None
225199 return (
226200 dry_run ,
227- bucket_name ,
228201 check_existing ,
229202 latest_only ,
230203 include_deprecated_feeds ,
204+ include_feed_op_status ,
231205 limit ,
232206 )
0 commit comments