3333
3434from shared .common .gcp_utils import create_refresh_materialized_view_task
3535from shared .database .database import with_db_session
36- from shared .database_gen .sqlacodegen_models import Gtfsdataset , Gtfsfile
36+ from shared .database_gen .sqlacodegen_models import Gtfsdataset , Gtfsfile , Gtfsfeed
3737from shared .dataset_service .main import DatasetTraceService , DatasetTrace , Status
3838from shared .helpers .logger import init_logger , get_logger
3939from shared .helpers .utils import (
@@ -84,7 +84,9 @@ def __init__(
8484 self .api_key_parameter_name = api_key_parameter_name
8585 self .date = datetime .now ().strftime ("%Y%m%d%H%M" )
8686 if self .authentication_type != 0 :
87- self .logger .info (f"Getting feed credentials for feed { self .feed_stable_id } " )
87+ self .logger .info (
88+ "Getting feed credentials for feed %s" , self .feed_stable_id
89+ )
8890 self .feed_credentials = self .get_feed_credentials (self .feed_stable_id )
8991 if self .feed_credentials is None :
9092 raise Exception (
@@ -135,7 +137,7 @@ def download_content(self, temporary_file_path, feed_id):
135137 credentials = self .feed_credentials ,
136138 logger = self .logger ,
137139 )
138- self .logger .info (f "hash is: { file_hash } " )
140+ self .logger .info ("hash is: %s" , file_hash )
139141 is_zip = zipfile .is_zipfile (temporary_file_path )
140142 return file_hash , is_zip
141143
@@ -168,7 +170,7 @@ def upload_files_to_storage(
168170 extracted_files : List [Gtfsfile ] = []
169171 if not extracted_files_path or not os .path .exists (extracted_files_path ):
170172 self .logger .warning (
171- f "Extracted files path { extracted_files_path } does not exist."
173+ "Extracted files path %s does not exist." , extracted_files_path
172174 )
173175 return blob , extracted_files
174176 self .logger .info ("Processing extracted files from %s" , extracted_files_path )
@@ -182,7 +184,7 @@ def upload_files_to_storage(
182184 if public :
183185 file_blob .make_public ()
184186 self .logger .info (
185- f "Uploaded extracted file { file_name } to { file_blob .public_url } "
187+ "Uploaded extracted file %s to %s" , file_name , file_blob .public_url
186188 )
187189 extracted_files .append (
188190 Gtfsfile (
@@ -209,7 +211,8 @@ def upload_dataset(self, feed_id, public=True) -> DatasetFile or None:
209211 file_sha256_hash , is_zip = self .download_content (temp_file_path , feed_id )
210212 if not is_zip :
211213 self .logger .error (
212- f"[{ self .feed_stable_id } ] The downloaded file from { self .producer_url } is not a valid ZIP file."
214+ "The downloaded file from %s is not a valid ZIP file." ,
215+ self .producer_url ,
213216 )
214217 return None
215218
@@ -299,17 +302,18 @@ def process_from_bucket(self, db_session, public=True) -> Optional[DatasetFile]:
299302 else None
300303 ),
301304 )
302- dataset = self .create_dataset_entities (
305+ dataset , latest = self .create_dataset_entities (
303306 dataset_file , skip_dataset_creation = True , db_session = db_session
304307 )
305- if dataset and dataset . latest :
308+ if dataset and latest :
306309 self .logger .info (
307- f "Creating pipeline tasks for latest dataset { dataset .stable_id } "
310+ "Creating pipeline tasks for latest dataset %s" , dataset .stable_id
308311 )
309312 create_pipeline_tasks (dataset )
310313 elif dataset :
311314 self .logger .info (
312- f"Dataset { dataset .stable_id } is not the latest, skipping pipeline tasks creation."
315+ "Dataset %s is not the latest, skipping pipeline tasks creation." ,
316+ dataset .stable_id ,
313317 )
314318 else :
315319 raise ValueError ("Dataset update failed, dataset is None." )
@@ -352,26 +356,24 @@ def create_dataset_entities(
352356 """
353357 try :
354358 # Check latest version of the dataset
355- latest_dataset = (
356- db_session .query (Gtfsdataset )
357- .filter_by (latest = True , feed_id = self .feed_id )
358- .one_or_none ()
359+ gtfs_feed : Gtfsfeed | None = (
360+ db_session .query (Gtfsfeed ).filter_by (id = self .feed_id ).one_or_none ()
359361 )
362+ latest_dataset = gtfs_feed .latest_dataset
360363 if not latest_dataset :
361- self .logger .info (
362- f"[{ self .feed_stable_id } ] No latest dataset found for feed."
363- )
364+ self .logger .info ("No latest dataset found for feed." )
364365
365366 dataset = None
367+ latest = True if latest_dataset is not None else False
366368 if not skip_dataset_creation :
367369 self .logger .info (
368- f"[{ self .feed_stable_id } ] Creating new dataset for feed with stable id { dataset_file .stable_id } ."
370+ "Creating new dataset for feed with stable id %s." ,
371+ dataset_file .stable_id ,
369372 )
370373 dataset = Gtfsdataset (
371374 id = str (uuid .uuid4 ()),
372375 feed_id = self .feed_id ,
373376 stable_id = dataset_file .stable_id ,
374- latest = True ,
375377 bounding_box = None ,
376378 note = None ,
377379 hash = dataset_file .file_sha256_hash ,
@@ -386,10 +388,14 @@ def create_dataset_entities(
386388 unzipped_size_bytes = self ._get_unzipped_size (dataset_file ),
387389 )
388390 db_session .add (dataset )
391+ # update the latest dataset relationship in the feed
392+ db_session .flush ()
393+ gtfs_feed .latest_dataset = dataset
394+ latest = True
389395 elif skip_dataset_creation and latest_dataset :
390396 self .logger .info (
391- f"[ { self . feed_stable_id } ] Updating latest dataset for feed with stable id "
392- f" { latest_dataset .stable_id } ."
397+ " Updating latest dataset for feed with stable id %s" ,
398+ latest_dataset .stable_id ,
393399 )
394400 latest_dataset .gtfsfiles = (
395401 dataset_file .extracted_files if dataset_file .extracted_files else []
@@ -400,13 +406,12 @@ def create_dataset_entities(
400406 )
401407
402408 if latest_dataset and not skip_dataset_creation :
403- latest_dataset .latest = False
404409 db_session .add (latest_dataset )
405410 db_session .commit ()
406- self .logger .info (f"[ { self . feed_stable_id } ] Dataset created successfully." )
411+ self .logger .info (" Dataset created successfully." )
407412
408413 create_refresh_materialized_view_task ()
409- return latest_dataset if skip_dataset_creation else dataset
414+ return latest_dataset if skip_dataset_creation else dataset , latest
410415 except Exception as e :
411416 raise Exception (f"Error creating dataset: { e } " )
412417
@@ -431,7 +436,7 @@ def process_from_producer_url(
431436 if dataset_file is None :
432437 self .logger .info (f"[{ self .feed_stable_id } ] No database update required." )
433438 return None
434- dataset = self .create_dataset_entities (dataset_file , db_session = db_session )
439+ dataset , _ = self .create_dataset_entities (dataset_file , db_session = db_session )
435440 create_pipeline_tasks (dataset )
436441 return dataset_file
437442
@@ -577,7 +582,8 @@ def process_dataset(cloud_event: CloudEvent):
577582 )
578583 return f"Function completed with errors, missing stable={ stable_id } or execution_id={ execution_id } "
579584 logger .info (
580- f"Function %s in execution: [{ execution_id } ]" ,
585+ "Function %s in execution: %s" ,
586+ execution_id ,
581587 "successfully completed" if not error_message else "Failed" ,
582588 )
583589 return "Completed." if error_message is None else error_message
0 commit comments