@@ -122,13 +122,14 @@ def create_dataset_stable_id(feed_stable_id, timestamp):
122122 """
123123 return f"{ feed_stable_id } -{ timestamp } "
124124
125- def download_content (self , temporary_file_path ):
125+ def download_content (self , temporary_file_path , stable_id ):
126126 """
127127 Downloads the content of a URL and return the hash of the file
128128 """
129129 file_hash = download_and_get_hash (
130130 self .producer_url ,
131131 temporary_file_path ,
132+ stable_id ,
132133 authentication_type = self .authentication_type ,
133134 api_key_parameter_name = self .api_key_parameter_name ,
134135 credentials = self .feed_credentials ,
@@ -193,7 +194,7 @@ def upload_files_to_storage(
193194 )
194195 return blob , extracted_files
195196
196- def upload_dataset (self , public = True ) -> DatasetFile or None :
197+ def upload_dataset (self , stable_id , public = True ) -> DatasetFile or None :
197198 """
198199 Uploads a dataset to a GCP bucket as <feed_stable_id>/latest.zip and
199200 <feed_stable_id>/<feed_stable_id>-<upload_datetime>.zip
@@ -204,7 +205,7 @@ def upload_dataset(self, public=True) -> DatasetFile or None:
204205 try :
205206 self .logger .info ("Accessing URL %s" , self .producer_url )
206207 temp_file_path = self .generate_temp_filename ()
207- file_sha256_hash , is_zip = self .download_content (temp_file_path )
208+ file_sha256_hash , is_zip = self .download_content (temp_file_path , stable_id )
208209 if not is_zip :
209210 self .logger .error (
210211 f"[{ self .feed_stable_id } ] The downloaded file from { self .producer_url } is not a valid ZIP file."
@@ -416,12 +417,12 @@ def _get_unzipped_size(dataset_file):
416417 )
417418
418419 @with_db_session
419- def process_from_producer_url (self , db_session ) -> Optional [DatasetFile ]:
420+ def process_from_producer_url (self , db_session , stable_id ) -> Optional [DatasetFile ]:
420421 """
421422 Process the dataset and store new version in GCP bucket if any changes are detected
422423 :return: the DatasetFile object created
423424 """
424- dataset_file = self .upload_dataset ()
425+ dataset_file = self .upload_dataset (stable_id )
425426
426427 if dataset_file is None :
427428 self .logger .info (f"[{ self .feed_stable_id } ] No database update required." )
@@ -542,7 +543,7 @@ def process_dataset(cloud_event: CloudEvent):
542543 if json_payload .get ("use_bucket_latest" , False ):
543544 dataset_file = processor .process_from_bucket ()
544545 else :
545- dataset_file = processor .process_from_producer_url ()
546+ dataset_file = processor .process_from_producer_url (stable_id )
546547 except Exception as e :
547548 # This makes sure the logger is initialized
548549 logger = get_logger ("process_dataset" , stable_id if stable_id else "UNKNOWN" )
0 commit comments