@@ -128,19 +128,7 @@ def download_content(self, temporary_file_path):
128128 logger = self .logger ,
129129 )
130130 is_zip = zipfile .is_zipfile (temporary_file_path )
131- extracted_files_path = None
132- if is_zip :
133- extracted_files_path = os .path .join (
134- temporary_file_path .split ("." )[0 ], "extracted"
135- )
136- # Create the directory for extracted files if it does not exist
137- os .makedirs (extracted_files_path , exist_ok = True )
138- with zipfile .ZipFile (temporary_file_path , "r" ) as zip_ref :
139- zip_ref .extractall (path = extracted_files_path )
140- # List all files in the extracted directory
141- extracted_files = os .listdir (extracted_files_path )
142- self .logger .info (f"Extracted files: { extracted_files } " )
143- return file_hash , is_zip , extracted_files_path
131+ return file_hash , is_zip
144132
145133 def upload_file_to_storage (
146134 self , source_file_path , dataset_stable_id , extracted_files_path
@@ -209,12 +197,14 @@ def upload_dataset(self) -> DatasetFile or None:
209197 self .logger .info (
210198 f"[{ self .feed_stable_id } ] File hash is { file_sha256_hash } ."
211199 )
212-
213200 if self .latest_hash != file_sha256_hash :
214201 self .logger .info (
215202 f"[{ self .feed_stable_id } ] Dataset has changed (hash { self .latest_hash } "
216203 f"-> { file_sha256_hash } ). Uploading new version."
217204 )
205+ extracted_files_path = self .unzip_files (
206+ extracted_files_path , temp_file_path
207+ )
218208 self .logger .info (
219209 f"Creating file { self .feed_stable_id } /latest.zip in bucket { self .bucket_name } "
220210 )
@@ -252,6 +242,18 @@ def upload_dataset(self) -> DatasetFile or None:
252242 os .remove (temp_file_path )
253243 return None
254244
245+ def unzip_files (self , extracted_files_path , temp_file_path ):
246+ extracted_files_path = os .path .join (temp_file_path .split ("." )[0 ], "extracted" )
247+ self .logger .info (f"Unzipping files to { extracted_files_path } " )
248+ # Create the directory for extracted files if it does not exist
249+ os .makedirs (extracted_files_path , exist_ok = True )
250+ with zipfile .ZipFile (temp_file_path , "r" ) as zip_ref :
251+ zip_ref .extractall (path = extracted_files_path )
252+ # List all files in the extracted directory
253+ extracted_files = os .listdir (extracted_files_path )
254+ self .logger .info (f"Extracted files: { extracted_files } " )
255+ return extracted_files_path
256+
255257 def generate_temp_filename (self ):
256258 """
257259 Generates a temporary filename
0 commit comments