66import os
77import warnings
88from collections import defaultdict
9- from functools import wraps
109from pathlib import Path
1110from zipfile import ZipFile
1211
@@ -58,6 +57,10 @@ def _get_data_from_bucket(object_key, bucket, client):
5857 return response ['Body' ].read ()
5958
6059
60+ def _get_dataset_name_from_prefix (prefix ):
61+ return prefix .split ('/' )[1 ]
62+
63+
6164def _list_objects (prefix , bucket , client ):
6265 """List all objects under a given prefix using pagination.
6366
@@ -79,7 +82,21 @@ def _list_objects(prefix, bucket, client):
7982 contents .extend (resp .get ('Contents' , []))
8083
8184 if not contents :
82- raise DemoResourceNotFoundError (f"No objects found under '{ prefix } ' in bucket '{ bucket } '." )
85+ prefix_parts = prefix .split ('/' )
86+ modality = prefix_parts [0 ]
87+ dataset_name = _get_dataset_name_from_prefix (prefix )
88+ if dataset_name :
89+ raise DemoResourceNotFoundError (
90+ f'Could not download dataset { dataset_name } from bucket { bucket } . '
91+ 'Make sure the bucket name is correct. If the bucket is private '
92+ 'make sure to provide your credentials.'
93+ )
94+ else :
95+ raise DemoResourceNotFoundError (
96+ f'Could not list datasets in modality { modality } from bucket { bucket } . '
97+ 'Make sure the bucket name is correct. If the bucket is private '
98+ 'make sure to provide your credentials.'
99+ )
83100
84101 return contents
85102
@@ -109,7 +126,7 @@ def _search_contents_keys(contents, match_fn):
109126 return matches
110127
111128
112- def _find_data_zip_key (contents , dataset_prefix ):
129+ def _find_data_zip_key (contents , dataset_prefix , bucket ):
113130 """Find the 'data.zip' object key under dataset prefix, case-insensitive.
114131
115132 Args:
@@ -131,7 +148,11 @@ def is_data_zip(key):
131148 if matches :
132149 return matches [0 ]
133150
134- raise DemoResourceNotFoundError ("Could not find 'data.zip' for the requested dataset." )
151+ dataset_name = _get_dataset_name_from_prefix (dataset_prefix )
152+ raise DemoResourceNotFoundError (
153+ f"Could not download dataset '{ dataset_name } ' from bucket '{ bucket } '. "
154+ "The dataset is missing 'data.zip' file."
155+ )
135156
136157
137158def _get_first_v1_metadata_bytes (contents , dataset_prefix , bucket , client ):
@@ -167,61 +188,13 @@ def is_direct_json_under_prefix(key):
167188 except Exception :
168189 continue
169190
191+ dataset_name = _get_dataset_name_from_prefix (dataset_prefix )
170192 raise DemoResourceNotFoundError (
171- 'Could not find a valid metadata JSON with METADATA_SPEC_VERSION "V1".'
193+ f"Could not download dataset '{ dataset_name } ' from bucket '{ bucket } '. "
194+ 'The dataset is missing a valid metadata.'
172195 )
173196
174197
175- def handle_download_failure ():
176- """Decorator to handle download exceptions.
177-
178- Returns:
179- func:
180- A wrapped function.
181- """
182-
183- def decorator (func ):
184- @wraps (func )
185- def wrapper (* args , ** kwargs ):
186- try :
187- function_result = func (* args , ** kwargs )
188- except DemoResourceNotFoundError as error :
189- bucket = kwargs .get ('bucket' , args [2 ])
190- dataset_name = kwargs .get ('dataset_name' , args [1 ])
191- error_msg = error .message
192- if 'No objects found under' in error_msg :
193- raise DemoResourceNotFoundError (
194- f'Could not download dataset { dataset_name } from bucket { bucket } . '
195- 'Make sure the bucket name is correct. If the bucket is private '
196- 'make sure to provide your credentials.'
197- ) from error
198-
199- if 'METADATA_SPEC_VERSION' in error_msg or 'metadata' in error_msg :
200- raise DemoResourceNotFoundError (
201- f'Could not download dataset { dataset_name } from bucket { bucket } . '
202- 'The dataset is missing a valid metadata.'
203- ) from error
204-
205- if 'no csv files were found in data.zip' in error_msg :
206- raise DemoResourceNotFoundError (
207- f'Could not download dataset { dataset_name } from bucket { bucket } . '
208- 'The dataset is missing `csv` file/s.'
209- ) from error
210-
211- if "Could not find 'data.zip'" in error_msg :
212- raise DemoResourceNotFoundError (
213- f'Could not download dataset { dataset_name } from bucket { bucket } . '
214- "The dataset is missing 'data.zip' file."
215- ) from error
216-
217- return function_result
218-
219- return wrapper
220-
221- return decorator
222-
223-
224- @handle_download_failure ()
225198def _download (modality , dataset_name , bucket , credentials = None ):
226199 """Download dataset resources from a bucket.
227200
@@ -237,7 +210,7 @@ def _download(modality, dataset_name, bucket, credentials=None):
237210 f'{ bucket_url } /{ dataset_prefix } '
238211 )
239212 contents = _list_objects (dataset_prefix , bucket = bucket , client = client )
240- zip_key = _find_data_zip_key (contents , dataset_prefix )
213+ zip_key = _find_data_zip_key (contents , dataset_prefix , bucket )
241214 zip_bytes = _get_data_from_bucket (zip_key , bucket = bucket , client = client )
242215 metadata_bytes = _get_first_v1_metadata_bytes (
243216 contents , dataset_prefix , bucket = bucket , client = client
@@ -313,7 +286,7 @@ def _get_data_without_output_folder(in_memory_directory):
313286 return data , skipped_files
314287
315288
316- def _get_data (modality , output_folder_name , in_memory_directory ):
289+ def _get_data (modality , output_folder_name , in_memory_directory , bucket , dataset_name ):
317290 if output_folder_name :
318291 data , skipped_files = _get_data_with_output_folder (output_folder_name )
319292 else :
@@ -324,7 +297,8 @@ def _get_data(modality, output_folder_name, in_memory_directory):
324297
325298 if not data :
326299 raise DemoResourceNotFoundError (
327- 'Demo data could not be downloaded because no csv files were found in data.zip'
300+ f"Could not download dataset '{ dataset_name } ' from bucket '{ bucket } '. "
301+ 'The dataset is missing `csv` file/s.'
328302 )
329303
330304 if modality != 'multi_table' :
@@ -352,7 +326,10 @@ def _get_metadata(metadata_bytes, dataset_name, output_folder_name=None):
352326 metadict = json .loads (metadata_bytes )
353327 metadata = Metadata ().load_from_dict (metadict , dataset_name )
354328 except Exception as e :
355- raise DemoResourceNotFoundError ('Failed to parse metadata JSON for the dataset.' ) from e
329+ raise DemoResourceNotFoundError (
330+ f"Could not parse the metadata for dataset '{ dataset_name } '. "
331+ 'The dataset is missing a valid metadata file.'
332+ ) from e
356333
357334 if output_folder_name :
358335 try :
@@ -413,7 +390,13 @@ def download_demo(
413390
414391 data_io , metadata_bytes = _download (modality , dataset_name , s3_bucket_name , credentials )
415392 in_memory_directory = _extract_data (data_io , output_folder_name )
416- data = _get_data (modality , output_folder_name , in_memory_directory )
393+ data = _get_data (
394+ modality ,
395+ output_folder_name ,
396+ in_memory_directory ,
397+ s3_bucket_name ,
398+ dataset_name ,
399+ )
417400 metadata = _get_metadata (metadata_bytes , dataset_name , output_folder_name )
418401
419402 return data , metadata
0 commit comments