Skip to content

Commit e94e153

Browse files
committed
Update errors raised from functions instead of a try except approach.
1 parent 2f3c8a2 commit e94e153

File tree

3 files changed

+96
-68
lines changed

3 files changed

+96
-68
lines changed

sdv/datasets/demo.py

Lines changed: 43 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import os
77
import warnings
88
from collections import defaultdict
9-
from functools import wraps
109
from pathlib import Path
1110
from zipfile import ZipFile
1211

@@ -58,6 +57,10 @@ def _get_data_from_bucket(object_key, bucket, client):
5857
return response['Body'].read()
5958

6059

60+
def _get_dataset_name_from_prefix(prefix):
61+
return prefix.split('/')[1]
62+
63+
6164
def _list_objects(prefix, bucket, client):
6265
"""List all objects under a given prefix using pagination.
6366
@@ -79,7 +82,21 @@ def _list_objects(prefix, bucket, client):
7982
contents.extend(resp.get('Contents', []))
8083

8184
if not contents:
82-
raise DemoResourceNotFoundError(f"No objects found under '{prefix}' in bucket '{bucket}'.")
85+
prefix_parts = prefix.split('/')
86+
modality = prefix_parts[0]
87+
dataset_name = _get_dataset_name_from_prefix(prefix)
88+
if dataset_name:
89+
raise DemoResourceNotFoundError(
90+
f'Could not download dataset {dataset_name} from bucket {bucket}. '
91+
'Make sure the bucket name is correct. If the bucket is private '
92+
'make sure to provide your credentials.'
93+
)
94+
else:
95+
raise DemoResourceNotFoundError(
96+
f'Could not list datasets in modality {modality} from bucket {bucket}. '
97+
'Make sure the bucket name is correct. If the bucket is private '
98+
'make sure to provide your credentials.'
99+
)
83100

84101
return contents
85102

@@ -109,7 +126,7 @@ def _search_contents_keys(contents, match_fn):
109126
return matches
110127

111128

112-
def _find_data_zip_key(contents, dataset_prefix):
129+
def _find_data_zip_key(contents, dataset_prefix, bucket):
113130
"""Find the 'data.zip' object key under dataset prefix, case-insensitive.
114131
115132
Args:
@@ -131,7 +148,11 @@ def is_data_zip(key):
131148
if matches:
132149
return matches[0]
133150

134-
raise DemoResourceNotFoundError("Could not find 'data.zip' for the requested dataset.")
151+
dataset_name = _get_dataset_name_from_prefix(dataset_prefix)
152+
raise DemoResourceNotFoundError(
153+
f"Could not download dataset '{dataset_name}' from bucket '{bucket}'. "
154+
"The dataset is missing 'data.zip' file."
155+
)
135156

136157

137158
def _get_first_v1_metadata_bytes(contents, dataset_prefix, bucket, client):
@@ -167,61 +188,13 @@ def is_direct_json_under_prefix(key):
167188
except Exception:
168189
continue
169190

191+
dataset_name = _get_dataset_name_from_prefix(dataset_prefix)
170192
raise DemoResourceNotFoundError(
171-
'Could not find a valid metadata JSON with METADATA_SPEC_VERSION "V1".'
193+
f"Could not download dataset '{dataset_name}' from bucket '{bucket}'. "
194+
'The dataset is missing a valid metadata.'
172195
)
173196

174197

175-
def handle_download_failure():
176-
"""Decorator to handle download exceptions.
177-
178-
Returns:
179-
func:
180-
A wrapped function.
181-
"""
182-
183-
def decorator(func):
184-
@wraps(func)
185-
def wrapper(*args, **kwargs):
186-
try:
187-
function_result = func(*args, **kwargs)
188-
except DemoResourceNotFoundError as error:
189-
bucket = kwargs.get('bucket', args[2])
190-
dataset_name = kwargs.get('dataset_name', args[1])
191-
error_msg = error.message
192-
if 'No objects found under' in error_msg:
193-
raise DemoResourceNotFoundError(
194-
f'Could not download dataset {dataset_name} from bucket {bucket}. '
195-
'Make sure the bucket name is correct. If the bucket is private '
196-
'make sure to provide your credentials.'
197-
) from error
198-
199-
if 'METADATA_SPEC_VERSION' in error_msg or 'metadata' in error_msg:
200-
raise DemoResourceNotFoundError(
201-
f'Could not download dataset {dataset_name} from bucket {bucket}. '
202-
'The dataset is missing a valid metadata.'
203-
) from error
204-
205-
if 'no csv files were found in data.zip' in error_msg:
206-
raise DemoResourceNotFoundError(
207-
f'Could not download dataset {dataset_name} from bucket {bucket}. '
208-
'The dataset is missing `csv` file/s.'
209-
) from error
210-
211-
if "Could not find 'data.zip'" in error_msg:
212-
raise DemoResourceNotFoundError(
213-
f'Could not download dataset {dataset_name} from bucket {bucket}. '
214-
"The dataset is missing 'data.zip' file."
215-
) from error
216-
217-
return function_result
218-
219-
return wrapper
220-
221-
return decorator
222-
223-
224-
@handle_download_failure()
225198
def _download(modality, dataset_name, bucket, credentials=None):
226199
"""Download dataset resources from a bucket.
227200
@@ -237,7 +210,7 @@ def _download(modality, dataset_name, bucket, credentials=None):
237210
f'{bucket_url}/{dataset_prefix}'
238211
)
239212
contents = _list_objects(dataset_prefix, bucket=bucket, client=client)
240-
zip_key = _find_data_zip_key(contents, dataset_prefix)
213+
zip_key = _find_data_zip_key(contents, dataset_prefix, bucket)
241214
zip_bytes = _get_data_from_bucket(zip_key, bucket=bucket, client=client)
242215
metadata_bytes = _get_first_v1_metadata_bytes(
243216
contents, dataset_prefix, bucket=bucket, client=client
@@ -313,7 +286,7 @@ def _get_data_without_output_folder(in_memory_directory):
313286
return data, skipped_files
314287

315288

316-
def _get_data(modality, output_folder_name, in_memory_directory):
289+
def _get_data(modality, output_folder_name, in_memory_directory, bucket, dataset_name):
317290
if output_folder_name:
318291
data, skipped_files = _get_data_with_output_folder(output_folder_name)
319292
else:
@@ -324,7 +297,8 @@ def _get_data(modality, output_folder_name, in_memory_directory):
324297

325298
if not data:
326299
raise DemoResourceNotFoundError(
327-
'Demo data could not be downloaded because no csv files were found in data.zip'
300+
f"Could not download dataset '{dataset_name}' from bucket '{bucket}'. "
301+
'The dataset is missing `csv` file/s.'
328302
)
329303

330304
if modality != 'multi_table':
@@ -352,7 +326,10 @@ def _get_metadata(metadata_bytes, dataset_name, output_folder_name=None):
352326
metadict = json.loads(metadata_bytes)
353327
metadata = Metadata().load_from_dict(metadict, dataset_name)
354328
except Exception as e:
355-
raise DemoResourceNotFoundError('Failed to parse metadata JSON for the dataset.') from e
329+
raise DemoResourceNotFoundError(
330+
f"Could not parse the metadata for dataset '{dataset_name}'. "
331+
'The dataset is missing a valid metadata file.'
332+
) from e
356333

357334
if output_folder_name:
358335
try:
@@ -413,7 +390,13 @@ def download_demo(
413390

414391
data_io, metadata_bytes = _download(modality, dataset_name, s3_bucket_name, credentials)
415392
in_memory_directory = _extract_data(data_io, output_folder_name)
416-
data = _get_data(modality, output_folder_name, in_memory_directory)
393+
data = _get_data(
394+
modality,
395+
output_folder_name,
396+
in_memory_directory,
397+
s3_bucket_name,
398+
dataset_name,
399+
)
417400
metadata = _get_metadata(metadata_bytes, dataset_name, output_folder_name)
418401

419402
return data, metadata

tests/integration/datasets/test_demo.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import pandas as pd
2+
import pytest
23

3-
from sdv.datasets.demo import get_available_demos
4+
from sdv.datasets.demo import download_demo, get_available_demos
5+
from sdv.metadata import Metadata
46

57

68
def test_get_available_demos_single_table():
@@ -79,3 +81,40 @@ def test_get_available_demos_multi_table():
7981
assert not tables_info.empty
8082
assert (tables_info['num_tables'] > 1).all()
8183
assert (tables_info['size_MB'] >= 0).all()
84+
85+
86+
@pytest.mark.parametrize('output_path', [None, 'tmp_path'])
87+
def test_download_demo_single_table(output_path, tmp_path):
88+
"""Test that the `download_demo` function works as intended for single-table."""
89+
# Run
90+
output_folder_name = tmp_path / 'sdv' if output_path else None
91+
data, metadata = download_demo(
92+
modality='single_table',
93+
dataset_name='fake_hotel_guests',
94+
output_folder_name=output_folder_name,
95+
)
96+
97+
# Assert
98+
metadata.validate_data({'fake_hotel_guests': data})
99+
assert len(data) > 1
100+
assert isinstance(metadata, Metadata)
101+
102+
103+
@pytest.mark.parametrize('output_path', [None, 'tmp_path'])
104+
def test_download_demo_multi_table(output_path, tmp_path):
105+
"""Test that the `download_demo` function works as intended for multi-table."""
106+
# Run
107+
output_folder_name = tmp_path / 'sdv' if output_path else None
108+
data, metadata = download_demo(
109+
modality='multi_table',
110+
dataset_name='fake_hotels',
111+
output_folder_name=output_folder_name,
112+
)
113+
114+
# Assert
115+
metadata.validate_data(data)
116+
expected_tables = ['hotels', 'guests']
117+
assert set(expected_tables) == set(data)
118+
assert isinstance(metadata, Metadata)
119+
assert len(data['hotels']) > 1
120+
assert len(data['guests']) > 1

tests/unit/datasets/test_demo.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def test__find_data_zip_key():
355355
dataset_prefix = 'single_table/fake_hotel_guests/'
356356

357357
# Run
358-
zip_key = _find_data_zip_key(contents, dataset_prefix)
358+
zip_key = _find_data_zip_key(contents, dataset_prefix, 'bucket')
359359

360360
# Assert
361361
assert zip_key == 'single_table/fake_hotel_guests/data.ZIP'
@@ -619,7 +619,7 @@ def test_download_demo_missing_zip_raises(mock_list):
619619

620620
# Run and Assert
621621
expected_msg = (
622-
'Could not download dataset word from bucket sdv-datasets-public. '
622+
"Could not download dataset 'word' from bucket 'sdv-datasets-public'. "
623623
"The dataset is missing 'data.zip' file."
624624
)
625625
with pytest.raises(DemoResourceNotFoundError, match=expected_msg):
@@ -640,7 +640,7 @@ def test_download_demo_no_v1_metadata_raises(mock_list, mock_get):
640640

641641
# Run and Assert
642642
error_msg = (
643-
'Could not download dataset word from bucket sdv-datasets-public. '
643+
"Could not download dataset 'word' from bucket 'sdv-datasets-public'. "
644644
'The dataset is missing a valid metadata.'
645645
)
646646
with pytest.raises(DemoResourceNotFoundError, match=error_msg):
@@ -677,8 +677,11 @@ def test__get_metadata_warns_on_save_error(_mock_open, tmp_path):
677677
def test__get_metadata_raises_on_invalid_json():
678678
"""_get_metadata should raise a helpful error when JSON is invalid."""
679679
# Run / Assert
680-
err = 'Failed to parse metadata JSON for the dataset.'
681-
with pytest.raises(DemoResourceNotFoundError, match=re.escape(err)):
680+
error_msg = (
681+
"Could not parse the metadata for dataset 'dataset1'. "
682+
'The dataset is missing a valid metadata file.'
683+
)
684+
with pytest.raises(DemoResourceNotFoundError, match=error_msg):
682685
_get_metadata(b'not-json', 'dataset1')
683686

684687

@@ -1105,8 +1108,11 @@ def test_download_demo_raises_when_no_csv_in_zip_single_table(mock_list, mock_ge
11051108
)
11061109

11071110
# Run and Assert
1108-
msg = 'Demo data could not be downloaded because no csv files were found in data.zip'
1109-
with pytest.raises(DemoResourceNotFoundError, match=re.escape(msg)):
1111+
error_msg = (
1112+
"Could not download dataset 'word' from bucket 'sdv-datasets-public'. "
1113+
'The dataset is missing `csv` file/s.'
1114+
)
1115+
with pytest.raises(DemoResourceNotFoundError, match=error_msg):
11101116
download_demo('single_table', 'word')
11111117

11121118

0 commit comments

Comments
 (0)