Skip to content

Commit b7819cd

Browse files
fix: raise error in FolderBasedBuilder when data_dir and data_files are missing (#7623)
* Update folder_based_builder.py * Update folder_based_builder.py * Update test_folder_based_builder.py --------- Co-authored-by: Quentin Lhoest <[email protected]>
1 parent e199f19 commit b7819cd

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ class FolderBasedBuilder(datasets.GeneratorBasedBuilder):
5858
METADATA_FILENAMES: list[str] = ["metadata.csv", "metadata.jsonl", "metadata.parquet"]
5959

6060
def _info(self):
61+
if not self.config.data_dir and not self.config.data_files:
62+
raise ValueError(
63+
"Folder-based datasets require either `data_dir` or `data_files` to be specified. "
64+
"Neither was provided."
65+
)
66+
6167
return datasets.DatasetInfo(features=self.config.features)
6268

6369
def _split_generators(self, dl_manager):

tests/packaged_modules/test_folder_based_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def test_default_folder_builder_not_usable(data_files_with_labels_no_metadata, c
285285
# test that AutoFolder is extended for streaming when it's child class is instantiated:
286286
# see line 115 in src/datasets/streaming.py
287287
def test_streaming_patched():
288-
_ = DummyFolderBasedBuilder()
288+
_ = DummyFolderBasedBuilder(data_dir=".")
289289
module = importlib.import_module(FolderBasedBuilder.__module__)
290290
assert hasattr(module, "_patched_for_streaming")
291291
assert module._patched_for_streaming

0 commit comments

Comments
 (0)