Skip to content

Commit d735a99

Browse files
authored
Update folder_based_builder.py
1 parent fc9efbc commit d735a99

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@ def _info(self):
6767

6868
return datasets.DatasetInfo(features=self.config.features)
6969

70-
def _split_generators(self, dl_manager):
70+
def _available_splits(self) -> Optional[List[str]]:
71+
return [str(split) for split in self.config.data_files] if isinstance(self.config.data_files, dict) else None
72+
73+
def _split_generators(self, dl_manager, splits: Optional[List[str]] = None):
7174
if not self.config.data_files:
7275
raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
7376
dl_manager.download_config.extract_on_the_fly = True
@@ -119,7 +122,8 @@ def analyze(files_or_archives, downloaded_files_or_dirs, split):
119122
f"The file '{original_file_name}' from the archive '{archive_file_name}' was ignored: it is not a {self.BASE_COLUMN_NAME}, and is not {metadata_filenames} either."
120123
)
121124

122-
data_files = self.config.data_files
125+
if splits and isinstance(data_files, dict):
126+
data_files = {split: data_files[split] for split in splits}
123127
splits = []
124128
for split_name, files in data_files.items():
125129
if isinstance(files, str):

0 commit comments

Comments
 (0)