Skip to content

Commit 2df6140

Browse files
authored
Update parquet.py
1 parent 486e66b commit 2df6140

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

src/datasets/packaged_modules/parquet/parquet.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,18 @@ def _info(self):
4141
)
4242
return datasets.DatasetInfo(features=self.config.features)
4343

44-
def _split_generators(self, dl_manager):
44+
def _available_splits(self) -> Optional[List[str]]:
45+
return [str(split) for split in self.config.data_files] if isinstance(self.config.data_files, dict) else None
46+
47+
def _split_generators(self, dl_manager, splits: Optional[List[str]] = None):
4548
"""We handle string, list and dicts in datafiles"""
4649
if not self.config.data_files:
4750
raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
4851
dl_manager.download_config.extract_on_the_fly = True
49-
data_files = dl_manager.download_and_extract(self.config.data_files)
52+
data_files = self.config.data_files
53+
if splits and isinstance(data_files, dict):
54+
data_files = {split: data_files[split] for split in splits}
55+
data_files = dl_manager.download_and_extract(data_files)
5056
splits = []
5157
for split_name, files in data_files.items():
5258
if isinstance(files, str):

0 commit comments

Comments
 (0)