Skip to content

Commit 486e66b

Browse files
authored
Update json.py
1 parent d735a99 commit 486e66b

File tree

1 file changed

+9
-3
lines changed
  • src/datasets/packaged_modules/json

1 file changed

+9
-3
lines changed

src/datasets/packaged_modules/json/json.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import io
22
import itertools
33
from dataclasses import dataclass
4-
from typing import Optional
4+
from typing import List, Optional
55

66
import pandas as pd
77
import pyarrow as pa
@@ -70,12 +70,18 @@ def _info(self):
7070
raise ValueError("The JSON loader parameter `newlines_in_values` is no longer supported")
7171
return datasets.DatasetInfo(features=self.config.features)
7272

73-
def _split_generators(self, dl_manager):
73+
def _available_splits(self) -> Optional[List[str]]:
74+
return [str(split) for split in self.config.data_files] if isinstance(self.config.data_files, dict) else None
75+
76+
def _split_generators(self, dl_manager, splits: Optional[List[str]] = None):
7477
"""We handle string, list and dicts in datafiles"""
7578
if not self.config.data_files:
7679
raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
7780
dl_manager.download_config.extract_on_the_fly = True
78-
data_files = dl_manager.download_and_extract(self.config.data_files)
81+
data_files = self.config.data_files
82+
if splits and isinstance(data_files, dict):
83+
data_files = {split: data_files[split] for split in splits}
84+
data_files = dl_manager.download_and_extract(data_files)
7985
splits = []
8086
for split_name, files in data_files.items():
8187
if isinstance(files, str):

0 commit comments

Comments
 (0)