Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/datasets/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1343,6 +1343,11 @@ def _prepare_split(
"""
raise NotImplementedError()

@property
def subset_name(self) -> str:
"""Alias for self.config.name to match Hugging Face Hub terminology ('Subset')."""
return self.config.name

def _get_examples_iterable_for_split(self, split_generator: SplitGenerator) -> ExamplesIterable:
"""Generate the examples on the fly.

Expand Down
9 changes: 5 additions & 4 deletions src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,7 @@ def load_dataset_builder(
def load_dataset(
path: str,
name: Optional[str] = None,
subset_name: Optional[str] = None, # <-- New alias parameter
data_dir: Optional[str] = None,
data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None,
split: Optional[Union[str, Split, list[str], list[Split]]] = None,
Expand All @@ -1202,6 +1203,10 @@ def load_dataset(
storage_options: Optional[dict] = None,
**config_kwargs,
) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]:
if name and subset_name and name != subset_name:
raise ValueError("'name' and 'subset_name' cannot both be set with different values.")
name = name or subset_name # Prefer 'name', fallback to 'subset_name'

"""Load a dataset from the Hugging Face Hub, or a local dataset.

You can find the list of datasets on the [Hub](https://huggingface.co/datasets) or with [`huggingface_hub.list_datasets`].
Expand Down Expand Up @@ -1388,7 +1393,6 @@ def load_dataset(
(verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
)

# Create a dataset builder
builder_instance = load_dataset_builder(
path=path,
name=name,
Expand All @@ -1404,11 +1408,9 @@ def load_dataset(
**config_kwargs,
)

# Return iterable dataset in case of streaming
if streaming:
return builder_instance.as_streaming_dataset(split=split)

# Download and prepare data
builder_instance.download_and_prepare(
download_config=download_config,
download_mode=download_mode,
Expand All @@ -1417,7 +1419,6 @@ def load_dataset(
storage_options=storage_options,
)

# Build dataset for splits
keep_in_memory = (
keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size)
)
Expand Down