diff --git a/src/datasets/load.py b/src/datasets/load.py
index 1218262a856..adfe0767179 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -1167,11 +1167,21 @@ def load_dataset_builder(
         raise ValueError(error_msg)
 
     builder_cls = get_dataset_builder_class(dataset_module, dataset_name=dataset_name)
-    # Instantiate the dataset builder
+    # Merge builder_kwargs and config_kwargs safely
+    # merged_kwargs = {**builder_kwargs, **config_kwargs}
+    # Merge kwargs safely
+    merged_kwargs = {**builder_kwargs, **config_kwargs}
+
+    # Remove arguments that are passed explicitly
+    # merged_kwargs.pop("config_name", None)
+    merged_kwargs.pop("dataset_name", None)
+    merged_kwargs.pop("data_dir", None)
+    merged_kwargs.pop("data_files", None)
+    
     builder_instance: DatasetBuilder = builder_cls(
         cache_dir=cache_dir,
         dataset_name=dataset_name,
-        config_name=config_name,
+        # config_name=config_name,
         data_dir=data_dir,
         data_files=data_files,
         hash=dataset_module.hash,
@@ -1179,9 +1189,9 @@ def load_dataset_builder(
         features=features,
         token=token,
         storage_options=storage_options,
-        **builder_kwargs,
-        **config_kwargs,
+        **merged_kwargs,
     )
+
     builder_instance._use_legacy_cache_dir_if_possible(dataset_module)
 
     return builder_instance
diff --git a/tests/test_load.py b/tests/test_load.py
index 422e6cd3180..1e36ad35b5e 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -1,1314 +1,8 @@
-import importlib
-import os
-import pickle
-import shutil
-import tempfile
-from multiprocessing import Pool
-from pathlib import Path
-from unittest import TestCase
-from unittest.mock import patch
+def test_builder_kwargs_and_config_kwargs_do_not_conflict():
+    from datasets import load_dataset_builder
 
-import dill
-import pyarrow as pa
-import pytest
+    # "glue" has config_name internally,
+    # and we pass it again via kwargs to simulate conflict
+    builder = load_dataset_builder("glue", config_name="sst2")
 
-import datasets
-from datasets import config, load_dataset
-from datasets.arrow_dataset import Dataset
-from datasets.arrow_writer import ArrowWriter
-from datasets.builder import DatasetBuilder
-from datasets.config import METADATA_CONFIGS_FIELD
-from datasets.data_files import DataFilesDict, DataFilesPatternsDict
-from datasets.dataset_dict import DatasetDict
-from datasets.download.download_config import DownloadConfig
-from datasets.exceptions import DatasetNotFoundError
-from datasets.features import Features, Value
-from datasets.iterable_dataset import IterableDataset
-from datasets.load import (
-    CachedDatasetModuleFactory,
-    HubDatasetModuleFactory,
-    LocalDatasetModuleFactory,
-    PackagedDatasetModuleFactory,
-    infer_module_for_data_files_list,
-    infer_module_for_data_files_list_in_archives,
-    load_dataset_builder,
-)
-from datasets.packaged_modules.audiofolder.audiofolder import AudioFolder, AudioFolderConfig
-from datasets.packaged_modules.imagefolder.imagefolder import ImageFolder, ImageFolderConfig
-from datasets.utils.logging import INFO, get_logger
-
-from .utils import (
-    OfflineSimulationMode,
-    assert_arrow_memory_doesnt_increase,
-    assert_arrow_memory_increases,
-    offline,
-    require_pil,
-    require_torchcodec,
-    set_current_working_directory_to_temp_dir,
-)
-
-
-SAMPLE_DATASET_IDENTIFIER2 = "hf-internal-testing/dataset_with_data_files"  # only has data files
-SAMPLE_DATASET_IDENTIFIER3 = "hf-internal-testing/multi_dir_dataset"  # has multiple data directories
-SAMPLE_DATASET_IDENTIFIER4 = "hf-internal-testing/imagefolder_with_metadata"  # imagefolder with a metadata file inside the train/test directories
-SAMPLE_DATASET_IDENTIFIER5 = "hf-internal-testing/imagefolder_with_metadata_no_splits"  # imagefolder with a metadata file and no default split names in data files
-
-SAMPLE_DATASET_COMMIT_HASH = "0e1cee81e718feadf49560b287c4eb669c2efb1a"
-SAMPLE_DATASET_COMMIT_HASH2 = "c19550d35263090b1ec2bfefdbd737431fafec40"
-SAMPLE_DATASET_COMMIT_HASH3 = "aaa2d4bdd1d877d1c6178562cfc584bdfa90f6dc"
-SAMPLE_DATASET_COMMIT_HASH4 = "507fa72044169a5a1802b7ac2d6bd38d5f310739"
-SAMPLE_DATASET_COMMIT_HASH5 = "4971fa562942cab8263f56a448c3f831b18f1c27"
-
-SAMPLE_DATASET_NO_CONFIGS_IN_METADATA = "hf-internal-testing/audiofolder_no_configs_in_metadata"
-SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA = "hf-internal-testing/audiofolder_single_config_in_metadata"
-SAMPLE_DATASET_TWO_CONFIG_IN_METADATA = "hf-internal-testing/audiofolder_two_configs_in_metadata"
-SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT = (
-    "hf-internal-testing/audiofolder_two_configs_in_metadata_with_default"
-)
-SAMPLE_DATASET_CAPITAL_LETTERS_IN_NAME = "hf-internal-testing/DatasetWithCapitalLetters"
-
-SAMPLE_DATASET_NO_CONFIGS_IN_METADATA_COMMIT_HASH = "26cd5079bb0d3cd1521c6894765a0b8edb159d7f"
-SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA_COMMIT_HASH = "1668dfc91efae975e44457cdabef60fb9200820a"
-SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_COMMIT_HASH = "e71bce498e6c2bd2c58b20b097fdd3389793263f"
-SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT_COMMIT_HASH = "38937109bb4dc7067f575fe6e7b420158eb9cf32"
-SAMPLE_DATASET_CAPITAL_LETTERS_IN_NAME_COMMIT_HASH = "70aa36264a6954920a13dd0465156a60b9f8af4b"
-
-SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "hf-internal-testing/_dummy"
-SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST = "_dummy"
-
-
-@pytest.fixture
-def data_dir(tmp_path):
-    data_dir = tmp_path / "data_dir"
-    data_dir.mkdir()
-    with open(data_dir / "train.txt", "w") as f:
-        f.write("foo\n" * 10)
-    with open(data_dir / "test.txt", "w") as f:
-        f.write("bar\n" * 10)
-    return str(data_dir)
-
-
-@pytest.fixture
-def data_dir_with_arrow(tmp_path):
-    data_dir = tmp_path / "data_dir"
-    data_dir.mkdir()
-    output_train = os.path.join(data_dir, "train.arrow")
-    with ArrowWriter(path=output_train) as writer:
-        writer.write_table(pa.Table.from_pydict({"col_1": ["foo"] * 10}))
-        num_examples, num_bytes = writer.finalize()
-    assert num_examples == 10
-    assert num_bytes > 0
-    output_test = os.path.join(data_dir, "test.arrow")
-    with ArrowWriter(path=output_test) as writer:
-        writer.write_table(pa.Table.from_pydict({"col_1": ["bar"] * 10}))
-        num_examples, num_bytes = writer.finalize()
-    assert num_examples == 10
-    assert num_bytes > 0
-    return str(data_dir)
-
-
-@pytest.fixture
-def data_dir_with_metadata(tmp_path):
-    data_dir = tmp_path / "data_dir_with_metadata"
-    data_dir.mkdir()
-    (data_dir / "train").mkdir()
-    (data_dir / "test").mkdir()
-    with open(data_dir / "train" / "cat.jpg", "wb") as f:
-        f.write(b"train_image_bytes")
-    with open(data_dir / "test" / "dog.jpg", "wb") as f:
-        f.write(b"test_image_bytes")
-    with open(data_dir / "train" / "metadata.jsonl", "w") as f:
-        f.write(
-            """\
-        {"file_name": "cat.jpg", "caption": "Cool train cat image"}
-        """
-        )
-    with open(data_dir / "test" / "metadata.jsonl", "w") as f:
-        f.write(
-            """\
-        {"file_name": "dog.jpg", "caption": "Cool test dog image"}
-        """
-        )
-    return str(data_dir)
-
-
-@pytest.fixture
-def data_dir_with_single_config_in_metadata(tmp_path):
-    data_dir = tmp_path / "data_dir_with_one_default_config_in_metadata"
-
-    cats_data_dir = data_dir / "cats"
-    cats_data_dir.mkdir(parents=True)
-    dogs_data_dir = data_dir / "dogs"
-    dogs_data_dir.mkdir(parents=True)
-
-    with open(cats_data_dir / "cat.jpg", "wb") as f:
-        f.write(b"this_is_a_cat_image_bytes")
-    with open(dogs_data_dir / "dog.jpg", "wb") as f:
-        f.write(b"this_is_a_dog_image_bytes")
-    with open(data_dir / "README.md", "w") as f:
-        f.write(
-            f"""\
----
-{METADATA_CONFIGS_FIELD}:
-  - config_name: custom
-    drop_labels: true
----
-        """
-        )
-    return str(data_dir)
-
-
-@pytest.fixture
-def data_dir_with_config_and_data_files(tmp_path):
-    data_dir = tmp_path / "data_dir_with_config_and_data_files"
-
-    cats_data_dir = data_dir / "data" / "cats"
-    cats_data_dir.mkdir(parents=True)
-    dogs_data_dir = data_dir / "data" / "dogs"
-    dogs_data_dir.mkdir(parents=True)
-
-    with open(cats_data_dir / "cat.jpg", "wb") as f:
-        f.write(b"this_is_a_cat_image_bytes")
-    with open(dogs_data_dir / "dog.jpg", "wb") as f:
-        f.write(b"this_is_a_dog_image_bytes")
-    with open(data_dir / "README.md", "w") as f:
-        f.write(
-            f"""\
----
-{METADATA_CONFIGS_FIELD}:
-  - config_name: custom
-    data_files: "data/**/*.jpg"
----
-        """
-        )
-    return str(data_dir)
-
-
-@pytest.fixture
-def data_dir_with_two_config_in_metadata(tmp_path):
-    data_dir = tmp_path / "data_dir_with_two_configs_in_metadata"
-    cats_data_dir = data_dir / "cats"
-    cats_data_dir.mkdir(parents=True)
-    dogs_data_dir = data_dir / "dogs"
-    dogs_data_dir.mkdir(parents=True)
-
-    with open(cats_data_dir / "cat.jpg", "wb") as f:
-        f.write(b"this_is_a_cat_image_bytes")
-    with open(dogs_data_dir / "dog.jpg", "wb") as f:
-        f.write(b"this_is_a_dog_image_bytes")
-
-    with open(data_dir / "README.md", "w") as f:
-        f.write(
-            f"""\
----
-{METADATA_CONFIGS_FIELD}:
-  - config_name: "v1"
-    drop_labels: true
-    default: true
-  - config_name: "v2"
-    drop_labels: false
----
-        """
-        )
-    return str(data_dir)
-
-
-@pytest.fixture
-def data_dir_with_data_dir_configs_in_metadata(tmp_path):
-    data_dir = tmp_path / "data_dir_with_two_configs_in_metadata"
-    cats_data_dir = data_dir / "cats"
-    cats_data_dir.mkdir(parents=True)
-    dogs_data_dir = data_dir / "dogs"
-    dogs_data_dir.mkdir(parents=True)
-
-    with open(cats_data_dir / "cat.jpg", "wb") as f:
-        f.write(b"this_is_a_cat_image_bytes")
-    with open(dogs_data_dir / "dog.jpg", "wb") as f:
-        f.write(b"this_is_a_dog_image_bytes")
-
-
-@pytest.fixture
-def sub_data_dirs(tmp_path):
-    data_dir2 = tmp_path / "data_dir2"
-    relative_subdir1 = "subdir1"
-    sub_data_dir1 = data_dir2 / relative_subdir1
-    sub_data_dir1.mkdir(parents=True)
-    with open(sub_data_dir1 / "train.txt", "w") as f:
-        f.write("foo\n" * 10)
-    with open(sub_data_dir1 / "test.txt", "w") as f:
-        f.write("bar\n" * 10)
-
-    relative_subdir2 = "subdir2"
-    sub_data_dir2 = tmp_path / data_dir2 / relative_subdir2
-    sub_data_dir2.mkdir(parents=True)
-    with open(sub_data_dir2 / "train.txt", "w") as f:
-        f.write("foo\n" * 10)
-    with open(sub_data_dir2 / "test.txt", "w") as f:
-        f.write("bar\n" * 10)
-
-    return str(data_dir2), relative_subdir1
-
-
-@pytest.fixture
-def complex_data_dir(tmp_path):
-    data_dir = tmp_path / "complex_data_dir"
-    data_dir.mkdir()
-    (data_dir / "data").mkdir()
-    with open(data_dir / "data" / "train.txt", "w") as f:
-        f.write("foo\n" * 10)
-    with open(data_dir / "data" / "test.txt", "w") as f:
-        f.write("bar\n" * 10)
-    with open(data_dir / "README.md", "w") as f:
-        f.write("This is a readme")
-    with open(data_dir / ".dummy", "w") as f:
-        f.write("this is a dummy file that is not a data file")
-    return str(data_dir)
-
-
-@pytest.mark.parametrize(
-    "data_files, expected_module, expected_builder_kwargs",
-    [
-        (["train.csv"], "csv", {}),
-        (["train.tsv"], "csv", {"sep": "\t"}),
-        (["train.json"], "json", {}),
-        (["train.jsonl"], "json", {}),
-        (["train.parquet"], "parquet", {}),
-        (["train.geoparquet"], "parquet", {}),
-        (["train.gpq"], "parquet", {}),
-        (["train.arrow"], "arrow", {}),
-        (["train.txt"], "text", {}),
-        (["uppercase.TXT"], "text", {}),
-        (["unsupported.ext"], None, {}),
-        ([""], None, {}),
-    ],
-)
-def test_infer_module_for_data_files(data_files, expected_module, expected_builder_kwargs):
-    module, builder_kwargs = infer_module_for_data_files_list(data_files)
-    assert module == expected_module
-    assert builder_kwargs == expected_builder_kwargs
-
-
-@pytest.mark.parametrize(
-    "data_file, expected_module",
-    [
-        ("zip_csv_path", "csv"),
-        ("zip_csv_with_dir_path", "csv"),
-        ("zip_uppercase_csv_path", "csv"),
-        ("zip_unsupported_ext_path", None),
-    ],
-)
-def test_infer_module_for_data_files_in_archives(
-    data_file, expected_module, zip_csv_path, zip_csv_with_dir_path, zip_uppercase_csv_path, zip_unsupported_ext_path
-):
-    data_file_paths = {
-        "zip_csv_path": zip_csv_path,
-        "zip_csv_with_dir_path": zip_csv_with_dir_path,
-        "zip_uppercase_csv_path": zip_uppercase_csv_path,
-        "zip_unsupported_ext_path": zip_unsupported_ext_path,
-    }
-    data_files = [str(data_file_paths[data_file])]
-    inferred_module, _ = infer_module_for_data_files_list_in_archives(data_files)
-    assert inferred_module == expected_module
-
-
-class ModuleFactoryTest(TestCase):
-    @pytest.fixture(autouse=True)
-    def inject_fixtures(
-        self,
-        jsonl_path,
-        data_dir,
-        data_dir_with_metadata,
-        data_dir_with_single_config_in_metadata,
-        data_dir_with_config_and_data_files,
-        data_dir_with_two_config_in_metadata,
-        sub_data_dirs,
-    ):
-        self._jsonl_path = jsonl_path
-        self._data_dir = data_dir
-        self._data_dir_with_metadata = data_dir_with_metadata
-        self._data_dir_with_single_config_in_metadata = data_dir_with_single_config_in_metadata
-        self._data_dir_with_config_and_data_files = data_dir_with_config_and_data_files
-        self._data_dir_with_two_config_in_metadata = data_dir_with_two_config_in_metadata
-        self._data_dir2 = sub_data_dirs[0]
-        self._sub_data_dir = sub_data_dirs[1]
-
-    def setUp(self):
-        self.cache_dir = tempfile.mkdtemp()
-        self.download_config = DownloadConfig(cache_dir=self.cache_dir)
-
-    def test_LocalDatasetModuleFactory(self):
-        factory = LocalDatasetModuleFactory(self._data_dir)
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        assert os.path.isdir(module_factory_result.builder_kwargs["base_path"])
-
-    def test_LocalDatasetModuleFactory_with_data_dir(self):
-        factory = LocalDatasetModuleFactory(self._data_dir2, data_dir=self._sub_data_dir)
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        builder_config = module_factory_result.builder_configs_parameters.builder_configs[0]
-        assert (
-            builder_config.data_files is not None
-            and len(builder_config.data_files["train"]) == 1
-            and len(builder_config.data_files["test"]) == 1
-        )
-        assert all(
-            self._sub_data_dir in Path(data_file).parts
-            for data_file in builder_config.data_files["train"] + builder_config.data_files["test"]
-        )
-
-    def test_LocalDatasetModuleFactory_with_metadata(self):
-        factory = LocalDatasetModuleFactory(self._data_dir_with_metadata)
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        builder_config = module_factory_result.builder_configs_parameters.builder_configs[0]
-        assert (
-            builder_config.data_files is not None
-            and len(builder_config.data_files["train"]) > 0
-            and len(builder_config.data_files["test"]) > 0
-        )
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in builder_config.data_files["train"])
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in builder_config.data_files["test"])
-
-    def test_LocalDatasetModuleFactory_with_single_config_in_metadata(self):
-        factory = LocalDatasetModuleFactory(
-            self._data_dir_with_single_config_in_metadata,
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-
-        module_metadata_configs = module_factory_result.builder_configs_parameters.metadata_configs
-        assert module_metadata_configs is not None
-        assert len(module_metadata_configs) == 1
-        assert next(iter(module_metadata_configs)) == "custom"
-        assert "drop_labels" in next(iter(module_metadata_configs.values()))
-        assert next(iter(module_metadata_configs.values()))["drop_labels"] is True
-
-        module_builder_configs = module_factory_result.builder_configs_parameters.builder_configs
-        assert module_builder_configs is not None
-        assert len(module_builder_configs) == 1
-        assert isinstance(module_builder_configs[0], ImageFolderConfig)
-        assert module_builder_configs[0].name == "custom"
-        assert module_builder_configs[0].data_files is not None
-        assert isinstance(module_builder_configs[0].data_files, DataFilesPatternsDict)
-        module_builder_configs[0]._resolve_data_files(self._data_dir_with_single_config_in_metadata, DownloadConfig())
-        assert isinstance(module_builder_configs[0].data_files, DataFilesDict)
-        assert len(module_builder_configs[0].data_files) == 1  # one train split
-        assert len(module_builder_configs[0].data_files["train"]) == 2  # two files
-        assert module_builder_configs[0].drop_labels is True  # parameter is passed from metadata
-
-        # config named "default" is automatically considered to be a default config
-        assert module_factory_result.builder_configs_parameters.default_config_name == "custom"
-
-        # we don't pass config params to builder in builder_kwargs, they are stored in builder_configs directly
-        assert "drop_labels" not in module_factory_result.builder_kwargs
-
-    def test_LocalDatasetModuleFactory_with_config_and_data_files(self):
-        factory = LocalDatasetModuleFactory(
-            self._data_dir_with_config_and_data_files,
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-
-        module_metadata_configs = module_factory_result.builder_configs_parameters.metadata_configs
-        builder_kwargs = module_factory_result.builder_kwargs
-        assert module_metadata_configs is not None
-        assert len(module_metadata_configs) == 1
-        assert next(iter(module_metadata_configs)) == "custom"
-        assert "data_files" in next(iter(module_metadata_configs.values()))
-        assert next(iter(module_metadata_configs.values()))["data_files"] == "data/**/*.jpg"
-        assert "data_files" not in builder_kwargs
-
-    def test_LocalDatasetModuleFactory_data_dir_with_config_and_data_files(self):
-        factory = LocalDatasetModuleFactory(self._data_dir_with_config_and_data_files, data_dir="data")
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-
-        module_metadata_configs = module_factory_result.builder_configs_parameters.metadata_configs
-        builder_kwargs = module_factory_result.builder_kwargs
-        assert module_metadata_configs is not None
-        assert len(module_metadata_configs) == 1
-        assert next(iter(module_metadata_configs)) == "custom"
-        assert "data_files" in next(iter(module_metadata_configs.values()))
-        assert next(iter(module_metadata_configs.values()))["data_files"] == "data/**/*.jpg"
-        assert "data_files" in builder_kwargs
-        assert "train" in builder_kwargs["data_files"]
-        assert len(builder_kwargs["data_files"]["train"]) == 2
-
-    def test_LocalDatasetModuleFactory_with_two_configs_in_metadata(self):
-        factory = LocalDatasetModuleFactory(
-            self._data_dir_with_two_config_in_metadata,
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-
-        module_metadata_configs = module_factory_result.builder_configs_parameters.metadata_configs
-        assert module_metadata_configs is not None
-        assert len(module_metadata_configs) == 2
-        assert list(module_metadata_configs) == ["v1", "v2"]
-        assert "drop_labels" in module_metadata_configs["v1"]
-        assert module_metadata_configs["v1"]["drop_labels"] is True
-        assert "drop_labels" in module_metadata_configs["v2"]
-        assert module_metadata_configs["v2"]["drop_labels"] is False
-
-        module_builder_configs = module_factory_result.builder_configs_parameters.builder_configs
-        assert module_builder_configs is not None
-        assert len(module_builder_configs) == 2
-        module_builder_config_v1, module_builder_config_v2 = module_builder_configs
-        assert module_builder_config_v1.name == "v1"
-        assert module_builder_config_v2.name == "v2"
-        assert isinstance(module_builder_config_v1, ImageFolderConfig)
-        assert isinstance(module_builder_config_v2, ImageFolderConfig)
-        assert isinstance(module_builder_config_v1.data_files, DataFilesPatternsDict)
-        assert isinstance(module_builder_config_v2.data_files, DataFilesPatternsDict)
-        module_builder_config_v1._resolve_data_files(self._data_dir_with_two_config_in_metadata, DownloadConfig())
-        module_builder_config_v2._resolve_data_files(self._data_dir_with_two_config_in_metadata, DownloadConfig())
-        assert isinstance(module_builder_config_v1.data_files, DataFilesDict)
-        assert isinstance(module_builder_config_v2.data_files, DataFilesDict)
-        assert sorted(module_builder_config_v1.data_files) == ["train"]
-        assert len(module_builder_config_v1.data_files["train"]) == 2
-        assert sorted(module_builder_config_v2.data_files) == ["train"]
-        assert len(module_builder_config_v2.data_files["train"]) == 2
-        assert module_builder_config_v1.drop_labels is True  # parameter is passed from metadata
-        assert module_builder_config_v2.drop_labels is False  # parameter is passed from metadata
-
-        assert (
-            module_factory_result.builder_configs_parameters.default_config_name == "v1"
-        )  # it's marked as a default one in yaml
-
-        # we don't pass config params to builder in builder_kwargs, they are stored in builder_configs directly
-        assert "drop_labels" not in module_factory_result.builder_kwargs
-
-    def test_PackagedDatasetModuleFactory(self):
-        factory = PackagedDatasetModuleFactory(
-            "json", data_files=self._jsonl_path, download_config=self.download_config
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-
-    def test_PackagedDatasetModuleFactory_with_data_dir(self):
-        factory = PackagedDatasetModuleFactory("json", data_dir=self._data_dir, download_config=self.download_config)
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        data_files = module_factory_result.builder_kwargs.get("data_files")
-        assert data_files is not None and len(data_files["train"]) > 0 and len(data_files["test"]) > 0
-        assert Path(data_files["train"][0]).parent.samefile(self._data_dir)
-        assert Path(data_files["test"][0]).parent.samefile(self._data_dir)
-
-    def test_PackagedDatasetModuleFactory_with_data_dir_and_metadata(self):
-        factory = PackagedDatasetModuleFactory(
-            "imagefolder", data_dir=self._data_dir_with_metadata, download_config=self.download_config
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        data_files = module_factory_result.builder_kwargs.get("data_files")
-        assert data_files is not None and len(data_files["train"]) > 0 and len(data_files["test"]) > 0
-        assert Path(self._data_dir_with_metadata) in Path(data_files["train"][0]).parents
-        assert Path(self._data_dir_with_metadata) in Path(data_files["test"][0]).parents
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in data_files["train"])
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in data_files["test"])
-
-    @pytest.mark.integration
-    def test_HubDatasetModuleFactory(self):
-        factory = HubDatasetModuleFactory(
-            SAMPLE_DATASET_IDENTIFIER2, commit_hash=SAMPLE_DATASET_COMMIT_HASH2, download_config=self.download_config
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
-
-    @pytest.mark.integration
-    def test_HubDatasetModuleFactory_with_data_dir(self):
-        data_dir = "data2"
-        factory = HubDatasetModuleFactory(
-            SAMPLE_DATASET_IDENTIFIER3,
-            commit_hash=SAMPLE_DATASET_COMMIT_HASH3,
-            data_dir=data_dir,
-            download_config=self.download_config,
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        builder_config = module_factory_result.builder_configs_parameters.builder_configs[0]
-        assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
-        assert (
-            builder_config.data_files is not None
-            and len(builder_config.data_files["train"]) == 1
-            and len(builder_config.data_files["test"]) == 1
-        )
-        assert all(
-            data_dir in Path(data_file).parts
-            for data_file in builder_config.data_files["train"] + builder_config.data_files["test"]
-        )
-
-    @pytest.mark.integration
-    def test_HubDatasetModuleFactory_with_metadata(self):
-        factory = HubDatasetModuleFactory(
-            SAMPLE_DATASET_IDENTIFIER4, commit_hash=SAMPLE_DATASET_COMMIT_HASH4, download_config=self.download_config
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        builder_config = module_factory_result.builder_configs_parameters.builder_configs[0]
-        assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
-        assert (
-            builder_config.data_files is not None
-            and len(builder_config.data_files["train"]) > 0
-            and len(builder_config.data_files["test"]) > 0
-        )
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in builder_config.data_files["train"])
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in builder_config.data_files["test"])
-
-        factory = HubDatasetModuleFactory(
-            SAMPLE_DATASET_IDENTIFIER5, commit_hash=SAMPLE_DATASET_COMMIT_HASH5, download_config=self.download_config
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        builder_config = module_factory_result.builder_configs_parameters.builder_configs[0]
-        assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
-        assert (
-            builder_config.data_files is not None
-            and len(builder_config.data_files) == 1
-            and len(builder_config.data_files["train"]) > 0
-        )
-        assert any(Path(data_file).name == "metadata.jsonl" for data_file in builder_config.data_files["train"])
-
-    @pytest.mark.integration
-    def test_HubDatasetModuleFactory_with_one_default_config_in_metadata(self):
-        factory = HubDatasetModuleFactory(
-            SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA,
-            commit_hash=SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA_COMMIT_HASH,
-            download_config=self.download_config,
-        )
-        module_factory_result = factory.get_module()
-        assert importlib.import_module(module_factory_result.module_path) is not None
-        assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT)
-
-        module_metadata_configs = module_factory_result.builder_configs_parameters.metadata_configs
-        assert module_metadata_configs is not None
-        assert len(module_metadata_configs) == 1
-        assert next(iter(module_metadata_configs)) == "custom"
-        assert "drop_labels" in next(iter(module_metadata_configs.values()))
-        assert next(iter(module_metadata_configs.values()))["drop_labels"] is True
-
-        module_builder_configs = module_factory_result.builder_configs_parameters.builder_configs
-        assert module_builder_configs is not None
-        assert len(module_builder_configs) == 1
-        assert isinstance(module_builder_configs[0], AudioFolderConfig)
-        assert module_builder_configs[0].name == "custom"
-        assert module_builder_configs[0].data_files is not None
-        assert isinstance(module_builder_configs[0].data_files, DataFilesPatternsDict)
-        module_builder_configs[0]._resolve_data_files(
-            module_factory_result.builder_kwargs["base_path"], DownloadConfig()
-        )
-        assert isinstance(module_builder_configs[0].data_files, DataFilesDict)
-        assert sorted(module_builder_configs[0].data_files) == ["test", "train"]
-        assert len(module_builder_configs[0].data_files["train"]) == 3
-        assert len(module_builder_configs[0].data_files["test"]) == 3
-        assert module_builder_configs[0].drop_labels is True  # parameter is passed from metadata
-
-        # config named "default" is automatically considered to be a default config
-        assert module_factory_result.builder_configs_parameters.default_config_name == "custom"
-
-        # we don't pass config params to builder in builder_kwargs, they are stored in builder_configs directly
-        assert "drop_labels" not in module_factory_result.builder_kwargs
-
-    @pytest.mark.integration
-    def test_HubDatasetModuleFactory_with_two_configs_in_metadata(self):
-        datasets_names = [
-            (SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_COMMIT_HASH),
-            (
-                SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT,
-                SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT_COMMIT_HASH,
-            ),
-        ]
-        for dataset_name, commit_hash in datasets_names:
-            factory = HubDatasetModuleFactory(
-                dataset_name, commit_hash=commit_hash, download_config=self.download_config
-            )
-            module_factory_result = factory.get_module()
-            assert importlib.import_module(module_factory_result.module_path) is not None
-
-            module_metadata_configs = module_factory_result.builder_configs_parameters.metadata_configs
-            assert module_metadata_configs is not None
-            assert len(module_metadata_configs) == 2
-            assert list(module_metadata_configs) == ["v1", "v2"]
-            assert "drop_labels" in module_metadata_configs["v1"]
-            assert module_metadata_configs["v1"]["drop_labels"] is True
-            assert "drop_labels" in module_metadata_configs["v2"]
-            assert module_metadata_configs["v2"]["drop_labels"] is False
-
-            module_builder_configs = module_factory_result.builder_configs_parameters.builder_configs
-            assert module_builder_configs is not None
-            assert len(module_builder_configs) == 2
-            module_builder_config_v1, module_builder_config_v2 = module_builder_configs
-            assert module_builder_config_v1.name == "v1"
-            assert module_builder_config_v2.name == "v2"
-            assert isinstance(module_builder_config_v1, AudioFolderConfig)
-            assert isinstance(module_builder_config_v2, AudioFolderConfig)
-            assert isinstance(module_builder_config_v1.data_files, DataFilesPatternsDict)
-            assert isinstance(module_builder_config_v2.data_files, DataFilesPatternsDict)
-            module_builder_config_v1._resolve_data_files(
-                module_factory_result.builder_kwargs["base_path"], DownloadConfig()
-            )
-            module_builder_config_v2._resolve_data_files(
-                module_factory_result.builder_kwargs["base_path"], DownloadConfig()
-            )
-            assert isinstance(module_builder_config_v1.data_files, DataFilesDict)
-            assert isinstance(module_builder_config_v2.data_files, DataFilesDict)
-            assert sorted(module_builder_config_v1.data_files) == ["test", "train"]
-            assert len(module_builder_config_v1.data_files["train"]) == 3
-            assert len(module_builder_config_v1.data_files["test"]) == 3
-            assert sorted(module_builder_config_v2.data_files) == ["test", "train"]
-            assert len(module_builder_config_v2.data_files["train"]) == 2
-            assert len(module_builder_config_v2.data_files["test"]) == 1
-            assert module_builder_config_v1.drop_labels is True  # parameter is passed from metadata
-            assert module_builder_config_v2.drop_labels is False  # parameter is passed from metadata
-            # we don't pass config params to builder in builder_kwargs, they are stored in builder_configs directly
-            assert "drop_labels" not in module_factory_result.builder_kwargs
-
-            if dataset_name == SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT:
-                assert module_factory_result.builder_configs_parameters.default_config_name == "v1"
-            else:
-                assert module_factory_result.builder_configs_parameters.default_config_name is None
-
-    @pytest.mark.integration
-    def test_CachedDatasetModuleFactory(self):
-        name = SAMPLE_DATASET_IDENTIFIER2
-        load_dataset_builder(name, cache_dir=self.cache_dir).download_and_prepare()
-        for offline_mode in OfflineSimulationMode:
-            with offline(offline_mode):
-                factory = CachedDatasetModuleFactory(
-                    name,
-                    cache_dir=self.cache_dir,
-                )
-                module_factory_result = factory.get_module()
-                assert importlib.import_module(module_factory_result.module_path) is not None
-
-
-@pytest.mark.parametrize(
-    "factory_class,requires_commit_hash",
-    [
-        (CachedDatasetModuleFactory, False),
-        (HubDatasetModuleFactory, True),
-        (LocalDatasetModuleFactory, False),
-        (PackagedDatasetModuleFactory, False),
-    ],
-)
-def test_module_factories(factory_class, requires_commit_hash):
-    name = "dummy_name"
-    if requires_commit_hash:
-        factory = factory_class(name, commit_hash="foo")
-    else:
-        factory = factory_class(name)
-    assert factory.name == name
-
-
-@pytest.mark.integration
-class LoadTest(TestCase):
-    @pytest.fixture(autouse=True)
-    def inject_fixtures(self, caplog):
-        self._caplog = caplog
-
-    def setUp(self):
-        self.cache_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        shutil.rmtree(self.cache_dir)
-
-    @pytest.mark.integration
-    def test_offline_dataset_module_factory(self):
-        repo_id = SAMPLE_DATASET_IDENTIFIER2
-        builder = load_dataset_builder(repo_id, cache_dir=self.cache_dir)
-        builder.download_and_prepare()
-        for offline_simulation_mode in list(OfflineSimulationMode):
-            with offline(offline_simulation_mode):
-                self._caplog.clear()
-                # allow provide the repo id without an explicit path to remote or local actual file
-                dataset_module = datasets.load.dataset_module_factory(repo_id, cache_dir=self.cache_dir)
-                self.assertEqual(dataset_module.module_path, "datasets.packaged_modules.cache.cache")
-                self.assertIn("Using the latest cached version of the dataset", self._caplog.text)
-
-    @pytest.mark.integration
-    def test_offline_dataset_module_factory_with_capital_letters_in_name(self):
-        repo_id = SAMPLE_DATASET_CAPITAL_LETTERS_IN_NAME
-        builder = load_dataset_builder(repo_id, cache_dir=self.cache_dir)
-        builder.download_and_prepare()
-        for offline_simulation_mode in list(OfflineSimulationMode):
-            with offline(offline_simulation_mode):
-                self._caplog.clear()
-                # allow provide the repo id without an explicit path to remote or local actual file
-                dataset_module = datasets.load.dataset_module_factory(repo_id, cache_dir=self.cache_dir)
-                self.assertEqual(dataset_module.module_path, "datasets.packaged_modules.cache.cache")
-                self.assertIn("Using the latest cached version of the dataset", self._caplog.text)
-
-    def test_load_dataset_from_hub(self):
-        with self.assertRaises(DatasetNotFoundError) as context:
-            datasets.load_dataset("_dummy")
-        self.assertIn(
-            "Dataset '_dummy' doesn't exist on the Hub",
-            str(context.exception),
-        )
-        with self.assertRaises(DatasetNotFoundError) as context:
-            datasets.load_dataset("HuggingFaceFW/fineweb-edu", revision="0.0.0")
-        self.assertIn(
-            "Revision '0.0.0' doesn't exist for dataset 'HuggingFaceFW/fineweb-edu' on the Hub.",
-            str(context.exception),
-        )
-        for offline_simulation_mode in list(OfflineSimulationMode):
-            with offline(offline_simulation_mode):
-                with self.assertRaises(ConnectionError) as context:
-                    datasets.load_dataset("_dummy")
-                if offline_simulation_mode != OfflineSimulationMode.HF_HUB_OFFLINE_SET_TO_1:
-                    self.assertIn(
-                        "Couldn't reach '_dummy' on the Hub",
-                        str(context.exception),
-                    )
-
-    def test_load_dataset_namespace(self):
-        with self.assertRaises(DatasetNotFoundError) as context:
-            datasets.load_dataset("hf-internal-testing/_dummy")
-        self.assertIn("hf-internal-testing/_dummy", str(context.exception))
-        for offline_simulation_mode in list(OfflineSimulationMode):
-            with offline(offline_simulation_mode):
-                with self.assertRaises(ConnectionError) as context:
-                    datasets.load_dataset("hf-internal-testing/_dummy")
-                self.assertIn("hf-internal-testing/_dummy", str(context.exception), msg=offline_simulation_mode)
-
-
-@pytest.mark.integration
-def test_load_dataset_builder_with_metadata():
-    builder = datasets.load_dataset_builder(SAMPLE_DATASET_IDENTIFIER4)
-    assert isinstance(builder, ImageFolder)
-    assert builder.config.name == "default"
-    assert builder.config.data_files is not None
-    assert builder.config.drop_metadata is None
-    with pytest.raises(ValueError):
-        builder = datasets.load_dataset_builder(SAMPLE_DATASET_IDENTIFIER4, "non-existing-config")
-
-
-@pytest.mark.integration
-def test_load_dataset_builder_config_kwargs_passed_as_arguments():
-    builder_default = datasets.load_dataset_builder(SAMPLE_DATASET_IDENTIFIER4)
-    builder_custom = datasets.load_dataset_builder(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True)
-    assert builder_custom.config.drop_metadata != builder_default.config.drop_metadata
-    assert builder_custom.config.drop_metadata is True
-
-
-@pytest.mark.integration
-def test_load_dataset_builder_with_two_configs_in_metadata():
-    builder = datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "v1")
-    assert isinstance(builder, AudioFolder)
-    assert builder.config.name == "v1"
-    assert builder.config.data_files is not None
-    with pytest.raises(ValueError):
-        datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA)
-    with pytest.raises(ValueError):
-        datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "non-existing-config")
-
-
-@pytest.mark.parametrize("serializer", [pickle, dill])
-def test_load_dataset_builder_with_metadata_configs_pickable(serializer):
-    builder = datasets.load_dataset_builder(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA)
-    builder_unpickled = serializer.loads(serializer.dumps(builder))
-    assert builder.BUILDER_CONFIGS == builder_unpickled.BUILDER_CONFIGS
-    assert list(builder_unpickled.builder_configs) == ["custom"]
-    assert isinstance(builder_unpickled.builder_configs["custom"], AudioFolderConfig)
-
-    builder2 = datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "v1")
-    builder2_unpickled = serializer.loads(serializer.dumps(builder2))
-    assert builder2.BUILDER_CONFIGS == builder2_unpickled.BUILDER_CONFIGS != builder_unpickled.BUILDER_CONFIGS
-    assert list(builder2_unpickled.builder_configs) == ["v1", "v2"]
-    assert isinstance(builder2_unpickled.builder_configs["v1"], AudioFolderConfig)
-    assert isinstance(builder2_unpickled.builder_configs["v2"], AudioFolderConfig)
-
-
-def test_load_dataset_builder_for_absolute_data_dir(complex_data_dir):
-    builder = datasets.load_dataset_builder(complex_data_dir)
-    assert isinstance(builder, DatasetBuilder)
-    assert builder.name == "text"
-    assert builder.dataset_name == Path(complex_data_dir).name
-    assert builder.config.name == "default"
-    assert isinstance(builder.config.data_files, DataFilesDict)
-    assert len(builder.config.data_files["train"]) > 0
-    assert len(builder.config.data_files["test"]) > 0
-
-
-def test_load_dataset_builder_for_relative_data_dir(complex_data_dir):
-    with set_current_working_directory_to_temp_dir():
-        relative_data_dir = "relative_data_dir"
-        shutil.copytree(complex_data_dir, relative_data_dir)
-        builder = datasets.load_dataset_builder(relative_data_dir)
-        assert isinstance(builder, DatasetBuilder)
-        assert builder.name == "text"
-        assert builder.dataset_name == relative_data_dir
-        assert builder.config.name == "default"
-        assert isinstance(builder.config.data_files, DataFilesDict)
-        assert len(builder.config.data_files["train"]) > 0
-        assert len(builder.config.data_files["test"]) > 0
-
-
-@pytest.mark.integration
-def test_load_dataset_builder_for_community_dataset():
-    builder = datasets.load_dataset_builder(SAMPLE_DATASET_IDENTIFIER2)
-    assert isinstance(builder, DatasetBuilder)
-    assert builder.name == "text"
-    assert builder.dataset_name == SAMPLE_DATASET_IDENTIFIER2.split("/")[-1]
-    assert builder.config.name == "default"
-    assert isinstance(builder.config.data_files, DataFilesDict)
-    assert len(builder.config.data_files["train"]) > 0
-    assert len(builder.config.data_files["test"]) > 0
-
-
-def test_load_dataset_builder_fail():
-    with pytest.raises(DatasetNotFoundError):
-        datasets.load_dataset_builder("blabla")
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "kwargs, expected_train_num_rows, expected_test_num_rows",
-    [
-        ({}, 2, 2),
-        ({"data_dir": "data1"}, 1, 1),  # GH-6918: NonMatchingSplitsSizesError
-        ({"data_files": "data1/train.txt"}, 1, None),  # GH-6939: ExpectedMoreSplits
-    ],
-)
-def test_load_dataset_from_hub(kwargs, expected_train_num_rows, expected_test_num_rows):
-    dataset = load_dataset(SAMPLE_DATASET_IDENTIFIER3, **kwargs)
-    assert dataset["train"].num_rows == expected_train_num_rows
-    assert (dataset["test"].num_rows == expected_test_num_rows) if expected_test_num_rows else ("test" not in dataset)
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize("stream_from_cache, ", [False, True])
-def test_load_dataset_cached_from_hub(stream_from_cache, caplog):
-    dataset = load_dataset(SAMPLE_DATASET_IDENTIFIER3)
-    assert isinstance(dataset, DatasetDict)
-    assert all(isinstance(d, Dataset) for d in dataset.values())
-    assert len(dataset) == 2
-    assert isinstance(next(iter(dataset["train"])), dict)
-    for offline_simulation_mode in list(OfflineSimulationMode):
-        with offline(offline_simulation_mode):
-            caplog.clear()
-            # Load dataset from cache
-            dataset = datasets.load_dataset(SAMPLE_DATASET_IDENTIFIER3, streaming=stream_from_cache)
-            assert len(dataset) == 2
-            assert "Using the latest cached version of the dataset" in caplog.text
-            assert isinstance(next(iter(dataset["train"])), dict)
-    with pytest.raises(DatasetNotFoundError) as exc_info:
-        datasets.load_dataset(SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST)
-    assert f"Dataset '{SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST}' doesn't exist on the Hub" in str(exc_info.value)
-
-
-def test_load_dataset_streaming_gz_json(jsonl_gz_path):
-    data_files = jsonl_gz_path
-    ds = load_dataset("json", split="train", data_files=data_files, streaming=True)
-    assert isinstance(ds, IterableDataset)
-    ds_item = next(iter(ds))
-    assert ds_item == {"col_1": "0", "col_2": 0, "col_3": 0.0}
-
-
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "path", ["sample.jsonl", "sample.jsonl.gz", "sample.tar", "sample.jsonl.xz", "sample.zip", "sample.jsonl.zst"]
-)
-def test_load_dataset_streaming_compressed_files(path):
-    repo_id = "hf-internal-testing/compressed_files"
-    data_files = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{path}"
-    if data_files[-3:] in ("zip", "tar"):  # we need to glob "*" inside archives
-        data_files = data_files[-3:] + "://*::" + data_files
-        return  # TODO(QL, albert): support re-add support for ZIP and TAR archives streaming
-    ds = load_dataset("json", split="train", data_files=data_files, streaming=True)
-    assert isinstance(ds, IterableDataset)
-    ds_item = next(iter(ds))
-    assert ds_item == {
-        "tokens": ["Ministeri", "de", "Justícia", "d'Espanya"],
-        "ner_tags": [1, 2, 2, 2],
-        "langs": ["ca", "ca", "ca", "ca"],
-        "spans": ["PER: Ministeri de Justícia d'Espanya"],
-    }
-
-
-@pytest.mark.parametrize("path_extension", ["csv", "csv.bz2"])
-@pytest.mark.parametrize("streaming", [False, True])
-def test_load_dataset_streaming_csv(path_extension, streaming, csv_path, bz2_csv_path):
-    paths = {"csv": csv_path, "csv.bz2": bz2_csv_path}
-    data_files = str(paths[path_extension])
-    features = Features({"col_1": Value("string"), "col_2": Value("int32"), "col_3": Value("float32")})
-    ds = load_dataset("csv", split="train", data_files=data_files, features=features, streaming=streaming)
-    assert isinstance(ds, IterableDataset if streaming else Dataset)
-    ds_item = next(iter(ds))
-    assert ds_item == {"col_1": "0", "col_2": 0, "col_3": 0.0}
-
-
-@pytest.mark.parametrize("streaming", [False, True])
-@pytest.mark.parametrize("data_file", ["zip_csv_path", "zip_csv_with_dir_path", "csv_path"])
-def test_load_dataset_zip_csv(data_file, streaming, zip_csv_path, zip_csv_with_dir_path, csv_path):
-    data_file_paths = {
-        "zip_csv_path": zip_csv_path,
-        "zip_csv_with_dir_path": zip_csv_with_dir_path,
-        "csv_path": csv_path,
-    }
-    data_files = str(data_file_paths[data_file])
-    expected_size = 8 if data_file.startswith("zip") else 4
-    features = Features({"col_1": Value("string"), "col_2": Value("int32"), "col_3": Value("float32")})
-    ds = load_dataset("csv", split="train", data_files=data_files, features=features, streaming=streaming)
-    if streaming:
-        ds_item_counter = 0
-        for ds_item in ds:
-            if ds_item_counter == 0:
-                assert ds_item == {"col_1": "0", "col_2": 0, "col_3": 0.0}
-            ds_item_counter += 1
-        assert ds_item_counter == expected_size
-    else:
-        assert ds.shape[0] == expected_size
-        ds_item = next(iter(ds))
-        assert ds_item == {"col_1": "0", "col_2": 0, "col_3": 0.0}
-
-
-@pytest.mark.parametrize("streaming", [False, True])
-@pytest.mark.parametrize("data_file", ["zip_jsonl_path", "zip_jsonl_with_dir_path", "jsonl_path"])
-def test_load_dataset_zip_jsonl(data_file, streaming, zip_jsonl_path, zip_jsonl_with_dir_path, jsonl_path):
-    data_file_paths = {
-        "zip_jsonl_path": zip_jsonl_path,
-        "zip_jsonl_with_dir_path": zip_jsonl_with_dir_path,
-        "jsonl_path": jsonl_path,
-    }
-    data_files = str(data_file_paths[data_file])
-    expected_size = 8 if data_file.startswith("zip") else 4
-    features = Features({"col_1": Value("string"), "col_2": Value("int32"), "col_3": Value("float32")})
-    ds = load_dataset("json", split="train", data_files=data_files, features=features, streaming=streaming)
-    if streaming:
-        ds_item_counter = 0
-        for ds_item in ds:
-            if ds_item_counter == 0:
-                assert ds_item == {"col_1": "0", "col_2": 0, "col_3": 0.0}
-            ds_item_counter += 1
-        assert ds_item_counter == expected_size
-    else:
-        assert ds.shape[0] == expected_size
-        ds_item = next(iter(ds))
-        assert ds_item == {"col_1": "0", "col_2": 0, "col_3": 0.0}
-
-
-@pytest.mark.parametrize("streaming", [False, True])
-@pytest.mark.parametrize("data_file", ["zip_text_path", "zip_text_with_dir_path", "text_path"])
-def test_load_dataset_zip_text(data_file, streaming, zip_text_path, zip_text_with_dir_path, text_path):
-    data_file_paths = {
-        "zip_text_path": zip_text_path,
-        "zip_text_with_dir_path": zip_text_with_dir_path,
-        "text_path": text_path,
-    }
-    data_files = str(data_file_paths[data_file])
-    expected_size = 8 if data_file.startswith("zip") else 4
-    ds = load_dataset("text", split="train", data_files=data_files, streaming=streaming)
-    if streaming:
-        ds_item_counter = 0
-        for ds_item in ds:
-            if ds_item_counter == 0:
-                assert ds_item == {"text": "0"}
-            ds_item_counter += 1
-        assert ds_item_counter == expected_size
-    else:
-        assert ds.shape[0] == expected_size
-        ds_item = next(iter(ds))
-        assert ds_item == {"text": "0"}
-
-
-@pytest.mark.parametrize("streaming", [False, True])
-def test_load_dataset_arrow(streaming, data_dir_with_arrow):
-    ds = load_dataset("arrow", split="train", data_dir=data_dir_with_arrow, streaming=streaming)
-    expected_size = 10
-    if streaming:
-        ds_item_counter = 0
-        for ds_item in ds:
-            if ds_item_counter == 0:
-                assert ds_item == {"col_1": "foo"}
-            ds_item_counter += 1
-        assert ds_item_counter == 10
-    else:
-        assert ds.num_rows == 10
-        assert ds.shape[0] == expected_size
-        ds_item = next(iter(ds))
-        assert ds_item == {"col_1": "foo"}
-
-
-def test_load_dataset_text_with_unicode_new_lines(text_path_with_unicode_new_lines):
-    data_files = str(text_path_with_unicode_new_lines)
-    ds = load_dataset("text", split="train", data_files=data_files)
-    assert ds.num_rows == 3
-
-
-def test_load_dataset_with_unsupported_extensions(text_dir_with_unsupported_extension):
-    data_files = str(text_dir_with_unsupported_extension)
-    ds = load_dataset("text", split="train", data_files=data_files)
-    assert ds.num_rows == 4
-
-
-@pytest.mark.integration
-def test_loading_from_the_datasets_hub_with_token():
-    class CustomException(Exception):
-        pass
-
-    with patch("huggingface_hub.file_download._get_metadata_or_catch_error") as mock_request:
-        mock_request.side_effect = CustomException()
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            with pytest.raises(CustomException):
-                load_dataset(SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER, cache_dir=tmp_dir, token="foo")
-        mock_request.assert_called_once()
-        assert mock_request.call_args_list[0][1]["headers"]["authorization"] == "Bearer foo"
-
-
-@pytest.mark.integration
-def test_load_streaming_private_dataset(hf_token, hf_private_dataset_repo_txt_data):
-    ds = load_dataset(hf_private_dataset_repo_txt_data, streaming=True, token=hf_token)
-    assert next(iter(ds)) is not None
-
-
-@pytest.mark.integration
-def test_load_dataset_builder_private_dataset(hf_token, hf_private_dataset_repo_txt_data):
-    builder = load_dataset_builder(hf_private_dataset_repo_txt_data, token=hf_token)
-    assert isinstance(builder, DatasetBuilder)
-
-
-@pytest.mark.integration
-def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data):
-    ds = load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True, token=hf_token)
-    assert next(iter(ds)) is not None
-
-
-@pytest.mark.integration
-def test_load_dataset_config_kwargs_passed_as_arguments():
-    ds_default = load_dataset(SAMPLE_DATASET_IDENTIFIER4)
-    ds_custom = load_dataset(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True)
-    assert list(ds_default["train"].features) == ["image", "caption"]
-    assert list(ds_custom["train"].features) == ["image"]
-
-
-@require_torchcodec
-@pytest.mark.integration
-def test_load_hub_dataset_with_single_config_in_metadata():
-    # load the same dataset but with no configurations (=with default parameters)
-    ds = load_dataset(SAMPLE_DATASET_NO_CONFIGS_IN_METADATA)
-    assert list(ds["train"].features) == ["audio", "label"]  # assert label feature is here as expected by default
-    assert len(ds["train"]) == 5 and len(ds["test"]) == 4
-
-    ds2 = load_dataset(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA)  # single config -> no need to specify it
-    assert list(ds2["train"].features) == ["audio"]  # assert param `drop_labels=True` from metadata is passed
-    assert len(ds2["train"]) == 3 and len(ds2["test"]) == 3
-
-    ds3 = load_dataset(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA, "custom")
-    assert list(ds3["train"].features) == ["audio"]  # assert param `drop_labels=True` from metadata is passed
-    assert len(ds3["train"]) == 3 and len(ds3["test"]) == 3
-
-    with pytest.raises(ValueError):
-        # no config named "default"
-        _ = load_dataset(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA, "default")
-
-
-@require_torchcodec
-@pytest.mark.integration
-def test_load_hub_dataset_with_two_config_in_metadata():
-    ds = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "v1")
-    assert list(ds["train"].features) == ["audio"]  # assert param `drop_labels=True` from metadata is passed
-    assert len(ds["train"]) == 3 and len(ds["test"]) == 3
-
-    ds2 = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "v2")
-    assert list(ds2["train"].features) == [
-        "audio",
-        "label",
-    ]  # assert param `drop_labels=False` from metadata is passed
-    assert len(ds2["train"]) == 2 and len(ds2["test"]) == 1
-
-    with pytest.raises(ValueError):
-        # config is required but not specified
-        _ = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA)
-
-    with pytest.raises(ValueError):
-        # no config named "default"
-        _ = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "default")
-
-    ds_with_default = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT)
-    # it's a dataset with the same data but "v1" config is marked as a default one
-    assert list(ds_with_default["train"].features) == list(ds["train"].features)
-    assert len(ds_with_default["train"]) == len(ds["train"]) and len(ds_with_default["test"]) == len(ds["test"])
-
-
-@require_torchcodec
-@pytest.mark.integration
-def test_load_hub_dataset_with_metadata_config_in_parallel():
-    # assert it doesn't fail (pickling of dynamically created class works)
-    ds = load_dataset(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA, num_proc=2)
-    assert "label" not in ds["train"].features  # assert param `drop_labels=True` from metadata is passed
-    assert len(ds["train"]) == 3 and len(ds["test"]) == 3
-
-    ds = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "v1", num_proc=2)
-    assert "label" not in ds["train"].features  # assert param `drop_labels=True` from metadata is passed
-    assert len(ds["train"]) == 3 and len(ds["test"]) == 3
-
-    ds = load_dataset(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "v2", num_proc=2)
-    assert "label" in ds["train"].features
-    assert len(ds["train"]) == 2 and len(ds["test"]) == 1
-
-
-@require_pil
-@pytest.mark.integration
-@pytest.mark.parametrize("streaming", [True])
-def test_load_dataset_private_zipped_images(hf_private_dataset_repo_zipped_img_data, hf_token, streaming):
-    ds = load_dataset(hf_private_dataset_repo_zipped_img_data, split="train", streaming=streaming, token=hf_token)
-    assert isinstance(ds, IterableDataset if streaming else Dataset)
-    ds_items = list(ds)
-    assert len(ds_items) == 2
-
-
-def test_load_dataset_then_move_then_reload(data_dir, tmp_path, caplog):
-    cache_dir1 = tmp_path / "cache1"
-    cache_dir2 = tmp_path / "cache2"
-    dataset = load_dataset(data_dir, split="train", cache_dir=cache_dir1, trust_remote_code=True)
-    fingerprint1 = dataset._fingerprint
-    del dataset
-    os.rename(cache_dir1, cache_dir2)
-    caplog.clear()
-    with caplog.at_level(INFO, logger=get_logger().name):
-        dataset = load_dataset(data_dir, split="train", cache_dir=cache_dir2)
-    assert "Found cached dataset" in caplog.text
-    assert dataset._fingerprint == fingerprint1, "for the caching mechanism to work, fingerprint should stay the same"
-    dataset = load_dataset(data_dir, split="test", cache_dir=cache_dir2)
-    assert dataset._fingerprint != fingerprint1
-
-
-def test_load_dataset_builder_then_edit_then_load_again(tmp_path: Path):
-    dataset_dir = tmp_path / "test_load_dataset_then_edit_then_load_again"
-    dataset_dir.mkdir()
-    with open(dataset_dir / "train.txt", "w") as f:
-        f.write("Hello there")
-    dataset_builder = load_dataset_builder(str(dataset_dir))
-    with open(dataset_dir / "train.txt", "w") as f:
-        f.write("General Kenobi !")
-    edited_dataset_builder = load_dataset_builder(str(dataset_dir))
-    assert dataset_builder.cache_dir != edited_dataset_builder.cache_dir
-
-
-@pytest.mark.parametrize("max_in_memory_dataset_size", ["default", 0, 50, 500])
-def test_load_dataset_local_with_default_in_memory(max_in_memory_dataset_size, data_dir, monkeypatch):
-    current_dataset_size = 148
-    if max_in_memory_dataset_size == "default":
-        max_in_memory_dataset_size = 0  # default
-    else:
-        monkeypatch.setattr(datasets.config, "IN_MEMORY_MAX_SIZE", max_in_memory_dataset_size)
-    if max_in_memory_dataset_size:
-        expected_in_memory = current_dataset_size < max_in_memory_dataset_size
-    else:
-        expected_in_memory = False
-
-    with assert_arrow_memory_increases() if expected_in_memory else assert_arrow_memory_doesnt_increase():
-        dataset = load_dataset(data_dir)
-    assert (dataset["train"].dataset_size < max_in_memory_dataset_size) is expected_in_memory
-
-
-@pytest.mark.integration
-def test_remote_data_files():
-    repo_id = "hf-internal-testing/raw_jsonl"
-    filename = "wikiann-bn-validation.jsonl"
-    data_files = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}"
-    ds = load_dataset("json", split="train", data_files=data_files, streaming=True)
-    assert isinstance(ds, IterableDataset)
-    ds_item = next(iter(ds))
-    assert ds_item.keys() == {"langs", "ner_tags", "spans", "tokens"}
-
-
-def distributed_load_dataset(args):
-    data_name, tmp_dir, datafiles = args
-    dataset = load_dataset(data_name, cache_dir=tmp_dir, data_files=datafiles)
-    return dataset
-
-
-def test_load_dataset_distributed(tmp_path, csv_path):
-    num_workers = 5
-    args = "csv", str(tmp_path), csv_path
-    with Pool(processes=num_workers) as pool:  # start num_workers processes
-        datasets = pool.map(distributed_load_dataset, [args] * num_workers)
-        assert len(datasets) == num_workers
-        assert all(len(dataset) == len(datasets[0]) > 0 for dataset in datasets)
-        assert len(datasets[0].cache_files) > 0
-        assert all(dataset.cache_files == datasets[0].cache_files for dataset in datasets)
-
-
-def test_load_dataset_with_storage_options(mockfs):
-    with mockfs.open("data.txt", "w") as f:
-        f.write("Hello there\n")
-        f.write("General Kenobi !")
-    data_files = {"train": ["mock://data.txt"]}
-    ds = load_dataset("text", data_files=data_files, storage_options=mockfs.storage_options)
-    assert list(ds["train"]) == [{"text": "Hello there"}, {"text": "General Kenobi !"}]
-
-
-@require_pil
-def test_load_dataset_with_storage_options_with_decoding(mockfs, image_file):
-    import PIL.Image
-
-    filename = os.path.basename(image_file)
-    with mockfs.open(filename, "wb") as fout:
-        with open(image_file, "rb") as fin:
-            fout.write(fin.read())
-    data_files = {"train": ["mock://" + filename]}
-    ds = load_dataset("imagefolder", data_files=data_files, storage_options=mockfs.storage_options)
-    assert len(ds["train"]) == 1
-    assert isinstance(ds["train"][0]["image"], PIL.Image.Image)
-
-
-def test_load_dataset_with_zip(zip_csv_path):
-    path = str(zip_csv_path.parent)
-    ds = load_dataset(path)
-    assert list(ds.keys()) == ["train"]
-    assert ds["train"].column_names == ["col_1", "col_2", "col_3"]
-    assert ds["train"].num_rows == 8
-    assert ds["train"][0] == {"col_1": 0, "col_2": 0, "col_3": 0.0}
-
-
-@pytest.mark.integration
-def test_reload_old_cache_from_2_15(tmp_path: Path):
-    cache_dir = tmp_path / "test_reload_old_cache_from_2_15"
-    builder_cache_dir = (
-        cache_dir / "polinaeterna___audiofolder_two_configs_in_metadata/v2-374bfde4f55442bc/0.0.0/7896925d64deea5d"
-    )
-    builder_cache_dir.mkdir(parents=True)
-    arrow_path = builder_cache_dir / "audiofolder_two_configs_in_metadata-train.arrow"
-    dataset_info_path = builder_cache_dir / "dataset_info.json"
-    with dataset_info_path.open("w") as f:
-        f.write("{}")
-    arrow_path.touch()
-    builder = load_dataset_builder(
-        "polinaeterna/audiofolder_two_configs_in_metadata",
-        "v2",
-        data_files="v2/train/*",
-        cache_dir=cache_dir.as_posix(),
-    )
-    assert builder.cache_dir == builder_cache_dir.as_posix()  # old cache from 2.15
-
-    builder = load_dataset_builder(
-        "polinaeterna/audiofolder_two_configs_in_metadata", "v2", cache_dir=cache_dir.as_posix()
-    )
-    assert (
-        builder.cache_dir
-        == (
-            cache_dir / "polinaeterna___audiofolder_two_configs_in_metadata" / "v2" / "0.0.0" / str(builder.hash)
-        ).as_posix()
-    )  # new cache
-
-
-@pytest.mark.integration
-def test_update_dataset_card_data_with_standalone_yaml():
-    # Labels defined in .huggingface.yml because they are too long to be in README.md
-    from datasets.utils.metadata import MetadataConfigs
-
-    with patch(
-        "datasets.utils.metadata.MetadataConfigs.from_dataset_card_data",
-        side_effect=MetadataConfigs.from_dataset_card_data,
-    ) as card_data_read_mock:
-        builder = load_dataset_builder("datasets-maintainers/dataset-with-standalone-yaml")
-    assert card_data_read_mock.call_args.args[0]["license"] is not None  # from README.md
-    assert card_data_read_mock.call_args.args[0]["dataset_info"] is not None  # from standalone yaml
-    assert card_data_read_mock.call_args.args[0]["tags"] == ["test"]  # standalone yaml has precedence
-    assert isinstance(
-        builder.info.features["label"], datasets.ClassLabel
-    )  # correctly loaded from long labels list in standalone yaml
+    assert builder.config.name == "sst2"