File tree Expand file tree Collapse file tree 3 files changed +8
-2
lines changed
Expand file tree Collapse file tree 3 files changed +8
-2
lines changed Original file line number Diff line number Diff line change @@ -1514,7 +1514,7 @@ def _rename_shard(shard_and_job: tuple[int]):
15141514 fpath .replace (SUFFIX , "" ),
15151515 )
15161516
1517- if total_original_shards > 1 :
1517+ if total_original_shards > 1 and config . SAVE_ORIGINAL_SHARD_LENGTHS :
15181518 split_generator .split_info .original_shard_lengths = [
15191519 original_shard_length
15201520 for original_shard_lengths in original_shard_lengths_per_job
@@ -1792,7 +1792,7 @@ def _rename_shard(shard_id_and_job: tuple[int]):
17921792 fpath .replace (SUFFIX , "" ),
17931793 )
17941794
1795- if total_original_shards > 1 :
1795+ if total_original_shards > 1 and config . SAVE_ORIGINAL_SHARD_LENGTHS :
17961796 split_generator .split_info .original_shard_lengths = [
17971797 original_shard_length
17981798 for original_shard_lengths in original_shard_lengths_per_job
Original file line number Diff line number Diff line change 167167DEFAULT_EXTRACTED_DATASETS_PATH = os .path .join (DEFAULT_DOWNLOADED_DATASETS_PATH , EXTRACTED_DATASETS_DIR )
168168EXTRACTED_DATASETS_PATH = Path (os .getenv ("HF_DATASETS_EXTRACTED_DATASETS_PATH" , DEFAULT_EXTRACTED_DATASETS_PATH ))
169169
170+ # Cached dataset info options
171+ SAVE_ORIGINAL_SHARD_LENGTHS = False
172+
170173# Download count for the website
171174HF_UPDATE_DOWNLOAD_COUNTS = (
172175 os .environ .get ("HF_UPDATE_DOWNLOAD_COUNTS" , "AUTO" ).upper () in ENV_VARS_TRUE_AND_AUTO_VALUES
Original file line number Diff line number Diff line change @@ -27,6 +27,9 @@ def set_test_cache_config(tmp_path_factory, monkeypatch):
2727 test_extracted_datasets_path = test_hf_datasets_cache / "downloads" / "extracted"
2828 monkeypatch .setattr ("datasets.config.EXTRACTED_DATASETS_PATH" , str (test_extracted_datasets_path ))
2929
30+ # used in dataset viewer, we may set it to true by default in the future
31+ monkeypatch .setattr ("datasets.config.SAVE_ORIGINAL_SHARD_LENGTHS" , True )
32+
3033
3134@pytest .fixture (autouse = True )
3235def disable_implicit_token (monkeypatch ):
You can’t perform that action at this time.
0 commit comments