Skip to content

Commit adaddcd

Browse files
authored
Update dataset_dict push_to_hub (#7711)
* update dataset_dict push_to_hub * minor * fix test
1 parent c6c381d commit adaddcd

File tree

3 files changed

+327
-250
lines changed

3 files changed

+327
-250
lines changed

src/datasets/arrow_dataset.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5797,7 +5797,7 @@ def push_to_hub(
57975797
num_proc=num_proc,
57985798
)
57995799

5800-
def get_deletions_and_dataset_card() -> tuple[str, str, Optional[str]]:
5800+
def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], str, Optional[str]]:
58015801
parent_commit = api.repo_info(repo_id, repo_type="dataset", revision=revision).sha
58025802

58035803
# Check if the repo already has a README.md and/or a dataset_infos.json to update them with the new split info (size and pattern)
@@ -5977,7 +5977,9 @@ def get_deletions_and_dataset_card() -> tuple[str, str, Optional[str]]:
59775977
+ (f" (still {num_commits - i - 1} to go)" if num_commits - i - 1 else "")
59785978
+ "."
59795979
)
5980-
additions = []
5980+
last_commit_additions = []
5981+
else:
5982+
last_commit_additions = additions
59815983

59825984
for retry, sleep_time in enumerate(itertools.chain(range(10), itertools.repeat(30)), start=1):
59835985
# We need to retry if there was a commit in between in case it touched the dataset card data
@@ -5997,7 +5999,7 @@ def get_deletions_and_dataset_card() -> tuple[str, str, Optional[str]]:
59975999
try:
59986000
commit_info = api.create_commit(
59996001
repo_id,
6000-
operations=additions + dataset_card_additions + deletions,
6002+
operations=last_commit_additions + dataset_card_additions + deletions,
60016003
commit_message=commit_message,
60026004
commit_description=commit_description,
60036005
repo_type="dataset",

0 commit comments

Comments
 (0)