Skip to content

Commit 891d794

Browse files
fix: retry LFS pointer errors in push_to_hub
1 parent 3ea8de7 commit 891d794

File tree

4 files changed

+111
-111
lines changed

4 files changed

+111
-111
lines changed

src/datasets/arrow_dataset.py

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6005,19 +6005,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
60056005
create_pr=create_pr,
60066006
)
60076007
except HfHubHTTPError as err:
6008-
if (
6009-
err.__context__
6010-
and isinstance(err.__context__, HfHubHTTPError)
6011-
and err.__context__.response.status_code == 409
6012-
):
6008+
response = (
6009+
err.response if err.response is not None else getattr(err.__context__, "response", None)
6010+
)
6011+
status_code = response.status_code if response is not None else None
6012+
if status_code == 409:
60136013
# 409 is Conflict (another commit is in progress)
60146014
time.sleep(sleep_time)
60156015
logger.info(
6016-
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
6016+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
60176017
)
60186018
continue
6019-
else:
6020-
raise
6019+
elif status_code == 400 and "lfs pointer" in str(err).lower():
6020+
# 400 with LFS error indicates LFS objects not yet propagated
6021+
time.sleep(sleep_time)
6022+
logger.info(
6023+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
6024+
)
6025+
continue
6026+
raise
60216027
break
60226028
logger.info(
60236029
f"Commit #{i + 1} completed"
@@ -6055,20 +6061,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
60556061
parent_commit=parent_commit,
60566062
)
60576063
except HfHubHTTPError as err:
6058-
if (
6059-
err.__context__
6060-
and isinstance(err.__context__, HfHubHTTPError)
6061-
and err.__context__.response.status_code in (412, 409)
6062-
):
6064+
response = err.response if err.response is not None else getattr(err.__context__, "response", None)
6065+
status_code = response.status_code if response is not None else None
6066+
if status_code in (412, 409):
60636067
# 412 is Precondition failed (parent_commit isn't satisfied)
60646068
# 409 is Conflict (another commit is in progress)
60656069
time.sleep(sleep_time)
60666070
logger.info(
6067-
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
6071+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
60686072
)
60696073
continue
6070-
else:
6071-
raise
6074+
elif status_code == 400 and "lfs pointer" in str(err).lower():
6075+
# 400 with LFS error indicates LFS objects not yet propagated
6076+
time.sleep(sleep_time)
6077+
logger.info(
6078+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
6079+
)
6080+
continue
6081+
raise
60726082
break
60736083

60746084
return commit_info

src/datasets/dataset_dict.py

Lines changed: 52 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,19 +1914,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
19141914
create_pr=create_pr,
19151915
)
19161916
except HfHubHTTPError as err:
1917-
if (
1918-
err.__context__
1919-
and isinstance(err.__context__, HfHubHTTPError)
1920-
and err.__context__.response.status_code == 409
1921-
):
1917+
response = (
1918+
err.response if err.response is not None else getattr(err.__context__, "response", None)
1919+
)
1920+
status_code = response.status_code if response is not None else None
1921+
if status_code == 409:
19221922
# 409 is Conflict (another commit is in progress)
19231923
time.sleep(sleep_time)
19241924
logger.info(
1925-
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
1925+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
1926+
)
1927+
continue
1928+
elif status_code == 400 and "lfs pointer" in str(err).lower():
1929+
# 400 with LFS error indicates LFS objects not yet propagated
1930+
time.sleep(sleep_time)
1931+
logger.info(
1932+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
19261933
)
19271934
continue
1928-
else:
1929-
raise
1935+
raise
19301936
break
19311937
logger.info(
19321938
f"Commit #{i + 1} completed"
@@ -1964,20 +1970,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
19641970
parent_commit=parent_commit,
19651971
)
19661972
except HfHubHTTPError as err:
1967-
if (
1968-
err.__context__
1969-
and isinstance(err.__context__, HfHubHTTPError)
1970-
and err.__context__.response.status_code in (412, 409)
1971-
):
1973+
response = err.response if err.response is not None else getattr(err.__context__, "response", None)
1974+
status_code = response.status_code if response is not None else None
1975+
if status_code in (412, 409):
19721976
# 412 is Precondition failed (parent_commit isn't satisfied)
19731977
# 409 is Conflict (another commit is in progress)
19741978
time.sleep(sleep_time)
19751979
logger.info(
1976-
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
1980+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
1981+
)
1982+
continue
1983+
elif status_code == 400 and "lfs pointer" in str(err).lower():
1984+
# 400 with LFS error indicates LFS objects not yet propagated
1985+
time.sleep(sleep_time)
1986+
logger.info(
1987+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
19771988
)
19781989
continue
1979-
else:
1980-
raise
1990+
raise
19811991
break
19821992

19831993
return commit_info
@@ -2783,19 +2793,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
27832793
create_pr=create_pr,
27842794
)
27852795
except HfHubHTTPError as err:
2786-
if (
2787-
err.__context__
2788-
and isinstance(err.__context__, HfHubHTTPError)
2789-
and err.__context__.response.status_code == 409
2790-
):
2796+
response = (
2797+
err.response if err.response is not None else getattr(err.__context__, "response", None)
2798+
)
2799+
status_code = response.status_code if response is not None else None
2800+
if status_code == 409:
27912801
# 409 is Conflict (another commit is in progress)
27922802
time.sleep(sleep_time)
27932803
logger.info(
2794-
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
2804+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
2805+
)
2806+
continue
2807+
elif status_code == 400 and "lfs pointer" in str(err).lower():
2808+
# 400 with LFS error indicates LFS objects not yet propagated
2809+
time.sleep(sleep_time)
2810+
logger.info(
2811+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
27952812
)
27962813
continue
2797-
else:
2798-
raise
2814+
raise
27992815
break
28002816
logger.info(
28012817
f"Commit #{i + 1} completed"
@@ -2833,20 +2849,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
28332849
parent_commit=parent_commit,
28342850
)
28352851
except HfHubHTTPError as err:
2836-
if (
2837-
err.__context__
2838-
and isinstance(err.__context__, HfHubHTTPError)
2839-
and err.__context__.response.status_code in (412, 409)
2840-
):
2852+
response = err.response if err.response is not None else getattr(err.__context__, "response", None)
2853+
status_code = response.status_code if response is not None else None
2854+
if status_code in (412, 409):
28412855
# 412 is Precondition failed (parent_commit isn't satisfied)
28422856
# 409 is Conflict (another commit is in progress)
28432857
time.sleep(sleep_time)
28442858
logger.info(
2845-
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
2859+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
2860+
)
2861+
continue
2862+
elif status_code == 400 and "lfs pointer" in str(err).lower():
2863+
# 400 with LFS error indicates LFS objects not yet propagated
2864+
time.sleep(sleep_time)
2865+
logger.info(
2866+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
28462867
)
28472868
continue
2848-
else:
2849-
raise
2869+
raise
28502870
break
28512871

28522872
return commit_info

src/datasets/iterable_dataset.py

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4439,19 +4439,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
44394439
create_pr=create_pr,
44404440
)
44414441
except HfHubHTTPError as err:
4442-
if (
4443-
err.__context__
4444-
and isinstance(err.__context__, HfHubHTTPError)
4445-
and err.__context__.response.status_code == 409
4446-
):
4442+
response = (
4443+
err.response if err.response is not None else getattr(err.__context__, "response", None)
4444+
)
4445+
status_code = response.status_code if response is not None else None
4446+
if status_code == 409:
44474447
# 409 is Conflict (another commit is in progress)
44484448
time.sleep(sleep_time)
44494449
logger.info(
4450-
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
4450+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
4451+
)
4452+
continue
4453+
elif status_code == 400 and "lfs pointer" in str(err).lower():
4454+
# 400 with LFS error indicates LFS objects not yet propagated
4455+
time.sleep(sleep_time)
4456+
logger.info(
4457+
f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
44514458
)
44524459
continue
4453-
else:
4454-
raise
4460+
raise
44554461
break
44564462
logger.info(
44574463
f"Commit #{i + 1} completed"
@@ -4489,20 +4495,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
44894495
parent_commit=parent_commit,
44904496
)
44914497
except HfHubHTTPError as err:
4492-
if (
4493-
err.__context__
4494-
and isinstance(err.__context__, HfHubHTTPError)
4495-
and err.__context__.response.status_code in (412, 409)
4496-
):
4498+
response = err.response if err.response is not None else getattr(err.__context__, "response", None)
4499+
status_code = response.status_code if response is not None else None
4500+
if status_code in (412, 409):
44974501
# 412 is Precondition failed (parent_commit isn't satisfied)
44984502
# 409 is Conflict (another commit is in progress)
44994503
time.sleep(sleep_time)
45004504
logger.info(
4501-
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})"
4505+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})"
45024506
)
45034507
continue
4504-
else:
4505-
raise
4508+
elif status_code == 400 and "lfs pointer" in str(err).lower():
4509+
# 400 with LFS error indicates LFS objects not yet propagated
4510+
time.sleep(sleep_time)
4511+
logger.info(
4512+
f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)"
4513+
)
4514+
continue
4515+
raise
45064516
break
45074517

45084518
return commit_info

0 commit comments

Comments
 (0)