diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 28a3dc9b2bf..0af60cf64ee 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -6005,19 +6005,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], create_pr=create_pr, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code == 409 - ): + response = ( + err.response if err.response is not None else getattr(err.__context__, "response", None) + ) + status_code = response.status_code if response is not None else None + if status_code == 409: # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" ) continue - else: - raise + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" + ) + continue + raise break logger.info( f"Commit #{i + 1} completed" @@ -6055,20 +6061,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], parent_commit=parent_commit, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code in (412, 409) - ): + response = err.response if err.response is not None else getattr(err.__context__, "response", None) + status_code = response.status_code if response is not None else None + if status_code in (412, 409): # 412 is Precondition failed (parent_commit isn't satisfied) # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" ) continue - else: - raise + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" + ) + continue + raise break return commit_info diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 995103d26e0..36ceb303911 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -1914,19 +1914,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], create_pr=create_pr, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code == 409 - ): + response = ( + err.response if err.response is not None else getattr(err.__context__, "response", None) + ) + status_code = response.status_code if response is not None else None + if status_code == 409: # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" + ) + continue + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" ) continue - else: - raise + raise break logger.info( f"Commit #{i + 1} completed" @@ -1964,20 +1970,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], parent_commit=parent_commit, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code in (412, 409) - ): + response = err.response if err.response is not None else getattr(err.__context__, "response", None) + status_code = response.status_code if response is not None else None + if status_code in (412, 409): # 412 is Precondition failed (parent_commit isn't satisfied) # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" + ) + continue + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" ) continue - else: - raise + raise break return commit_info @@ -2783,19 +2793,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], create_pr=create_pr, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code == 409 - ): + response = ( + err.response if err.response is not None else getattr(err.__context__, "response", None) + ) + status_code = response.status_code if response is not None else None + if status_code == 409: # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" + ) + continue + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" ) continue - else: - raise + raise break logger.info( f"Commit #{i + 1} completed" @@ -2833,20 +2849,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], parent_commit=parent_commit, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code in (412, 409) - ): + response = err.response if err.response is not None else getattr(err.__context__, "response", None) + status_code = response.status_code if response is not None else None + if status_code in (412, 409): # 412 is Precondition failed (parent_commit isn't satisfied) # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" + ) + continue + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" ) continue - else: - raise + raise break return commit_info diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py index b66c8977ade..c41b39ea545 100644 --- a/src/datasets/iterable_dataset.py +++ b/src/datasets/iterable_dataset.py @@ -4439,19 +4439,25 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], create_pr=create_pr, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code == 409 - ): + response = ( + err.response if err.response is not None else getattr(err.__context__, "response", None) + ) + status_code = response.status_code if response is not None else None + if status_code == 409: # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" + ) + continue + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying intermediate commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" ) continue - else: - raise + raise break logger.info( f"Commit #{i + 1} completed" @@ -4489,20 +4495,24 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete], parent_commit=parent_commit, ) except HfHubHTTPError as err: - if ( - err.__context__ - and isinstance(err.__context__, HfHubHTTPError) - and err.__context__.response.status_code in (412, 409) - ): + response = err.response if err.response is not None else getattr(err.__context__, "response", None) + status_code = response.status_code if response is not None else None + if status_code in (412, 409): # 412 is Precondition failed (parent_commit isn't satisfied) # 409 is Conflict (another commit is in progress) time.sleep(sleep_time) logger.info( - f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {err.__context__.response.status_code})" + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code})" ) continue - else: - raise + elif status_code == 400 and "lfs pointer" in str(err).lower(): + # 400 with LFS error indicates LFS objects not yet propagated + time.sleep(sleep_time) + logger.info( + f"Retrying commit for {repo_id}, {config_name} ({retry}/n with status_code {status_code} - LFS propagation)" + ) + continue + raise break return commit_info