diff --git a/src/huggingface_hub/_commit_api.py b/src/huggingface_hub/_commit_api.py index 3d32d78960..2e42cf727f 100644 --- a/src/huggingface_hub/_commit_api.py +++ b/src/huggingface_hub/_commit_api.py @@ -45,6 +45,13 @@ UploadMode = Literal["lfs", "regular"] +# Type alias for commit modes +# immediate: commit is processed immediately (default) +# queued: commit is pending +# flush: all pending commits are processed and merged into one commit +CommitMode = Literal["immediate", "queued", "flush"] + + # Max is 1,000 per request on the Hub for HfApi.get_paths_info # Otherwise we get: # HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters @@ -872,6 +879,7 @@ def _prepare_commit_payload( commit_message: str, commit_description: Optional[str] = None, parent_commit: Optional[str] = None, + commit_mode: Optional[CommitMode] = None, ) -> Iterable[dict[str, Any]]: """ Builds the payload to POST to the `/commit` API of the Hub. @@ -889,6 +897,8 @@ def _prepare_commit_payload( header_value = {"summary": commit_message, "description": commit_description} if parent_commit is not None: header_value["parentCommit"] = parent_commit + if commit_mode is not None: + header_value["mode"] = commit_mode yield {"key": "header", "value": header_value} nb_ignored_files = 0 diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 08254a8dd5..f229354e55 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -49,6 +49,7 @@ from . import constants from ._commit_api import ( + CommitMode, CommitOperation, CommitOperationAdd, CommitOperationCopy, @@ -4011,6 +4012,43 @@ def create_commit( # type: ignore create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, + commit_mode: Literal["queued"], + run_as_future: Literal[False] = ..., + ) -> None: ... + + @overload + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + commit_mode: Literal["queued"], + run_as_future: Literal[True], + ) -> Future[None]: ... + + @overload + def create_commit( + self, + repo_id: str, + operations: Iterable[CommitOperation], + *, + commit_message: str, + commit_description: Optional[str] = None, + token: Union[str, bool, None] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + create_pr: Optional[bool] = None, + num_threads: int = 5, + parent_commit: Optional[str] = None, + commit_mode: Optional[CommitMode] = None, run_as_future: Literal[False] = ..., ) -> CommitInfo: ... @@ -4028,7 +4066,8 @@ def create_commit( create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, - run_as_future: Literal[True] = ..., + commit_mode: Optional[CommitMode] = None, + run_as_future: Literal[True], ) -> Future[CommitInfo]: ... @validate_hf_hub_args @@ -4046,8 +4085,9 @@ def create_commit( create_pr: Optional[bool] = None, num_threads: int = 5, parent_commit: Optional[str] = None, + commit_mode: Optional[CommitMode] = None, run_as_future: bool = False, - ) -> Union[CommitInfo, Future[CommitInfo]]: + ) -> Union[CommitInfo, Future[CommitInfo], None, Future[None]]: """ Creates a commit in the given repo, deleting & uploading files as needed. @@ -4117,6 +4157,13 @@ def create_commit( is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be especially useful if the repo is updated / committed to concurrently. + + commit_mode (`str`, *optional*): + The commit mode to use. Possible values are: + - `"immediate"`: commit is processed immediately (default) + - `"queued"`: commit is pending. No commit info returned. + - `"flush"`: all pending commits are processed and merged into one commit + run_as_future (`bool`, *optional*): Whether or not to run this method in the background. Background jobs are run sequentially without blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) @@ -4185,7 +4232,7 @@ def create_commit( logger.debug( f"About to commit to the hub: {len(additions)} addition(s), {len(copies)} copie(s) and" - f" {nb_deletions} deletion(s)." + f" {nb_deletions} deletion(s) (mode: {commit_mode or 'immediate'})." ) # If updating a README.md file, make sure the metadata format is valid @@ -4275,6 +4322,7 @@ def create_commit( commit_message=commit_message, commit_description=commit_description, parent_commit=parent_commit, + commit_mode=commit_mode, ) commit_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/commit/{revision}" @@ -4309,6 +4357,9 @@ def _payload_as_ndjson() -> Iterable[bytes]: for addition in additions: addition._is_committed = True + if commit_mode == "queued": + return None # TODO: return something (there is a queue commit id in the payload) + commit_data = commit_resp.json() return CommitInfo( commit_url=commit_data["commitUrl"],