Skip to content

Commit 2bc9a05

Browse files
authored
Git prune (#450)
* Git lfs prune * Tests * Add it to push and context manager * Adjust documentation
1 parent 345bf67 commit 2bc9a05

File tree

2 files changed

+210
-3
lines changed

2 files changed

+210
-3
lines changed

src/huggingface_hub/repository.py

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,27 @@ def __init__(
2929
is_done_method: Callable,
3030
status_method: Callable,
3131
process: subprocess.Popen,
32+
post_method: Optional[Callable] = None,
3233
):
3334
self.title = title
3435
self._is_done = is_done_method
3536
self._status = status_method
3637
self._process = process
3738
self._stderr = ""
3839
self._stdout = ""
40+
self._post_method = post_method
3941

4042
@property
4143
def is_done(self) -> bool:
4244
"""
4345
Whether the process is done.
4446
"""
45-
return self._is_done()
47+
result = self._is_done()
48+
49+
if result and self._post_method is not None:
50+
self._post_method()
51+
52+
return result
4653

4754
@property
4855
def status(self) -> int:
@@ -840,6 +847,27 @@ def auto_track_large_files(self, pattern: Optional[str] = ".") -> List[str]:
840847

841848
return files_to_be_tracked_with_lfs
842849

850+
def lfs_prune(self, recent=False):
851+
"""
852+
git lfs prune
853+
"""
854+
args = "git lfs prune".split()
855+
if recent:
856+
args.append("--recent")
857+
try:
858+
with lfs_log_progress():
859+
result = subprocess.run(
860+
args,
861+
stderr=subprocess.PIPE,
862+
stdout=subprocess.PIPE,
863+
check=True,
864+
encoding="utf-8",
865+
cwd=self.local_dir,
866+
)
867+
logger.info(result.stdout)
868+
except subprocess.CalledProcessError as exc:
869+
raise EnvironmentError(exc.stderr)
870+
843871
def git_pull(self, rebase: Optional[bool] = False):
844872
"""
845873
git pull
@@ -914,6 +942,7 @@ def git_push(
914942
self,
915943
upstream: Optional[str] = None,
916944
blocking: Optional[bool] = True,
945+
auto_lfs_prune: Optional[bool] = False,
917946
) -> Union[str, Tuple[str, CommandInProgress]]:
918947
"""
919948
git push
@@ -931,6 +960,8 @@ def git_push(
931960
Setting this to `False` will return an `CommandInProgress` object
932961
which has an `is_done` property. This property will be set to
933962
`True` when the push is finished.
963+
auto_lfs_prune (`bool`, defaults to `False`):
964+
Whether to automatically prune files once they have been pushed to the remote.
934965
"""
935966
command = "git push"
936967

@@ -986,12 +1017,16 @@ def status_method():
9861017
is_done_method=lambda: process.poll() is not None,
9871018
status_method=status_method,
9881019
process=process,
1020+
post_method=self.lfs_prune if auto_lfs_prune else None,
9891021
)
9901022

9911023
self.command_queue.append(command)
9921024

9931025
return self.git_head_commit_url(), command
9941026

1027+
if auto_lfs_prune:
1028+
self.lfs_prune()
1029+
9951030
return self.git_head_commit_url()
9961031

9971032
def git_checkout(self, revision: str, create_branch_ok: Optional[bool] = False):
@@ -1172,6 +1207,7 @@ def push_to_hub(
11721207
commit_message: Optional[str] = "commit files to HF hub",
11731208
blocking: Optional[bool] = True,
11741209
clean_ok: Optional[bool] = False,
1210+
auto_lfs_prune: Optional[bool] = False,
11751211
) -> Optional[str]:
11761212
"""
11771213
Helper to add, commit, and push files to remote repository on the HuggingFace Hub.
@@ -1185,14 +1221,18 @@ def push_to_hub(
11851221
clean_ok (`bool`, `optional`, defaults to `False`):
11861222
If True, this function will return None if the repo is untouched.
11871223
Default behavior is to fail because the git command fails.
1224+
auto_lfs_prune (`bool`, defaults to `False`):
1225+
Whether to automatically prune files once they have been pushed to the remote.
11881226
"""
11891227
if clean_ok and self.is_repo_clean():
11901228
logger.info("Repo currently clean. Ignoring push_to_hub")
11911229
return None
11921230
self.git_add(auto_lfs_track=True)
11931231
self.git_commit(commit_message)
11941232
return self.git_push(
1195-
upstream=f"origin {self.current_branch}", blocking=blocking
1233+
upstream=f"origin {self.current_branch}",
1234+
blocking=blocking,
1235+
auto_lfs_prune=auto_lfs_prune,
11961236
)
11971237

11981238
@contextmanager
@@ -1202,6 +1242,7 @@ def commit(
12021242
branch: Optional[str] = None,
12031243
track_large_files: Optional[bool] = True,
12041244
blocking: Optional[bool] = True,
1245+
auto_lfs_prune: Optional[bool] = False,
12051246
):
12061247
"""
12071248
Context manager utility to handle committing to a repository. This automatically tracks large files (>10Mb)
@@ -1216,6 +1257,8 @@ def commit(
12161257
Whether to automatically track large files or not. Will do so by default.
12171258
blocking (`bool`, `optional`, defaults to `True`):
12181259
Whether the function should return only when the `git push` has finished.
1260+
auto_lfs_prune (`bool`, defaults to `True`):
1261+
Whether to automatically prune files once they have been pushed to the remote.
12191262
12201263
Examples:
12211264
@@ -1270,7 +1313,9 @@ def commit(
12701313

12711314
try:
12721315
self.git_push(
1273-
upstream=f"origin {self.current_branch}", blocking=blocking
1316+
upstream=f"origin {self.current_branch}",
1317+
blocking=blocking,
1318+
auto_lfs_prune=auto_lfs_prune,
12741319
)
12751320
except OSError as e:
12761321
# If no changes are detected, there is nothing to commit.

tests/test_repository.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,168 @@ def test_delete_tag(self):
735735
repo.delete_tag("v4.6.0", remote="origin")
736736
self.assertFalse(repo.tag_exists("v4.6.0", remote="origin"))
737737

738+
def test_lfs_prune(self):
739+
repo = Repository(
740+
WORKING_REPO_DIR,
741+
clone_from=f"{USER}/{REPO_NAME}",
742+
use_auth_token=self._token,
743+
git_user="ci",
744+
git_email="[email protected]",
745+
revision="main",
746+
)
747+
748+
with repo.commit("Committing LFS file"):
749+
with open("file.bin", "w+") as f:
750+
f.write("Random string 1")
751+
752+
with repo.commit("Committing LFS file"):
753+
with open("file.bin", "w+") as f:
754+
f.write("Random string 2")
755+
756+
root_directory = pathlib.Path(repo.local_dir) / ".git" / "lfs"
757+
git_lfs_files_size = sum(
758+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
759+
)
760+
repo.lfs_prune()
761+
post_prune_git_lfs_files_size = sum(
762+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
763+
)
764+
765+
# Size of the directory holding LFS files was reduced
766+
self.assertLess(post_prune_git_lfs_files_size, git_lfs_files_size)
767+
768+
def test_lfs_prune_git_push(self):
769+
repo = Repository(
770+
WORKING_REPO_DIR,
771+
clone_from=f"{USER}/{REPO_NAME}",
772+
use_auth_token=self._token,
773+
git_user="ci",
774+
git_email="[email protected]",
775+
revision="main",
776+
)
777+
778+
with repo.commit("Committing LFS file"):
779+
with open("file.bin", "w+") as f:
780+
f.write("Random string 1")
781+
782+
root_directory = pathlib.Path(repo.local_dir) / ".git" / "lfs"
783+
git_lfs_files_size = sum(
784+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
785+
)
786+
787+
with open(os.path.join(repo.local_dir, "file.bin"), "w+") as f:
788+
f.write("Random string 2")
789+
790+
repo.git_add()
791+
repo.git_commit("New commit")
792+
repo.git_push(auto_lfs_prune=True)
793+
794+
post_prune_git_lfs_files_size = sum(
795+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
796+
)
797+
798+
# Size of the directory holding LFS files is the exact same
799+
self.assertEqual(post_prune_git_lfs_files_size, git_lfs_files_size)
800+
801+
def test_lfs_prune_git_push_non_blocking(self):
802+
repo = Repository(
803+
WORKING_REPO_DIR,
804+
clone_from=f"{USER}/{REPO_NAME}",
805+
use_auth_token=self._token,
806+
git_user="ci",
807+
git_email="[email protected]",
808+
revision="main",
809+
)
810+
811+
with repo.commit("Committing LFS file"):
812+
with open("file.bin", "w+") as f:
813+
f.write("Random string 1")
814+
815+
root_directory = pathlib.Path(repo.local_dir) / ".git" / "lfs"
816+
git_lfs_files_size = sum(
817+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
818+
)
819+
820+
with open(os.path.join(repo.local_dir, "file.bin"), "w+") as f:
821+
f.write("Random string 2")
822+
823+
repo.git_add()
824+
repo.git_commit("New commit")
825+
repo.git_push(blocking=False, auto_lfs_prune=True)
826+
827+
while len(repo.commands_in_progress):
828+
time.sleep(0.2)
829+
830+
post_prune_git_lfs_files_size = sum(
831+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
832+
)
833+
834+
# Size of the directory holding LFS files is the exact same
835+
self.assertEqual(post_prune_git_lfs_files_size, git_lfs_files_size)
836+
837+
def test_lfs_prune_context_manager(self):
838+
repo = Repository(
839+
WORKING_REPO_DIR,
840+
clone_from=f"{USER}/{REPO_NAME}",
841+
use_auth_token=self._token,
842+
git_user="ci",
843+
git_email="[email protected]",
844+
revision="main",
845+
)
846+
847+
with repo.commit("Committing LFS file"):
848+
with open("file.bin", "w+") as f:
849+
f.write("Random string 1")
850+
851+
root_directory = pathlib.Path(repo.local_dir) / ".git" / "lfs"
852+
git_lfs_files_size = sum(
853+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
854+
)
855+
856+
with repo.commit("Committing LFS file", auto_lfs_prune=True):
857+
with open("file.bin", "w+") as f:
858+
f.write("Random string 2")
859+
860+
post_prune_git_lfs_files_size = sum(
861+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
862+
)
863+
864+
# Size of the directory holding LFS files is the exact same
865+
self.assertEqual(post_prune_git_lfs_files_size, git_lfs_files_size)
866+
867+
def test_lfs_prune_context_manager_non_blocking(self):
868+
repo = Repository(
869+
WORKING_REPO_DIR,
870+
clone_from=f"{USER}/{REPO_NAME}",
871+
use_auth_token=self._token,
872+
git_user="ci",
873+
git_email="[email protected]",
874+
revision="main",
875+
)
876+
877+
with repo.commit("Committing LFS file"):
878+
with open("file.bin", "w+") as f:
879+
f.write("Random string 1")
880+
881+
root_directory = pathlib.Path(repo.local_dir) / ".git" / "lfs"
882+
git_lfs_files_size = sum(
883+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
884+
)
885+
886+
with repo.commit("Committing LFS file", auto_lfs_prune=True, blocking=False):
887+
with open("file.bin", "w+") as f:
888+
f.write("Random string 2")
889+
890+
while len(repo.commands_in_progress):
891+
time.sleep(0.2)
892+
893+
post_prune_git_lfs_files_size = sum(
894+
f.stat().st_size for f in root_directory.glob("**/*") if f.is_file()
895+
)
896+
897+
# Size of the directory holding LFS files is the exact same
898+
self.assertEqual(post_prune_git_lfs_files_size, git_lfs_files_size)
899+
738900

739901
class RepositoryOfflineTest(RepositoryCommonTest):
740902
@classmethod

0 commit comments

Comments
 (0)