Skip to content

Commit 2d35b80

Browse files
committed
Use git's partial clone feature to speed up pip
Clone with --filter=blob:none - as it fetches all metadata, but only dynamically fetches the blobs as needed by checkout. Since typically, pip only needs the blobs for a single revision, this can be a big improvement, especially when fetching from repositories with a lot of history, particularly on slower network connections. Added unit test for the rev-less path. Confirmed that both of the if/else paths are tested by the unit tests.
1 parent b9f8295 commit 2d35b80

File tree

3 files changed

+122
-1
lines changed

3 files changed

+122
-1
lines changed

news/9086.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval.

src/pip/_internal/vcs/git.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,12 +262,27 @@ def fetch_new(self, dest, url, rev_options):
262262
# type: (str, HiddenText, RevOptions) -> None
263263
rev_display = rev_options.to_display()
264264
logger.info("Cloning %s%s to %s", url, rev_display, display_path(dest))
265-
self.run_command(make_command("clone", "-q", url, dest))
265+
if self.get_git_version() >= (2, 17):
266+
# Git added support for partial clone in 2.17
267+
# https://git-scm.com/docs/partial-clone
268+
# Speeds up cloning by functioning without a complete copy of repository
269+
self.run_command(
270+
make_command(
271+
"clone",
272+
"--filter=blob:none",
273+
"-q",
274+
url,
275+
dest,
276+
)
277+
)
278+
else:
279+
self.run_command(make_command("clone", "-q", url, dest))
266280

267281
if rev_options.rev:
268282
# Then a specific revision was requested.
269283
rev_options = self.resolve_revision(dest, url, rev_options)
270284
branch_name = getattr(rev_options, "branch_name", None)
285+
logger.debug("Rev options %s, branch_name %s", rev_options, branch_name)
271286
if branch_name is None:
272287
# Only do a checkout if the current commit id doesn't match
273288
# the requested revision.

tests/functional/test_vcs_git.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import os
6+
from unittest.mock import patch
67

78
import pytest
89

@@ -282,3 +283,107 @@ def test_resolve_commit_not_on_branch(script, tmp_path):
282283
# check we can fetch our commit
283284
rev_options = Git.make_rev_options(commit)
284285
Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options)
286+
287+
288+
def _initialize_clonetest_server(repo_path, script, enable_partial_clone):
289+
repo_path.mkdir()
290+
script.run("git", "init", cwd=str(repo_path))
291+
repo_file = repo_path / "file.txt"
292+
repo_file.write_text(u".")
293+
script.run("git", "add", "file.txt", cwd=str(repo_path))
294+
script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path))
295+
296+
# Enable filtering support on server
297+
if enable_partial_clone:
298+
script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path)
299+
script.run(
300+
"git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path
301+
)
302+
303+
return repo_file
304+
305+
306+
@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
307+
def test_partial_clone(script, tmp_path):
308+
"""Test partial clone w/ a git-server that supports it"""
309+
repo_path = tmp_path / "repo"
310+
repo_file = _initialize_clonetest_server(
311+
repo_path, script, enable_partial_clone=True
312+
)
313+
clone_path1 = repo_path / "clone1"
314+
clone_path2 = repo_path / "clone2"
315+
316+
commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()
317+
318+
# Check that we can clone at HEAD
319+
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
320+
# Check that we can clone to commit
321+
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))
322+
323+
# Write some additional stuff to git pull
324+
repo_file.write_text(u"..")
325+
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))
326+
327+
# Make sure git pull works - with server supporting filtering
328+
assert (
329+
"warning: filtering not recognized by server, ignoring"
330+
not in script.run("git", "pull", cwd=clone_path1).stderr
331+
)
332+
assert (
333+
"warning: filtering not recognized by server, ignoring"
334+
not in script.run("git", "pull", cwd=clone_path2).stderr
335+
)
336+
337+
338+
@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
339+
def test_partial_clone_without_server_support(script, tmp_path):
340+
"""Test partial clone w/ a git-server that does not support it"""
341+
repo_path = tmp_path / "repo"
342+
repo_file = _initialize_clonetest_server(
343+
repo_path, script, enable_partial_clone=False
344+
)
345+
clone_path1 = repo_path / "clone1"
346+
clone_path2 = repo_path / "clone2"
347+
348+
commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()
349+
350+
# Check that we can clone at HEAD
351+
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
352+
# Check that we can clone to commit
353+
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))
354+
355+
# Write some additional stuff to git pull
356+
repo_file.write_text(u"..")
357+
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))
358+
359+
# Make sure git pull works - even though server doesn't support filtering
360+
assert (
361+
"warning: filtering not recognized by server, ignoring"
362+
in script.run("git", "pull", cwd=clone_path1).stderr
363+
)
364+
assert (
365+
"warning: filtering not recognized by server, ignoring"
366+
in script.run("git", "pull", cwd=clone_path2).stderr
367+
)
368+
369+
370+
def test_clone_without_partial_clone_support(script, tmp_path):
371+
"""Older git clients don't support partial clone. Test the fallback path"""
372+
repo_path = tmp_path / "repo"
373+
repo_file = _initialize_clonetest_server(
374+
repo_path, script, enable_partial_clone=True
375+
)
376+
clone_path = repo_path / "clone1"
377+
378+
# Check that we can clone w/ old version of git w/o --filter
379+
with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)):
380+
Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options())
381+
382+
repo_file.write_text(u"...")
383+
script.run("git", "commit", "-am", "third commit", cwd=str(repo_path))
384+
385+
# Should work fine w/o attempting to use `--filter` args
386+
assert (
387+
"warning: filtering not recognized by server, ignoring"
388+
not in script.run("git", "pull", cwd=clone_path).stderr
389+
)

0 commit comments

Comments
 (0)