Skip to content

Commit f4db457

Browse files
committed
feat(run-task): implement shallow git clones
Shallow clones yield a massive improvement to clone performance, at the expense of making it tricky to determine the files that were modified.
1 parent 619b206 commit f4db457

File tree

2 files changed

+152
-11
lines changed

2 files changed

+152
-11
lines changed

src/taskgraph/run-task/run-task

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -571,13 +571,17 @@ def git_fetch(
571571
*targets: str,
572572
remote: str = "origin",
573573
tags: bool = False,
574+
shallow: bool = False,
574575
env: Optional[dict[str, str]] = None,
575576
):
576577
args = ["git", "fetch"]
577578
if tags:
578579
# `--force` is needed to be able to update an existing outdated tag.
579580
args.extend(["--tags", "--force"])
580581

582+
if shallow:
583+
args.append("--depth=1")
584+
581585
args.extend([remote, *set(targets)])
582586
retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
583587

@@ -643,6 +647,7 @@ def git_checkout(
643647
head_rev: Optional[str],
644648
ssh_key_file: Optional[Path],
645649
ssh_known_hosts_file: Optional[Path],
650+
shallow: bool = False,
646651
):
647652
assert head_ref or head_rev
648653

@@ -688,10 +693,18 @@ def git_checkout(
688693
args = [
689694
"git",
690695
"clone",
691-
base_repo if base_repo else head_repo,
692-
destination_path,
693696
]
694697

698+
if shallow:
699+
args.extend(["--depth=1", "--no-checkout"])
700+
701+
args.extend(
702+
[
703+
base_repo if base_repo else head_repo,
704+
destination_path,
705+
]
706+
)
707+
695708
retry_required_command(b"vcs", args, extra_env=env)
696709

697710
# For Github based repos, base_rev often doesn't refer to an ancestor of
@@ -700,7 +713,7 @@ def git_checkout(
700713
# that consumers can compute the merge-base or files modified between the
701714
# two as needed.
702715
if base_rev and base_rev != NULL_REVISION:
703-
git_fetch(destination_path, base_rev, env=env)
716+
git_fetch(destination_path, base_rev, shallow=shallow, env=env)
704717

705718
# If a head_ref was provided, it might be tag, so we need to make sure we fetch
706719
# those. This is explicitly only done when base and head repo match,
@@ -715,15 +728,17 @@ def git_checkout(
715728
targets = []
716729
if head_ref:
717730
targets.append(head_ref)
718-
if not targets:
719-
# If head_ref wasn't provided, we fallback to head_rev.
731+
if not head_ref or (shallow and head_rev):
732+
# If head_ref wasn't provided, we fallback to head_rev. If we have a
733+
# shallow clone, head_rev needs to be fetched independently regardless.
720734
targets.append(head_rev)
721735

722736
git_fetch(
723737
destination_path,
724738
*targets,
725739
remote=head_repo,
726740
tags=tags,
741+
shallow=shallow,
727742
env=env,
728743
)
729744

@@ -911,11 +926,17 @@ def add_vcs_arguments(parser, project, name):
911926
f"--{project}-sparse-profile",
912927
help=f"Path to sparse profile for {name} checkout",
913928
)
929+
parser.add_argument(
930+
f"--{project}-shallow-clone",
931+
action="store_true",
932+
help=f"Use shallow clone for {name}",
933+
)
914934

915935

916936
def collect_vcs_options(args, project, name):
917937
checkout = getattr(args, f"{project}_checkout")
918938
sparse_profile = getattr(args, f"{project}_sparse_profile")
939+
shallow_clone = getattr(args, f"{project}_shallow_clone")
919940

920941
env_prefix = project.upper()
921942

@@ -960,6 +981,7 @@ def collect_vcs_options(args, project, name):
960981
"repo-type": repo_type,
961982
"ssh-secret-name": private_key_secret,
962983
"pip-requirements": pip_requirements,
984+
"shallow-clone": shallow_clone,
963985
}
964986

965987

@@ -1008,6 +1030,7 @@ def vcs_checkout_from_args(options):
10081030
head_rev,
10091031
ssh_key_file,
10101032
ssh_known_hosts_file,
1033+
shallow=options.get("shallow-clone", False),
10111034
)
10121035
elif options["repo-type"] == "hg":
10131036
revision = hg_checkout(

test/test_scripts_run_task.py

Lines changed: 124 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,10 @@ def test_install_pip_requirements_with_uv(
152152

153153

154154
@pytest.mark.parametrize(
155-
"env,extra_expected",
155+
"args,env,extra_expected",
156156
[
157157
pytest.param(
158+
{},
158159
{
159160
"REPOSITORY_TYPE": "hg",
160161
"BASE_REPOSITORY": "https://hg.mozilla.org/mozilla-central",
@@ -165,20 +166,38 @@ def test_install_pip_requirements_with_uv(
165166
{
166167
"base-repo": "https://hg.mozilla.org/mozilla-unified",
167168
},
168-
)
169+
id="hg",
170+
),
171+
pytest.param(
172+
{"myrepo_shallow_clone": True},
173+
{
174+
"REPOSITORY_TYPE": "git",
175+
"HEAD_REPOSITORY": "https://github.com/test/repo.git",
176+
"HEAD_REV": "abc123",
177+
},
178+
{"shallow-clone": True},
179+
id="git_with_shallow_clone",
180+
),
169181
],
170182
)
171-
def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected):
183+
def test_collect_vcs_options(
184+
monkeypatch,
185+
run_task_mod,
186+
args,
187+
env,
188+
extra_expected,
189+
):
172190
name = "myrepo"
173191
checkout = "checkout"
174192

175193
monkeypatch.setattr(os, "environ", {})
176194
for k, v in env.items():
177195
monkeypatch.setenv(f"{name.upper()}_{k.upper()}", v)
178196

179-
args = Namespace()
180-
setattr(args, f"{name}_checkout", checkout)
181-
setattr(args, f"{name}_sparse_profile", False)
197+
args.setdefault(f"{name}_checkout", checkout)
198+
args.setdefault(f"{name}_shallow_clone", False)
199+
args.setdefault(f"{name}_sparse_profile", False)
200+
args = Namespace(**args)
182201

183202
result = run_task_mod.collect_vcs_options(args, name, name)
184203

@@ -194,6 +213,7 @@ def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected):
194213
"head-ref": env.get("HEAD_REF"),
195214
"head-rev": env.get("HEAD_REV"),
196215
"repo-type": env.get("REPOSITORY_TYPE"),
216+
"shallow-clone": False,
197217
"ssh-secret-name": env.get("SSH_SECRET_NAME"),
198218
"sparse-profile": False,
199219
"store-path": env.get("HG_STORE_PATH"),
@@ -456,6 +476,104 @@ def test_git_checkout_with_commit(
456476
assert current_rev == head_rev
457477

458478

479+
def test_git_checkout_shallow(
480+
mock_stdin,
481+
run_task_mod,
482+
mock_git_repo,
483+
tmp_path,
484+
):
485+
destination = tmp_path / "destination"
486+
487+
# Git ignores `--depth` when cloning from local directories, so use file://
488+
# protocol to force shallow clone.
489+
repo_url = f"file://{mock_git_repo['path']}"
490+
base_rev = mock_git_repo["main"][-1]
491+
head_rev = mock_git_repo["branch"][-1]
492+
493+
# Use shallow clone with head_ref != head_rev
494+
run_task_mod.git_checkout(
495+
destination_path=str(destination),
496+
head_repo=repo_url,
497+
base_repo=repo_url,
498+
base_rev=base_rev,
499+
head_ref="mybranch",
500+
head_rev=head_rev,
501+
ssh_key_file=None,
502+
ssh_known_hosts_file=None,
503+
shallow=True,
504+
)
505+
shallow_file = destination / ".git" / "shallow"
506+
assert shallow_file.exists()
507+
508+
# Verify we're on the correct commit
509+
final_rev = subprocess.check_output(
510+
["git", "rev-parse", "HEAD"],
511+
cwd=str(destination),
512+
universal_newlines=True,
513+
).strip()
514+
assert final_rev == head_rev
515+
516+
# Verify both base_rev and head_rev are available.
517+
for sha in (base_rev, head_rev):
518+
result = subprocess.run(
519+
["git", "cat-file", "-t", sha],
520+
cwd=str(destination),
521+
capture_output=True,
522+
text=True,
523+
)
524+
assert result.returncode == 0, f"Commit {sha} should be available"
525+
assert result.stdout.strip() == "commit"
526+
527+
528+
def test_git_fetch_shallow(
529+
mock_stdin,
530+
run_task_mod,
531+
mock_git_repo,
532+
tmp_path,
533+
):
534+
destination = tmp_path / "destination"
535+
536+
# Git ignores `--depth` when cloning from local directories, so use file://
537+
# protocol to force shallow clone.
538+
repo_url = f"file://{mock_git_repo['path']}"
539+
540+
run_task_mod.run_command(
541+
b"vcs",
542+
[
543+
"git",
544+
"clone",
545+
"--depth=1",
546+
"--no-checkout",
547+
repo_url,
548+
str(destination),
549+
],
550+
)
551+
shallow_file = destination / ".git" / "shallow"
552+
assert shallow_file.exists()
553+
554+
# Verify base_rev doesn't exist yet
555+
base_rev = mock_git_repo["branch"][-1]
556+
result = subprocess.run(
557+
["git", "cat-file", "-t", base_rev],
558+
cwd=str(destination),
559+
capture_output=True,
560+
text=True,
561+
)
562+
assert result.returncode != 0
563+
564+
run_task_mod.git_fetch(str(destination), base_rev, remote=repo_url, shallow=True)
565+
566+
# Verify base_rev is now available
567+
result = subprocess.run(
568+
["git", "cat-file", "-t", base_rev],
569+
cwd=str(destination),
570+
capture_output=True,
571+
text=True,
572+
)
573+
assert result.returncode == 0
574+
assert result.stdout.strip() == "commit"
575+
576+
459577
def test_display_python_version_should_output_python_versions_title(
460578
run_task_mod, capsys
461579
):

0 commit comments

Comments
 (0)