@@ -28,15 +28,13 @@ import os
2828import platform
2929import re
3030import shutil
31- import signal
3231import socket
3332import stat
3433import subprocess
3534import time
3635import urllib .error
3736import urllib .request
3837from pathlib import Path
39- from threading import Thread
4038from typing import Optional
4139
4240SECRET_BASEURL_TPL = "{}/secrets/v1/secret/{{}}" .format (os .environ .get ("TASKCLUSTER_PROXY_URL" , "http://taskcluster" ).rstrip ('/' ))
@@ -600,6 +598,8 @@ def git_checkout(
600598 commit : Optional [str ],
601599 ssh_key_file : Optional [Path ],
602600 ssh_known_hosts_file : Optional [Path ],
601+ efficient_clone : bool = False ,
602+ sparse_dirs : Optional [str ] = None ,
603603):
604604 env = {
605605 # abort if transfer speed is lower than 1kB/s for 1 minute
@@ -636,22 +636,43 @@ def git_checkout(
636636 args = [
637637 "git" ,
638638 "clone" ,
639+ ]
640+
641+ if efficient_clone :
642+ # Use blobless clone for faster initial clone
643+ # This fetches commit and tree objects but not file contents
644+ args .extend (["--filter=blob:none" ])
645+ # Use shallow clone with depth 1 for minimal history
646+ args .extend (["--depth=1" ])
647+ # Skip checkout initially, we'll do sparse checkout later
648+ args .extend (["--no-checkout" ])
649+ elif sparse_dirs :
650+ # For sparse checkout without efficient clone, still skip initial checkout
651+ # so we can set up sparse checkout before checking out files
652+ args .extend (["--no-checkout" ])
653+
654+ args .extend ([
639655 base_repo if base_repo else head_repo ,
640656 destination_path ,
641- ]
657+ ])
642658
643659 retry_required_command (b"vcs" , args , extra_env = env )
644660
645661 if base_ref :
646- args = ["git" , "fetch" , "origin" , base_ref ]
662+ args = ["git" , "fetch" ]
663+ if efficient_clone :
664+ # For shallow clones, we need to deepen to fetch more history
665+ args .extend (["--depth=100" ])
666+ args .extend (["origin" , base_ref ])
647667
648668 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
649669
650670 # Create local branch so that taskgraph is able to compute differences
651671 # between the head branch and the base one, if needed
652- args = ["git" , "checkout" , base_ref ]
653-
654- retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
672+ if not efficient_clone and not sparse_dirs :
673+ # Only checkout if we didn't use --no-checkout initially
674+ args = ["git" , "checkout" , base_ref ]
675+ retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
655676
656677 # When commits are force-pushed (like on a testing branch), base_rev doesn't
657678 # exist on base_ref. Fetching it allows taskgraph to compute differences
@@ -660,7 +681,11 @@ def git_checkout(
660681 # Unlike base_ref just above, there is no need to checkout the revision:
661682 # it's immediately available after the fetch.
662683 if base_rev and base_rev != NULL_REVISION :
663- args = ["git" , "fetch" , "origin" , base_rev ]
684+ args = ["git" , "fetch" ]
685+ if efficient_clone :
686+ # For shallow clones, we need to deepen to fetch more history
687+ args .extend (["--depth=100" ])
688+ args .extend (["origin" , base_rev ])
664689
665690 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
666691
@@ -671,28 +696,44 @@ def git_checkout(
671696 # in not having a tag, or worse: having an outdated version of one.
672697 # `--force` is needed to be able to update an existing tag.
673698 if ref and base_repo == head_repo :
674- args = [
675- "git" ,
676- "fetch" ,
677- "--tags" ,
678- "--force" ,
679- base_repo ,
680- ref ,
681- ]
699+ args = ["git" , "fetch" ]
700+ if efficient_clone :
701+ # For shallow clones, we need to deepen to fetch more history
702+ args .extend (["--depth=100" ])
703+ args .extend (["--tags" , "--force" , base_repo , ref ])
682704
683705 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
684706
685707 # If a ref isn't provided, we fetch all refs from head_repo, which may be slow
686- args = [
687- "git" ,
688- "fetch" ,
689- "--no-tags" ,
690- head_repo ,
691- ref if ref else "+refs/heads/*:refs/remotes/work/*" ,
692- ]
708+ args = ["git" , "fetch" ]
709+ if efficient_clone :
710+ # For shallow clones, we need to deepen to fetch more history
711+ args . extend ([ "--depth=100" ])
712+ # With blobless clones, we only fetch the blobs we need
713+ args . extend ([ "--filter=blob:none" ])
714+ args . extend ([ "--no-tags" , head_repo , ref if ref else "+refs/heads/*:refs/remotes/work/*" ])
693715
694716 retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
695717
718+ if sparse_dirs :
719+ # When sparse directories/files are specified, set up sparse checkout
720+ # The sparse_dirs should be a colon-separated list of directories or files
721+ #
722+ # Note: Git's sparse-checkout behavior in cone mode (default since Git 2.37):
723+ # - Root-level files: Checked out exactly as specified
724+ # - Files in subdirectories: Entire parent directory is included
725+ # - Directories: All contents included
726+
727+ # Enable sparse checkout (cone mode is default since Git 2.37)
728+ args = ["git" , "sparse-checkout" , "init" ]
729+ run_required_command (b"vcs" , args , cwd = destination_path )
730+
731+ # Set the sparse entries
732+ entries = sparse_dirs .split (":" )
733+ args = ["git" , "sparse-checkout" , "set" ] + entries
734+ run_required_command (b"vcs" , args , cwd = destination_path )
735+
736+ # Now do the actual checkout
696737 args = [
697738 "git" ,
698739 "checkout" ,
@@ -879,11 +920,17 @@ def add_vcs_arguments(parser, project, name):
879920 "--%s-sparse-profile" % project ,
880921 help = "Path to sparse profile for %s checkout" % name ,
881922 )
923+ parser .add_argument (
924+ "--%s-efficient-clone" % project ,
925+ action = "store_true" ,
926+ help = "Use efficient cloning strategies (blobless, shallow, no-checkout) for %s" % name ,
927+ )
882928
883929
884930def collect_vcs_options (args , project , name ):
885931 checkout = getattr (args , "%s_checkout" % project )
886932 sparse_profile = getattr (args , "%s_sparse_profile" % project )
933+ efficient_clone = getattr (args , "%s_efficient_clone" % project )
887934
888935 env_prefix = project .upper ()
889936
@@ -896,6 +943,7 @@ def collect_vcs_options(args, project, name):
896943 ref = os .environ .get ("%s_HEAD_REF" % env_prefix )
897944 pip_requirements = os .environ .get ("%s_PIP_REQUIREMENTS" % env_prefix )
898945 private_key_secret = os .environ .get ("%s_SSH_SECRET_NAME" % env_prefix )
946+ sparse_dirs = os .environ .get ("%s_SPARSE_DIRS" % env_prefix )
899947
900948 store_path = os .environ .get ("HG_STORE_PATH" )
901949
@@ -930,6 +978,8 @@ def collect_vcs_options(args, project, name):
930978 "repo-type" : repo_type ,
931979 "ssh-secret-name" : private_key_secret ,
932980 "pip-requirements" : pip_requirements ,
981+ "efficient-clone" : efficient_clone ,
982+ "sparse-dirs" : sparse_dirs ,
933983 }
934984
935985
@@ -978,6 +1028,8 @@ def vcs_checkout_from_args(options):
9781028 revision ,
9791029 ssh_key_file ,
9801030 ssh_known_hosts_file ,
1031+ options .get ("efficient-clone" , False ),
1032+ options .get ("sparse-dirs" ),
9811033 )
9821034 elif options ["repo-type" ] == "hg" :
9831035 if not revision and not ref :
0 commit comments