Skip to content

Commit 23702d0

Browse files
GitGetter: Re-allow ref=COMMIT_ID
Earlier versions of this code allowed "ref" to take any value that would be accepted by "git checkout" as a valid target of a symbolic ref. We inadvertently made a breaking change that broke that as part of introducing a shallow clone optimization, because shallow clone requires selecting a single branch. To restore the previous capabilities while retaining the "depth" argument, here we accept a compromise where "ref" has the stronger requirement of being a valid named ref in the remote repository if and only if "depth" is set to a value greater than zero. If depth isn't set or is less than one, we will do the old behavior of just cloning all of the refs in the remote repository in full and then switching to refer to the selected branch, tag, or naked commit ID as a separate step. This includes a heuristic to generate an additional error message hint if we get an error from "git clone" and it looks like the user might've been trying to use "depth" and "ref=COMMIT" together. We can't recognize that error accurately because it's only reported as human-oriented git command output, but this heuristic should hopefully minimize situations where we show it inappropriately.
1 parent 64f1c03 commit 23702d0

File tree

2 files changed

+139
-3
lines changed

2 files changed

+139
-3
lines changed

get_git.go

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func (g *GitGetter) Get(dst string, u *url.URL) error {
5353

5454
// Extract some query parameters we use
5555
var ref, sshKey string
56-
var depth int
56+
depth := 0 // 0 means "don't use shallow clone"
5757
q := u.Query()
5858
if len(q) > 0 {
5959
ref = q.Get("ref")
@@ -167,20 +167,53 @@ func (g *GitGetter) checkout(dst string, ref string) error {
167167
return getRunCommand(cmd)
168168
}
169169

170+
// gitCommitIDRegex is a pattern intended to match strings that seem
171+
// "likely to be" git commit IDs, rather than named refs. This cannot be
172+
// an exact decision because it's valid to name a branch or tag after a series
173+
// of hexadecimal digits too.
174+
//
175+
// We require at least 7 digits here because that's the smallest size git
176+
// itself will typically generate, and so it'll reduce the risk of false
177+
// positives on short branch names that happen to also be "hex words".
178+
var gitCommitIDRegex = regexp.MustCompile("^[0-9a-fA-F]{7,40}$")
179+
170180
func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error {
171181
args := []string{"clone"}
172182

183+
originalRef := ref // we handle an unspecified ref differently than explicitly selecting the default branch below
173184
if ref == "" {
174185
ref = findRemoteDefaultBranch(u)
175186
}
176187
if depth > 0 {
177188
args = append(args, "--depth", strconv.Itoa(depth))
189+
args = append(args, "--branch", ref)
178190
}
191+
args = append(args, u.String(), dst)
179192

180-
args = append(args, "--branch", ref, u.String(), dst)
181193
cmd := exec.CommandContext(ctx, "git", args...)
182194
setupGitEnv(cmd, sshKeyFile)
183-
return getRunCommand(cmd)
195+
err := getRunCommand(cmd)
196+
if err != nil {
197+
if depth > 0 && originalRef != "" {
198+
// If we're creating a shallow clone then the given ref must be
199+
// a named ref (branch or tag) rather than a commit directly.
200+
// We can't accurately recognize the resulting error here without
201+
// hard-coding assumptions about git's human-readable output, but
202+
// we can at least try a heuristic.
203+
if gitCommitIDRegex.MatchString(originalRef) {
204+
return fmt.Errorf("%w (note that setting 'depth' requires 'ref' to be a branch or tag name)", err)
205+
}
206+
}
207+
return err
208+
}
209+
210+
if depth < 1 && originalRef != "" {
211+
// If we didn't add --depth and --branch above then we will now be
212+
// on the remote repository's default branch, rather than the selected
213+
// ref, so we'll need to fix that before we return.
214+
return g.checkout(dst, originalRef)
215+
}
216+
return nil
184217
}
185218

186219
func (g *GitGetter) update(ctx context.Context, dst, sshKeyFile, ref string, depth int) error {

get_git_test.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,58 @@ func TestGitGetter_branch(t *testing.T) {
8888
}
8989
}
9090

91+
func TestGitGetter_commitID(t *testing.T) {
92+
if !testHasGit {
93+
t.Skip("git not found, skipping")
94+
}
95+
96+
g := new(GitGetter)
97+
dst := tempDir(t)
98+
99+
// We're going to create different content on the main branch vs.
100+
// another branch here, so that below we can recognize if we
101+
// correctly cloned the commit actually requested (from the
102+
// "other branch"), not the one at HEAD.
103+
repo := testGitRepo(t, "commit_id")
104+
repo.git("checkout", "-b", "main-branch")
105+
repo.commitFile("wrong.txt", "Nope")
106+
repo.git("checkout", "-b", "other-branch")
107+
repo.commitFile("hello.txt", "Yep")
108+
commitID, err := repo.latestCommit()
109+
if err != nil {
110+
t.Fatal(err)
111+
}
112+
// Return to the main branch so that HEAD of this repository
113+
// will be that, rather than "test-branch".
114+
repo.git("checkout", "main-branch")
115+
116+
q := repo.url.Query()
117+
q.Add("ref", commitID)
118+
repo.url.RawQuery = q.Encode()
119+
120+
t.Logf("Getting %s", repo.url)
121+
if err := g.Get(dst, repo.url); err != nil {
122+
t.Fatalf("err: %s", err)
123+
}
124+
125+
// Verify the main file exists
126+
mainPath := filepath.Join(dst, "hello.txt")
127+
if _, err := os.Stat(mainPath); err != nil {
128+
t.Fatalf("err: %s", err)
129+
}
130+
131+
// Get again should work
132+
if err := g.Get(dst, repo.url); err != nil {
133+
t.Fatalf("err: %s", err)
134+
}
135+
136+
// Verify the main file exists
137+
mainPath = filepath.Join(dst, "hello.txt")
138+
if _, err := os.Stat(mainPath); err != nil {
139+
t.Fatalf("err: %s", err)
140+
}
141+
}
142+
91143
func TestGitGetter_remoteWithoutMaster(t *testing.T) {
92144
if !testHasGit {
93145
t.Log("git not found, skipping")
@@ -212,6 +264,44 @@ func TestGitGetter_shallowCloneWithTag(t *testing.T) {
212264
}
213265
}
214266

267+
func TestGitGetter_shallowCloneWithCommitID(t *testing.T) {
268+
if !testHasGit {
269+
t.Log("git not found, skipping")
270+
t.Skip()
271+
}
272+
273+
g := new(GitGetter)
274+
dst := tempDir(t)
275+
276+
repo := testGitRepo(t, "upstream")
277+
repo.commitFile("v1.0.txt", "0")
278+
repo.git("tag", "v1.0")
279+
repo.commitFile("v1.1.txt", "1")
280+
281+
commitID, err := repo.latestCommit()
282+
if err != nil {
283+
t.Fatal(err)
284+
}
285+
286+
// Specify a clone depth of 1 with a naked commit ID
287+
// This is intentionally invalid: shallow clone always requires a named ref.
288+
q := repo.url.Query()
289+
q.Add("ref", commitID[:8])
290+
q.Add("depth", "1")
291+
repo.url.RawQuery = q.Encode()
292+
293+
t.Logf("Getting %s", repo.url)
294+
err = g.Get(dst, repo.url)
295+
if err == nil {
296+
t.Fatalf("success; want error")
297+
}
298+
// We use a heuristic to generate an extra hint in the error message if
299+
// it looks like the user was trying to combine ref=COMMIT with depth.
300+
if got, want := err.Error(), "(note that setting 'depth' requires 'ref' to be a branch or tag name)"; !strings.Contains(got, want) {
301+
t.Errorf("missing error message hint\ngot: %s\nwant substring: %s", got, want)
302+
}
303+
}
304+
215305
func TestGitGetter_branchUpdate(t *testing.T) {
216306
if !testHasGit {
217307
t.Skip("git not found, skipping")
@@ -661,6 +751,19 @@ func (r *gitRepo) commitFile(file, content string) {
661751
r.git("commit", "-m", "Adding "+file)
662752
}
663753

754+
// latestCommit returns the full commit id of the latest commit on the current
755+
// branch.
756+
func (r *gitRepo) latestCommit() (string, error) {
757+
cmd := exec.Command("git", "rev-parse", "HEAD")
758+
cmd.Dir = r.dir
759+
rawOut, err := cmd.Output()
760+
if err != nil {
761+
return "", err
762+
}
763+
rawOut = bytes.TrimSpace(rawOut)
764+
return string(rawOut), nil
765+
}
766+
664767
// This is a read-only deploy key for an empty test repository.
665768
// Note: This is split over multiple lines to avoid being disabled by key
666769
// scanners automatically.

0 commit comments

Comments
 (0)