Skip to content

Commit c22b28e

Browse files
authored
fix: update() now respects depth, filter, and sparse checkout; short hashes are searched efficiently (#8)
* fix: Sparse checkout on existing dir * fix: Avoid pulling all tags and reachable objects * fix: Please stop giving me 11k tags * fix: Add --no-tags to more code paths * improve short hash matching
1 parent f261362 commit c22b28e

File tree

2 files changed

+153
-25
lines changed

2 files changed

+153
-25
lines changed

get_git.go

Lines changed: 84 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ func (g *GitGetter) Get(ctx context.Context, dst string, u *url.URL) error {
121121
return err
122122
}
123123
if err == nil {
124-
err = g.update(ctx, dst, sshKeyFile, u, ref, depth)
124+
err = g.update(ctx, dst, sshKeyFile, u, ref, depth, subdir)
125125
} else {
126126
err = g.clone(ctx, dst, sshKeyFile, u, ref, depth, subdir)
127127
}
@@ -204,6 +204,7 @@ func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.UR
204204
args = append(args, "--filter=blob:none")
205205
args = append(args, "--sparse")
206206
args = append(args, "--no-checkout")
207+
args = append(args, "--no-tags")
207208
}
208209

209210
args = append(args, "--", u.String(), dst)
@@ -212,6 +213,7 @@ func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.UR
212213
setupGitEnv(cmd, sshKeyFile)
213214
err := getRunCommand(cmd)
214215
if err != nil {
216+
_ = os.RemoveAll(dst)
215217
if depth > 0 && originalRef != "" {
216218
// If we're creating a shallow clone then the given ref must be
217219
// a named ref (branch or tag) rather than a commit directly.
@@ -230,31 +232,61 @@ func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.UR
230232
cmd.Dir = dst
231233
err = getRunCommand(cmd)
232234
if err != nil {
235+
_ = os.RemoveAll(dst)
233236
return err
234237
}
235238

236239
// If the commit is a long commit sha then we can fetch it
237240
if isCommitID && len(ref) == 40 {
238-
cmd = exec.CommandContext(ctx, "git", "fetch", "origin", ref, "--depth", "1")
241+
cmd = exec.CommandContext(ctx, "git", "fetch", "origin", ref, "--depth", "1", "--no-tags")
239242
cmd.Dir = dst
240243
err = getRunCommand(cmd)
241244
if err != nil {
245+
_ = os.RemoveAll(dst)
242246
return err
243247
}
244248
}
245249

246-
// If the commit is a short commit sha then we will need to fetch the full history to find the commit
247-
// since we can't fetch a commit by short sha
250+
// If the commit is a short commit sha then we will need to fetch the
251+
// commit graph to resolve it to a full hash. We use --filter=tree:0
252+
// to fetch only commit objects (no trees or blobs), which is much
253+
// smaller than --filter=blob:none. Once resolved, we fetch just that
254+
// single commit with its trees via sparse checkout.
248255
if isCommitID && len(ref) < 40 {
249-
cmd = exec.CommandContext(ctx, "git", "fetch", "--unshallow", "--filter=blob:none")
256+
cmd = exec.CommandContext(ctx, "git", "fetch", "--unshallow", "--filter=tree:0", "--no-tags")
250257
cmd.Dir = dst
251258
err = getRunCommand(cmd)
252259
if err != nil {
260+
_ = os.RemoveAll(dst)
261+
return err
262+
}
263+
264+
// Resolve the short hash to a full hash
265+
cmd = exec.CommandContext(ctx, "git", "rev-parse", "--verify", ref)
266+
cmd.Dir = dst
267+
out, err := cmd.Output()
268+
if err != nil {
269+
_ = os.RemoveAll(dst)
253270
return err
254271
}
272+
fullRef := strings.TrimSpace(string(out))
273+
274+
// Now fetch just that commit with depth 1 to get trees/blobs
275+
// for the sparse checkout
276+
cmd = exec.CommandContext(ctx, "git", "fetch", "origin", fullRef, "--depth", "1", "--no-tags")
277+
cmd.Dir = dst
278+
if err := getRunCommand(cmd); err != nil {
279+
_ = os.RemoveAll(dst)
280+
return err
281+
}
282+
ref = fullRef
255283
}
256284

257-
return g.checkout(ctx, dst, ref)
285+
if err := g.checkout(ctx, dst, ref); err != nil {
286+
_ = os.RemoveAll(dst)
287+
return err
288+
}
289+
return nil
258290
}
259291

260292
if depth < 1 && originalRef != "" {
@@ -271,7 +303,7 @@ func (g *GitGetter) clone(ctx context.Context, dst, sshKeyFile string, u *url.UR
271303
return nil
272304
}
273305

274-
func (g *GitGetter) update(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int) error {
306+
func (g *GitGetter) update(ctx context.Context, dst, sshKeyFile string, u *url.URL, ref string, depth int, subdir string) error {
275307
// Remove all variations of .git directories
276308
err := removeCaseInsensitiveGitDirectory(dst)
277309
if err != nil {
@@ -294,16 +326,30 @@ func (g *GitGetter) update(ctx context.Context, dst, sshKeyFile string, u *url.U
294326
return err
295327
}
296328

297-
// Fetch the remote ref
298-
cmd = exec.CommandContext(ctx, "git", "fetch", "--tags")
299-
cmd.Dir = dst
300-
err = getRunCommand(cmd)
301-
if err != nil {
302-
return err
329+
// Fetch all tags so that tag-based refs can be resolved during checkout.
330+
// Skip this when depth > 0 because --tags fetches every tag reference
331+
// (e.g. 11k+ tags in large repos) regardless of --depth, and we already
332+
// fetch the specific ref we need below.
333+
if depth <= 0 {
334+
cmd = exec.CommandContext(ctx, "git", "fetch", "--tags")
335+
cmd.Dir = dst
336+
err = getRunCommand(cmd)
337+
if err != nil {
338+
return err
339+
}
303340
}
304341

305342
// Fetch the remote ref
306-
cmd = exec.CommandContext(ctx, "git", "fetch", "origin", "--", ref)
343+
fetchArgs := []string{"fetch", "origin"}
344+
if depth > 0 {
345+
fetchArgs = append(fetchArgs, "--depth", strconv.Itoa(depth))
346+
}
347+
if subdir != "" {
348+
fetchArgs = append(fetchArgs, "--filter=blob:none")
349+
fetchArgs = append(fetchArgs, "--no-tags")
350+
}
351+
fetchArgs = append(fetchArgs, "--", ref)
352+
cmd = exec.CommandContext(ctx, "git", fetchArgs...)
307353
cmd.Dir = dst
308354
err = getRunCommand(cmd)
309355
if err != nil {
@@ -318,22 +364,38 @@ func (g *GitGetter) update(ctx context.Context, dst, sshKeyFile string, u *url.U
318364
return err
319365
}
320366

367+
// Set up sparse checkout if subdir is specified
368+
if subdir != "" {
369+
cmd = exec.CommandContext(ctx, "git", "sparse-checkout", "set", subdir)
370+
cmd.Dir = dst
371+
if err := getRunCommand(cmd); err != nil {
372+
return err
373+
}
374+
}
375+
321376
// Checkout ref branch
322377
err = g.checkout(ctx, dst, ref)
323378
if err != nil {
324379
return err
325380
}
326381

327-
// Pull the latest changes from the ref branch
328-
if depth > 0 {
329-
cmd = exec.CommandContext(ctx, "git", "pull", "origin", "--depth", strconv.Itoa(depth), "--ff-only", "--", ref)
330-
} else {
331-
cmd = exec.CommandContext(ctx, "git", "pull", "origin", "--ff-only", "--", ref)
382+
// Pull the latest changes from the ref branch.
383+
// Skip this when subdir is set because we've already fetched the exact
384+
// ref we need above, and pull would re-fetch without --no-tags/--filter,
385+
// defeating our sparse/shallow optimisations.
386+
if subdir == "" {
387+
if depth > 0 {
388+
cmd = exec.CommandContext(ctx, "git", "pull", "origin", "--depth", strconv.Itoa(depth), "--ff-only", "--", ref)
389+
} else {
390+
cmd = exec.CommandContext(ctx, "git", "pull", "origin", "--ff-only", "--", ref)
391+
}
392+
393+
cmd.Dir = dst
394+
setupGitEnv(cmd, sshKeyFile)
395+
return getRunCommand(cmd)
332396
}
333397

334-
cmd.Dir = dst
335-
setupGitEnv(cmd, sshKeyFile)
336-
return getRunCommand(cmd)
398+
return nil
337399
}
338400

339401
// fetchSubmodules downloads any configured submodules recursively.

get_git_test.go

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,7 @@ func TestGitGetter_BadGitConfig(t *testing.T) {
946946
if err == nil {
947947
// Update the repository containing the bad git config.
948948
// This should remove the bad git config file and initialize a new one.
949-
err = g.update(ctx, dst, testGitToken, url, "main", 1)
949+
err = g.update(ctx, dst, testGitToken, url, "main", 1, "")
950950
} else {
951951
// Clone a repository with a git config file
952952
err = g.clone(ctx, dst, testGitToken, url, "main", 1, "")
@@ -963,7 +963,7 @@ func TestGitGetter_BadGitConfig(t *testing.T) {
963963

964964
// Update the repository containing the bad git config.
965965
// This should remove the bad git config file and initialize a new one.
966-
err = g.update(ctx, dst, testGitToken, url, "main", 1)
966+
err = g.update(ctx, dst, testGitToken, url, "main", 1, "")
967967
}
968968
if err != nil {
969969
t.Fatal(err.Error())
@@ -1064,7 +1064,7 @@ func TestGitGetter_BadRef(t *testing.T) {
10641064
// Clone a repository with non-existent ref
10651065
err = g.clone(ctx, dst, "", url, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, "")
10661066
if err == nil {
1067-
t.Fatal(err.Error())
1067+
t.Fatal("expected clone to fail with bad ref")
10681068
}
10691069

10701070
// Expect that the dst was cleaned up after failed ref checkout
@@ -1184,6 +1184,72 @@ func TestGitGetter_sparseCheckoutWithShortCommitID(t *testing.T) {
11841184
}
11851185
}
11861186

1187+
func TestGitGetter_updateSparseCheckout(t *testing.T) {
1188+
if !testHasGit {
1189+
t.Skip("git not found, skipping")
1190+
}
1191+
1192+
g := new(GitGetter)
1193+
dst := filepath.Join(t.TempDir(), "target")
1194+
1195+
repo := testGitRepo(t, "update-sparse")
1196+
repo.git("checkout", "-b", "main")
1197+
repo.commitFile("subdir1/file1.txt", "hello")
1198+
repo.commitFile("subdir2/file2.txt", "world")
1199+
1200+
q := repo.url.Query()
1201+
q.Add("ref", "main")
1202+
q.Add("subdir", "subdir1")
1203+
repo.url.RawQuery = q.Encode()
1204+
1205+
// First Get: triggers clone() path with sparse checkout
1206+
if err := g.Get(context.Background(), dst, repo.url); err != nil {
1207+
t.Fatalf("first get (clone) err: %s", err)
1208+
}
1209+
1210+
// Verify sparse checkout worked on clone
1211+
if _, err := os.Stat(filepath.Join(dst, "subdir1/file1.txt")); err != nil {
1212+
t.Fatalf("subdir1/file1.txt should exist after clone: %s", err)
1213+
}
1214+
if _, err := os.Stat(filepath.Join(dst, "subdir2/file2.txt")); err == nil {
1215+
t.Fatalf("subdir2/file2.txt should not exist after clone")
1216+
}
1217+
1218+
// Add a new file in the tracked subdir
1219+
repo.commitFile("subdir1/file1-update.txt", "updated")
1220+
// Also add a file in the untracked subdir
1221+
repo.commitFile("subdir2/file2-update.txt", "also updated")
1222+
1223+
// Second Get: dst exists, triggers update() path
1224+
if err := g.Get(context.Background(), dst, repo.url); err != nil {
1225+
t.Fatalf("second get (update) err: %s", err)
1226+
}
1227+
1228+
// Verify the updated file in subdir1 exists
1229+
if _, err := os.Stat(filepath.Join(dst, "subdir1/file1-update.txt")); err != nil {
1230+
t.Fatalf("subdir1/file1-update.txt should exist after update: %s", err)
1231+
}
1232+
1233+
// Verify files in subdir2 still do not exist (sparse checkout in update)
1234+
if _, err := os.Stat(filepath.Join(dst, "subdir2/file2.txt")); err == nil {
1235+
t.Fatalf("subdir2/file2.txt should not exist after update")
1236+
}
1237+
if _, err := os.Stat(filepath.Join(dst, "subdir2/file2-update.txt")); err == nil {
1238+
t.Fatalf("subdir2/file2-update.txt should not exist after update")
1239+
}
1240+
1241+
// Verify the repo is shallow (depth=1 is set automatically when subdir is specified)
1242+
cmd := exec.Command("git", "rev-list", "HEAD", "--count")
1243+
cmd.Dir = dst
1244+
b, err := cmd.Output()
1245+
if err != nil {
1246+
t.Fatalf("rev-list err: %s", err)
1247+
}
1248+
if count := strings.TrimSpace(string(b)); count != "1" {
1249+
t.Fatalf("expected shallow clone with 1 commit after update, got %s", count)
1250+
}
1251+
}
1252+
11871253
// gitRepo is a helper struct which controls a single temp git repo.
11881254
type gitRepo struct {
11891255
t *testing.T

0 commit comments

Comments
 (0)