Skip to content

Commit 7cf577a

Browse files
authored
Merge pull request #6238 from jsternberg/exclude-pattern-too-broad-cache-key
contenthash: change how checksum is calculated with wildcards and patterns
2 parents 31c7091 + 9403302 commit 7cf577a

File tree

2 files changed

+78
-28
lines changed

2 files changed

+78
-28
lines changed

cache/contenthash/checksum.go

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable,
427427
return cc.lazyChecksum(ctx, m, p, opts.FollowLinks)
428428
}
429429

430-
includedPaths, err := cc.includedPaths(ctx, m, p, opts)
430+
prefix, includedPaths, err := cc.includedPaths(ctx, m, p, opts)
431431
if err != nil {
432432
return "", err
433433
}
@@ -451,17 +451,19 @@ func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable,
451451
}
452452

453453
h := cachedigest.NewHash(cachedigest.TypeFileList)
454-
for i, w := range includedPaths {
455-
if i != 0 {
456-
h.Write([]byte{0})
454+
for _, w := range includedPaths {
455+
path := strings.TrimPrefix(w.path, prefix)
456+
k := convertPathToKey(path)
457+
if len(k) == 0 {
458+
k = []byte{0}
457459
}
458-
h.Write([]byte(path.Base(w.path)))
460+
h.Write(k)
459461
h.Write([]byte(w.record.Digest))
460462
}
461463
return h.Sum(), nil
462464
}
463465

464-
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) ([]*includedPath, error) {
466+
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) (string, []*includedPath, error) {
465467
cc.mu.Lock()
466468
defer cc.mu.Unlock()
467469

@@ -472,11 +474,11 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
472474
root := cc.tree.Root()
473475
scan, err := cc.needsScan(root, "", false)
474476
if err != nil {
475-
return nil, err
477+
return "", nil, err
476478
}
477479
if scan {
478480
if err := cc.scanPath(ctx, m, "", false); err != nil {
479-
return nil, err
481+
return "", nil, err
480482
}
481483
}
482484

@@ -494,15 +496,15 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
494496
if len(opts.IncludePatterns) != 0 {
495497
includePatternMatcher, err = patternmatcher.New(opts.IncludePatterns)
496498
if err != nil {
497-
return nil, errors.Wrapf(err, "invalid includepatterns: %s", opts.IncludePatterns)
499+
return "", nil, errors.Wrapf(err, "invalid includepatterns: %s", opts.IncludePatterns)
498500
}
499501
}
500502

501503
var excludePatternMatcher *patternmatcher.PatternMatcher
502504
if len(opts.ExcludePatterns) != 0 {
503505
excludePatternMatcher, err = patternmatcher.New(opts.ExcludePatterns)
504506
if err != nil {
505-
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", opts.ExcludePatterns)
507+
return "", nil, errors.Wrapf(err, "invalid excludepatterns: %s", opts.ExcludePatterns)
506508
}
507509
}
508510

@@ -524,7 +526,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
524526
if opts.Wildcard {
525527
origPrefix, k, keyOk, err = wildcardPrefix(root, p)
526528
if err != nil {
527-
return nil, err
529+
return "", nil, err
528530
}
529531
} else {
530532
origPrefix = p
@@ -536,7 +538,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
536538
var cr *CacheRecord
537539
k, cr, err = getFollowLinks(root, k, false)
538540
if err != nil {
539-
return nil, err
541+
return "", nil, err
540542
}
541543
keyOk = (cr != nil)
542544
}
@@ -606,7 +608,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
606608
if p != "" && (lastMatchedDir == "" || !strings.HasPrefix(fn, lastMatchedDir+"/")) {
607609
include, err := path.Match(p, fn)
608610
if err != nil {
609-
return nil, err
611+
return "", nil, err
610612
}
611613
if !include {
612614
k, _, keyOk = iter.Next()
@@ -623,7 +625,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
623625
parentDir,
624626
)
625627
if err != nil {
626-
return nil, err
628+
return "", nil, err
627629
}
628630
} else {
629631
if !strings.HasPrefix(fn+"/", p+"/") {
@@ -638,7 +640,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
638640
parentDir,
639641
)
640642
if err != nil {
641-
return nil, err
643+
return "", nil, err
642644
}
643645
}
644646

@@ -649,7 +651,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
649651

650652
cr, upt, err := cc.checksum(ctx, root, txn, m, k, false)
651653
if err != nil {
652-
return nil, err
654+
return "", nil, err
653655
}
654656
if upt {
655657
updated = true
@@ -688,7 +690,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
688690
cc.tree = txn.Commit()
689691
cc.dirty = updated
690692

691-
return includedPaths, nil
693+
return origPrefix, includedPaths, nil
692694
}
693695

694696
func shouldIncludePath(
@@ -1092,7 +1094,6 @@ func (cc *cacheContext) scanPath(ctx context.Context, m *mount, p string, follow
10921094
}
10931095

10941096
err = cc.walk(scanPath, walkFunc)
1095-
10961097
if err != nil {
10971098
return err
10981099
}

cache/contenthash/checksum_test.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ import (
3535
const (
3636
dgstFileData0 = digest.Digest("sha256:cd8e75bca50f2d695f220d0cb0997d8ead387e4f926e8669a92d7f104cc9885b")
3737
dgstDirD0 = digest.Digest("sha256:d47454417d2c554067fbefe5f5719edc49f3cfe969c36b62e34a187a4da0cc9a")
38-
dgstDirD0FileByFile = digest.Digest("sha256:231c3293e329de47fec9e79056686477891fd1f244ed7b1c1fa668489a1f0d50")
38+
dgstDirD0FileByFile = digest.Digest("sha256:6b612ad5c13159112ae26357ef7bd34df29916941ba40ec2ce38dfb70c6c60c3")
3939
dgstDirD0Modified = digest.Digest("sha256:555ffa3028630d97ba37832b749eda85ab676fd64ffb629fbf0f4ec8c1e3bff1")
40-
dgstDoubleStar = digest.Digest("sha256:853b46abef38d02c9e29fdd1557c6002903b262541e60064bc84518d4d3a6f11")
40+
dgstDoubleStar = digest.Digest("sha256:aa7448215bbf5b837b35ce64467857d8b9747965573d14d4b31b370586370971")
4141
)
4242

4343
func TestChecksumSymlinkNoParentScan(t *testing.T) {
@@ -426,9 +426,9 @@ func TestChecksumWildcardOrFilter(t *testing.T) {
426426

427427
dgst, err := cc.Checksum(context.TODO(), ref, "f*o", ChecksumOpts{Wildcard: true}, nil)
428428
require.NoError(t, err)
429-
require.Equal(t, digest.FromBytes(append([]byte("foo"), []byte(dgstFileData0)...)), dgst)
429+
require.Equal(t, digest.FromBytes(append([]byte{0}, append([]byte("foo"), []byte(dgstFileData0)...)...)), dgst)
430430

431-
expFoos := digest.Digest("sha256:7f51c821895cfc116d3f64231dfb438e87a237ecbbe027cd96b7ee5e763cc569")
431+
expFoos := digest.Digest("sha256:0b6731924f0a32a812bf8a729202e55e54ded331f9e4ff2397f681e43694e086")
432432

433433
dgst, err = cc.Checksum(context.TODO(), ref, "f*", ChecksumOpts{Wildcard: true}, nil)
434434
require.NoError(t, err)
@@ -442,7 +442,7 @@ func TestChecksumWildcardOrFilter(t *testing.T) {
442442
require.NoError(t, err)
443443
require.Equal(t, dgstFileData0, dgst)
444444

445-
expFoos2 := digest.Digest("sha256:8afc09c7018d65d5eb318a9ef55cb704dec1f06d288181d913fc27a571aa042d")
445+
expFoos2 := digest.Digest("sha256:982153600b9653a1decb0f961e09e2bc1be335cfcaac9b39dbd1120b65fdf92c")
446446

447447
dgst, err = cc.Checksum(context.TODO(), ref, "y*", ChecksumOpts{FollowLinks: true, Wildcard: true}, nil)
448448
require.NoError(t, err)
@@ -534,13 +534,15 @@ func TestSymlinksNoFollow(t *testing.T) {
534534
require.NoError(t, err)
535535
require.Equal(t, expectedSym, dgst)
536536

537-
expectedSym = digest.Digest("sha256:e14a98332f46b81993ff4a3b7898bb00fc3d245d84e4c42e57c9ee6b129c7c41")
537+
expectedSym = digest.Digest("sha256:2797e710c6d1a89ff2d91c834b828b1dc500f2982430a58241df8e146f4c4bb4")
538538

539539
// Same with wildcard used.
540540
dgst, err = cc.Checksum(context.TODO(), ref, "fo?", ChecksumOpts{FollowLinks: true, Wildcard: true}, nil)
541541
require.NoError(t, err)
542542
require.Equal(t, expectedSym, dgst)
543543

544+
expectedSym = digest.Digest("sha256:9b761577efcb1239cf4be971914c5d7404914dd32ff436401af1764dc5446b83")
545+
544546
// Still works with exclude pattern.
545547
dgst, err = cc.Checksum(context.TODO(), ref, "foo", ChecksumOpts{FollowLinks: true, ExcludePatterns: []string{"*.git"}}, nil)
546548
require.NoError(t, err)
@@ -935,6 +937,11 @@ func TestChecksumIncludeSymlink(t *testing.T) {
935937
// File should be included
936938
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD0)
937939

940+
dgstD0Wildcard, err := cc.Checksum(context.TODO(), ref, "data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
941+
require.NoError(t, err)
942+
// File should be included
943+
require.NotEqual(t, dgstD0Wildcard, digest.FromBytes([]byte{}), dgstD0Wildcard)
944+
938945
dgstMntD0, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
939946
require.NoError(t, err)
940947
// File should be included despite symlink
@@ -945,6 +952,16 @@ func TestChecksumIncludeSymlink(t *testing.T) {
945952
// File should be included
946953
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)
947954

955+
dgstD2Wildcard, err := cc.Checksum(context.TODO(), ref, "data/d0/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
956+
require.NoError(t, err)
957+
// File should be included
958+
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)
959+
960+
dgstD2InnerWildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
961+
require.NoError(t, err)
962+
// File should be included
963+
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)
964+
948965
dgstMntD2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
949966
require.NoError(t, err)
950967
// File should be included despite symlink
@@ -957,23 +974,23 @@ func TestChecksumIncludeSymlink(t *testing.T) {
957974

958975
dgstMntD0Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
959976
require.NoError(t, err)
960-
require.Equal(t, dgstD0, dgstMntD0Wildcard2)
977+
require.Equal(t, dgstD0Wildcard, dgstMntD0Wildcard2)
961978

962979
dgstMntD2Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
963980
require.NoError(t, err)
964981
require.Equal(t, dgstD2, dgstMntD2Wildcard)
965982

966983
dgstMntD2Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
967984
require.NoError(t, err)
968-
require.Equal(t, dgstD2, dgstMntD2Wildcard2)
985+
require.Equal(t, dgstD2Wildcard, dgstMntD2Wildcard2)
969986

970987
dgstMntInnerWildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
971988
require.NoError(t, err)
972-
require.Equal(t, dgstD2, dgstMntInnerWildcard)
989+
require.Equal(t, dgstD2InnerWildcard, dgstMntInnerWildcard)
973990

974991
dgstMntInnerWildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/symlink-to-d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
975992
require.NoError(t, err)
976-
require.Equal(t, dgstD2, dgstMntInnerWildcard2)
993+
require.Equal(t, dgstD2InnerWildcard, dgstMntInnerWildcard2)
977994
}
978995

979996
func TestHandleChange(t *testing.T) {
@@ -1497,6 +1514,38 @@ func TestChecksumUpdateDirectory(t *testing.T) {
14971514
require.ErrorContains(t, err, "not found")
14981515
}
14991516

1517+
func TestChecksumIdenticalWithNoopExclude(t *testing.T) {
1518+
t.Parallel()
1519+
tmpdir := t.TempDir()
1520+
1521+
snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots"))
1522+
require.NoError(t, err)
1523+
cm, cleanup := setupCacheManager(t, tmpdir, "native", snapshotter)
1524+
t.Cleanup(cleanup)
1525+
1526+
ch := []string{
1527+
"ADD test dir",
1528+
"ADD test/foo file data0",
1529+
}
1530+
1531+
ref := createRef(t, cm, ch)
1532+
1533+
cc, err := newCacheContext(ref)
1534+
require.NoError(t, err)
1535+
1536+
expectedDgst := "sha256:8f36dfd60011a21345427f4d3177b1223e11fbb732c18dd07cd8b2a27a0b53ca"
1537+
1538+
dgst, err := cc.Checksum(context.TODO(), ref, "test", ChecksumOpts{}, nil)
1539+
require.NoError(t, err)
1540+
require.Equal(t, expectedDgst, string(dgst))
1541+
1542+
dgst, err = cc.Checksum(context.TODO(), ref, "test", ChecksumOpts{
1543+
ExcludePatterns: []string{"*.git"},
1544+
}, nil)
1545+
require.NoError(t, err)
1546+
require.Equal(t, expectedDgst, string(dgst))
1547+
}
1548+
15001549
func createRef(t *testing.T, cm cache.Manager, files []string) cache.ImmutableRef {
15011550
if runtime.GOOS == "windows" && len(files) > 0 {
15021551
// lm.Mount() will fail

0 commit comments

Comments
 (0)