Skip to content

Commit 9403302

Browse files
committed
contenthash: change how checksum is calculated with wildcards and patterns
This changes the way the checksum is calculated for content that has wildcards or include/exclude paths. The previous method would result in a different checksum with the same file contents but different patterns due to a different method of hashing the data. This normalizes the way the data is hashed to use similar methods. The wildcard path now strips the prefix from the path instead of using the base path. It no longer adds an additional zero byte between entries but instead uses the zero byte from the directory path key. Signed-off-by: Jonathan A. Sternberg <[email protected]>
1 parent 2777c1b commit 9403302

File tree

2 files changed

+78
-28
lines changed

2 files changed

+78
-28
lines changed

cache/contenthash/checksum.go

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable,
427427
return cc.lazyChecksum(ctx, m, p, opts.FollowLinks)
428428
}
429429

430-
includedPaths, err := cc.includedPaths(ctx, m, p, opts)
430+
prefix, includedPaths, err := cc.includedPaths(ctx, m, p, opts)
431431
if err != nil {
432432
return "", err
433433
}
@@ -451,17 +451,19 @@ func (cc *cacheContext) Checksum(ctx context.Context, mountable cache.Mountable,
451451
}
452452

453453
h := cachedigest.NewHash(cachedigest.TypeFileList)
454-
for i, w := range includedPaths {
455-
if i != 0 {
456-
h.Write([]byte{0})
454+
for _, w := range includedPaths {
455+
path := strings.TrimPrefix(w.path, prefix)
456+
k := convertPathToKey(path)
457+
if len(k) == 0 {
458+
k = []byte{0}
457459
}
458-
h.Write([]byte(path.Base(w.path)))
460+
h.Write(k)
459461
h.Write([]byte(w.record.Digest))
460462
}
461463
return h.Sum(), nil
462464
}
463465

464-
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) ([]*includedPath, error) {
466+
func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, opts ChecksumOpts) (string, []*includedPath, error) {
465467
cc.mu.Lock()
466468
defer cc.mu.Unlock()
467469

@@ -472,11 +474,11 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
472474
root := cc.tree.Root()
473475
scan, err := cc.needsScan(root, "", false)
474476
if err != nil {
475-
return nil, err
477+
return "", nil, err
476478
}
477479
if scan {
478480
if err := cc.scanPath(ctx, m, "", false); err != nil {
479-
return nil, err
481+
return "", nil, err
480482
}
481483
}
482484

@@ -494,15 +496,15 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
494496
if len(opts.IncludePatterns) != 0 {
495497
includePatternMatcher, err = patternmatcher.New(opts.IncludePatterns)
496498
if err != nil {
497-
return nil, errors.Wrapf(err, "invalid includepatterns: %s", opts.IncludePatterns)
499+
return "", nil, errors.Wrapf(err, "invalid includepatterns: %s", opts.IncludePatterns)
498500
}
499501
}
500502

501503
var excludePatternMatcher *patternmatcher.PatternMatcher
502504
if len(opts.ExcludePatterns) != 0 {
503505
excludePatternMatcher, err = patternmatcher.New(opts.ExcludePatterns)
504506
if err != nil {
505-
return nil, errors.Wrapf(err, "invalid excludepatterns: %s", opts.ExcludePatterns)
507+
return "", nil, errors.Wrapf(err, "invalid excludepatterns: %s", opts.ExcludePatterns)
506508
}
507509
}
508510

@@ -524,7 +526,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
524526
if opts.Wildcard {
525527
origPrefix, k, keyOk, err = wildcardPrefix(root, p)
526528
if err != nil {
527-
return nil, err
529+
return "", nil, err
528530
}
529531
} else {
530532
origPrefix = p
@@ -536,7 +538,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
536538
var cr *CacheRecord
537539
k, cr, err = getFollowLinks(root, k, false)
538540
if err != nil {
539-
return nil, err
541+
return "", nil, err
540542
}
541543
keyOk = (cr != nil)
542544
}
@@ -606,7 +608,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
606608
if p != "" && (lastMatchedDir == "" || !strings.HasPrefix(fn, lastMatchedDir+"/")) {
607609
include, err := path.Match(p, fn)
608610
if err != nil {
609-
return nil, err
611+
return "", nil, err
610612
}
611613
if !include {
612614
k, _, keyOk = iter.Next()
@@ -623,7 +625,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
623625
parentDir,
624626
)
625627
if err != nil {
626-
return nil, err
628+
return "", nil, err
627629
}
628630
} else {
629631
if !strings.HasPrefix(fn+"/", p+"/") {
@@ -638,7 +640,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
638640
parentDir,
639641
)
640642
if err != nil {
641-
return nil, err
643+
return "", nil, err
642644
}
643645
}
644646

@@ -649,7 +651,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
649651

650652
cr, upt, err := cc.checksum(ctx, root, txn, m, k, false)
651653
if err != nil {
652-
return nil, err
654+
return "", nil, err
653655
}
654656
if upt {
655657
updated = true
@@ -688,7 +690,7 @@ func (cc *cacheContext) includedPaths(ctx context.Context, m *mount, p string, o
688690
cc.tree = txn.Commit()
689691
cc.dirty = updated
690692

691-
return includedPaths, nil
693+
return origPrefix, includedPaths, nil
692694
}
693695

694696
func shouldIncludePath(
@@ -1092,7 +1094,6 @@ func (cc *cacheContext) scanPath(ctx context.Context, m *mount, p string, follow
10921094
}
10931095

10941096
err = cc.walk(scanPath, walkFunc)
1095-
10961097
if err != nil {
10971098
return err
10981099
}

cache/contenthash/checksum_test.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ import (
3535
const (
3636
dgstFileData0 = digest.Digest("sha256:cd8e75bca50f2d695f220d0cb0997d8ead387e4f926e8669a92d7f104cc9885b")
3737
dgstDirD0 = digest.Digest("sha256:d47454417d2c554067fbefe5f5719edc49f3cfe969c36b62e34a187a4da0cc9a")
38-
dgstDirD0FileByFile = digest.Digest("sha256:231c3293e329de47fec9e79056686477891fd1f244ed7b1c1fa668489a1f0d50")
38+
dgstDirD0FileByFile = digest.Digest("sha256:6b612ad5c13159112ae26357ef7bd34df29916941ba40ec2ce38dfb70c6c60c3")
3939
dgstDirD0Modified = digest.Digest("sha256:555ffa3028630d97ba37832b749eda85ab676fd64ffb629fbf0f4ec8c1e3bff1")
40-
dgstDoubleStar = digest.Digest("sha256:853b46abef38d02c9e29fdd1557c6002903b262541e60064bc84518d4d3a6f11")
40+
dgstDoubleStar = digest.Digest("sha256:aa7448215bbf5b837b35ce64467857d8b9747965573d14d4b31b370586370971")
4141
)
4242

4343
func TestChecksumSymlinkNoParentScan(t *testing.T) {
@@ -426,9 +426,9 @@ func TestChecksumWildcardOrFilter(t *testing.T) {
426426

427427
dgst, err := cc.Checksum(context.TODO(), ref, "f*o", ChecksumOpts{Wildcard: true}, nil)
428428
require.NoError(t, err)
429-
require.Equal(t, digest.FromBytes(append([]byte("foo"), []byte(dgstFileData0)...)), dgst)
429+
require.Equal(t, digest.FromBytes(append([]byte{0}, append([]byte("foo"), []byte(dgstFileData0)...)...)), dgst)
430430

431-
expFoos := digest.Digest("sha256:7f51c821895cfc116d3f64231dfb438e87a237ecbbe027cd96b7ee5e763cc569")
431+
expFoos := digest.Digest("sha256:0b6731924f0a32a812bf8a729202e55e54ded331f9e4ff2397f681e43694e086")
432432

433433
dgst, err = cc.Checksum(context.TODO(), ref, "f*", ChecksumOpts{Wildcard: true}, nil)
434434
require.NoError(t, err)
@@ -442,7 +442,7 @@ func TestChecksumWildcardOrFilter(t *testing.T) {
442442
require.NoError(t, err)
443443
require.Equal(t, dgstFileData0, dgst)
444444

445-
expFoos2 := digest.Digest("sha256:8afc09c7018d65d5eb318a9ef55cb704dec1f06d288181d913fc27a571aa042d")
445+
expFoos2 := digest.Digest("sha256:982153600b9653a1decb0f961e09e2bc1be335cfcaac9b39dbd1120b65fdf92c")
446446

447447
dgst, err = cc.Checksum(context.TODO(), ref, "y*", ChecksumOpts{FollowLinks: true, Wildcard: true}, nil)
448448
require.NoError(t, err)
@@ -534,13 +534,15 @@ func TestSymlinksNoFollow(t *testing.T) {
534534
require.NoError(t, err)
535535
require.Equal(t, expectedSym, dgst)
536536

537-
expectedSym = digest.Digest("sha256:e14a98332f46b81993ff4a3b7898bb00fc3d245d84e4c42e57c9ee6b129c7c41")
537+
expectedSym = digest.Digest("sha256:2797e710c6d1a89ff2d91c834b828b1dc500f2982430a58241df8e146f4c4bb4")
538538

539539
// Same with wildcard used.
540540
dgst, err = cc.Checksum(context.TODO(), ref, "fo?", ChecksumOpts{FollowLinks: true, Wildcard: true}, nil)
541541
require.NoError(t, err)
542542
require.Equal(t, expectedSym, dgst)
543543

544+
expectedSym = digest.Digest("sha256:9b761577efcb1239cf4be971914c5d7404914dd32ff436401af1764dc5446b83")
545+
544546
// Still works with exclude pattern.
545547
dgst, err = cc.Checksum(context.TODO(), ref, "foo", ChecksumOpts{FollowLinks: true, ExcludePatterns: []string{"*.git"}}, nil)
546548
require.NoError(t, err)
@@ -935,6 +937,11 @@ func TestChecksumIncludeSymlink(t *testing.T) {
935937
// File should be included
936938
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD0)
937939

940+
dgstD0Wildcard, err := cc.Checksum(context.TODO(), ref, "data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
941+
require.NoError(t, err)
942+
// File should be included
943+
require.NotEqual(t, dgstD0Wildcard, digest.FromBytes([]byte{}), dgstD0Wildcard)
944+
938945
dgstMntD0, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
939946
require.NoError(t, err)
940947
// File should be included despite symlink
@@ -945,6 +952,16 @@ func TestChecksumIncludeSymlink(t *testing.T) {
945952
// File should be included
946953
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)
947954

955+
dgstD2Wildcard, err := cc.Checksum(context.TODO(), ref, "data/d0/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
956+
require.NoError(t, err)
957+
// File should be included
958+
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)
959+
960+
dgstD2InnerWildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
961+
require.NoError(t, err)
962+
// File should be included
963+
require.NotEqual(t, digest.FromBytes([]byte{}), dgstD2)
964+
948965
dgstMntD2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}}, nil)
949966
require.NoError(t, err)
950967
// File should be included despite symlink
@@ -957,23 +974,23 @@ func TestChecksumIncludeSymlink(t *testing.T) {
957974

958975
dgstMntD0Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
959976
require.NoError(t, err)
960-
require.Equal(t, dgstD0, dgstMntD0Wildcard2)
977+
require.Equal(t, dgstD0Wildcard, dgstMntD0Wildcard2)
961978

962979
dgstMntD2Wildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
963980
require.NoError(t, err)
964981
require.Equal(t, dgstD2, dgstMntD2Wildcard)
965982

966983
dgstMntD2Wildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d1/d*", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
967984
require.NoError(t, err)
968-
require.Equal(t, dgstD2, dgstMntD2Wildcard2)
985+
require.Equal(t, dgstD2Wildcard, dgstMntD2Wildcard2)
969986

970987
dgstMntInnerWildcard, err := cc.Checksum(context.TODO(), ref, "mnt/data/d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
971988
require.NoError(t, err)
972-
require.Equal(t, dgstD2, dgstMntInnerWildcard)
989+
require.Equal(t, dgstD2InnerWildcard, dgstMntInnerWildcard)
973990

974991
dgstMntInnerWildcard2, err := cc.Checksum(context.TODO(), ref, "mnt/data/symlink-to-d0/d*/d2", ChecksumOpts{IncludePatterns: []string{"**/foo"}, Wildcard: true}, nil)
975992
require.NoError(t, err)
976-
require.Equal(t, dgstD2, dgstMntInnerWildcard2)
993+
require.Equal(t, dgstD2InnerWildcard, dgstMntInnerWildcard2)
977994
}
978995

979996
func TestHandleChange(t *testing.T) {
@@ -1497,6 +1514,38 @@ func TestChecksumUpdateDirectory(t *testing.T) {
14971514
require.ErrorContains(t, err, "not found")
14981515
}
14991516

1517+
func TestChecksumIdenticalWithNoopExclude(t *testing.T) {
1518+
t.Parallel()
1519+
tmpdir := t.TempDir()
1520+
1521+
snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots"))
1522+
require.NoError(t, err)
1523+
cm, cleanup := setupCacheManager(t, tmpdir, "native", snapshotter)
1524+
t.Cleanup(cleanup)
1525+
1526+
ch := []string{
1527+
"ADD test dir",
1528+
"ADD test/foo file data0",
1529+
}
1530+
1531+
ref := createRef(t, cm, ch)
1532+
1533+
cc, err := newCacheContext(ref)
1534+
require.NoError(t, err)
1535+
1536+
expectedDgst := "sha256:8f36dfd60011a21345427f4d3177b1223e11fbb732c18dd07cd8b2a27a0b53ca"
1537+
1538+
dgst, err := cc.Checksum(context.TODO(), ref, "test", ChecksumOpts{}, nil)
1539+
require.NoError(t, err)
1540+
require.Equal(t, expectedDgst, string(dgst))
1541+
1542+
dgst, err = cc.Checksum(context.TODO(), ref, "test", ChecksumOpts{
1543+
ExcludePatterns: []string{"*.git"},
1544+
}, nil)
1545+
require.NoError(t, err)
1546+
require.Equal(t, expectedDgst, string(dgst))
1547+
}
1548+
15001549
func createRef(t *testing.T, cm cache.Manager, files []string) cache.ImmutableRef {
15011550
if runtime.GOOS == "windows" && len(files) > 0 {
15021551
// lm.Mount() will fail

0 commit comments

Comments
 (0)