Skip to content

Commit edcf130

Browse files
authored
Merge pull request moby#5060 from cyphar/contenthash-scanpath-slash
contenthash: improve the correctness of needsScan
2 parents 9aa07e6 + 7b630eb commit edcf130

File tree

2 files changed

+236
-41
lines changed

2 files changed

+236
-41
lines changed

cache/contenthash/checksum.go

Lines changed: 78 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"path/filepath"
1111
"strings"
1212
"sync"
13+
"sync/atomic"
1314

1415
iradix "github.com/hashicorp/go-immutable-radix/v2"
1516
simplelru "github.com/hashicorp/golang-lru/v2/simplelru"
@@ -928,31 +929,64 @@ func (cc *cacheContext) checksum(ctx context.Context, root *iradix.Node[*CacheRe
928929
return cr2, true, nil
929930
}
930931

932+
// pathSet is a set of path prefixes that can be used to see if a given path is
933+
// lexically a child of any path in the set. All paths provided to this set
934+
// MUST be absolute and use / as the separator.
935+
type pathSet struct {
936+
// prefixes contains paths of the form "/a/b/", so that we correctly detect
937+
// /a/b as being a parent of /a/b/c but not /a/bc.
938+
prefixes []string
939+
}
940+
941+
// add a path to the set. This is a no-op if includes(path) == true.
942+
func (s *pathSet) add(p string) {
943+
// Ensure the path is absolute and clean.
944+
p = path.Join("/", p)
945+
if !s.includes(p) {
946+
if p != "/" {
947+
p += "/"
948+
}
949+
s.prefixes = append(s.prefixes, p)
950+
}
951+
}
952+
953+
// includes returns true iff there is a path in the pathSet which is a lexical
954+
// parent of the given path. The provided path MUST be an absolute path and
955+
// MUST NOT contain any ".." components, as they will be path.Clean'd.
956+
func (s pathSet) includes(p string) bool {
957+
// Ensure the path is absolute and clean.
958+
p = path.Join("/", p)
959+
if p != "/" {
960+
p += "/"
961+
}
962+
for _, prefix := range s.prefixes {
963+
if strings.HasPrefix(p, prefix) {
964+
return true
965+
}
966+
}
967+
return false
968+
}
969+
931970
// needsScan returns false if path is in the tree or a parent path is in tree
932971
// and subpath is missing.
933972
func (cc *cacheContext) needsScan(root *iradix.Node[*CacheRecord], path string, followTrailing bool) (bool, error) {
934973
var (
935-
lastGoodPath string
974+
goodPaths pathSet
936975
hasParentInTree bool
937976
)
938977
k := convertPathToKey(path)
939978
_, cr, err := getFollowLinksCallback(root, k, followTrailing, func(subpath string, cr *CacheRecord) error {
979+
// If we found a path that exists in the cache, add it to the set of
980+
// known-scanned paths. Otherwise, verify whether the not-found subpath
981+
// is inside a known-scanned path (we might have hit a "..", taking us
982+
// out of the scanned paths, or we might hit a non-existent path inside
983+
// a scanned path). getFollowLinksCallback iterates left-to-right, so
984+
// we will always hit ancestors first.
940985
if cr != nil {
941-
// If the path is not a symlink, then for now we have a parent in
942-
// the tree. Otherwise, we reset hasParentInTree because we
943-
// might've jumped to a part of the tree that hasn't been scanned.
944-
hasParentInTree = (cr.Type != CacheRecordTypeSymlink)
945-
if hasParentInTree {
946-
lastGoodPath = subpath
947-
}
948-
} else if hasParentInTree {
949-
// If the current component was something like ".." and the subpath
950-
// couldn't be found, then we need to invalidate hasParentInTree.
951-
// In practice this means that our subpath needs to be prefixed by
952-
// the last good path. We add a trailing slash to make sure the
953-
// prefix is a proper lexical prefix (as opposed to /a/b being seen
954-
// as a prefix of /a/bc).
955-
hasParentInTree = strings.HasPrefix(subpath+"/", lastGoodPath+"/")
986+
hasParentInTree = cr.Type != CacheRecordTypeSymlink
987+
goodPaths.add(subpath)
988+
} else {
989+
hasParentInTree = goodPaths.includes(subpath)
956990
}
957991
return nil
958992
})
@@ -962,6 +996,13 @@ func (cc *cacheContext) needsScan(root *iradix.Node[*CacheRecord], path string,
962996
return cr == nil && !hasParentInTree, nil
963997
}
964998

999+
// Only used by TestNeedScanChecksumRegression to make sure scanPath is not
1000+
// called for paths we have already scanned.
1001+
var (
1002+
scanCounterEnable bool
1003+
scanCounter atomic.Uint64
1004+
)
1005+
9651006
func (cc *cacheContext) scanPath(ctx context.Context, m *mount, p string, followTrailing bool) (retErr error) {
9661007
p = path.Join("/", p)
9671008

@@ -994,6 +1035,9 @@ func (cc *cacheContext) scanPath(ctx context.Context, m *mount, p string, follow
9941035
}
9951036

9961037
err = filepath.Walk(scanPath, func(itemPath string, fi os.FileInfo, err error) error {
1038+
if scanCounterEnable {
1039+
scanCounter.Add(1)
1040+
}
9971041
if err != nil {
9981042
// If the root doesn't exist, ignore the error.
9991043
if itemPath == scanPath && errors.Is(err, os.ErrNotExist) {
@@ -1005,11 +1049,7 @@ func (cc *cacheContext) scanPath(ctx context.Context, m *mount, p string, follow
10051049
if err != nil {
10061050
return err
10071051
}
1008-
p := path.Join("/", filepath.ToSlash(rel))
1009-
if p == "/" {
1010-
p = ""
1011-
}
1012-
k := convertPathToKey(p)
1052+
k := convertPathToKey(keyPath(rel))
10131053
if _, ok := n.Get(k); !ok {
10141054
cr := &CacheRecord{
10151055
Type: CacheRecordTypeFile,
@@ -1060,9 +1100,18 @@ func getFollowLinks(root *iradix.Node[*CacheRecord], k []byte, followTrailing bo
10601100
// Linux APIs), followTrailing is obeyed even if the key has a trailing slash
10611101
// (though paths like "foo/link/." will cause the link to be resolved).
10621102
//
1063-
// The callback cb is called after each cache lookup done by
1064-
// getFollowLinksCallback, except for the first lookup where the verbatim key
1065-
// is looked up in the cache.
1103+
// cb is a callback that is called for each path component encountered during
1104+
// path resolution (after the path component is looked up in the cache). This
1105+
// means for a path like /a/b/c, the callback will be called for at least
1106+
//
1107+
// {/, /a, /a/b, /a/b/c}
1108+
//
1109+
// Note that if any of the components are symlinks, the paths will depend on
1110+
// the symlink contents and there will be more callbacks. If the requested key
1111+
// has a trailing slash, the callback will also be called for the final
1112+
// trailing-slash lookup (/a/b/c/ in the above example). Note that
1113+
// getFollowLinksCallback will try to look up the original key directly first
1114+
// and the callback is not called for this first lookup.
10661115
func getFollowLinksCallback(root *iradix.Node[*CacheRecord], k []byte, followTrailing bool, cb followLinksCallback) ([]byte, *CacheRecord, error) {
10671116
v, ok := root.Get(k)
10681117
if ok && (!followTrailing || v.Type != CacheRecordTypeSymlink) {
@@ -1093,16 +1142,11 @@ func getFollowLinksCallback(root *iradix.Node[*CacheRecord], k []byte, followTra
10931142
// Apply the component to the path. Since it is a single component, and
10941143
// our current path contains no symlinks, we can just apply it
10951144
// leixically.
1096-
nextPath := path.Join("/", currentPath, part)
1097-
if nextPath == "/" {
1098-
// If we hit the root, just treat it as a directory.
1099-
currentPath = "/"
1100-
continue
1101-
}
1102-
if nextPath == currentPath {
1103-
// noop component
1104-
continue
1105-
}
1145+
nextPath := keyPath(path.Join("/", currentPath, part))
1146+
// In contrast to rootPath, we don't skip lookups for no-op components
1147+
// or / because we need to call the callback for every path component
1148+
// we hit (including /) and we need to make sure that the CacheRecord
1149+
// we return is correct after every iteration.
11061150

11071151
cr, ok = root.Get(convertPathToKey(nextPath))
11081152
if cb != nil {

cache/contenthash/checksum_test.go

Lines changed: 158 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,157 @@ func TestChecksumSymlinkNoParentScan(t *testing.T) {
6565
dgst, err := cc.Checksum(context.TODO(), ref, "aa/ln/bb/cc/dd", ChecksumOpts{FollowLinks: true}, nil)
6666
require.NoError(t, err)
6767
require.Equal(t, dgstFileData0, dgst)
68+
69+
// The above checksum request should have only checksummed aa/bb/cc, and so
70+
// any parent directories should need a scan but non-existent (or existent)
71+
// children should not.
72+
root := cc.tree.Root()
73+
74+
for _, path := range []string{
75+
// Paths not within the scanned /aa/bb/cc/.
76+
"/", "/aa", "/aa/bb", "/aa/bb/ff", "/non-exist",
77+
} {
78+
needs1, err := cc.needsScan(root, path, false)
79+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=false)", path)
80+
require.Truef(t, needs1, "needsScan(%q, followTrailing=false)", path)
81+
82+
needs2, err := cc.needsScan(root, path, true)
83+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=true)", path)
84+
require.Truef(t, needs2, "needsScan(%q, followTrailing=true)", path)
85+
}
86+
87+
for _, path := range []string{
88+
// Paths within the scanned /aa/bb/cc, even if they don't exist.
89+
"/aa/bb/cc", "/aa/bb/cc/non-exist", "/aa/bb/cc/dd/ee/ff", "/aa/bb/cc/non-exist/xx/yy/zz",
90+
} {
91+
needs1, err := cc.needsScan(root, path, false)
92+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=false)", path)
93+
require.Falsef(t, needs1, "needsScan(%q, followTrailing=false)", path)
94+
95+
needs2, err := cc.needsScan(root, path, true)
96+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=true)", path)
97+
require.Falsef(t, needs2, "needsScan(%q, followTrailing=true)", path)
98+
}
99+
100+
// /aa was not scanned, but during the walk we went through /aa/ln and so
101+
// we know the contents of the link. However, if we want to scan it with
102+
// followTrailing=true, we will need a scan because we didn't scan /aa.
103+
path := "/aa/ln"
104+
needs1, err := cc.needsScan(root, path, false)
105+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=false)", path)
106+
require.Falsef(t, needs1, "needsScan(%q, followTrailing=false)", path)
107+
108+
needs2, err := cc.needsScan(root, path, true)
109+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=true)", path)
110+
require.Truef(t, needs2, "needsScan(%q, followTrailing=true)", path)
111+
}
112+
113+
// https://github.com/moby/buildkit/issues/5042
114+
func TestNeedScanChecksumRegression(t *testing.T) {
115+
// This test cannot be run in parallel because we use scanCounter.
116+
scanCounterEnable = true
117+
defer func() {
118+
scanCounterEnable = false
119+
}()
120+
121+
tmpdir := t.TempDir()
122+
123+
snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots"))
124+
require.NoError(t, err)
125+
cm, cleanup := setupCacheManager(t, tmpdir, "native", snapshotter)
126+
t.Cleanup(cleanup)
127+
128+
ch := []string{
129+
"ADD aa dir",
130+
"ADD aa/bb dir",
131+
"ADD aa/bb/cc file data0",
132+
"ADD aa/ln symlink /aa",
133+
"ADD aa/root symlink /",
134+
"ADD bb symlink aa/bb",
135+
}
136+
137+
ref := createRef(t, cm, ch)
138+
139+
cc, err := newCacheContext(ref)
140+
require.NoError(t, err)
141+
142+
// Checksumming /aa/bb while following links will result in /aa being scanned.
143+
_, err = cc.Checksum(context.TODO(), ref, "/bb", ChecksumOpts{FollowLinks: true}, nil)
144+
require.NoError(t, err)
145+
146+
root := cc.tree.Root()
147+
for _, test := range []struct {
148+
path string
149+
followTrailing, expectNeedsScan bool
150+
}{
151+
// Any path under /aa will not result in a re-scan.
152+
{"/aa", true, false},
153+
{"/aa/ln", true, false},
154+
{"/aa/ln", false, false},
155+
{"/aa/non-exist", true, false},
156+
{"/aa/bb/non-exist", true, false},
157+
{"/aa/bb/cc", true, false},
158+
{"/aa/bb/cc/non-exist", true, false},
159+
// followTrailing=false on a symlink to /.
160+
{"/aa/root", false, false},
161+
// /bb itself was scanned during the lookup in Checksum.
162+
{"/bb", true, false},
163+
{"/bb", false, false},
164+
// A path outside /aa will need a scan.
165+
{"/non-exist", true, true},
166+
{"/non-exist", false, true},
167+
{"/aa/root", true, true},
168+
{"/", true, true},
169+
} {
170+
needs, err := cc.needsScan(root, test.path, test.followTrailing)
171+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=%v)", test.path, test.followTrailing)
172+
require.Equalf(t, test.expectNeedsScan, needs, "needsScan(%q, followTrailing=%v)", test.path, test.followTrailing)
173+
}
174+
175+
// Make sure trying to checksum a subpath results in no further scans.
176+
initialScanCounter := scanCounter.Load()
177+
_, err = cc.Checksum(context.TODO(), ref, "/bb/cc", ChecksumOpts{FollowLinks: true}, nil)
178+
require.NoError(t, err)
179+
require.Equal(t, initialScanCounter, scanCounter.Load())
180+
_, err = cc.Checksum(context.TODO(), ref, "/bb/non-existent", ChecksumOpts{FollowLinks: true}, nil)
181+
require.Error(t, err)
182+
require.Equal(t, initialScanCounter, scanCounter.Load())
183+
184+
// Looking up a non-existent path in / will checksum the whole tree. See
185+
// <https://github.com/moby/buildkit/issues/5042> for more information.
186+
// This means that needsScan will return true for any path.
187+
_, err = cc.Checksum(context.TODO(), ref, "/non-existent", ChecksumOpts{FollowLinks: true}, nil)
188+
require.Error(t, err)
189+
fullScanCounter := scanCounter.Load()
190+
require.NotEqual(t, fullScanCounter, initialScanCounter)
191+
192+
root = cc.tree.Root()
193+
for _, path := range []string{
194+
"/", "/non-exist", "/ff", "/aa/root", "/non-exist/child", "/different-non-exist",
195+
} {
196+
needs1, err := cc.needsScan(root, path, false)
197+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=false)", path)
198+
require.Falsef(t, needs1, "needsScan(%q, followTrailing=false)", path)
199+
200+
needs2, err := cc.needsScan(root, path, true)
201+
require.NoErrorf(t, err, "needsScan(%q, followTrailing=true)", path)
202+
require.Falsef(t, needs2, "needsScan(%q, followTrailing=true)", path)
203+
}
204+
205+
// Looking up any more paths should not result in any more scans because we
206+
// already know / was scanned.
207+
_, err = cc.Checksum(context.TODO(), ref, "/non-existent", ChecksumOpts{FollowLinks: true}, nil)
208+
require.Error(t, err)
209+
require.Equal(t, fullScanCounter, scanCounter.Load())
210+
_, err = cc.Checksum(context.TODO(), ref, "/different/non/existent", ChecksumOpts{FollowLinks: true}, nil)
211+
require.Error(t, err)
212+
require.Equal(t, fullScanCounter, scanCounter.Load())
213+
_, err = cc.Checksum(context.TODO(), ref, "/aa/root/aa/non-exist", ChecksumOpts{FollowLinks: true}, nil)
214+
require.Error(t, err)
215+
require.Equal(t, fullScanCounter, scanCounter.Load())
216+
_, err = cc.Checksum(context.TODO(), ref, "/aa/root/bb/cc", ChecksumOpts{FollowLinks: true}, nil)
217+
require.NoError(t, err)
218+
require.Equal(t, fullScanCounter, scanCounter.Load())
68219
}
69220

70221
func TestChecksumNonLexicalSymlinks(t *testing.T) {
@@ -114,8 +265,8 @@ func TestChecksumNonLexicalSymlinks(t *testing.T) {
114265
"link3/target/file",
115266
} {
116267
dgst, err := cc.Checksum(context.TODO(), ref, path, ChecksumOpts{FollowLinks: true}, nil)
117-
require.NoError(t, err)
118-
require.Equal(t, dgstFileData0, dgst)
268+
require.NoErrorf(t, err, "Checksum(%q)", path)
269+
require.Equalf(t, dgstFileData0, dgst, "Checksum(%q)", path)
119270
}
120271

121272
// FollowLinks only affects final component resolution, so make sure that
@@ -130,8 +281,8 @@ func TestChecksumNonLexicalSymlinks(t *testing.T) {
130281
"link3/target/file",
131282
} {
132283
dgst, err := cc.Checksum(context.TODO(), ref, path, ChecksumOpts{FollowLinks: false}, nil)
133-
require.NoError(t, err)
134-
require.Equal(t, dgstFileData0, dgst)
284+
require.NoErrorf(t, err, "Checksum(%q)", path)
285+
require.Equalf(t, dgstFileData0, dgst, "Checksum(%q)", path)
135286
}
136287

137288
dgstLink1TargetFile, err := cc.Checksum(context.TODO(), ref, "link1/target_file", ChecksumOpts{FollowLinks: false}, nil)
@@ -159,9 +310,9 @@ func TestChecksumNonLexicalSymlinks(t *testing.T) {
159310
{"link3/target_file", dgstLink3TargetFile},
160311
} {
161312
dgst, err := cc.Checksum(context.TODO(), ref, test.path, ChecksumOpts{FollowLinks: false}, nil)
162-
require.NoError(t, err)
163-
require.NotEqual(t, dgstFileData0, dgst)
164-
require.Equal(t, test.expectedDgst, dgst)
313+
require.NoErrorf(t, err, "Checksum(%q)", test.path)
314+
require.NotEqualf(t, dgstFileData0, dgst, "Checksum(%q)", test.path)
315+
require.Equalf(t, test.expectedDgst, dgst, "Checksum(%q)", test.path)
165316
}
166317
}
167318

0 commit comments

Comments
 (0)