Skip to content

Commit 4b308b7

Browse files
authored
perf: cache fs traversal across --watch invocations (#117)
When using caching things like tree-sitter queries the slowest thing remaining is gazelle traversing the fs, essentially doing `ReadDir` and parsing the BUILDs in each dir. This patches gazelle to: 1. `aspect_gazelle_runner` gets a reference to the gazelle fs cache, save it for next time gazelle runs 2. invalidate saved fs reads based on the list of files changed from watchman 3. populate that gazelle fs cache based on previous invocations, this is done before gazelle starts to traverse the fs ### Changes are visible to end-users: no ### Test plan - Covered by existing test cases - Manual testing; run `configure --watch` and trigger an incremental invocation, ensure it preloads the fs cache
1 parent be0c9d6 commit 4b308b7

File tree

5 files changed

+83
-5
lines changed

5 files changed

+83
-5
lines changed

runner/MODULE.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ archive_override(
4242
patch_strip = 1,
4343
patches = [
4444
"//patches:bazelbuild_bazel-gazelle_aspect-cli.patch",
45+
"//patches:bazelbuild_bazel-gazelle_aspect-watchman.patch",
4546
"//pkg/git:gazelle-gitignore.patch",
4647
],
4748
strip_prefix = "bazel-gazelle-2de7b829fef136795c68d9d3b8644a379693cf55",
@@ -68,6 +69,7 @@ go_deps2.module_override(
6869
patch_strip = 1,
6970
patches = [
7071
"//patches:bazelbuild_bazel-gazelle_aspect-cli.patch",
72+
"//patches:bazelbuild_bazel-gazelle_aspect-watchman.patch",
7173
"//pkg/git:gazelle-gitignore.patch",
7274
],
7375
path = "github.com/bazelbuild/bazel-gazelle",
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
diff --git a/walk/walk.go b/walk/walk.go
2+
index 0e59bcf..b8d774c 100644
3+
--- a/walk/walk.go
4+
+++ b/walk/walk.go
5+
@@ -373,6 +373,12 @@ func newWalker(c *config.Config, cexts []config.Configurer, dirs []string, mode
6+
relsToVisitSeen: make(map[string]struct{}),
7+
}
8+
9+
+ // PATCH(caching): additional aspect-gazelle caching of walker
10+
+ if c.Exts["aspect:walkCache:load"] != nil {
11+
+ walkCacheLoad := c.Exts["aspect:walkCache:load"].(func(m interface{}))
12+
+ walkCacheLoad(&w.cache.entryMap)
13+
+ }
14+
+
15+
// Asynchronously populate the walker cache in the background.
16+
go w.populateCache()
17+

runner/pkg/watchman/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ go_test(
2626
],
2727
embed = [":watchman"],
2828
tags = ["manual"], # TODO: we need watchman in our runners.
29+
deps = ["@gazelle//config"],
2930
)
3031

3132
# At least make sure watch tests can build despite being tagged as manual

runner/pkg/watchman/cache.go

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"path"
99
"path/filepath"
1010
"runtime"
11+
"strings"
1112
"sync"
1213

1314
"github.com/aspect-build/aspect-gazelle/common/cache"
@@ -34,6 +35,10 @@ type watchmanCache struct {
3435
old map[string]map[string]any
3536
new *sync.Map
3637
lastClockSpec string
38+
39+
// A reference to the walk cache used in the last gazelle invocation
40+
// to allow restoration in the next invocation.
41+
walkCache *sync.Map
3742
}
3843

3944
var _ cache.Cache = (*watchmanCache)(nil)
@@ -53,10 +58,14 @@ func NewWatchmanCache(c *config.Config) cache.Cache {
5358
log.Fatalf("failed to start the watcher: %v", err)
5459
}
5560

56-
return newWatchmanCache(w, diskCachePath)
61+
return newWatchmanCache(c, w, diskCachePath)
5762
}
5863

59-
func newWatchmanCache(w *WatchmanWatcher, diskCachePath string) *watchmanCache {
64+
// The walk cache of a previous invocation of gazelle.
65+
// Must be a global var that persists across multiple gazelle invocations.
66+
var previousWalkCache *sync.Map
67+
68+
func newWatchmanCache(c *config.Config, w *WatchmanWatcher, diskCachePath string) *watchmanCache {
6069
wc := &watchmanCache{
6170
w: w,
6271
file: diskCachePath,
@@ -65,6 +74,7 @@ func newWatchmanCache(w *WatchmanWatcher, diskCachePath string) *watchmanCache {
6574

6675
symlinks: &sync.Map{},
6776
}
77+
wc.populateWalkCache(c)
6878
wc.read()
6979

7080
runtime.SetFinalizer(wc, closeWatchmanCache)
@@ -76,13 +86,53 @@ func closeWatchmanCache(c *watchmanCache) {
7686
c.w.Close()
7787
}
7888

89+
func (c *watchmanCache) populateWalkCache(cfg *config.Config) {
90+
// If a walk cache was provided also provide the loader to copy the cached entries
91+
// into any fresh walk cache. This must be invoked from a patched gazelle walk.
92+
cfg.Exts["aspect:walkCache:load"] = func(m interface{}) {
93+
cc := 0
94+
95+
newWalkCache := m.(*sync.Map)
96+
if c.walkCache != nil {
97+
c.walkCache.Range(func(key, value any) bool {
98+
cc++
99+
newWalkCache.Store(key, value)
100+
return true
101+
})
102+
}
103+
104+
BazelLog.Debugf("Loaded %d walk cache entries into new walk cache\n", cc)
105+
106+
// Keep a reference to the walk cache for the new gazelle walk invocation
107+
// in case of subsequent gazelle invocations.
108+
previousWalkCache = newWalkCache
109+
}
110+
}
111+
112+
func invalidateWalkCache(m *sync.Map, staleD string) {
113+
if staleD == "." || staleD == "" {
114+
m.Clear()
115+
return
116+
}
117+
118+
m.Range(func(key, value any) bool {
119+
d := key.(string)
120+
// Delete the stale directory and any children that may inherit the state
121+
if staleD == d || len(d) > len(staleD) && strings.HasPrefix(d, staleD) && d[len(staleD)] == '/' {
122+
m.Delete(key)
123+
}
124+
return true
125+
})
126+
}
127+
79128
func (c *watchmanCache) read() {
80129
cacheReader, err := os.Open(c.file)
81130
if err != nil {
82131
BazelLog.Tracef("Failed to open cache %q: %v", c.file, err)
83132
return
84133
}
85134
defer cacheReader.Close()
135+
defer func() { previousWalkCache = nil }()
86136

87137
var v cacheState
88138

@@ -111,14 +161,20 @@ func (c *watchmanCache) read() {
111161
return
112162
}
113163

114-
// Discard entries which have changed since the last cache write.
115164
for _, p := range cs.Paths {
165+
// Discard entries which have changed since the last cache write.
116166
delete(v.Entries, p)
167+
168+
// Discard any walk cache entries for the removed/changed path and its parents.
169+
if previousWalkCache != nil {
170+
invalidateWalkCache(previousWalkCache, path.Dir(p))
171+
}
117172
}
118173

119174
// Persist the still valid entries as the "old" cache state
120175
c.old = v.Entries
121176
c.lastClockSpec = cs.ClockSpec
177+
c.walkCache = previousWalkCache
122178

123179
// Persist the fact that all persisted paths are not symlinks.
124180
// Only new paths with no cache entries will require a stat call.

runner/pkg/watchman/cache_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package watchman
33
import (
44
"os"
55
"testing"
6+
7+
"github.com/bazelbuild/bazel-gazelle/config"
68
)
79

810
func TestLoadOrStoreFile(t *testing.T) {
@@ -22,7 +24,7 @@ func TestLoadOrStoreFile(t *testing.T) {
2224
defer w.Stop()
2325
defer w.Close()
2426

25-
c := newWatchmanCache(w, getTempFile(t))
27+
c := newWatchmanCache(config.New(), w, getTempFile(t))
2628

2729
computes := 0
2830
compute := func(path string, content []byte) (any, error) {
@@ -73,7 +75,7 @@ func TestLoadOrStoreFileSymlink(t *testing.T) {
7375
defer w.Stop()
7476
defer w.Close()
7577

76-
c := newWatchmanCache(w, getTempFile(t))
78+
c := newWatchmanCache(config.New(), w, getTempFile(t))
7779

7880
computes := 0
7981
compute := func(path string, content []byte) (any, error) {

0 commit comments

Comments
 (0)