Skip to content

Commit 9651276

Browse files
committed
internal/lsp/cache: optimize Snapshot.clone
This change replaces the single large map used for snapshot.goFiles by a map of 256 stripes, each of which becomes immutable once shared. This optimizes the common case in which the copy is nearly identical to the original. We still need to visit each map entry to see whether it needs to be deleted (which is rare) and to inherit the handle in the usual case. This is now done concurrently. Also, share the (logically immutable) []PackageIDs slices across old and new snapshots. This was worth 5% of CPU and 1/3 of allocations (all small). Benchmark on darwin/arm64 shows a 29% reduction for DidChange. $ go test -v ./gopls/internal/regtest/bench -run=TestBenchmarkDidChange -didchange_dir=$HOME/w/kubernetes -didchange_file=pkg/util/hash/hash.go Before: BenchmarkStatistics 100 22955469 ns/op 11308095 B/op 47412 allocs/op BenchmarkStatistics 100 23454630 ns/op 11226742 B/op 46882 allocs/op BenchmarkStatistics 100 23618532 ns/op 11258619 B/op 47068 allocs/op After goFilesMap: BenchmarkStatistics 100 16643972 ns/op 8770787 B/op 46238 allocs/op BenchmarkStatistics 100 17805864 ns/op 8862926 B/op 46762 allocs/op BenchmarkStatistics 100 18618255 ns/op 9308864 B/op 49776 allocs/op After goFilesMap and ids sharing: BenchmarkStatistics 100 16703623 ns/op 8772626 B/op 33812 allocs/op BenchmarkStatistics 100 16927378 ns/op 8529491 B/op 32328 allocs/op BenchmarkStatistics 100 16632762 ns/op 8557533 B/op 32497 allocs/op Also: - Add comments documenting findings of profiling. - preallocate slice for knownSubdirs. - remove unwanted loop over slice in Generation.Inherit Updates golang/go#45686 Change-Id: Id953699191b8404cf36ba3a7ab9cd78b1d19c0a2 Reviewed-on: https://go-review.googlesource.com/c/tools/+/410176 TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Robert Findley <[email protected]> gopls-CI: kokoro <[email protected]> Run-TryBot: Alan Donovan <[email protected]>
1 parent 697795d commit 9651276

File tree

7 files changed

+191
-37
lines changed

7 files changed

+191
-37
lines changed

internal/lsp/cache/cache.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,14 @@ func (h *fileHandle) Read() ([]byte, error) {
183183
return h.bytes, h.err
184184
}
185185

186+
// hashContents returns a string of hex digits denoting the hash of contents.
187+
//
188+
// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
189+
// to hex digits on demand (rare). The array is larger when it appears as a
190+
// struct field (32B vs 16B) but smaller overall (string data is 64B), has
191+
// better locality, and is more efficiently hashed by runtime maps.
186192
func hashContents(contents []byte) string {
187-
return fmt.Sprintf("%x", sha256.Sum256(contents))
193+
return fmt.Sprintf("%64x", sha256.Sum256(contents))
188194
}
189195

190196
var cacheIndex, sessionIndex, viewIndex int64

internal/lsp/cache/session.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ func (s *Session) createView(ctx context.Context, name string, folder span.URI,
234234
packages: make(map[packageKey]*packageHandle),
235235
meta: NewMetadataGraph(),
236236
files: make(map[span.URI]source.VersionedFileHandle),
237-
goFiles: make(map[parseKey]*parseGoHandle),
237+
goFiles: newGoFileMap(),
238238
symbols: make(map[span.URI]*symbolHandle),
239239
actions: make(map[actionKey]*actionHandle),
240240
workspacePackages: make(map[PackageID]PackagePath),

internal/lsp/cache/snapshot.go

Lines changed: 162 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ type snapshot struct {
7676
files map[span.URI]source.VersionedFileHandle
7777

7878
// goFiles maps a parseKey to its parseGoHandle.
79-
goFiles map[parseKey]*parseGoHandle
79+
goFiles *goFileMap
8080

8181
// TODO(rfindley): consider merging this with files to reduce burden on clone.
8282
symbols map[span.URI]*symbolHandle
@@ -663,16 +663,17 @@ func (s *snapshot) transitiveReverseDependencies(id PackageID, ids map[PackageID
663663
func (s *snapshot) getGoFile(key parseKey) *parseGoHandle {
664664
s.mu.Lock()
665665
defer s.mu.Unlock()
666-
return s.goFiles[key]
666+
return s.goFiles.get(key)
667667
}
668668

669669
func (s *snapshot) addGoFile(key parseKey, pgh *parseGoHandle) *parseGoHandle {
670670
s.mu.Lock()
671671
defer s.mu.Unlock()
672-
if existing, ok := s.goFiles[key]; ok {
673-
return existing
672+
673+
if prev := s.goFiles.get(key); prev != nil {
674+
return prev
674675
}
675-
s.goFiles[key] = pgh
676+
s.goFiles.set(key, pgh)
676677
return pgh
677678
}
678679

@@ -811,6 +812,8 @@ func (s *snapshot) fileWatchingGlobPatterns(ctx context.Context) map[string]stru
811812
patterns := map[string]struct{}{
812813
fmt.Sprintf("**/*.{%s}", extensions): {},
813814
}
815+
816+
// Add a pattern for each Go module in the workspace that is not within the view.
814817
dirs := s.workspace.dirs(ctx, s)
815818
for _, dir := range dirs {
816819
dirName := dir.Filename()
@@ -830,14 +833,19 @@ func (s *snapshot) fileWatchingGlobPatterns(ctx context.Context) map[string]stru
830833
// contain Go code (golang/go#42348). To handle this, explicitly watch all
831834
// of the directories in the workspace. We find them by adding the
832835
// directories of every file in the snapshot's workspace directories.
833-
var dirNames []string
834-
for _, uri := range s.getKnownSubdirs(dirs) {
835-
dirNames = append(dirNames, uri.Filename())
836-
}
837-
sort.Strings(dirNames)
838-
if len(dirNames) > 0 {
836+
// There may be thousands.
837+
knownSubdirs := s.getKnownSubdirs(dirs)
838+
if n := len(knownSubdirs); n > 0 {
839+
dirNames := make([]string, 0, n)
840+
for _, uri := range knownSubdirs {
841+
dirNames = append(dirNames, uri.Filename())
842+
}
843+
sort.Strings(dirNames)
844+
// The double allocation of Sprintf(Join()) accounts for 8%
845+
// of DidChange, but specializing doesn't appear to help. :(
839846
patterns[fmt.Sprintf("{%s}", strings.Join(dirNames, ","))] = struct{}{}
840847
}
848+
841849
return patterns
842850
}
843851

@@ -874,7 +882,7 @@ func (s *snapshot) getKnownSubdirs(wsDirs []span.URI) []span.URI {
874882
}
875883
s.unprocessedSubdirChanges = nil
876884

877-
var result []span.URI
885+
result := make([]span.URI, 0, len(s.knownSubdirs))
878886
for uri := range s.knownSubdirs {
879887
result = append(result, uri)
880888
}
@@ -1719,7 +1727,7 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC
17191727
packages: make(map[packageKey]*packageHandle, len(s.packages)),
17201728
actions: make(map[actionKey]*actionHandle, len(s.actions)),
17211729
files: make(map[span.URI]source.VersionedFileHandle, len(s.files)),
1722-
goFiles: make(map[parseKey]*parseGoHandle, len(s.goFiles)),
1730+
goFiles: s.goFiles.clone(),
17231731
symbols: make(map[span.URI]*symbolHandle, len(s.symbols)),
17241732
workspacePackages: make(map[PackageID]PackagePath, len(s.workspacePackages)),
17251733
unloadableFiles: make(map[span.URI]struct{}, len(s.unloadableFiles)),
@@ -1764,12 +1772,27 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC
17641772
result.parseWorkHandles[k] = v
17651773
}
17661774

1767-
for k, v := range s.goFiles {
1768-
if _, ok := changes[k.file.URI]; ok {
1769-
continue
1775+
// Copy the handles of all Go source files.
1776+
// There may be tens of thousands of files,
1777+
// but changes are typically few, so we
1778+
// use a striped map optimized for this case
1779+
// and visit its stripes in parallel.
1780+
var (
1781+
toDeleteMu sync.Mutex
1782+
toDelete []parseKey
1783+
)
1784+
s.goFiles.forEachConcurrent(func(k parseKey, v *parseGoHandle) {
1785+
if changes[k.file.URI] == nil {
1786+
// no change (common case)
1787+
newGen.Inherit(v.handle)
1788+
} else {
1789+
toDeleteMu.Lock()
1790+
toDelete = append(toDelete, k)
1791+
toDeleteMu.Unlock()
17701792
}
1771-
newGen.Inherit(v.handle)
1772-
result.goFiles[k] = v
1793+
})
1794+
for _, k := range toDelete {
1795+
result.goFiles.delete(k)
17731796
}
17741797

17751798
// Copy all of the go.mod-related handles. They may be invalidated later,
@@ -1975,21 +1998,34 @@ func (s *snapshot) clone(ctx, bgCtx context.Context, changes map[span.URI]*fileC
19751998
deleteInvalidMetadata := forceReloadMetadata || workspaceModeChanged
19761999
idsInSnapshot := map[PackageID]bool{} // track all known IDs
19772000
for uri, ids := range s.meta.ids {
1978-
var resultIDs []PackageID
1979-
for _, id := range ids {
2001+
// Optimization: ids slices are typically numerous, short (<3),
2002+
// and rarely modified by this loop, so don't allocate copies
2003+
// until necessary.
2004+
var resultIDs []PackageID // nil implies equal to ids[:i:i]
2005+
for i, id := range ids {
19802006
if skipID[id] || deleteInvalidMetadata && idsToInvalidate[id] {
2007+
resultIDs = ids[:i:i] // unshare
19812008
continue
19822009
}
19832010
// The ID is not reachable from any workspace package, so it should
19842011
// be deleted.
19852012
if !reachableID[id] {
2013+
resultIDs = ids[:i:i] // unshare
19862014
continue
19872015
}
19882016
idsInSnapshot[id] = true
1989-
resultIDs = append(resultIDs, id)
2017+
if resultIDs != nil {
2018+
resultIDs = append(resultIDs, id)
2019+
}
2020+
}
2021+
if resultIDs == nil {
2022+
resultIDs = ids
19902023
}
19912024
result.meta.ids[uri] = resultIDs
19922025
}
2026+
// TODO(adonovan): opt: represent PackageID as an index into a process-global
2027+
// dup-free list of all package names ever seen, then use a bitmap instead of
2028+
// a hash table for "PackageSet" (e.g. idsInSnapshot).
19932029

19942030
// Copy the package metadata. We only need to invalidate packages directly
19952031
// containing the affected file, and only if it changed in a relevant way.
@@ -2259,7 +2295,7 @@ func metadataChanges(ctx context.Context, lockedSnapshot *snapshot, oldFH, newFH
22592295
// lockedSnapshot must be locked.
22602296
func peekOrParse(ctx context.Context, lockedSnapshot *snapshot, fh source.FileHandle, mode source.ParseMode) (*source.ParsedGoFile, error) {
22612297
key := parseKey{file: fh.FileIdentity(), mode: mode}
2262-
if pgh := lockedSnapshot.goFiles[key]; pgh != nil {
2298+
if pgh := lockedSnapshot.goFiles.get(key); pgh != nil {
22632299
cached := pgh.handle.Cached(lockedSnapshot.generation)
22642300
if cached != nil {
22652301
cached := cached.(*parseGoData)
@@ -2547,3 +2583,107 @@ func readGoSum(dst map[module.Version][]string, file string, data []byte) error
25472583
}
25482584
return nil
25492585
}
2586+
2587+
// -- goFileMap --
2588+
2589+
// A goFileMap is conceptually a map[parseKey]*parseGoHandle,
2590+
// optimized for cloning all or nearly all entries.
2591+
type goFileMap struct {
2592+
// The map is represented as a map of 256 stripes, one per
2593+
// distinct value of the top 8 bits of key.file.Hash.
2594+
// Each stripe has an associated boolean indicating whether it
2595+
// is shared, and thus immutable, and thus must be copied before any update.
2596+
// (The bits could be packed but it hasn't been worth it yet.)
2597+
stripes [256]map[parseKey]*parseGoHandle
2598+
exclusive [256]bool // exclusive[i] means stripe[i] is not shared and may be safely mutated
2599+
}
2600+
2601+
// newGoFileMap returns a new empty goFileMap.
2602+
func newGoFileMap() *goFileMap {
2603+
return new(goFileMap) // all stripes are shared (non-exclusive) nil maps
2604+
}
2605+
2606+
// clone returns a copy of m.
2607+
// For concurrency, it counts as an update to m.
2608+
func (m *goFileMap) clone() *goFileMap {
2609+
m.exclusive = [256]bool{} // original and copy are now nonexclusive
2610+
copy := *m
2611+
return &copy
2612+
}
2613+
2614+
// get returns the value for key k.
2615+
func (m *goFileMap) get(k parseKey) *parseGoHandle {
2616+
return m.stripes[m.hash(k)][k]
2617+
}
2618+
2619+
// set updates the value for key k to v.
2620+
func (m *goFileMap) set(k parseKey, v *parseGoHandle) {
2621+
m.unshare(k)[k] = v
2622+
}
2623+
2624+
// delete deletes the value for key k, if any.
2625+
func (m *goFileMap) delete(k parseKey) {
2626+
// TODO(adonovan): opt?: skip unshare if k isn't present.
2627+
delete(m.unshare(k), k)
2628+
}
2629+
2630+
// forEachConcurrent calls f for each entry in the map.
2631+
// Calls may be concurrent.
2632+
// f must not modify m.
2633+
func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) {
2634+
// Visit stripes in parallel chunks.
2635+
const p = 16 // concurrency level
2636+
var wg sync.WaitGroup
2637+
wg.Add(p)
2638+
for i := 0; i < p; i++ {
2639+
chunk := m.stripes[i*p : (i+1)*p]
2640+
go func() {
2641+
for _, stripe := range chunk {
2642+
for k, v := range stripe {
2643+
f(k, v)
2644+
}
2645+
}
2646+
wg.Done()
2647+
}()
2648+
}
2649+
wg.Wait()
2650+
}
2651+
2652+
// -- internal--
2653+
2654+
// hash returns 8 bits from the key's file digest.
2655+
func (m *goFileMap) hash(k parseKey) int {
2656+
h := k.file.Hash
2657+
if h == "" {
2658+
// Sadly the Hash isn't always a hash because cache.GetFile may
2659+
// successfully return a *fileHandle containing an error and no hash.
2660+
// Lump the duds together for now.
2661+
// TODO(adonovan): fix the underlying bug.
2662+
return 0
2663+
}
2664+
return unhex(h[0])<<4 | unhex(h[1])
2665+
}
2666+
2667+
// unhex returns the value of a valid hex digit.
2668+
func unhex(b byte) int {
2669+
if '0' <= b && b <= '9' {
2670+
return int(b - '0')
2671+
}
2672+
return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
2673+
}
2674+
2675+
// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
2676+
func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {
2677+
i := m.hash(k)
2678+
if !m.exclusive[i] {
2679+
m.exclusive[i] = true
2680+
2681+
// Copy the map.
2682+
copy := make(map[parseKey]*parseGoHandle, len(m.stripes[i]))
2683+
for k, v := range m.stripes[i] {
2684+
copy[k] = v
2685+
}
2686+
m.stripes[i] = copy
2687+
}
2688+
return m.stripes[i]
2689+
}

internal/lsp/source/view.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ type FileHandle interface {
532532
type FileIdentity struct {
533533
URI span.URI
534534

535-
// Identifier represents a unique identifier for the file's content.
535+
// Hash is a string of hex digits denoting the cryptographic digest of the file's content.
536536
Hash string
537537
}
538538

internal/memoize/memoize.go

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -234,19 +234,18 @@ func (s *Store) DebugOnlyIterate(f func(k, v interface{})) {
234234
}
235235
}
236236

237-
func (g *Generation) Inherit(hs ...*Handle) {
238-
for _, h := range hs {
239-
if atomic.LoadUint32(&g.destroyed) != 0 {
240-
panic("inherit on generation " + g.name + " destroyed by " + g.destroyedBy)
241-
}
237+
// Inherit makes h valid in generation g. It is concurrency-safe.
238+
func (g *Generation) Inherit(h *Handle) {
239+
if atomic.LoadUint32(&g.destroyed) != 0 {
240+
panic("inherit on generation " + g.name + " destroyed by " + g.destroyedBy)
241+
}
242242

243-
h.mu.Lock()
244-
if h.state == stateDestroyed {
245-
panic(fmt.Sprintf("inheriting destroyed handle %#v (type %T) into generation %v", h.key, h.key, g.name))
246-
}
247-
h.generations[g] = struct{}{}
248-
h.mu.Unlock()
243+
h.mu.Lock()
244+
if h.state == stateDestroyed {
245+
panic(fmt.Sprintf("inheriting destroyed handle %#v (type %T) into generation %v", h.key, h.key, g.name))
249246
}
247+
h.generations[g] = struct{}{}
248+
h.mu.Unlock()
250249
}
251250

252251
// Cached returns the value associated with a handle.

internal/memoize/memoize_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ func TestCleanup(t *testing.T) {
8787
expectGet(t, h1, g1, &v1)
8888
expectGet(t, h2, g1, &v2)
8989
g2 := s.Generation("g2")
90-
g2.Inherit(h1, h2)
90+
g2.Inherit(h1)
91+
g2.Inherit(h2)
9192

9293
g1.Destroy("TestCleanup")
9394
expectGet(t, h1, g2, &v1)

internal/span/uri.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ func (uri URI) Filename() string {
3535
}
3636

3737
func filename(uri URI) (string, error) {
38+
// This function is frequently called and its cost is
39+
// dominated by the allocation of a net.URL.
40+
// TODO(adonovan): opt: replace by a bespoke parseFileURI
41+
// function that doesn't allocate.
3842
if uri == "" {
3943
return "", nil
4044
}
@@ -80,6 +84,10 @@ func URIFromURI(s string) URI {
8084
return URI(u.String())
8185
}
8286

87+
// CompareURI performs a three-valued comparison of two URIs.
88+
// Lexically unequal URIs may compare equal if they are "file:" URIs
89+
// that share the same base name (ignoring case) and denote the same
90+
// file device/inode, according to stat(2).
8391
func CompareURI(a, b URI) int {
8492
if equalURI(a, b) {
8593
return 0

0 commit comments

Comments
 (0)