Skip to content

Commit 3e94830

Browse files
committed
internal/imports: optimize package path filtering
As reported in golang/go#67889, a short unresolved identifier can cause goimports fixes to be quite slow, because the substring match heuristic used to filter import paths matches a large fraction of the module cache. Fix this by improving the precision of the matching heuristic used to filter potential packages. We now match only full segments of the import path (ignoring '.' and '-'), or subsegments delimited by '.' and '-'. Add a gopls benchmark that reproduces this initial slowness, along with a command to force a scan of the module cache. On my (overpowered) linux development machine, with a 5GB module cache, this change reduces the benchmark time ~90%, from 2s to 200ms. With a smaller machine, slower operating system, or larger module cache, I imagine the starting point could be significantly more than 2s. Fixes golang/go#67889 Change-Id: Id8f7ea20040b059b42366333adeb4add684dee61 Reviewed-on: https://go-review.googlesource.com/c/tools/+/591715 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Alan Donovan <[email protected]>
1 parent dddd55d commit 3e94830

File tree

10 files changed

+305
-52
lines changed

10 files changed

+305
-52
lines changed

gopls/doc/commands.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,10 @@ Args:
568568
}
569569
```
570570

571+
## `gopls.scan_imports`: **force a sychronous scan of the imports cache.**
572+
573+
This command is intended for use by gopls tests only.
574+
571575
## `gopls.start_debugging`: **Start the gopls debug server**
572576

573577
Start the gopls debug server if it isn't running, and return the debug

gopls/internal/cache/view.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"encoding/json"
1515
"errors"
1616
"fmt"
17+
"log"
1718
"os"
1819
"os/exec"
1920
"path"
@@ -481,6 +482,14 @@ func (v *View) shutdown() {
481482
v.snapshotMu.Unlock()
482483
}
483484

485+
// ScanImports scans the module cache synchronously.
486+
// For use in tests.
487+
func (v *View) ScanImports() {
488+
gomodcache := v.folder.Env.GOMODCACHE
489+
dirCache := v.importsState.modCache.dirCache(gomodcache)
490+
imports.ScanModuleCache(gomodcache, dirCache, log.Printf)
491+
}
492+
484493
// IgnoredFile reports if a file would be ignored by a `go list` of the whole
485494
// workspace.
486495
//

gopls/internal/doc/api.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,13 @@
11071107
"ArgDoc": "{\n\t// The test file containing the tests to run.\n\t\"URI\": string,\n\t// Specific test names to run, e.g. TestFoo.\n\t\"Tests\": []string,\n\t// Specific benchmarks to run, e.g. BenchmarkFoo.\n\t\"Benchmarks\": []string,\n}",
11081108
"ResultDoc": ""
11091109
},
1110+
{
1111+
"Command": "gopls.scan_imports",
1112+
"Title": "force a sychronous scan of the imports cache.",
1113+
"Doc": "This command is intended for use by gopls tests only.",
1114+
"ArgDoc": "",
1115+
"ResultDoc": ""
1116+
},
11101117
{
11111118
"Command": "gopls.start_debugging",
11121119
"Title": "Start the gopls debug server",

gopls/internal/protocol/command/command_gen.go

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gopls/internal/protocol/command/interface.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ type Interface interface {
259259
// specified function symbol (plus any nested lambdas and defers).
260260
// The machine architecture is determined by the view.
261261
Assembly(_ context.Context, viewID, packageID, symbol string) error
262+
263+
// ScanImports: force a sychronous scan of the imports cache.
264+
//
265+
// This command is intended for use by gopls tests only.
266+
ScanImports(context.Context) error
262267
}
263268

264269
type RunTestsArgs struct {

gopls/internal/server/command.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,3 +1484,10 @@ func (c *commandHandler) Assembly(ctx context.Context, viewID, packageID, symbol
14841484
openClientBrowser(ctx, c.s.client, url)
14851485
return nil
14861486
}
1487+
1488+
func (c *commandHandler) ScanImports(ctx context.Context) error {
1489+
for _, v := range c.s.session.Views() {
1490+
v.ScanImports()
1491+
}
1492+
return nil
1493+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package bench
6+
7+
import (
8+
"context"
9+
"flag"
10+
"testing"
11+
"time"
12+
13+
"golang.org/x/tools/gopls/internal/protocol"
14+
"golang.org/x/tools/gopls/internal/protocol/command"
15+
. "golang.org/x/tools/gopls/internal/test/integration"
16+
"golang.org/x/tools/gopls/internal/test/integration/fake"
17+
)
18+
19+
var gopath = flag.String("gopath", "", "if set, run goimports scan with this GOPATH value")
20+
21+
func BenchmarkInitialGoimportsScan(b *testing.B) {
22+
if *gopath == "" {
23+
// This test doesn't make much sense with a tiny module cache.
24+
// For now, don't bother trying to construct a huge cache, since it likely
25+
// wouldn't work well on the perf builder. Instead, this benchmark only
26+
// runs with a pre-existing GOPATH.
27+
b.Skip("imports scan requires an explicit GOPATH to be set with -gopath")
28+
}
29+
30+
repo := getRepo(b, "tools") // since this a test of module cache scanning, any repo will do
31+
32+
b.ResetTimer()
33+
34+
for i := 0; i < b.N; i++ {
35+
func() {
36+
// Unfortunately we (intentionally) don't support resetting the module
37+
// cache scan state, so in order to have an accurate benchmark we must
38+
// effectively restart gopls on every iteration.
39+
//
40+
// Warning: this can cause this benchmark to run quite slowly if the
41+
// observed time (when the timer is running) is a tiny fraction of the
42+
// actual time.
43+
b.StopTimer()
44+
config := fake.EditorConfig{
45+
Env: map[string]string{"GOPATH": *gopath},
46+
}
47+
env := repo.newEnv(b, config, "imports", false)
48+
defer env.Close()
49+
env.Await(InitialWorkspaceLoad)
50+
51+
// Create a buffer with a dangling selctor where the receiver is a single
52+
// character ('a') that matches a large fraction of the module cache.
53+
env.CreateBuffer("internal/lsp/cache/temp.go", `
54+
// This is a temp file to exercise goimports scan of the module cache.
55+
package cache
56+
57+
func _() {
58+
_ = a.B // a dangling selector causes goimports to scan many packages
59+
}
60+
`)
61+
env.AfterChange()
62+
63+
// Force a scan of the imports cache, so that the goimports algorithm
64+
// observes all directories.
65+
env.ExecuteCommand(&protocol.ExecuteCommandParams{
66+
Command: command.ScanImports.String(),
67+
}, nil)
68+
69+
if stopAndRecord := startProfileIfSupported(b, env, "importsscan"); stopAndRecord != nil {
70+
defer stopAndRecord()
71+
}
72+
73+
b.StartTimer()
74+
if false {
75+
// golang/go#67923: testing resuming imports scanning after a
76+
// cancellation.
77+
//
78+
// Cancelling and then resuming the scan should take around the same
79+
// amount of time.
80+
ctx, cancel := context.WithTimeout(env.Ctx, 50*time.Millisecond)
81+
defer cancel()
82+
if err := env.Editor.OrganizeImports(ctx, "internal/lsp/cache/temp.go"); err != nil {
83+
b.Logf("organize imports failed: %v", err)
84+
}
85+
}
86+
env.OrganizeImports("internal/lsp/cache/temp.go")
87+
}()
88+
}
89+
}

gopls/internal/test/integration/misc/import_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func main() {
4646
t.Fatal(err)
4747
}
4848
env.ExecuteCommand(&protocol.ExecuteCommandParams{
49-
Command: "gopls.add_import",
49+
Command: command.AddImport.String(),
5050
Arguments: cmd.Arguments,
5151
}, nil)
5252
got := env.BufferText("main.go")

internal/imports/fix.go

Lines changed: 82 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,58 +1776,24 @@ func pkgIsCandidate(filename string, refs references, pkg *pkg) bool {
17761776
}
17771777

17781778
// Speed optimization to minimize disk I/O:
1779-
// the last two components on disk must contain the
1780-
// package name somewhere.
17811779
//
1782-
// This permits mismatch naming like directory
1783-
// "go-foo" being package "foo", or "pkg.v3" being "pkg",
1784-
// or directory "google.golang.org/api/cloudbilling/v1"
1785-
// being package "cloudbilling", but doesn't
1786-
// permit a directory "foo" to be package
1787-
// "bar", which is strongly discouraged
1788-
// anyway. There's no reason goimports needs
1789-
// to be slow just to accommodate that.
1780+
// Use the matchesPath heuristic to filter to package paths that could
1781+
// reasonably match a dangling reference.
1782+
//
1783+
// This permits mismatch naming like directory "go-foo" being package "foo",
1784+
// or "pkg.v3" being "pkg", or directory
1785+
// "google.golang.org/api/cloudbilling/v1" being package "cloudbilling", but
1786+
// doesn't permit a directory "foo" to be package "bar", which is strongly
1787+
// discouraged anyway. There's no reason goimports needs to be slow just to
1788+
// accommodate that.
17901789
for pkgIdent := range refs {
1791-
lastTwo := lastTwoComponents(pkg.importPathShort)
1792-
if strings.Contains(lastTwo, pkgIdent) {
1793-
return true
1794-
}
1795-
if hasHyphenOrUpperASCII(lastTwo) && !hasHyphenOrUpperASCII(pkgIdent) {
1796-
lastTwo = lowerASCIIAndRemoveHyphen(lastTwo)
1797-
if strings.Contains(lastTwo, pkgIdent) {
1798-
return true
1799-
}
1800-
}
1801-
}
1802-
return false
1803-
}
1804-
1805-
func hasHyphenOrUpperASCII(s string) bool {
1806-
for i := 0; i < len(s); i++ {
1807-
b := s[i]
1808-
if b == '-' || ('A' <= b && b <= 'Z') {
1790+
if matchesPath(pkgIdent, pkg.importPathShort) {
18091791
return true
18101792
}
18111793
}
18121794
return false
18131795
}
18141796

1815-
func lowerASCIIAndRemoveHyphen(s string) (ret string) {
1816-
buf := make([]byte, 0, len(s))
1817-
for i := 0; i < len(s); i++ {
1818-
b := s[i]
1819-
switch {
1820-
case b == '-':
1821-
continue
1822-
case 'A' <= b && b <= 'Z':
1823-
buf = append(buf, b+('a'-'A'))
1824-
default:
1825-
buf = append(buf, b)
1826-
}
1827-
}
1828-
return string(buf)
1829-
}
1830-
18311797
// canUse reports whether the package in dir is usable from filename,
18321798
// respecting the Go "internal" and "vendor" visibility rules.
18331799
func canUse(filename, dir string) bool {
@@ -1868,19 +1834,84 @@ func canUse(filename, dir string) bool {
18681834
return !strings.Contains(relSlash, "/vendor/") && !strings.Contains(relSlash, "/internal/") && !strings.HasSuffix(relSlash, "/internal")
18691835
}
18701836

1871-
// lastTwoComponents returns at most the last two path components
1872-
// of v, using either / or \ as the path separator.
1873-
func lastTwoComponents(v string) string {
1837+
// matchesPath reports whether ident may match a potential package name
1838+
// referred to by path, using heuristics to filter out unidiomatic package
1839+
// names.
1840+
//
1841+
// Specifically, it checks whether either of the last two '/'- or '\'-delimited
1842+
// path segments matches the identifier. The segment-matching heuristic must
1843+
// allow for various conventions around segment naming, including go-foo,
1844+
// foo-go, and foo.v3. To handle all of these, matching considers both (1) the
1845+
// entire segment, ignoring '-' and '.', as well as (2) the last subsegment
1846+
// separated by '-' or '.'. So the segment foo-go matches all of the following
1847+
// identifiers: foo, go, and foogo. All matches are case insensitive (for ASCII
1848+
// identifiers).
1849+
//
1850+
// See the docstring for [pkgIsCandidate] for an explanation of how this
1851+
// heuristic filters potential candidate packages.
1852+
func matchesPath(ident, path string) bool {
1853+
// Ignore case, for ASCII.
1854+
lowerIfASCII := func(b byte) byte {
1855+
if 'A' <= b && b <= 'Z' {
1856+
return b + ('a' - 'A')
1857+
}
1858+
return b
1859+
}
1860+
1861+
// match reports whether path[start:end] matches ident, ignoring [.-].
1862+
match := func(start, end int) bool {
1863+
ii := len(ident) - 1 // current byte in ident
1864+
pi := end - 1 // current byte in path
1865+
for ; pi >= start && ii >= 0; pi-- {
1866+
pb := path[pi]
1867+
if pb == '-' || pb == '.' {
1868+
continue
1869+
}
1870+
pb = lowerIfASCII(pb)
1871+
ib := lowerIfASCII(ident[ii])
1872+
if pb != ib {
1873+
return false
1874+
}
1875+
ii--
1876+
}
1877+
return ii < 0 && pi < start // all bytes matched
1878+
}
1879+
1880+
// segmentEnd and subsegmentEnd hold the end points of the current segment
1881+
// and subsegment intervals.
1882+
segmentEnd := len(path)
1883+
subsegmentEnd := len(path)
1884+
1885+
// Count slashes; we only care about the last two segments.
18741886
nslash := 0
1875-
for i := len(v) - 1; i >= 0; i-- {
1876-
if v[i] == '/' || v[i] == '\\' {
1887+
1888+
for i := len(path) - 1; i >= 0; i-- {
1889+
switch b := path[i]; b {
1890+
// TODO(rfindley): we handle backlashes here only because the previous
1891+
// heuristic handled backslashes. This is perhaps overly defensive, but is
1892+
// the result of many lessons regarding Chesterton's fence and the
1893+
// goimports codebase.
1894+
//
1895+
// However, this function is only ever called with something called an
1896+
// 'importPath'. Is it possible that this is a real import path, and
1897+
// therefore we need only consider forward slashes?
1898+
case '/', '\\':
1899+
if match(i+1, segmentEnd) || match(i+1, subsegmentEnd) {
1900+
return true
1901+
}
18771902
nslash++
18781903
if nslash == 2 {
1879-
return v[i:]
1904+
return false // did not match above
1905+
}
1906+
segmentEnd, subsegmentEnd = i, i // reset
1907+
case '-', '.':
1908+
if match(i+1, subsegmentEnd) {
1909+
return true
18801910
}
1911+
subsegmentEnd = i
18811912
}
18821913
}
1883-
return v
1914+
return match(0, segmentEnd) || match(0, subsegmentEnd)
18841915
}
18851916

18861917
type visitFn func(node ast.Node) ast.Visitor

0 commit comments

Comments
 (0)