Skip to content

Commit 8e32503

Browse files
authored
perf: detect and simplify source glob filters (#120)
### Changes are visible to end-users: no ### Test plan - Covered by existing test cases - New test cases added
1 parent 4a3c2d2 commit 8e32503

File tree

17 files changed

+408
-95
lines changed

17 files changed

+408
-95
lines changed

common/BUILD.bazel

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go_library(
55
srcs = [
66
"directives.go",
77
"error.go",
8+
"glob.go",
89
"regex.go",
910
"set.go",
1011
"walk.go",
@@ -15,6 +16,7 @@ go_library(
1516
deps = [
1617
"//common/logger",
1718
"@com_github_bazelbuild_buildtools//build",
19+
"@com_github_bmatcuk_doublestar_v4//:doublestar",
1820
"@com_github_emirpasic_gods//sets/treeset",
1921
"@gazelle//config",
2022
"@gazelle//label",
@@ -25,10 +27,14 @@ go_library(
2527

2628
go_test(
2729
name = "common_test",
28-
srcs = ["set_test.go"],
30+
srcs = [
31+
"glob_test.go",
32+
"set_test.go",
33+
],
2934
embed = [":common"],
3035
deps = [
3136
"@com_github_bazelbuild_buildtools//build",
37+
"@com_github_bmatcuk_doublestar_v4//:doublestar",
3238
"@gazelle//label",
3339
],
3440
)

common/glob.go

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
package gazelle
2+
3+
import (
4+
"fmt"
5+
"regexp"
6+
"strings"
7+
"sync"
8+
9+
"github.com/bmatcuk/doublestar/v4"
10+
)
11+
12+
type GlobExpr func(string) bool
13+
14+
// Expressions that are not even globs
15+
var nonGlobRe = regexp.MustCompile(`^[\w./@-]+$`)
16+
17+
// Doublestar globs that can be simplified to only a prefix and suffix
18+
var prePostGlobRe = regexp.MustCompile(`^([\w./@-]*)\*\*(?:/\*?)?([\w./@-]+)$`)
19+
20+
// Globs with a prefix or postfix that can be checked before invoking the regex
21+
var preGlobRe = regexp.MustCompile(`^([\w./@-]+).*$`)
22+
var postGlobRe = regexp.MustCompile(`^.*?([\w./@-]+)$`)
23+
24+
var parsedExpCache sync.Map
25+
26+
func ParseGlobExpression(exp string) (GlobExpr, error) {
27+
loaded, ok := parsedExpCache.Load(exp)
28+
if ok {
29+
return loaded.(GlobExpr), nil
30+
}
31+
32+
if !doublestar.ValidatePattern(exp) {
33+
return nil, fmt.Errorf("invalid glob pattern: %s", exp)
34+
}
35+
36+
expr := parseGlobExpression(exp)
37+
loaded, _ = parsedExpCache.LoadOrStore(exp, expr)
38+
return loaded.(GlobExpr), nil
39+
}
40+
41+
func parseGlobExpression(exp string) GlobExpr {
42+
if nonGlobRe.MatchString(exp) {
43+
return func(p string) bool {
44+
return p == exp
45+
}
46+
}
47+
48+
if extGlob := prePostGlobRe.FindStringSubmatch(exp); len(extGlob) > 0 {
49+
// Globs that can be expressed as pre + ** + ext
50+
pre, ext := extGlob[1], extGlob[2]
51+
minLen := len(pre) + len(ext)
52+
return func(p string) bool {
53+
return len(p) >= minLen && strings.HasPrefix(p, pre) && strings.HasSuffix(p, ext)
54+
}
55+
}
56+
57+
if preGlob := preGlobRe.FindStringSubmatch(exp); len(preGlob) > 0 {
58+
pre := preGlob[1]
59+
return func(p string) bool {
60+
if !strings.HasPrefix(p, pre) {
61+
return false
62+
}
63+
return doublestar.MatchUnvalidated(exp, p)
64+
}
65+
}
66+
67+
if postGlob := postGlobRe.FindStringSubmatch(exp); len(postGlob) > 0 {
68+
post := postGlob[1]
69+
return func(p string) bool {
70+
if !strings.HasSuffix(p, post) {
71+
return false
72+
}
73+
return doublestar.MatchUnvalidated(exp, p)
74+
}
75+
}
76+
77+
return func(p string) bool {
78+
return doublestar.MatchUnvalidated(exp, p)
79+
}
80+
}
81+
82+
func ParseGlobExpressions(exps []string) (GlobExpr, error) {
83+
if len(exps) == 1 {
84+
return ParseGlobExpression(exps[0])
85+
}
86+
87+
key := strings.Join(exps, ",")
88+
loaded, ok := parsedExpCache.Load(key)
89+
if ok {
90+
return loaded.(GlobExpr), nil
91+
}
92+
93+
expr, err := parseGlobExpressions(exps)
94+
if err != nil {
95+
return nil, err
96+
}
97+
98+
loaded, _ = parsedExpCache.LoadOrStore(key, expr)
99+
return loaded.(GlobExpr), nil
100+
}
101+
102+
func parseGlobExpressions(exps []string) (GlobExpr, error) {
103+
exacts := make(map[string]struct{})
104+
prePosts := make(map[string][]string)
105+
preGlobs := make(map[string][]string)
106+
postGlobs := make(map[string][]string)
107+
globs := make([]string, 0)
108+
109+
for _, exp := range exps {
110+
if !doublestar.ValidatePattern(exp) {
111+
return nil, fmt.Errorf("invalid glob pattern: %s", exp)
112+
}
113+
114+
if nonGlobRe.MatchString(exp) {
115+
exacts[exp] = struct{}{}
116+
} else if extGlob := prePostGlobRe.FindStringSubmatch(exp); len(extGlob) > 0 {
117+
// Globs that can be expressed as pre + ** + ext
118+
pre, ext := extGlob[1], extGlob[2]
119+
prePosts[pre] = append(prePosts[pre], ext)
120+
} else if preGlob := preGlobRe.FindStringSubmatch(exp); len(preGlob) > 0 {
121+
pre := preGlob[1]
122+
preGlobs[pre] = append(preGlobs[pre], exp)
123+
} else if postGlob := postGlobRe.FindStringSubmatch(exp); len(postGlob) > 0 {
124+
post := postGlob[1]
125+
postGlobs[post] = append(postGlobs[post], exp)
126+
} else {
127+
globs = append(globs, exp)
128+
}
129+
}
130+
131+
exprFuncs := make([]GlobExpr, 0, 5)
132+
133+
if len(exacts) > 0 {
134+
exprFuncs = append(exprFuncs, func(p string) bool {
135+
_, e := exacts[p]
136+
return e
137+
})
138+
}
139+
140+
if len(prePosts) > 0 {
141+
exprFuncs = append(exprFuncs, func(p string) bool {
142+
for pre, exts := range prePosts {
143+
if strings.HasPrefix(p, pre) {
144+
for _, ext := range exts {
145+
if len(p) >= len(pre)+len(ext) && strings.HasSuffix(p, ext) {
146+
return true
147+
}
148+
}
149+
}
150+
}
151+
return false
152+
})
153+
}
154+
155+
if len(preGlobs) > 0 {
156+
exprFuncs = append(exprFuncs, func(p string) bool {
157+
for pre, globs := range preGlobs {
158+
if strings.HasPrefix(p, pre) {
159+
for _, glob := range globs {
160+
if doublestar.MatchUnvalidated(glob, p) {
161+
return true
162+
}
163+
}
164+
}
165+
}
166+
return false
167+
})
168+
}
169+
170+
if len(postGlobs) > 0 {
171+
exprFuncs = append(exprFuncs, func(p string) bool {
172+
for post, globs := range postGlobs {
173+
if strings.HasSuffix(p, post) {
174+
for _, glob := range globs {
175+
if doublestar.MatchUnvalidated(glob, p) {
176+
return true
177+
}
178+
}
179+
}
180+
}
181+
return false
182+
})
183+
}
184+
185+
if len(globs) > 0 {
186+
exprFuncs = append(exprFuncs, func(p string) bool {
187+
for _, glob := range globs {
188+
if doublestar.MatchUnvalidated(glob, p) {
189+
return true
190+
}
191+
}
192+
return false
193+
})
194+
}
195+
196+
return func(p string) bool {
197+
for _, expr := range exprFuncs {
198+
if expr(p) {
199+
return true
200+
}
201+
}
202+
return false
203+
}, nil
204+
}

common/glob_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package gazelle
2+
3+
import (
4+
"testing"
5+
6+
"github.com/bmatcuk/doublestar/v4"
7+
)
8+
9+
func TestParseGlobExpressionVsDoublestar(t *testing.T) {
10+
// Ensure any shortcuts that ParseGlobExpression takes preserve the same behaviour
11+
// as running doublestar directly.
12+
// The results of the expression are not checked, only that any shortcuts ParseGlobExpression
13+
// adds still match the result of doublestar without those shortcuts.
14+
tests := map[string][]string{
15+
// Exact matches
16+
"file.txt": {"file.txt", "./file.txt", "file", ".file", "file.", "a/file.txt"},
17+
"WORKSPACE": {"WORKSPACE", "WORKSPACE.bazel", "a/WORKSPACE", "WORKSPACE.txt", "a/WORKSPACE.bazel"},
18+
"WORKSPACE.bazel": {"WORKSPACE", "WORKSPACE.bazel", "a/WORKSPACE", "WORKSPACE.txt", "a/WORKSPACE.bazel"},
19+
"@foo/bar": {"@foo/bar/baz", "@foo/bar", "foo/bar", "a/@foo/bar"},
20+
21+
"@foo/*@1.2.3": {"@foo/bar/[email protected]", "@foo/[email protected]", "foo/[email protected]", "@foo/[email protected]"},
22+
23+
// Exact matches with paths
24+
"path/to/file.txt": {"path/to/file.txt", "a/path/to/file.txt", "path/to/file.txt2"},
25+
26+
// Doublestar with prefix
27+
"src/**/*.go": {"src/main.go", "src/deep/nested/file.go", "src/foo.go", "src/", "src/.go"},
28+
"src/foo/**/*.go": {"src/main.go", "src/foo/main.go", "src/foo/bar/main.go", "foo/src/main.go", "main.go", "src/foo/src/main.go"},
29+
30+
// With prefix and suffix that are equal
31+
"foo/**/foo": {"foo", "foo/foo", "foo/bar/foo", "foo/foo/foo"},
32+
33+
// Body with doublestars
34+
"**/foo/**": {"foo/bar", "a/foo/baz", "a/b/c/foo/d/e", "foo", "a/b/c/foo", "foo/a/b/c"},
35+
36+
// Starting doublestars
37+
"**/WORKSPACE": {"WORKSPACE", "WORKSPACE.bazel", "a/WORKSPACE", "WORKSPACE.txt", "a/WORKSPACE.bazel"},
38+
"**/WORKSPACE.bazel": {"WORKSPACE", "WORKSPACE.bazel", "a/WORKSPACE", "WORKSPACE.txt", "a/WORKSPACE.bazel"},
39+
"**/@foo/bar": {"@foo/bar/baz", "@foo/bar", "foo/bar", "a/@foo/bar"},
40+
"**/*.go": {"main.go", "src/main.go", "src/deep/nested/file.go"},
41+
"**/*_test.go": {"src/test_file.go", "src/path/test_file.go", "deep/nested/test_file.go"},
42+
"**/*.pb.go": {"generated.pb.go", "src/generated.pb.go"},
43+
"**/*.d.ts": {"src/types.d.ts", "types.d.ts"},
44+
45+
// Prefix without doublestars
46+
"src/*.go": {"src/main.go", "main.go", "src/a/b/main.go", "foo/src/main.go"},
47+
"src/*/test_*.go": {"src/path/test_file.go", "src/a/test_b/c.go", "src/test_file.go"},
48+
"**/*.test.js": {"src/test.main.js"},
49+
"src/**/test_*.spec.ts": {"src/path/test_file.spec.ts", "src/test_foo.spec.ts"},
50+
"very/long/path/with/many/segments/file.go": {"very/long/path/with/many/segments/file.go"},
51+
"path/with/unicode/测试文件.txt": {"path/with/unicode/测试文件.txt"},
52+
53+
// Odd cases
54+
"": {""},
55+
"**": {"", "a", "a/b/c"},
56+
"**/*": {"", "a", "a.b", "a/b/c", "a/b/c.d"},
57+
}
58+
59+
for testPattern, testCases := range tests {
60+
expr, err := ParseGlobExpression(testPattern)
61+
62+
// Verify doublestar agrees on validity
63+
if (err == nil) != doublestar.ValidatePattern(testPattern) {
64+
t.Errorf("ParseGlobExpression(%q) returned error %v and doublestar returned the opposite", testPattern, err)
65+
}
66+
67+
// Verify matching behaviour
68+
for _, c := range testCases {
69+
if expr(c) != doublestar.MatchUnvalidated(testPattern, c) {
70+
t.Errorf("pattern %q did not align with doublestar with case %q", testPattern, c)
71+
}
72+
}
73+
}
74+
}

language/js/BUILD.bazel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ go_library(
4747
"@aspect_gazelle//common/logger",
4848
"@aspect_gazelle//common/rule",
4949
"@com_github_bazelbuild_buildtools//build",
50-
"@com_github_bmatcuk_doublestar_v4//:doublestar",
51-
"@com_github_emirpasic_gods//maps/linkedhashmap",
5250
"@com_github_emirpasic_gods//maps/treemap",
5351
"@com_github_emirpasic_gods//sets/treeset",
5452
"@gazelle//config",

language/js/MODULE.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ bazel_dep(name = "gazelle", version = "0.45.0") # must match go.mod
1111
# Go modules
1212
go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps")
1313
go_deps.from_file(go_mod = "//:go.mod")
14-
use_repo(go_deps, "com_github_bazelbuild_buildtools", "com_github_bmatcuk_doublestar_v4", "com_github_emirpasic_gods", "com_github_masterminds_semver_v3", "com_github_msolo_jsonr", "in_gopkg_yaml_v3")
14+
use_repo(go_deps, "com_github_bazelbuild_buildtools", "com_github_emirpasic_gods", "com_github_masterminds_semver_v3", "com_github_msolo_jsonr", "in_gopkg_yaml_v3")
1515

1616
####### Dev dependencies ########
1717

0 commit comments

Comments
 (0)