Skip to content

Commit 24d2185

Browse files
authored
Use include and exclude directories to reduce filepath traversal (#4025)
1 parent 2358ba2 commit 24d2185

File tree

5 files changed

+599
-36
lines changed

5 files changed

+599
-36
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
- Update `PROTOVALIDATE` lint rule to check `IGNORE_IF_ZERO_VALUE` on fields that track presence.
66
- Fix `buf format` on fields with missing field number tags.
7+
- Optimize `include` and `exclude` path handling for workspaces to avoid unnecessary file system
8+
operations. This change can result in a performance improvement for large workspaces.
79

810
## [v1.57.2] - 2025-09-16
911

private/buf/bufworkspace/workspace_targeting.go

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -593,49 +593,22 @@ func getMappedModuleBucketAndModuleTargeting(
593593
// https://github.com/bufbuild/buf/issues/113
594594
storage.MatchPathExt(".proto"),
595595
}
596-
if len(excludes) != 0 {
597-
var notOrMatchers []storage.Matcher
598-
for _, exclude := range excludes {
599-
notOrMatchers = append(
600-
notOrMatchers,
601-
storage.MatchPathContained(exclude),
602-
)
603-
}
604-
matchers = append(
605-
matchers,
606-
storage.MatchNot(
607-
storage.MatchOr(
608-
notOrMatchers...,
609-
),
610-
),
611-
)
612-
}
613-
// An includes with length 0 adds no filter to the proto files.
614-
if len(includes) > 0 {
615-
var orMatchers []storage.Matcher
616-
for _, include := range includes {
617-
orMatchers = append(
618-
orMatchers,
619-
storage.MatchPathContained(include),
620-
)
621-
}
622-
matchers = append(
623-
matchers,
624-
storage.MatchOr(
625-
orMatchers...,
626-
),
627-
)
628-
}
629-
rootBuckets = append(
630-
rootBuckets,
596+
// Use MaskReadBucket over FilterReadBucket to optimize filtering on large file sets.
597+
filteredBucket, err := storage.MaskReadBucket(
631598
storage.FilterReadBucket(
632599
storage.MapReadBucket(
633600
moduleBucket,
634601
mappers...,
635602
),
636603
matchers...,
637604
),
605+
includes,
606+
excludes,
638607
)
608+
if err != nil {
609+
return nil, nil, err
610+
}
611+
rootBuckets = append(rootBuckets, filteredBucket)
639612
}
640613
docStorageReadBucket, err := bufmodule.GetDocStorageReadBucket(ctx, moduleBucket)
641614
if err != nil {

private/bufpkg/bufmodule/module_read_bucket.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ func (b *moduleReadBucket) WalkFileInfos(
399399
return fn(fileInfo)
400400
}
401401

402-
// If we have target paths, we do not want to walk to whole bucket.
402+
// If we have target paths, we do not want to walk the whole bucket.
403403
// For example, we do --path path/to/file.proto for googleapis, we don't want to
404404
// walk all of googleapis to find the single file.
405405
//

private/pkg/storage/mask.go

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
// Copyright 2020-2025 Buf Technologies, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package storage
16+
17+
import (
18+
"context"
19+
"io/fs"
20+
"slices"
21+
22+
"github.com/bufbuild/buf/private/pkg/normalpath"
23+
)
24+
25+
// MaskReadBucket creates a ReadBucket that masks the include and exclude prefixes,
26+
// with optimized walking that only traverses include prefixes.
27+
//
28+
// If includePrefixes is empty, all paths are included (no prefix filtering on walk).
29+
// If excludePrefixes is provided, those paths are excluded from results.
30+
// If both includePrefixes and excludePrefixes are empty, the original ReadBucket is returned.
31+
func MaskReadBucket(readBucket ReadBucket, includePrefixes, excludePrefixes []string) (ReadBucket, error) {
32+
if len(includePrefixes) == 0 && len(excludePrefixes) == 0 {
33+
return readBucket, nil
34+
}
35+
return newMaskReadBucketCloser(readBucket, nil, includePrefixes, excludePrefixes)
36+
}
37+
38+
// MaskReadBucketCloser creates a ReadBucketCloser that masks using include and exclude prefixes,
39+
// with optimized walking that only traverses include prefixes.
40+
//
41+
// If includePrefixes is empty, all paths are included (no prefix filtering on walk).
42+
// If excludePrefixes is provided, those paths are excluded from results.
43+
// If both includePrefixes and excludePrefixes are empty, the original ReadBucketCloser is returned.
44+
func MaskReadBucketCloser(readBucketCloser ReadBucketCloser, includePrefixes, excludePrefixes []string) (ReadBucketCloser, error) {
45+
if len(includePrefixes) == 0 && len(excludePrefixes) == 0 {
46+
return readBucketCloser, nil
47+
}
48+
return newMaskReadBucketCloser(readBucketCloser, readBucketCloser.Close, includePrefixes, excludePrefixes)
49+
}
50+
51+
type maskReadBucketCloser struct {
52+
delegate ReadBucket
53+
closeFunc func() error
54+
includePrefixes []string
55+
excludePrefixes []string
56+
}
57+
58+
func newMaskReadBucketCloser(
59+
delegate ReadBucket,
60+
closeFunc func() error,
61+
includePrefixes, excludePrefixes []string,
62+
) (*maskReadBucketCloser, error) {
63+
normalizedIncludes, err := normalizeValidateAndCompactPrefixes(includePrefixes)
64+
if err != nil {
65+
return nil, err
66+
}
67+
normalizedExcludes, err := normalizeValidateAndCompactPrefixes(excludePrefixes)
68+
if err != nil {
69+
return nil, err
70+
}
71+
return &maskReadBucketCloser{
72+
delegate: delegate,
73+
closeFunc: closeFunc,
74+
includePrefixes: normalizedIncludes,
75+
excludePrefixes: normalizedExcludes,
76+
}, nil
77+
}
78+
79+
func (r *maskReadBucketCloser) Get(ctx context.Context, path string) (ReadObjectCloser, error) {
80+
path, err := normalpath.NormalizeAndValidate(path)
81+
if err != nil {
82+
return nil, err
83+
}
84+
if !r.matchPath(path) {
85+
return nil, &fs.PathError{Op: "read", Path: path, Err: fs.ErrNotExist}
86+
}
87+
return r.delegate.Get(ctx, path)
88+
}
89+
90+
func (r *maskReadBucketCloser) Stat(ctx context.Context, path string) (ObjectInfo, error) {
91+
path, err := normalpath.NormalizeAndValidate(path)
92+
if err != nil {
93+
return nil, err
94+
}
95+
if !r.matchPath(path) {
96+
return nil, &fs.PathError{Op: "read", Path: path, Err: fs.ErrNotExist}
97+
}
98+
return r.delegate.Stat(ctx, path)
99+
}
100+
101+
func (r *maskReadBucketCloser) Walk(ctx context.Context, prefix string, f func(ObjectInfo) error) error {
102+
prefix, err := normalpath.NormalizeAndValidate(prefix)
103+
if err != nil {
104+
return err
105+
}
106+
for _, excludePrefix := range r.excludePrefixes {
107+
isChild := normalpath.EqualsOrContainsPath(excludePrefix, prefix, normalpath.Relative)
108+
if isChild {
109+
// The requested prefix is under an exclude prefix, so nothing to walk.
110+
return nil
111+
}
112+
}
113+
walkFunc := func(objectInfo ObjectInfo) error {
114+
if !r.matchPath(objectInfo.Path()) {
115+
return nil
116+
}
117+
return f(objectInfo)
118+
}
119+
if len(r.includePrefixes) == 0 {
120+
// No include prefixes, so walk normally.
121+
return r.delegate.Walk(ctx, prefix, walkFunc)
122+
}
123+
// Find all include prefixes under the requests root prefix.
124+
var effectivePrefixes []string
125+
for _, includePrefix := range r.includePrefixes {
126+
isParent := normalpath.EqualsOrContainsPath(includePrefix, prefix, normalpath.Relative)
127+
if isParent {
128+
// The requested prefix is under an include prefix, so walk normally.
129+
return r.delegate.Walk(ctx, prefix, walkFunc)
130+
}
131+
isChild := normalpath.EqualsOrContainsPath(prefix, includePrefix, normalpath.Relative)
132+
if isChild {
133+
effectivePrefixes = append(effectivePrefixes, includePrefix)
134+
}
135+
}
136+
// Walk each effective prefix that is a child of the requested prefix.
137+
// The effective prefixes are sorted and compacted on creation of the Bucket,
138+
// so no need to sort or compact here.
139+
for _, effectivePrefix := range effectivePrefixes {
140+
if err := r.delegate.Walk(ctx, effectivePrefix, walkFunc); err != nil {
141+
return err
142+
}
143+
}
144+
return nil
145+
}
146+
147+
func (r *maskReadBucketCloser) Close() error {
148+
if r.closeFunc != nil {
149+
return r.closeFunc()
150+
}
151+
return nil
152+
}
153+
154+
// matchPath checks if a path matches the include/exclude criteria
155+
func (r *maskReadBucketCloser) matchPath(path string) bool {
156+
// Check excludes first (if any exclude matches, reject the path)
157+
for _, excludePrefix := range r.excludePrefixes {
158+
// Check if the exclude prefix contains the path (path is under exclude prefix)
159+
if normalpath.EqualsOrContainsPath(excludePrefix, path, normalpath.Relative) {
160+
return false
161+
}
162+
}
163+
// If no include prefixes, accept all paths (that weren't excluded)
164+
if len(r.includePrefixes) == 0 {
165+
return true
166+
}
167+
// Check includes (at least one include must match)
168+
for _, includePrefix := range r.includePrefixes {
169+
// Check if the include prefix contains the path (path is under include prefix)
170+
if normalpath.EqualsOrContainsPath(includePrefix, path, normalpath.Relative) {
171+
return true
172+
}
173+
}
174+
return false
175+
}
176+
177+
// normalizeValidateAndCompactPrefixes normalizes, validates, and compacts a list of path prefixes.
178+
// It removes redundant child prefixes that are already covered by parent prefixes.
179+
// For example, ["foo", "foo/v1", "foo/v1/v2"] becomes ["foo"].
180+
func normalizeValidateAndCompactPrefixes(prefixes []string) ([]string, error) {
181+
if len(prefixes) == 0 {
182+
return nil, nil
183+
}
184+
var normalized []string
185+
for _, prefix := range prefixes {
186+
normalizedPrefix, err := normalpath.NormalizeAndValidate(prefix)
187+
if err != nil {
188+
return nil, err
189+
}
190+
normalized = append(normalized, normalizedPrefix)
191+
}
192+
slices.Sort(normalized)
193+
// Remove redundant child prefixes that are covered by parent prefixes.
194+
// For example, ["bar", "foo", "foo/v1", "foo/v1/v2"] becomes ["bar", "foo"].
195+
reduced := normalized[:1]
196+
for _, prefix := range normalized[1:] {
197+
if !normalpath.EqualsOrContainsPath(reduced[len(reduced)-1], prefix, normalpath.Relative) {
198+
reduced = append(reduced, prefix)
199+
}
200+
}
201+
return reduced, nil
202+
}

0 commit comments

Comments
 (0)