Skip to content

Commit de874d3

Browse files
Allow filesystem exclusion to eagerly prune the enumerated tree (#4008)
* Allow filesystem exclusion to eagerly prune the enumerated tree rather than skipping files afterwards * added test case * resolved charlie's comment * resolved cody comments * updated test case * remove shouldExclude method * added test cases and fixed the logic * removed print statement * renamed test case * assert->require in tests
1 parent 1ce08e4 commit de874d3

File tree

3 files changed

+96
-3
lines changed

3 files changed

+96
-3
lines changed

pkg/common/filter.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,10 @@ func (filter *Filter) Pass(object string) bool {
9797
if filter == nil {
9898
return true
9999
}
100+
100101
excluded := filter.exclude.Matches(object)
101102
included := filter.include.Matches(object)
103+
102104
return !excluded && included
103105
}
104106

@@ -114,3 +116,8 @@ func (rules *FilterRuleSet) Matches(object string) bool {
114116
}
115117
return false
116118
}
119+
120+
// ShouldExclude return true if any regular expressions in the exclude FilterRuleSet matches the path.
121+
func (filter *Filter) ShouldExclude(path string) bool {
122+
return filter.exclude.Matches(path)
123+
}

pkg/sources/filesystem/filesystem.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,25 @@ func (s *Source) scanDir(ctx context.Context, path string, chunksChan chan *sour
127127
ctx.Logger().Error(err, "error walking directory")
128128
return nil
129129
}
130+
130131
fullPath := filepath.Join(path, relativePath)
131132

133+
// check if the full path is not matching any pattern in include FilterRuleSet and matching any exclude FilterRuleSet.
134+
if s.filter != nil && !s.filter.Pass(fullPath) {
135+
// skip excluded directories
136+
if d.IsDir() && s.filter.ShouldExclude(fullPath) {
137+
return fs.SkipDir
138+
}
139+
140+
return nil // skip the file
141+
}
142+
132143
// Skip over non-regular files. We do this check here to suppress noisy
133144
// logs for trying to scan directories and other non-regular files in
134145
// our traversal.
135146
if !d.Type().IsRegular() {
136147
return nil
137148
}
138-
if s.filter != nil && !s.filter.Pass(fullPath) {
139-
return nil
140-
}
141149

142150
workerPool.Go(func() error {
143151
if err = s.scanFile(ctx, fullPath, chunksChan); err != nil {

pkg/sources/filesystem/filesystem_test.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/go-logr/logr"
1111
"github.com/kylelemons/godebug/pretty"
1212
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
1314
"google.golang.org/protobuf/types/known/anypb"
1415

1516
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
@@ -300,6 +301,83 @@ func TestChunkUnitReporterErr(t *testing.T) {
300301
assert.Error(t, err)
301302
}
302303

304+
func TestSkipDir(t *testing.T) {
305+
t.Parallel()
306+
ctx := context.Background()
307+
308+
// create a temp directory with files
309+
ignoreDir, cleanupDir, err := createTempDir("", "ignore1", "ignore2", "ignore3")
310+
require.NoError(t, err)
311+
defer cleanupDir()
312+
313+
// create an ExcludePathsFile that contains the ignoreDir path
314+
excludeFile, cleanupFile, err := createTempFile("", ignoreDir+"\n")
315+
require.NoError(t, err)
316+
defer cleanupFile()
317+
318+
conn, err := anypb.New(&sourcespb.Filesystem{
319+
ExcludePathsFile: excludeFile.Name(),
320+
})
321+
require.NoError(t, err)
322+
323+
// initialize the source.
324+
s := Source{}
325+
err = s.Init(ctx, "exclude directory", 0, 0, true, conn, 1)
326+
require.NoError(t, err)
327+
328+
reporter := sourcestest.TestReporter{}
329+
err = s.ChunkUnit(ctx, sources.CommonSourceUnit{
330+
ID: ignoreDir,
331+
}, &reporter)
332+
require.NoError(t, err)
333+
334+
require.Equal(t, 0, len(reporter.Chunks), "Expected no chunks from excluded directory")
335+
require.Equal(t, 0, len(reporter.ChunkErrs), "Expected no errors for excluded directory")
336+
}
337+
338+
func TestScanSubDirFile(t *testing.T) {
339+
t.Parallel()
340+
ctx := context.Background()
341+
342+
// create a temp directory with files
343+
parentDir, cleanupParentDir, err := createTempDir("", "file1")
344+
require.NoError(t, err)
345+
defer cleanupParentDir()
346+
347+
childDir, cleanupChildDir, err := createTempDir(parentDir, "file2")
348+
require.NoError(t, err)
349+
defer cleanupChildDir()
350+
351+
// create a file in child directory
352+
file, cleanupFile, err := createTempFile(childDir, "should scan this file")
353+
require.NoError(t, err)
354+
defer cleanupFile()
355+
356+
// create an IncludePathsFile that contains the file path
357+
includeFile, cleanupFile, err := createTempFile("", file.Name()+"\n")
358+
require.NoError(t, err)
359+
defer cleanupFile()
360+
361+
conn, err := anypb.New(&sourcespb.Filesystem{
362+
IncludePathsFile: includeFile.Name(),
363+
})
364+
require.NoError(t, err)
365+
366+
// initialize the source.
367+
s := Source{}
368+
err = s.Init(ctx, "include sub directory file", 0, 0, true, conn, 1)
369+
require.NoError(t, err)
370+
371+
reporter := sourcestest.TestReporter{}
372+
err = s.ChunkUnit(ctx, sources.CommonSourceUnit{
373+
ID: parentDir,
374+
}, &reporter)
375+
require.NoError(t, err)
376+
377+
require.Equal(t, 1, len(reporter.Chunks), "Expected chunks from included file")
378+
require.Equal(t, 0, len(reporter.ChunkErrs), "Expected no errors")
379+
}
380+
303381
// createTempFile is a helper function to create a temporary file in the given
304382
// directory with the provided contents. If dir is "", the operating system's
305383
// temp directory is used.

0 commit comments

Comments
 (0)