From 6e67aac7b8155c7d2683c98cc5abead356215224 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Wed, 19 Nov 2025 13:30:26 -0500 Subject: [PATCH] dirent: no longer cache the results of Stat and Info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit changes fastwalk.DirEntry to no longer cache the result of calling the Stat and Info methods. goos: darwin goarch: arm64 pkg: github.com/charlievieth/fastwalk cpu: Apple M4 Pro │ base.10.txt │ new.10.txt │ │ sec/op │ sec/op vs base │ UnixDirentInfo/Serial-14 648.6n ± 2% 616.6n ± 1% -4.92% (p=0.000 n=10) UnixDirentInfo/Parallel-14 969.4n ± 0% 932.5n ± 0% -3.81% (p=0.000 n=10) geomean 792.9n 758.3n -4.37% │ base.10.txt │ new.10.txt │ │ B/op │ B/op vs base │ UnixDirentInfo/Serial-14 384.0 ± 0% 336.0 ± 0% -12.50% (p=0.000 n=10) UnixDirentInfo/Parallel-14 384.0 ± 0% 336.0 ± 0% -12.50% (p=0.000 n=10) geomean 384.0 336.0 -12.50% │ base.10.txt │ new.10.txt │ │ allocs/op │ allocs/op vs base │ UnixDirentInfo/Serial-14 4.000 ± 0% 3.000 ± 0% -25.00% (p=0.000 n=10) UnixDirentInfo/Parallel-14 4.000 ± 0% 3.000 ± 0% -25.00% (p=0.000 n=10) geomean 4.000 3.000 -25.00% goos: linux goarch: amd64 pkg: github.com/charlievieth/fastwalk cpu: AMD EPYC-Milan Processor │ base.10.txt │ new.10.txt │ │ sec/op │ sec/op vs base │ UnixDirentInfo/Serial-4 2.235µ ± 1% 2.162µ ± 3% -3.24% (p=0.001 n=10) UnixDirentInfo/Parallel-4 732.7n ± 2% 695.9n ± 2% -5.03% (p=0.000 n=10) geomean 1.280µ 1.227µ -4.14% │ base.10.txt │ new.10.txt │ │ B/op │ B/op vs base │ UnixDirentInfo/Serial-4 384.0 ± 0% 336.0 ± 0% -12.50% (p=0.000 n=10) UnixDirentInfo/Parallel-4 384.0 ± 0% 336.0 ± 0% -12.50% (p=0.000 n=10) geomean 384.0 336.0 -12.50% │ base.10.txt │ new.10.txt │ │ allocs/op │ allocs/op vs base │ UnixDirentInfo/Serial-4 4.000 ± 0% 3.000 ± 0% -25.00% (p=0.000 n=10) UnixDirentInfo/Parallel-4 4.000 ± 0% 3.000 ± 0% -25.00% (p=0.000 n=10) geomean 4.000 3.000 -25.00% --- dirent.go | 47 ++++++------------------ dirent_portable.go | 16 ++++----- dirent_test.go | 88 --------------------------------------------- dirent_unix.go | 28 ++------------- dirent_unix_test.go | 84 +++++++++++-------------------------------- fastwalk.go | 8 ++--- 6 files changed, 45 insertions(+), 226 deletions(-) diff --git a/dirent.go b/dirent.go index 1059320..7654f95 100644 --- a/dirent.go +++ b/dirent.go @@ -3,51 +3,26 @@ package fastwalk import ( "io/fs" "os" - "sync" - "sync/atomic" "syscall" - "unsafe" ) -type fileInfo struct { - once sync.Once - fs.FileInfo - err error -} - -func loadFileInfo(pinfo **fileInfo) *fileInfo { - ptr := (*unsafe.Pointer)(unsafe.Pointer(pinfo)) - fi := (*fileInfo)(atomic.LoadPointer(ptr)) - if fi == nil { - fi = &fileInfo{} - if !atomic.CompareAndSwapPointer( - (*unsafe.Pointer)(unsafe.Pointer(pinfo)), // adrr - nil, // old - unsafe.Pointer(fi), // new - ) { - fi = (*fileInfo)(atomic.LoadPointer(ptr)) - } - } - return fi -} - -// StatDirEntry returns a [fs.FileInfo] describing the named file ([os.Stat]). -// If de is a [fastwalk.DirEntry] its Stat method is used and the returned -// FileInfo may be cached from a prior call to Stat. If a cached result is not -// desired, users should just call [os.Stat] directly. +// StatDirEntry returns a [fs.FileInfo] describing the named file. If de is a +// [fastwalk.DirEntry] its Stat method is used. Otherwise, [os.Stat] is called +// on the path. Therefore, de should be the DirEntry describing path. // -// This is a helper function for calling Stat on the DirEntry passed to the -// walkFn argument to [Walk]. +// Note: This function was added when fastwalk used to always cache the result +// of DirEntry.Stat. Now that fastwalk no longer explicitly caches the result +// of Stat this function is slightly less useful and is equivalent to the below +// code: // -// The path argument is only used if de is not of type [fastwalk.DirEntry]. -// Therefore, de should be the DirEntry describing path. +// if d, ok := de.(DirEntry); ok { +// return d.Stat() +// } +// return os.Stat(path) func StatDirEntry(path string, de fs.DirEntry) (fs.FileInfo, error) { if de == nil { return nil, &os.PathError{Op: "stat", Path: path, Err: syscall.EINVAL} } - if de.Type()&os.ModeSymlink == 0 { - return de.Info() - } if d, ok := de.(DirEntry); ok { return d.Stat() } diff --git a/dirent_portable.go b/dirent_portable.go index ed9c3c7..85a4d18 100644 --- a/dirent_portable.go +++ b/dirent_portable.go @@ -8,6 +8,7 @@ package fastwalk import ( "io/fs" "os" + "runtime" "sort" "sync" @@ -19,7 +20,6 @@ var _ DirEntry = (*portableDirent)(nil) type portableDirent struct { fs.DirEntry parent string - stat *fileInfo depth uint32 } @@ -32,14 +32,14 @@ func (d *portableDirent) Depth() int { } func (d *portableDirent) Stat() (fs.FileInfo, error) { - if d.DirEntry.Type()&os.ModeSymlink == 0 { - return d.DirEntry.Info() + if runtime.GOOS == "windows" { + // On Windows use Info() if the file is not a symlink since + // IIRC the result is cached when the FileInfo is created. + if d.DirEntry.Type()&fs.ModeSymlink == 0 { + return d.DirEntry.Info() + } } - stat := loadFileInfo(&d.stat) - stat.once.Do(func() { - stat.FileInfo, stat.err = os.Stat(d.parent + string(os.PathSeparator) + d.Name()) - }) - return stat.FileInfo, stat.err + return os.Stat(d.parent + string(os.PathSeparator) + d.Name()) } func newDirEntry(dirName string, info fs.DirEntry, depth int) DirEntry { diff --git a/dirent_test.go b/dirent_test.go index ce00531..2441166 100644 --- a/dirent_test.go +++ b/dirent_test.go @@ -6,8 +6,6 @@ import ( "io/fs" "os" "path/filepath" - "runtime" - "sync" "testing" "github.com/charlievieth/fastwalk" @@ -75,91 +73,5 @@ func TestDirent(t *testing.T) { t.Errorf("lstat mismatch\n got:\n%s\nwant:\n%s", fastwalk.FormatFileInfo(got), fastwalk.FormatFileInfo(want)) } - fi, err := fileEnt.Info() - if err != nil { - t.Fatal(err) - } - if fi != got { - t.Error("failed to return or cache FileInfo") - } - de := fileEnt.(fastwalk.DirEntry) - fi, err = de.Stat() - if err != nil { - t.Fatal(err) - } - if fi != got { - t.Error("failed to use cached Info result for non-symlink") - } - }) - - t.Run("Parallel", func(t *testing.T) { - testParallel := func(t *testing.T, de fs.DirEntry, fn func() (fs.FileInfo, error)) { - numCPU := runtime.NumCPU() - - infos := make([][]fs.FileInfo, numCPU) - for i := range infos { - infos[i] = make([]fs.FileInfo, 100) - } - - // Start all the goroutines at the same time to - // maximise the chance of a race - start := make(chan struct{}) - var wg, ready sync.WaitGroup - ready.Add(numCPU) - wg.Add(numCPU) - for i := 0; i < numCPU; i++ { - go func(fis []fs.FileInfo, de fs.DirEntry) { - defer wg.Done() - ready.Done() - <-start - for i := range fis { - fis[i], _ = de.Info() - } - }(infos[i], de) - } - - ready.Wait() - close(start) // start all goroutines at once - wg.Wait() - - first := infos[0][0] - if first == nil { - t.Fatal("failed to stat file:", de.Name()) - } - for _, fis := range infos { - for _, fi := range fis { - if fi != first { - t.Errorf("Expected the same fs.FileInfo to always "+ - "be returned got: %#v want: %#v", fi, first) - } - } - } - } - - t.Run("File", func(t *testing.T) { - t.Run("Stat", func(t *testing.T) { - _, fileEnt := getDirEnts(t) - de := fileEnt.(fastwalk.DirEntry) - testParallel(t, de, de.Stat) - }) - t.Run("Lstat", func(t *testing.T) { - _, fileEnt := getDirEnts(t) - de := fileEnt.(fastwalk.DirEntry) - testParallel(t, de, de.Info) - }) - }) - - t.Run("Link", func(t *testing.T) { - t.Run("Stat", func(t *testing.T) { - linkEnt, _ := getDirEnts(t) - de := linkEnt.(fastwalk.DirEntry) - testParallel(t, de, de.Stat) - }) - t.Run("Lstat", func(t *testing.T) { - linkEnt, _ := getDirEnts(t) - de := linkEnt.(fastwalk.DirEntry) - testParallel(t, de, de.Info) - }) - }) }) } diff --git a/dirent_unix.go b/dirent_unix.go index 9f785a2..e314bfe 100644 --- a/dirent_unix.go +++ b/dirent_unix.go @@ -16,8 +16,6 @@ type unixDirent struct { name string typ fs.FileMode depth uint32 // uint32 so that we can pack it next to typ - info *fileInfo - stat *fileInfo } func (d *unixDirent) Name() string { return d.name } @@ -27,22 +25,11 @@ func (d *unixDirent) Depth() int { return int(d.depth) } func (d *unixDirent) String() string { return fmtdirent.FormatDirEntry(d) } func (d *unixDirent) Info() (fs.FileInfo, error) { - info := loadFileInfo(&d.info) - info.once.Do(func() { - info.FileInfo, info.err = os.Lstat(d.parent + "/" + d.name) - }) - return info.FileInfo, info.err + return os.Lstat(d.parent + "/" + d.name) } func (d *unixDirent) Stat() (fs.FileInfo, error) { - if d.typ&os.ModeSymlink == 0 { - return d.Info() - } - stat := loadFileInfo(&d.stat) - stat.once.Do(func() { - stat.FileInfo, stat.err = os.Stat(d.parent + "/" + d.name) - }) - return stat.FileInfo, stat.err + return os.Stat(d.parent + "/" + d.name) } func newUnixDirent(parent, name string, typ fs.FileMode, depth int) *unixDirent { @@ -55,16 +42,7 @@ func newUnixDirent(parent, name string, typ fs.FileMode, depth int) *unixDirent } func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry { - info := &fileInfo{ - FileInfo: fi, - } - info.once.Do(func() {}) - return &unixDirent{ - parent: dirname, - name: fi.Name(), - typ: fi.Mode().Type(), - info: info, - } + return newUnixDirent(dirname, fi.Name(), fi.Mode().Type(), 0) } var direntSlicePool = sync.Pool{ diff --git a/dirent_unix_test.go b/dirent_unix_test.go index dc6f510..03ba738 100644 --- a/dirent_unix_test.go +++ b/dirent_unix_test.go @@ -10,10 +10,8 @@ import ( "reflect" "runtime" "sync" - "sync/atomic" "testing" "time" - "unsafe" ) func testUnixDirentParallel(t *testing.T, ent *unixDirent, want fs.FileInfo, @@ -23,7 +21,7 @@ func testUnixDirentParallel(t *testing.T, ent *unixDirent, want fs.FileInfo, return fi1.Name() == fi2.Name() && fi1.Size() == fi2.Size() && fi1.Mode() == fi2.Mode() && - fi1.ModTime() == fi2.ModTime() && + fi1.ModTime().Equal(fi2.ModTime()) && fi1.IsDir() == fi2.IsDir() && os.SameFile(fi1, fi2) } @@ -38,10 +36,6 @@ func testUnixDirentParallel(t *testing.T, ent *unixDirent, want fs.FileInfo, var wg sync.WaitGroup start := make(chan struct{}) - var mu sync.Mutex - infos := make(map[*fileInfo]int) - stats := make(map[*fileInfo]int) - for i := 0; i < numCPU; i++ { wg.Add(1) go func() { @@ -53,12 +47,6 @@ func testUnixDirentParallel(t *testing.T, ent *unixDirent, want fs.FileInfo, t.Error(err) return } - info := (*fileInfo)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&ent.info)))) - stat := (*fileInfo)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&ent.stat)))) - mu.Lock() - infos[info]++ - stats[stat]++ - mu.Unlock() if !sameFile(fi, want) { t.Errorf("FileInfo not equal:\nwant: %s\ngot: %s\n", FormatFileInfo(want), FormatFileInfo(fi)) @@ -70,8 +58,6 @@ func testUnixDirentParallel(t *testing.T, ent *unixDirent, want fs.FileInfo, close(start) wg.Wait() - - t.Logf("Infos: %d Stats: %d\n", len(infos), len(stats)) } func TestUnixDirent(t *testing.T) { @@ -222,24 +208,6 @@ func TestSortDirents(t *testing.T) { }) } -func BenchmarkUnixDirentLoadFileInfo(b *testing.B) { - wd, err := os.Getwd() - if err != nil { - b.Fatal(err) - } - fi, err := os.Lstat(wd) - if err != nil { - b.Fatal(err) - } - parent, name := filepath.Split(wd) - d := newUnixDirent(parent, name, fi.Mode().Type(), 0) - - for i := 0; i < b.N; i++ { - loadFileInfo(&d.info) - d.info = nil - } -} - func BenchmarkUnixDirentInfo(b *testing.B) { wd, err := os.Getwd() if err != nil { @@ -250,38 +218,26 @@ func BenchmarkUnixDirentInfo(b *testing.B) { b.Fatal(err) } parent, name := filepath.Split(wd) - d := newUnixDirent(parent, name, fi.Mode().Type(), 0) - for i := 0; i < b.N; i++ { - fi, err := d.Info() - if err != nil { - b.Fatal(err) - } - if fi == nil { - b.Fatal("Nil FileInfo") + b.Run("Serial", func(b *testing.B) { + d := newUnixDirent(parent, name, fi.Mode().Type(), 0) + for i := 0; i < b.N; i++ { + _, err := d.Info() + if err != nil { + b.Fatal(err) + } } - } -} - -func BenchmarkUnixDirentStat(b *testing.B) { - wd, err := os.Getwd() - if err != nil { - b.Fatal(err) - } - fi, err := os.Lstat(wd) - if err != nil { - b.Fatal(err) - } - parent, name := filepath.Split(wd) - d := newUnixDirent(parent, name, fi.Mode().Type(), 0) + }) - for i := 0; i < b.N; i++ { - fi, err := d.Stat() - if err != nil { - b.Fatal(err) - } - if fi == nil { - b.Fatal("Nil FileInfo") - } - } + b.Run("Parallel", func(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + d := newUnixDirent(parent, name, fi.Mode().Type(), 0) + for pb.Next() { + _, err := d.Info() + if err != nil { + b.Fatal(err) + } + } + }) + }) } diff --git a/fastwalk.go b/fastwalk.go index 4fd930a..4aaa74c 100644 --- a/fastwalk.go +++ b/fastwalk.go @@ -325,7 +325,7 @@ func (c *Config) Copy() *Config { // A DirEntry extends the [fs.DirEntry] interface to add a Stat() method // that returns the result of calling [os.Stat] on the underlying file. -// The results of Info() and Stat() are cached. +// The result of Info() may be cached. The result of Stat() is never cached. // // The [fs.DirEntry] argument passed to the [fs.WalkDirFunc] by [Walk] is // always a DirEntry. @@ -333,10 +333,8 @@ type DirEntry interface { fs.DirEntry // Stat returns the fs.FileInfo for the file or subdirectory described - // by the entry. The returned FileInfo may be from the time of the - // original directory read or from the time of the call to os.Stat. - // If the entry denotes a symbolic link, Stat reports the information - // about the target itself, not the link. + // by the entry. If the entry denotes a symbolic link, Stat reports the + // information about the target itself, not the link. Stat() (fs.FileInfo, error) // Depth returns the depth at which this entry was generated relative to the