Skip to content

Commit 0b86fd6

Browse files
committed
pipeline: call .ReadDir instead of fs.WalkDir to gather all file descriptors
1 parent 9a953ce commit 0b86fd6

File tree

5 files changed

+162
-110
lines changed

5 files changed

+162
-110
lines changed

go.mod

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ require (
1414
github.com/hashicorp/go-retryablehttp v0.7.4
1515
github.com/igrmk/treemap/v2 v2.0.1
1616
github.com/jlaffaye/ftp v0.2.0
17-
github.com/moov-io/ach v1.37.1
17+
github.com/moov-io/ach v1.37.2
1818
github.com/moov-io/base v0.48.5
1919
github.com/moov-io/cryptfs v0.7.1
2020
github.com/moov-io/go-ftp v0.3.2
@@ -138,7 +138,7 @@ require (
138138
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
139139
github.com/jmespath/go-jmespath v0.4.0 // indirect
140140
github.com/json-iterator/go v1.1.12 // indirect
141-
github.com/klauspost/compress v1.17.0 // indirect
141+
github.com/klauspost/compress v1.17.7 // indirect
142142
github.com/kr/fs v0.1.0 // indirect
143143
github.com/kylelemons/godebug v1.1.0 // indirect
144144
github.com/magiconair/properties v1.8.7 // indirect
@@ -188,7 +188,7 @@ require (
188188
go.uber.org/multierr v1.11.0 // indirect
189189
golang.org/x/exp v0.0.0-20240314144324-c7f7c6466f7f // indirect
190190
golang.org/x/mod v0.15.0 // indirect
191-
golang.org/x/net v0.22.0 // indirect
191+
golang.org/x/net v0.23.0 // indirect
192192
golang.org/x/oauth2 v0.16.0 // indirect
193193
golang.org/x/sys v0.18.0 // indirect
194194
golang.org/x/time v0.5.0 // indirect

go.sum

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,8 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm
320320
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
321321
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
322322
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
323-
github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
324-
github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
323+
github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
324+
github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
325325
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
326326
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
327327
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
@@ -368,8 +368,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
368368
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
369369
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
370370
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
371-
github.com/moov-io/ach v1.37.1 h1:cIH/W/S0PbmoQjGxJBHhgYKwfIXKyXUygN6NyJ1c8nY=
372-
github.com/moov-io/ach v1.37.1/go.mod h1:g/XLp337pbEMctCDazuCHMnmNcoV04aMqZGoQfOsrpI=
371+
github.com/moov-io/ach v1.37.2 h1:Onh8QmINf+z0ZfFmugO7An7l59JmQt8f3txpU0lw+g4=
372+
github.com/moov-io/ach v1.37.2/go.mod h1:g/XLp337pbEMctCDazuCHMnmNcoV04aMqZGoQfOsrpI=
373373
github.com/moov-io/base v0.48.5 h1:QaTyTo6eFFFV35R9l/GdePQN40IJti9knD5hqdWPnnM=
374374
github.com/moov-io/base v0.48.5/go.mod h1:D5ZV9COV/qtCjTQuYpq7gGInCk64AhOQI6UY4kt4Rq8=
375375
github.com/moov-io/cryptfs v0.7.1 h1:aT79fcxvX3vLJ25GMFlsl6WtPLijZ3VWMaRSq8OJu4M=
@@ -567,8 +567,8 @@ golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
567567
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
568568
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
569569
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
570-
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
571-
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
570+
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
571+
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
572572
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
573573
golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ=
574574
golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o=

internal/pipeline/events_api_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ func TestEventsAPI_FileUploaded(t *testing.T) {
127127
switch i {
128128
case 0:
129129
// Example: TESTING-143425.52750.ach
130-
require.True(t, strings.HasPrefix(v.Filename, "TESTING-"))
131-
require.True(t, strings.HasSuffix(v.Filename, ".ach"))
130+
require.True(t, strings.HasPrefix(v.Filename, "TESTING-"), v.Filename)
131+
require.True(t, strings.HasSuffix(v.Filename, ".ach"), v.Filename)
132132
case 1:
133133
require.Equal(t, "foo.ach", v.Filename)
134134
}

internal/pipeline/merging.go

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"encoding/hex"
2525
"encoding/json"
2626
"fmt"
27-
"io/fs"
2827
"path/filepath"
2928
"slices"
3029
"strings"
@@ -102,7 +101,9 @@ func (m *filesystemMerging) writeACHFile(ctx context.Context, xfer incoming.ACHF
102101
if err := ach.NewWriter(&buf).Write(xfer.File); err != nil {
103102
return err
104103
}
105-
path := filepath.Join("mergable", m.shard.Name, fmt.Sprintf("%s.ach", xfer.FileID))
104+
105+
fileID := strings.TrimSuffix(xfer.FileID, ".ach")
106+
path := filepath.Join("mergable", m.shard.Name, fmt.Sprintf("%s.ach", fileID))
106107
if err := m.storage.WriteFile(path, buf.Bytes()); err != nil {
107108
return err
108109
}
@@ -116,7 +117,8 @@ func (m *filesystemMerging) writeACHFile(ctx context.Context, xfer incoming.ACHF
116117
"shardKey": log.String(xfer.ShardKey),
117118
}).Logf("ERROR encoding ValidateOpts: %v", err)
118119
}
119-
path := filepath.Join("mergable", m.shard.Name, fmt.Sprintf("%s.json", xfer.FileID))
120+
121+
path := filepath.Join("mergable", m.shard.Name, fmt.Sprintf("%s.json", fileID))
120122
if err := m.storage.WriteFile(path, buf.Bytes()); err != nil {
121123
m.logger.Warn().With(log.Fields{
122124
"fileID": log.String(xfer.FileID),
@@ -129,7 +131,8 @@ func (m *filesystemMerging) writeACHFile(ctx context.Context, xfer incoming.ACHF
129131
}
130132

131133
func (m *filesystemMerging) HandleCancel(ctx context.Context, cancel incoming.CancelACHFile) (incoming.FileCancellationResponse, error) {
132-
path := filepath.Join("mergable", m.shard.Name, fmt.Sprintf("%s.ach", cancel.FileID))
134+
fileID := strings.TrimSuffix(cancel.FileID, ".ach")
135+
path := filepath.Join("mergable", m.shard.Name, fmt.Sprintf("%s.ach", fileID))
133136

134137
// Check if the file exists already
135138
file, _ := m.storage.Open(path)
@@ -335,7 +338,7 @@ func (m *filesystemMerging) WithEachMerged(ctx context.Context, f func(context.C
335338
attribute.Int("achgateway.successful_remote_writes", successfulRemoteWrites),
336339
)
337340

338-
// Build a mapping of BatchHeader + EntryDetail from dir
341+
// Build a mapping of BatchHeader + EntryDetail from dir (input files)
339342
mappings, err := m.buildDirMapping(dir, canceledFiles)
340343
if err != nil {
341344
el.Add(err)
@@ -395,79 +398,87 @@ func fileAcceptor(canceledFiles []string) func(string) ach.FileAcceptance {
395398
}
396399
}
397400

401+
// buildDirMapping computes a tree of the input files and their entries together so that we can quickly find
402+
// where they were merged into.
398403
func (m *filesystemMerging) buildDirMapping(dir string, canceledFiles []string) (*treemap.TreeMap[string, string], error) {
399404
tree := treemap.New[string, string]()
400405

406+
fds, err := m.storage.ReadDir(dir)
407+
if err != nil {
408+
return nil, err
409+
}
410+
401411
acceptor := fileAcceptor(canceledFiles)
402412

403-
err := fs.WalkDir(m.storage, dir, func(path string, d fs.DirEntry, err error) error {
404-
if err != nil {
405-
if strings.Contains(err.Error(), "is a directory") {
406-
return nil
407-
}
408-
return err
409-
}
410-
if d.IsDir() {
411-
return nil // skip directories
412-
}
413+
for i := range fds {
414+
path := fds[i].Name()
413415

414-
if strings.Contains(path, "uploaded") || strings.HasSuffix(path, ".json") {
415-
// Skip /uploaded/ as we're only interested in the input files.
416-
// Skip .json files as they contain ValidateOpts
417-
return nil
416+
// Ignore directories as ReadDir continues inside of them.
417+
// .json files contain ValidateOpts which we can skip
418+
if fds[i].IsDir() || strings.HasSuffix(path, ".json") {
419+
continue
418420
}
419421

420422
// Skip the file if merging would have skipped it
421423
if acceptor(path) == ach.SkipFile {
422-
return nil
424+
continue
423425
}
424426

425-
fd, err := m.storage.Open(path)
427+
err = m.accumulateMappings(tree, filepath.Join(dir, path))
426428
if err != nil {
427-
return fmt.Errorf("opening %s failed: %w", path, err)
429+
return nil, fmt.Errorf("accumulating mappings from %s failed: %w", path, err)
428430
}
429-
defer fd.Close()
430-
431-
// Check for validate opts
432-
validateOptsPath := strings.TrimSuffix(path, filepath.Ext(path)) + ".json"
433-
var validateOpts *ach.ValidateOpts
434-
if optsFD, err := m.storage.Open(validateOptsPath); err == nil {
435-
if optsFD != nil {
436-
defer optsFD.Close()
437-
}
431+
}
438432

439-
err = json.NewDecoder(optsFD).Decode(&validateOpts)
440-
if err != nil {
441-
return fmt.Errorf("reading %s as validate opts failed: %w", validateOptsPath, err)
442-
}
443-
}
433+
return tree, nil
434+
}
444435

445-
rdr := ach.NewReader(fd)
446-
if validateOpts != nil {
447-
rdr.SetValidation(validateOpts)
436+
func (m *filesystemMerging) accumulateMappings(tree *treemap.TreeMap[string, string], path string) error {
437+
fd, err := m.storage.Open(path)
438+
if err != nil {
439+
return fmt.Errorf("opening %s failed: %w", path, err)
440+
}
441+
defer fd.Close()
442+
443+
// Check for validate opts
444+
validateOptsPath := strings.TrimSuffix(path, filepath.Ext(path)) + ".json"
445+
var validateOpts *ach.ValidateOpts
446+
if optsFD, err := m.storage.Open(validateOptsPath); err == nil {
447+
if optsFD != nil {
448+
defer optsFD.Close()
448449
}
449450

450-
file, err := rdr.Read()
451+
err = json.NewDecoder(optsFD).Decode(&validateOpts)
451452
if err != nil {
452-
return fmt.Errorf("reading %s failed: %w", path, err)
453+
return fmt.Errorf("reading %s as validate opts failed: %w", validateOptsPath, err)
453454
}
455+
}
454456

455-
_, filename := filepath.Split(path)
457+
rdr := ach.NewReader(fd)
458+
if validateOpts != nil {
459+
rdr.SetValidation(validateOpts)
460+
}
461+
462+
file, err := rdr.Read()
463+
if err != nil {
464+
return fmt.Errorf("reading %s failed: %w", path, err)
465+
}
456466

457-
// Add each BatchHeader and Entry to the map
458-
for i := range file.Batches {
459-
bh := file.Batches[i].GetHeader().String()
467+
_, filename := filepath.Split(path)
460468

461-
entries := file.Batches[i].GetEntries()
462-
for m := range entries {
463-
tree.Set(makeKey(bh, entries[m]), filename)
464-
}
465-
}
469+
// Add each BatchHeader and Entry to the map
470+
for i := range file.Batches {
471+
bh := file.Batches[i].GetHeader().String()
466472

467-
return nil
468-
})
473+
entries := file.Batches[i].GetEntries()
469474

470-
return tree, err
475+
for m := range entries {
476+
key := makeKey(bh, entries[m])
477+
tree.Set(key, filename)
478+
}
479+
}
480+
481+
return nil
471482
}
472483

473484
func makeKey(bh string, entry *ach.EntryDetail) string {

0 commit comments

Comments
 (0)