Skip to content

Commit 86279b4

Browse files
authored
chore(redactors): memory consumption improvements (#1332)
* Document additional go tool profiling flags * Add a regex cache to avoid compiling regular expressions all the time * Reduce max buffer capacity * Prefer bytes to strings Strings are immutable and hence we need to create a new one all the time when operation on them * Some more changes * More bytes * Use writer.Write instead of fmt.FPrintf * Clear regex cache when resetting redactors * Logs errors when redactors error since they get swallowed * Add an improvement comment * Limit the number of goroutines spawned when redacting * Minor improvement * Write byte slices one at a time instead of concatenating them first * Add a test for writeBytes * Additional tests
1 parent 514c86d commit 86279b4

File tree

10 files changed

+227
-81
lines changed

10 files changed

+227
-81
lines changed

CONTRIBUTING.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ go tool pprof -http=":8000" cpu.prof
9494
go tool pprof -http=":8001" mem.prof
9595
```
9696

97+
**Additional flags for memory profiling**
98+
- `inuse_space`: Amount of memory allocated and not released yet (default).
99+
- `inuse_objects`: Amount of objects allocated and not released yet.
100+
- `alloc_space`: Total amount of memory allocated (regardless of released).
101+
- `alloc_objects`: Total amount of objects allocated (regardless of released).
102+
97103
More on profiling please visit https://go.dev/doc/diagnostics#profiling
98104

99105
## Contribution workflow

pkg/collect/redact.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"bytes"
66
"compress/gzip"
77
"io"
8-
"io/ioutil"
98
"os"
109
"path/filepath"
1110
"strings"
@@ -17,18 +16,29 @@ import (
1716
"k8s.io/klog/v2"
1817
)
1918

19+
// Max number of concurrent redactors to run
20+
// Ensure the number is low enough since each of the redactors
21+
// also spawns goroutines to redact files in tar archives and
22+
// other goroutines for each redactor spec.
23+
const MAX_CONCURRENT_REDACTORS = 10
24+
2025
func RedactResult(bundlePath string, input CollectorResult, additionalRedactors []*troubleshootv1beta2.Redact) error {
2126
wg := &sync.WaitGroup{}
2227

2328
// Error channel to capture errors from goroutines
2429
errorCh := make(chan error, len(input))
30+
limitCh := make(chan struct{}, MAX_CONCURRENT_REDACTORS)
31+
defer close(limitCh)
2532

2633
for k, v := range input {
34+
limitCh <- struct{}{}
2735

2836
wg.Add(1)
2937

3038
go func(file string, data []byte) {
3139
defer wg.Done()
40+
defer func() { <-limitCh }() // free up after the function execution has run
41+
3242
var reader io.Reader
3343
if data == nil {
3444

@@ -84,7 +94,7 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors
8494
// If the file is .tar, .tgz or .tar.gz, it must not be redacted. Instead it is
8595
// decompressed and each file inside the tar redacted and compressed back into the archive.
8696
if filepath.Ext(file) == ".tar" || filepath.Ext(file) == ".tgz" || strings.HasSuffix(file, ".tar.gz") {
87-
tmpDir, err := ioutil.TempDir("", "troubleshoot-subresult-")
97+
tmpDir, err := os.MkdirTemp("", "troubleshoot-subresult-")
8898
if err != nil {
8999
errorCh <- errors.Wrap(err, "failed to create temp dir")
90100
return

pkg/collect/result.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func (r CollectorResult) SaveResult(bundlePath string, relativePath string, read
132132
return errors.Wrap(err, "failed to stat file")
133133
}
134134

135-
klog.V(2).Infof("Added %q (%d MB) to bundle output", relativePath, fileInfo.Size()/(1024*1024))
135+
klog.V(2).Infof("Added %q (%d KB) to bundle output", relativePath, fileInfo.Size()/(1024))
136136
return nil
137137
}
138138

pkg/constants/constants.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ const (
8383
MESSAGE_TEXT_PADDING = 4
8484
MESSAGE_TEXT_LINES_MARGIN_TO_BOTTOM = 4
8585

86-
// Bufio Reader Constants
87-
MAX_BUFFER_CAPACITY = 1024 * 1024
86+
// This is the initial size of the buffer allocated.
87+
// Under the hood, an array of size N is allocated in memory
88+
BUF_INIT_SIZE = 4096 // 4KB
89+
90+
// This is the muximum size the buffer can grow to
91+
// Its not what the buffer will be allocated to initially
92+
SCANNER_MAX_SIZE = 10 * 1024 * 1024 // 10MB
8893
)

pkg/redact/literal.go

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,26 @@ package redact
22

33
import (
44
"bufio"
5+
"bytes"
56
"fmt"
67
"io"
7-
"strings"
8+
9+
"github.com/replicatedhq/troubleshoot/pkg/constants"
10+
"k8s.io/klog/v2"
811
)
912

1013
type literalRedactor struct {
11-
matchString string
12-
filePath string
13-
redactName string
14-
isDefault bool
14+
match []byte
15+
filePath string
16+
redactName string
17+
isDefault bool
1518
}
1619

17-
func literalString(matchString, path, name string) Redactor {
20+
func literalString(match []byte, path, name string) Redactor {
1821
return literalRedactor{
19-
matchString: matchString,
20-
filePath: path,
21-
redactName: name,
22+
match: match,
23+
filePath: path,
24+
redactName: name,
2225
}
2326
}
2427

@@ -28,32 +31,37 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
2831
go func() {
2932
var err error
3033
defer func() {
31-
if err == io.EOF {
34+
if err == nil || err == io.EOF {
3235
writer.Close()
3336
} else {
37+
if err == bufio.ErrTooLong {
38+
s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024)
39+
klog.V(2).Info(s)
40+
} else {
41+
klog.V(2).Info(fmt.Sprintf("Error redacting %q: %v", path, err))
42+
}
3443
writer.CloseWithError(err)
3544
}
3645
}()
3746

38-
reader := bufio.NewReader(input)
47+
buf := make([]byte, constants.BUF_INIT_SIZE)
48+
scanner := bufio.NewScanner(input)
49+
scanner.Buffer(buf, constants.SCANNER_MAX_SIZE)
50+
3951
lineNum := 0
40-
for {
52+
for scanner.Scan() {
4153
lineNum++
42-
var line string
43-
line, err = readLine(reader)
44-
if err != nil {
45-
return
46-
}
54+
line := scanner.Bytes()
4755

48-
clean := strings.ReplaceAll(line, r.matchString, MASK_TEXT)
56+
clean := bytes.ReplaceAll(line, r.match, maskTextBytes)
4957

50-
// io.WriteString would be nicer, but scanner strips new lines
51-
fmt.Fprintf(writer, "%s\n", clean)
58+
// Append newline since scanner strips it
59+
err = writeBytes(writer, clean, NEW_LINE)
5260
if err != nil {
5361
return
5462
}
5563

56-
if clean != line {
64+
if !bytes.Equal(clean, line) {
5765
addRedaction(Redaction{
5866
RedactorName: r.redactName,
5967
CharactersRemoved: len(line) - len(clean),
@@ -63,6 +71,9 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
6371
})
6472
}
6573
}
74+
if scanErr := scanner.Err(); scanErr != nil {
75+
err = scanErr
76+
}
6677
}()
6778
return out
6879
}

pkg/redact/multi_line.go

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ package redact
22

33
import (
44
"bufio"
5-
"fmt"
5+
"bytes"
66
"io"
77
"regexp"
8-
"strings"
98
)
109

1110
type MultiLineRedactor struct {
@@ -20,19 +19,19 @@ type MultiLineRedactor struct {
2019

2120
func NewMultiLineRedactor(re1 LineRedactor, re2 string, maskText, path, name string, isDefault bool) (*MultiLineRedactor, error) {
2221
var scanCompiled *regexp.Regexp
23-
compiled1, err := regexp.Compile(re1.regex)
22+
compiled1, err := compileRegex(re1.regex)
2423
if err != nil {
2524
return nil, err
2625
}
2726

2827
if re1.scan != "" {
29-
scanCompiled, err = regexp.Compile(re1.scan)
28+
scanCompiled, err = compileRegex(re1.scan)
3029
if err != nil {
3130
return nil, err
3231
}
3332
}
3433

35-
compiled2, err := regexp.Compile(re2)
34+
compiled2, err := compileRegex(re2)
3635
if err != nil {
3736
return nil, err
3837
}
@@ -48,14 +47,18 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
4847
writer.CloseWithError(err)
4948
}()
5049

51-
substStr := getReplacementPattern(r.re2, r.maskText)
50+
substStr := []byte(getReplacementPattern(r.re2, r.maskText))
5251

5352
reader := bufio.NewReader(input)
5453
line1, line2, err := getNextTwoLines(reader, nil)
5554
if err != nil {
5655
// this will print 2 blank lines for empty input...
57-
fmt.Fprintf(writer, "%s\n", line1)
58-
fmt.Fprintf(writer, "%s\n", line2)
56+
// Append newlines since scanner strips them
57+
err = writeBytes(writer, line1, NEW_LINE, line2, NEW_LINE)
58+
if err != nil {
59+
return
60+
}
61+
5962
return
6063
}
6164

@@ -66,33 +69,41 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
6669

6770
// is scan is not nil, then check if line1 matches scan by lowercasing it
6871
if r.scan != nil {
69-
lowerLine1 := strings.ToLower(line1)
70-
if !r.scan.MatchString(lowerLine1) {
71-
fmt.Fprintf(writer, "%s\n", line1)
72-
line1, line2, err = getNextTwoLines(reader, &line2)
72+
lowerLine1 := bytes.ToLower(line1)
73+
if !r.scan.Match(lowerLine1) {
74+
// Append newline since scanner strips it
75+
err = writeBytes(writer, line1, NEW_LINE)
76+
if err != nil {
77+
return
78+
}
79+
line1, line2, err = getNextTwoLines(reader, line2)
7380
flushLastLine = true
7481
continue
7582
}
7683
}
7784

7885
// If line1 matches re1, then transform line2 using re2
79-
if !r.re1.MatchString(line1) {
80-
fmt.Fprintf(writer, "%s\n", line1)
81-
line1, line2, err = getNextTwoLines(reader, &line2)
86+
if !r.re1.Match(line1) {
87+
// Append newline since scanner strips it
88+
err = writeBytes(writer, line1, NEW_LINE)
89+
if err != nil {
90+
return
91+
}
92+
line1, line2, err = getNextTwoLines(reader, line2)
8293
flushLastLine = true
8394
continue
8495
}
8596
flushLastLine = false
86-
clean := r.re2.ReplaceAllString(line2, substStr)
97+
clean := r.re2.ReplaceAll(line2, substStr)
8798

88-
// io.WriteString would be nicer, but reader strips new lines
89-
fmt.Fprintf(writer, "%s\n%s\n", line1, clean)
99+
// Append newlines since scanner strips them
100+
err = writeBytes(writer, line1, NEW_LINE, clean, NEW_LINE)
90101
if err != nil {
91102
return
92103
}
93104

94105
// if clean is not equal to line2, a redaction was performed
95-
if clean != line2 {
106+
if !bytes.Equal(clean, line2) {
96107
addRedaction(Redaction{
97108
RedactorName: r.redactName,
98109
CharactersRemoved: len(line2) - len(clean),
@@ -106,15 +117,18 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
106117
}
107118

108119
if flushLastLine {
109-
fmt.Fprintf(writer, "%s\n", line1)
120+
// Append newline since scanner strip it
121+
err = writeBytes(writer, line1, NEW_LINE)
122+
if err != nil {
123+
return
124+
}
110125
}
111126
}()
112127
return out
113128
}
114129

115-
func getNextTwoLines(reader *bufio.Reader, curLine2 *string) (line1 string, line2 string, err error) {
116-
line1 = ""
117-
line2 = ""
130+
func getNextTwoLines(reader *bufio.Reader, curLine2 []byte) (line1 []byte, line2 []byte, err error) {
131+
line2 = []byte{}
118132

119133
if curLine2 == nil {
120134
line1, err = readLine(reader)
@@ -126,11 +140,23 @@ func getNextTwoLines(reader *bufio.Reader, curLine2 *string) (line1 string, line
126140
return
127141
}
128142

129-
line1 = *curLine2
143+
line1 = curLine2
130144
line2, err = readLine(reader)
131145
if err != nil {
132146
return
133147
}
134148

135149
return
136150
}
151+
152+
// writeBytes writes all byte slices to the writer
153+
// in the order they are passed in the variadic argument
154+
func writeBytes(w io.Writer, bs ...[]byte) error {
155+
for _, b := range bs {
156+
_, err := w.Write(b)
157+
if err != nil {
158+
return err
159+
}
160+
}
161+
return nil
162+
}

0 commit comments

Comments
 (0)