Skip to content

Commit d730be0

Browse files
authored
feat(redact): use a scan regex for default redact rule of lines to improve cpu usage and reduce time cost (#1291)
1 parent 38ff340 commit d730be0

File tree

9 files changed

+647
-148
lines changed

9 files changed

+647
-148
lines changed

pkg/collect/redact.go

Lines changed: 98 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"path/filepath"
1111
"strings"
12+
"sync"
1213

1314
"github.com/pkg/errors"
1415
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
@@ -17,97 +18,128 @@ import (
1718
)
1819

1920
func RedactResult(bundlePath string, input CollectorResult, additionalRedactors []*troubleshootv1beta2.Redact) error {
21+
wg := &sync.WaitGroup{}
22+
23+
// Error channel to capture errors from goroutines
24+
errorCh := make(chan error, len(input))
25+
2026
for k, v := range input {
21-
file := k
2227

23-
var reader io.Reader
24-
if v == nil {
25-
// Collected contents are in a file. Get a reader to the file.
26-
info, err := os.Lstat(filepath.Join(bundlePath, file))
27-
if err != nil {
28-
if os.IsNotExist(errors.Cause(err)) {
29-
// File not found, moving on.
30-
continue
31-
}
32-
return errors.Wrap(err, "failed to stat file")
33-
}
28+
wg.Add(1)
3429

35-
// Redact the target file of a symlink
36-
// There is an opportunity for improving performance here by skipping symlinks
37-
// if a target has been redacted already, but that would require
38-
// some extra logic to ensure that a spec filtering only symlinks still works.
39-
if info.Mode().Type() == os.ModeSymlink {
40-
symlink := file
41-
target, err := os.Readlink(filepath.Join(bundlePath, symlink))
30+
go func(file string, data []byte) {
31+
defer wg.Done()
32+
var reader io.Reader
33+
if data == nil {
34+
35+
// Collected contents are in a file. Get a reader to the file.
36+
info, err := os.Lstat(filepath.Join(bundlePath, file))
4237
if err != nil {
43-
return errors.Wrap(err, "failed to read symlink")
38+
if os.IsNotExist(errors.Cause(err)) {
39+
// File not found, moving on.
40+
return
41+
}
42+
errorCh <- errors.Wrap(err, "failed to stat file")
43+
return
4444
}
4545

46-
// Get the relative path to the target file to conform with
47-
// the path formats of the CollectorResult
48-
file, err = filepath.Rel(bundlePath, target)
46+
// Redact the target file of a symlink
47+
// There is an opportunity for improving performance here by skipping symlinks
48+
// if a target has been redacted already, but that would require
49+
// some extra logic to ensure that a spec filtering only symlinks still works.
50+
if info.Mode().Type() == os.ModeSymlink {
51+
symlink := file
52+
target, err := os.Readlink(filepath.Join(bundlePath, symlink))
53+
if err != nil {
54+
errorCh <- errors.Wrap(err, "failed to read symlink")
55+
return
56+
}
57+
// Get the relative path to the target file to conform with
58+
// the path formats of the CollectorResult
59+
file, err = filepath.Rel(bundlePath, target)
60+
if err != nil {
61+
errorCh <- errors.Wrap(err, "failed to get relative path")
62+
return
63+
}
64+
klog.V(2).Infof("Redacting %s (symlink => %s)\n", file, symlink)
65+
} else {
66+
klog.V(2).Infof("Redacting %s\n", file)
67+
}
68+
r, err := input.GetReader(bundlePath, file)
4969
if err != nil {
50-
return errors.Wrap(err, "failed to get relative path")
70+
if os.IsNotExist(errors.Cause(err)) {
71+
return
72+
}
73+
errorCh <- errors.Wrap(err, "failed to get reader")
74+
return
5175
}
52-
klog.V(2).Infof("Redacting %s (symlink => %s)\n", file, symlink)
76+
defer r.Close()
77+
78+
reader = r
5379
} else {
54-
klog.V(2).Infof("Redacting %s\n", file)
80+
// Collected contents are in memory. Get a reader to the memory buffer.
81+
reader = bytes.NewBuffer(data)
5582
}
56-
r, err := input.GetReader(bundlePath, file)
57-
if err != nil {
58-
if os.IsNotExist(errors.Cause(err)) {
59-
continue
83+
84+
// If the file is .tar, .tgz or .tar.gz, it must not be redacted. Instead it is
85+
// decompressed and each file inside the tar redacted and compressed back into the archive.
86+
if filepath.Ext(file) == ".tar" || filepath.Ext(file) == ".tgz" || strings.HasSuffix(file, ".tar.gz") {
87+
tmpDir, err := ioutil.TempDir("", "troubleshoot-subresult-")
88+
if err != nil {
89+
errorCh <- errors.Wrap(err, "failed to create temp dir")
90+
return
6091
}
61-
return errors.Wrap(err, "failed to get reader")
62-
}
63-
defer r.Close()
92+
defer os.RemoveAll(tmpDir)
6493

65-
reader = r
66-
} else {
67-
// Collected contents are in memory. Get a reader to the memory buffer.
68-
reader = bytes.NewBuffer(v)
69-
}
94+
subResult, tarHeaders, err := decompressFile(tmpDir, reader, file)
95+
if err != nil {
96+
errorCh <- errors.Wrap(err, "failed to decompress file")
97+
return
98+
}
99+
err = RedactResult(tmpDir, subResult, additionalRedactors)
100+
if err != nil {
101+
errorCh <- errors.Wrap(err, "failed to redact file")
102+
return
103+
}
70104

71-
// If the file is .tar, .tgz or .tar.gz, it must not be redacted. Instead it is
72-
// decompressed and each file inside the tar redacted and compressed back into the archive.
73-
if filepath.Ext(file) == ".tar" || filepath.Ext(file) == ".tgz" || strings.HasSuffix(file, ".tar.gz") {
74-
tmpDir, err := ioutil.TempDir("", "troubleshoot-subresult-")
75-
if err != nil {
76-
return errors.Wrap(err, "failed to create temp dir")
77-
}
78-
defer os.RemoveAll(tmpDir)
105+
dstFilename := filepath.Join(bundlePath, file)
106+
err = compressFiles(tmpDir, subResult, tarHeaders, dstFilename)
107+
if err != nil {
108+
errorCh <- errors.Wrap(err, "failed to re-compress file")
109+
return
110+
}
79111

80-
subResult, tarHeaders, err := decompressFile(tmpDir, reader, file)
81-
if err != nil {
82-
return errors.Wrap(err, "failed to decompress file")
112+
os.RemoveAll(tmpDir) // ensure clean up on each iteration in addition to the defer
113+
114+
//Content of the tar file was redacted. return to next file.
115+
return
83116
}
84-
err = RedactResult(tmpDir, subResult, additionalRedactors)
117+
118+
redacted, err := redact.Redact(reader, file, additionalRedactors)
85119
if err != nil {
86-
return errors.Wrap(err, "failed to redact file")
120+
errorCh <- errors.Wrap(err, "failed to redact io stream")
121+
return
87122
}
88123

89-
dstFilename := filepath.Join(bundlePath, file)
90-
err = compressFiles(tmpDir, subResult, tarHeaders, dstFilename)
124+
err = input.ReplaceResult(bundlePath, file, redacted)
91125
if err != nil {
92-
return errors.Wrap(err, "failed to re-compress file")
126+
errorCh <- errors.Wrap(err, "failed to create redacted result")
127+
return
93128
}
129+
}(k, v)
130+
}
94131

95-
os.RemoveAll(tmpDir) // ensure clean up on each iteration in addition to the defer
96-
97-
//Content of the tar file was redacted. Continue to next file.
98-
continue
99-
}
100-
101-
redacted, err := redact.Redact(reader, file, additionalRedactors)
102-
if err != nil {
103-
return errors.Wrap(err, "failed to redact io stream")
104-
}
132+
go func() {
133+
wg.Wait()
134+
close(errorCh)
135+
}()
105136

106-
err = input.ReplaceResult(bundlePath, file, redacted)
137+
for err := range errorCh {
107138
if err != nil {
108-
return errors.Wrap(err, "failed to create redacted result")
139+
return err
109140
}
110141
}
142+
111143
return nil
112144
}
113145

pkg/collect/result.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,12 @@ func (r CollectorResult) SaveResult(bundlePath string, relativePath string, read
127127
return errors.Wrap(err, "failed to copy data")
128128
}
129129

130-
klog.V(2).Infof("Added %q to bundle output", relativePath)
130+
fileInfo, err := f.Stat()
131+
if err != nil {
132+
return errors.Wrap(err, "failed to stat file")
133+
}
134+
135+
klog.V(2).Infof("Added %q (%d MB) to bundle output", relativePath, fileInfo.Size()/(1024*1024))
131136
return nil
132137
}
133138

pkg/constants/constants.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,4 +82,7 @@ const (
8282
// TermUI Display Constants
8383
MESSAGE_TEXT_PADDING = 4
8484
MESSAGE_TEXT_LINES_MARGIN_TO_BOTTOM = 4
85+
86+
// Bufio Reader Constants
87+
MAX_BUFFER_CAPACITY = 1024 * 1024
8588
)

pkg/redact/multi_line.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ import (
55
"fmt"
66
"io"
77
"regexp"
8+
"strings"
89
)
910

1011
type MultiLineRedactor struct {
12+
scan *regexp.Regexp
1113
re1 *regexp.Regexp
1214
re2 *regexp.Regexp
1315
maskText string
@@ -16,16 +18,26 @@ type MultiLineRedactor struct {
1618
isDefault bool
1719
}
1820

19-
func NewMultiLineRedactor(re1, re2, maskText, path, name string, isDefault bool) (*MultiLineRedactor, error) {
20-
compiled1, err := regexp.Compile(re1)
21+
func NewMultiLineRedactor(re1 LineRedactor, re2 string, maskText, path, name string, isDefault bool) (*MultiLineRedactor, error) {
22+
var scanCompiled *regexp.Regexp
23+
compiled1, err := regexp.Compile(re1.regex)
2124
if err != nil {
2225
return nil, err
2326
}
27+
28+
if re1.scan != "" {
29+
scanCompiled, err = regexp.Compile(re1.scan)
30+
if err != nil {
31+
return nil, err
32+
}
33+
}
34+
2435
compiled2, err := regexp.Compile(re2)
2536
if err != nil {
2637
return nil, err
2738
}
28-
return &MultiLineRedactor{re1: compiled1, re2: compiled2, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil
39+
40+
return &MultiLineRedactor{scan: scanCompiled, re1: compiled1, re2: compiled2, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil
2941
}
3042

3143
func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
@@ -52,6 +64,17 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
5264
for err == nil {
5365
lineNum++ // the first line that can be redacted is line 2
5466

67+
// is scan is not nil, then check if line1 matches scan by lowercasing it
68+
if r.scan != nil {
69+
lowerLine1 := strings.ToLower(line1)
70+
if !r.scan.MatchString(lowerLine1) {
71+
fmt.Fprintf(writer, "%s\n", line1)
72+
line1, line2, err = getNextTwoLines(reader, &line2)
73+
flushLastLine = true
74+
continue
75+
}
76+
}
77+
5578
// If line1 matches re1, then transform line2 using re2
5679
if !r.re1.MatchString(line1) {
5780
fmt.Fprintf(writer, "%s\n", line1)
@@ -60,7 +83,6 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
6083
continue
6184
}
6285
flushLastLine = false
63-
6486
clean := r.re2.ReplaceAllString(line2, substStr)
6587

6688
// io.WriteString would be nicer, but reader strips new lines

0 commit comments

Comments
 (0)