Skip to content

Commit 3b1cd65

Browse files
authored
Add base64 detectors. (#414)
1 parent 6d3f27b commit 3b1cd65

File tree

3 files changed

+181
-0
lines changed

3 files changed

+181
-0
lines changed

pkg/decoders/base64.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package decoders
2+
3+
import (
4+
"bytes"
5+
"encoding/base64"
6+
"strings"
7+
8+
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
9+
)
10+
11+
type Base64 struct{}
12+
13+
var (
14+
b64Charset = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
15+
b64EndChars = "+/="
16+
)
17+
18+
func getSubstringsOfCharacterSet(data []byte, charset []byte, threshold int) []string {
19+
count := 0
20+
substrings := []string{}
21+
letters := strings.Builder{}
22+
if len(data) == 0 {
23+
return nil
24+
}
25+
for _, char := range string(data) {
26+
if bytes.ContainsRune(charset, char) {
27+
letters.WriteRune(char)
28+
count++
29+
} else {
30+
if count > threshold {
31+
substrings = appendB64Substring(letters, substrings)
32+
}
33+
letters.Reset()
34+
count = 0
35+
}
36+
}
37+
38+
if count > threshold && len(letters.String()) > 0 {
39+
substrings = appendB64Substring(letters, substrings)
40+
}
41+
42+
return substrings
43+
}
44+
45+
func appendB64Substring(letters strings.Builder, substrings []string) []string {
46+
47+
substring := strings.TrimLeft(letters.String(), b64EndChars)
48+
// handle key=value
49+
if strings.Contains(strings.TrimRight(substring, b64EndChars), "=") {
50+
split := strings.SplitN(substring, "=", 2)
51+
substrings = append(substrings, split[len(split)-1])
52+
} else {
53+
substrings = append(substrings, substring)
54+
}
55+
return substrings
56+
}
57+
58+
func (d *Base64) FromChunk(chunk *sources.Chunk) *sources.Chunk {
59+
60+
encodedSubstrings := getSubstringsOfCharacterSet(chunk.Data, b64Charset, 20)
61+
decodedSubstrings := map[string][]byte{}
62+
63+
for _, str := range encodedSubstrings {
64+
dec, err := base64.StdEncoding.DecodeString(str)
65+
if err == nil && len(dec) > 0 {
66+
decodedSubstrings[str] = dec
67+
}
68+
}
69+
70+
if len(decodedSubstrings) > 0 {
71+
for substring, dec := range decodedSubstrings {
72+
chunk.Data = bytes.Replace(chunk.Data, []byte(substring), dec, 1)
73+
}
74+
return chunk
75+
}
76+
77+
return nil
78+
}

pkg/decoders/base64_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package decoders
2+
3+
import (
4+
"testing"
5+
6+
"github.com/kylelemons/godebug/pretty"
7+
8+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
9+
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
10+
)
11+
12+
func TestBase64_FromChunk(t *testing.T) {
13+
tests := []struct {
14+
name string
15+
chunk *sources.Chunk
16+
want *sources.Chunk
17+
}{
18+
{
19+
name: "only b64 chunk",
20+
chunk: &sources.Chunk{
21+
Data: []byte(`bG9uZ2VyLWVuY29kZWQtc2VjcmV0LXRlc3Q=`),
22+
},
23+
want: &sources.Chunk{
24+
Data: []byte(`longer-encoded-secret-test`),
25+
},
26+
},
27+
{
28+
name: "mixed content",
29+
chunk: &sources.Chunk{
30+
Data: []byte(`token: bG9uZ2VyLWVuY29kZWQtc2VjcmV0LXRlc3Q=`),
31+
},
32+
want: &sources.Chunk{
33+
Data: []byte(`token: longer-encoded-secret-test`),
34+
},
35+
},
36+
{
37+
name: "no chunk",
38+
chunk: &sources.Chunk{
39+
Data: []byte(``),
40+
},
41+
want: nil,
42+
},
43+
{
44+
name: "env var (looks like all b64 decodable but has `=` in the middle)",
45+
chunk: &sources.Chunk{
46+
Data: []byte(`some-encoded-secret=dGVzdHNlY3JldA==`),
47+
},
48+
want: &sources.Chunk{
49+
Data: []byte(`some-encoded-secret=testsecret`),
50+
},
51+
},
52+
{
53+
name: "has longer b64 inside",
54+
chunk: &sources.Chunk{
55+
Data: []byte(`some-encoded-secret="bG9uZ2VyLWVuY29kZWQtc2VjcmV0LXRlc3Q="`),
56+
},
57+
want: &sources.Chunk{
58+
Data: []byte(`some-encoded-secret="longer-encoded-secret-test"`),
59+
},
60+
},
61+
{
62+
name: "many possible substrings",
63+
chunk: &sources.Chunk{
64+
Data: []byte(`Many substrings in this slack message could be base64 decoded
65+
but only dGhpcyBlbmNhcHN1bGF0ZWQgc2VjcmV0 should be decoded.`),
66+
},
67+
want: &sources.Chunk{
68+
Data: []byte(`Many substrings in this slack message could be base64 decoded
69+
but only this encapsulated secret should be decoded.`),
70+
},
71+
},
72+
}
73+
for _, tt := range tests {
74+
t.Run(tt.name, func(t *testing.T) {
75+
d := &Base64{}
76+
got := d.FromChunk(tt.chunk)
77+
if tt.want != nil {
78+
if got == nil {
79+
t.Fatal("got nil, did not want nil")
80+
}
81+
if diff := pretty.Compare(string(got.Data), string(tt.want.Data)); diff != "" {
82+
t.Errorf("Base64FromChunk() %s diff: (-got +want)\n%s", tt.name, diff)
83+
}
84+
} else {
85+
if got != nil {
86+
t.Error("Expected nil chunk")
87+
}
88+
}
89+
})
90+
}
91+
}
92+
93+
func BenchmarkFromChunk(benchmark *testing.B) {
94+
d := Base64{}
95+
for name, data := range detectors.MustGetBenchmarkData() {
96+
benchmark.Run(name, func(b *testing.B) {
97+
for n := 0; n < b.N; n++ {
98+
d.FromChunk(&sources.Chunk{Data: data})
99+
}
100+
})
101+
}
102+
}

pkg/decoders/decoders.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
func DefaultDecoders() []Decoder {
88
return []Decoder{
99
&Plain{},
10+
&Base64{},
1011
}
1112
}
1213

0 commit comments

Comments
 (0)