@@ -10,8 +10,10 @@ import (
10
10
"path/filepath"
11
11
"regexp"
12
12
"strings"
13
+ "sync"
13
14
"time"
14
15
16
+ ahocorasick "github.com/BobuSumisu/aho-corasick"
15
17
"github.com/avast/apkparser"
16
18
dextk "github.com/csnewman/dextk"
17
19
@@ -29,6 +31,80 @@ import (
29
31
// ToDo: Scan nested APKs (aka XAPK files). ATM the archive.go file will skip over them.
30
32
// ToDo: Provide file location information to secret output.
31
33
34
+ var (
35
+ keywordMatcherOnce sync.Once
36
+ keywordMatcher * detectorKeywordMatcher
37
+ )
38
+
39
+ func defaultDetectorKeywords () []string {
40
+ allDetectors := defaults .DefaultDetectors ()
41
+
42
+ // Remove keywords that cause lots of false positives.
43
+ var exclusions = []string {
44
+ "AKIA" , "SG." , "pat" , "token" , "gh" , "github" , "sql" , "database" , "http" , "key" , "api-" , "sdk-" , "float" , "-us" , "gh" , "pat" , "token" , "sid" , "http" , "private" , "key" , "segment" , "close" , "protocols" , "verifier" , "box" , "privacy" , "dm" , "sl." , "vf" , "flat" ,
45
+ }
46
+
47
+ var keywords []string
48
+ exclusionSet := make (map [string ]struct {})
49
+ for _ , excl := range exclusions {
50
+ exclusionSet [strings .ToLower (excl )] = struct {}{}
51
+ }
52
+
53
+ // Aggregate all keywords from detectors.
54
+ for _ , detector := range allDetectors {
55
+ for _ , kw := range detector .Keywords () {
56
+ kwLower := strings .ToLower (kw )
57
+ if _ , excluded := exclusionSet [kwLower ]; ! excluded {
58
+ keywords = append (keywords , kwLower )
59
+ }
60
+ }
61
+ }
62
+ return keywords
63
+ }
64
+
65
+ // detectorKeywordMatcher encapsulates the Aho-Corasick trie for efficient keyword matching.
66
+ // It is used to scan APK file contents for keywords associated with our credential detectors.
67
+ // By only processing files/sections that contain these keywords, we can efficiently filter
68
+ // out irrelevant data and focus on content that is more likely to contain credentials.
69
+ // The Aho-Corasick algorithm provides fast, simultaneous matching of multiple patterns in
70
+ // a single pass through the text, which is crucial for performance when scanning large APK files.
71
+ type detectorKeywordMatcher struct { trie * ahocorasick.Trie }
72
+
73
+ // getDefaultDetectorKeywordMatcher creates or returns the singleton detectorKeywordMatcher.
74
+ // This is implemented as a singleton for several important reasons:
75
+ // 1. Building the Aho-Corasick trie is computationally expensive and should only be done once.
76
+ // 2. The trie is immutable after construction and can be safely shared across goroutines.
77
+ // 3. The keyword list from the detectors is static for a given program execution.
78
+ // 4. Memory efficiency - we avoid duplicating the trie structure for each handler instance.
79
+ func getDefaultDetectorKeywordMatcher () * detectorKeywordMatcher {
80
+ keywordMatcherOnce .Do (func () {
81
+ keywords := defaultDetectorKeywords ()
82
+ keywordMatcher = & detectorKeywordMatcher {
83
+ trie : ahocorasick .NewTrieBuilder ().AddStrings (keywords ).Build (),
84
+ }
85
+ })
86
+ return keywordMatcher
87
+ }
88
+
89
+ // FindKeywords scans the input text and returns a slice of matched keywords.
90
+ // The method is thread-safe and uses a read lock since the trie is immutable.
91
+ // It returns unique matches only, eliminating duplicates that may occur when
92
+ // the same keyword appears multiple times in the input text.
93
+ func (km * detectorKeywordMatcher ) FindKeywords (text []byte ) []string {
94
+ matches := km .trie .Match (bytes .ToLower (text ))
95
+ found := make ([]string , 0 , len (matches ))
96
+ seen := make (map [string ]struct {}) // To avoid duplicate entries
97
+
98
+ for _ , match := range matches {
99
+ keyword := match .MatchString ()
100
+ if _ , exists := seen [keyword ]; ! exists {
101
+ found = append (found , keyword )
102
+ seen [keyword ] = struct {}{}
103
+ }
104
+ }
105
+ return found
106
+ }
107
+
32
108
var (
33
109
stringInstructionType = "const-string"
34
110
targetInstructionTypes = []string {stringInstructionType , "iput-object" , "sput-object" , "const-class" , "invoke-virtual" , "invoke-super" , "invoke-direct" , "invoke-static" , "invoke-interface" }
@@ -53,15 +129,15 @@ var (
53
129
54
130
// apkHandler handles apk archive formats.
55
131
type apkHandler struct {
56
- keywordMatcher * defaults. DefaultDetectorKeywordMatcher
132
+ keywordMatcher * detectorKeywordMatcher
57
133
* defaultHandler
58
134
}
59
135
60
136
// newAPKHandler creates an apkHandler.
61
137
func newAPKHandler () * apkHandler {
62
138
return & apkHandler {
63
139
defaultHandler : newDefaultHandler (apkHandlerType ),
64
- keywordMatcher : defaults . NewDefaultDetectorKeywordMatcher (),
140
+ keywordMatcher : getDefaultDetectorKeywordMatcher (),
65
141
}
66
142
}
67
143
0 commit comments