Skip to content

Commit b063628

Browse files
authored
Merge branch 'main' into gha-fix
2 parents 5961eae + 2f1baea commit b063628

File tree

23 files changed

+1333
-638
lines changed

23 files changed

+1333
-638
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ require (
4141
github.com/couchbase/gocb/v2 v2.10.1
4242
github.com/crewjam/rfc5424 v0.1.0
4343
github.com/csnewman/dextk v0.3.0
44-
github.com/docker/docker v28.2.2+incompatible
44+
github.com/docker/docker v28.3.3+incompatible
4545
github.com/dustin/go-humanize v1.0.1
4646
github.com/elastic/go-elasticsearch/v8 v8.17.1
4747
github.com/envoyproxy/protoc-gen-validate v1.2.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,8 @@ github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5Jxw
483483
github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
484484
github.com/docker/docker v28.3.2+incompatible h1:wn66NJ6pWB1vBZIilP8G3qQPqHy5XymfYn5vsqeA5oA=
485485
github.com/docker/docker v28.3.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
486+
github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
487+
github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
486488
github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZSeVMrFgOr3T+zrFAo=
487489
github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
488490
github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8=

main.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ var (
136136
gitlabScanEndpoint = gitlabScan.Flag("endpoint", "GitLab endpoint.").Default("https://gitlab.com").String()
137137
gitlabScanRepos = gitlabScan.Flag("repo", "GitLab repo url. You can repeat this flag. Leave empty to scan all repos accessible with provided credential. Example: https://gitlab.com/org/repo.git").Strings()
138138
gitlabScanToken = gitlabScan.Flag("token", "GitLab token. Can be provided with environment variable GITLAB_TOKEN.").Envar("GITLAB_TOKEN").Required().String()
139+
gitlabScanGroupIds = gitlabScan.Flag("group-id", "GitLab group ID. If provided, it will scan the group and its subgroups. You can repeat this flag.").Strings()
139140
gitlabScanIncludePaths = gitlabScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String()
140141
gitlabScanExcludePaths = gitlabScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String()
141142
gitlabScanIncludeRepos = gitlabScan.Flag("include-repos", `Repositories to include in an org scan. This can also be a glob pattern. You can repeat this flag. Must use Gitlab repo full name. Example: "trufflesecurity/trufflehog", "trufflesecurity/t*"`).Strings()
@@ -788,10 +789,15 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
788789
return scanMetrics, fmt.Errorf("could not create filter: %v", err)
789790
}
790791

792+
if len(*gitlabScanRepos) > 0 && len(*gitlabScanGroupIds) > 0 {
793+
return scanMetrics, fmt.Errorf("invalid config: you cannot specify both repositories and groups at the same time")
794+
}
795+
791796
cfg := sources.GitlabConfig{
792797
Endpoint: *gitlabScanEndpoint,
793798
Token: *gitlabScanToken,
794799
Repos: *gitlabScanRepos,
800+
GroupIds: *gitlabScanGroupIds,
795801
IncludeRepos: *gitlabScanIncludeRepos,
796802
ExcludeRepos: *gitlabScanExcludeRepos,
797803
Filter: filter,

pkg/detectors/aha/aha.go

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ func (s Scanner) Keywords() []string {
3434
return []string{"aha.io"}
3535
}
3636

37+
func (s Scanner) Type() detectorspb.DetectorType {
38+
return detectorspb.DetectorType_Aha
39+
}
40+
41+
func (s Scanner) Description() string {
42+
return "Aha is a product management software suite. Aha API keys can be used to access and modify product data and workflows."
43+
}
44+
3745
func (s Scanner) getClient() *http.Client {
3846
if s.client != nil {
3947
return s.client
@@ -44,30 +52,39 @@ func (s Scanner) getClient() *http.Client {
4452
// FromData will find and optionally verify Aha secrets in a given set of bytes.
4553
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
4654
dataStr := string(data)
55+
56+
var uniqueFoundUrls = make(map[string]struct{})
57+
4758
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
48-
URLmatches := URLPat.FindAllStringSubmatch(dataStr, -1)
4959

50-
resURLMatch := "aha.io"
51-
for _, URLmatch := range URLmatches {
52-
resURLMatch = strings.TrimSpace(URLmatch[1])
60+
for _, match := range URLPat.FindAllStringSubmatch(dataStr, -1) {
61+
uniqueFoundUrls[match[1]] = struct{}{}
5362
}
5463

55-
for _, match := range matches {
56-
resMatch := strings.TrimSpace(match[1])
57-
58-
s1 := detectors.Result{
59-
DetectorType: detectorspb.DetectorType_Aha,
60-
Raw: []byte(resMatch),
61-
}
64+
// if no url was found use the default
65+
if len(uniqueFoundUrls) == 0 {
66+
uniqueFoundUrls["aha.io"] = struct{}{}
67+
}
6268

63-
if verify {
64-
client := s.getClient()
65-
isVerified, verificationErr := verifyAha(ctx, client, resMatch, resURLMatch)
66-
s1.Verified = isVerified
67-
s1.SetVerificationError(verificationErr, resMatch)
69+
for _, match := range matches {
70+
for url := range uniqueFoundUrls {
71+
resMatch := strings.TrimSpace(match[1])
72+
73+
s1 := detectors.Result{
74+
DetectorType: detectorspb.DetectorType_Aha,
75+
Raw: []byte(resMatch),
76+
RawV2: []byte(resMatch + url),
77+
}
78+
79+
if verify {
80+
client := s.getClient()
81+
isVerified, verificationErr := verifyAha(ctx, client, resMatch, url)
82+
s1.Verified = isVerified
83+
s1.SetVerificationError(verificationErr, resMatch)
84+
}
85+
86+
results = append(results, s1)
6887
}
69-
70-
results = append(results, s1)
7188
}
7289

7390
return results, nil
@@ -98,11 +115,3 @@ func verifyAha(ctx context.Context, client *http.Client, resMatch, resURLMatch s
98115
return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
99116
}
100117
}
101-
102-
func (s Scanner) Type() detectorspb.DetectorType {
103-
return detectorspb.DetectorType_Aha
104-
}
105-
106-
func (s Scanner) Description() string {
107-
return "Aha is a product management software suite. Aha API keys can be used to access and modify product data and workflows."
108-
}

pkg/detectors/aha/aha_test.go

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ package aha
22

33
import (
44
"context"
5-
"fmt"
6-
"strings"
75
"testing"
86

97
"github.com/google/go-cmp/cmp"
@@ -12,52 +10,62 @@ import (
1210
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
1311
)
1412

15-
var (
16-
validPattern = "00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff/example.aha.io"
17-
invalidPattern = "00112233445566778899aabbCC$%eeff00112233445566778899aabbccddeeff/example.fake.io"
18-
)
19-
2013
func TestAha_Pattern(t *testing.T) {
2114
d := Scanner{}
2215
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
2316

24-
key := strings.Split(validPattern, "/")[0]
25-
url := strings.Split(validPattern, "/")[1]
26-
2717
tests := []struct {
2818
name string
2919
input string
3020
want []string
3121
}{
3222
{
33-
name: "valid pattern",
34-
input: fmt.Sprintf("aha.io = '%s'", validPattern),
35-
want: []string{key},
36-
},
37-
{
38-
name: "valid pattern - detect URL far away from keyword",
39-
input: fmt.Sprintf("aha.io = '%s\n URL is not close to the keyword but should be detected %s'", key, url),
40-
want: []string{key},
23+
name: "valid pattern",
24+
input: `
25+
[INFO] sending request to the aha.io API
26+
[DEBUG] using key = 81a1411a7e276fd88819df3137eb406e0f281f8a8c417947ca4b025890c8541c
27+
[DEBUG] using host = example.aha.io
28+
[INFO] response received: 200 OK
29+
`,
30+
want: []string{"81a1411a7e276fd88819df3137eb406e0f281f8a8c417947ca4b025890c8541cexample.aha.io"},
4131
},
4232
{
43-
name: "valid pattern - key out of prefix range",
44-
input: fmt.Sprintf("aha.io keyword is not close to the real key and secret = '%s'", validPattern),
45-
want: nil,
33+
name: "valid pattern - key out of prefix range",
34+
input: `
35+
[INFO] sending request to the aha.io API
36+
[WARN] Do not commit the secrets
37+
[DEBUG] using key = 81a1411a7e276fd88819df3137eb406e0f281f8a8c417947ca4b025890c8541c
38+
[DEBUG] using host = example.aha.io
39+
[INFO] response received: 200 OK
40+
`,
41+
want: nil,
4642
},
4743
{
48-
name: "valid pattern - only key",
49-
input: fmt.Sprintf("aha.io %s", key),
50-
want: []string{key},
44+
name: "valid pattern - only key",
45+
input: `
46+
[INFO] sending request to the aha.io API
47+
[DEBUG] using key = 81a1411a7e276fd88819df3137eb406e0f281f8a8c417947ca4b025890c8541c
48+
[INFO] response received: 200 OK
49+
`,
50+
want: []string{"81a1411a7e276fd88819df3137eb406e0f281f8a8c417947ca4b025890c8541caha.io"},
5151
},
5252
{
53-
name: "valid pattern - only URL",
54-
input: fmt.Sprintf("aha.io %s", url),
55-
want: nil,
53+
name: "valid pattern - only URL",
54+
input: `
55+
[INFO] sending request to the example.aha.io API
56+
[INFO] response received: 200 OK
57+
`,
58+
want: nil,
5659
},
5760
{
58-
name: "invalid pattern",
59-
input: fmt.Sprintf("aha.io %s", invalidPattern),
60-
want: nil,
61+
name: "invalid pattern",
62+
input: `
63+
[INFO] sending request to the aha.io API
64+
[DEBUG] using key = 81a1411a7e276fd88819df3137eJ406e0f281f8a8c417947ca4b025890c8541c
65+
[DEBUG] using host = 1test.aha.io
66+
[INFO] response received: 200 OK
67+
`,
68+
want: nil,
6169
},
6270
}
6371

pkg/detectors/github/v1/github_old.go

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/json"
66
"fmt"
77
"net/http"
8+
"strings"
89

910
regexp "github.com/wasilibs/go-re2"
1011

@@ -27,7 +28,8 @@ func (Scanner) CloudEndpoint() string { return "https://api.github.com" }
2728
var (
2829
// Oauth token
2930
// https://developer.github.com/v3/#oauth2-token-sent-in-a-header
30-
keyPat = regexp.MustCompile(`(?i)(?:github|gh|pat|token)[^\.].{0,40}[ =:'"]+([a-f0-9]{40})\b`)
31+
// the middle regex `\b[a-zA-Z0-9.\/?=&]{0,40}` is to match the prefix of token match to avoid processing common known patterns
32+
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"github", "gh", "pat", "token"}) + `\b[a-zA-Z0-9.\/?=&]{0,40}` + `\b([a-f0-9]{40})\b`)
3133

3234
// TODO: Oauth2 client_id and client_secret
3335
// https://developer.github.com/v3/#oauth2-keysecret
@@ -55,13 +57,25 @@ type HeaderInfo struct {
5557
// Keywords are used for efficiently pre-filtering chunks.
5658
// Use identifiers in the secret preferably, or the provider name.
5759
func (s Scanner) Keywords() []string {
58-
return []string{"github", "gh", "pat", "token"}
60+
return []string{"github", "gh"}
61+
}
62+
63+
func (s Scanner) Type() detectorspb.DetectorType {
64+
return detectorspb.DetectorType_Github
65+
}
66+
67+
func (s Scanner) Description() string {
68+
return "GitHub is a web-based platform used for version control and collaborative software development. GitHub tokens can be used to access and modify repositories and other resources."
5969
}
6070

6171
var ghFalsePositives = map[detectors.FalsePositive]struct{}{
6272
detectors.FalsePositive("github commit"): {},
6373
}
6474

75+
var ghKnownNonSensitivePrefixes = []string{
76+
"avatars.githubusercontent.com", // github avatar urls prefix
77+
}
78+
6579
// FromData will find and optionally verify GitHub secrets in a given set of bytes.
6680
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
6781
dataStr := string(data)
@@ -70,7 +84,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
7084

7185
for _, match := range matches {
7286
// First match is entire regex, second is the first group.
73-
87+
matchPrefix := match[0]
7488
token := match[1]
7589

7690
// Note that this false positive check happens **before** verification! I don't know why it's written this way
@@ -79,6 +93,15 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
7993
continue
8094
}
8195

96+
// to avoid false positives
97+
if isKnownNonSensitiveCommonPrefix(matchPrefix) {
98+
continue
99+
}
100+
101+
if detectors.StringShannonEntropy(token) < 3.5 {
102+
continue
103+
}
104+
82105
s1 := detectors.Result{
83106
DetectorType: detectorspb.DetectorType_Github,
84107
Raw: []byte(token),
@@ -180,10 +203,16 @@ func SetHeaderInfo(headers *HeaderInfo, s1 *detectors.Result) {
180203
}
181204
}
182205

183-
func (s Scanner) Type() detectorspb.DetectorType {
184-
return detectorspb.DetectorType_Github
185-
}
206+
// isKnownNonSensitiveCommonPrefix checks if the given prefix is a known, non-sensitive value.
207+
// The GitHub v1 detector uses a broad regex that can capture many false positives.
208+
// This function helps filter out matches that begin with common, safe prefixes.
209+
// Example: avatars.githubusercontent.com/u/56769451?u=088102b6160822bc68c25a2a5df170080d0b16a2
210+
func isKnownNonSensitiveCommonPrefix(matchPrefix string) bool {
211+
for _, prefix := range ghKnownNonSensitivePrefixes {
212+
if strings.Contains(matchPrefix, prefix) {
213+
return true
214+
}
215+
}
186216

187-
func (s Scanner) Description() string {
188-
return "GitHub is a web-based platform used for version control and collaborative software development. GitHub tokens can be used to access and modify repositories and other resources."
217+
return false
189218
}

0 commit comments

Comments
 (0)