Skip to content

Commit 7029746

Browse files
bunnyanonabmussani
andauthored
Added deepseek support (#3980)
* Added deepseek support * Fixed requested changes * Fixed merge conflict * add 'deepseek' as regex prefix and keywords to avoid false positive updated the test, to aligned with regex prefix. --------- Co-authored-by: Abdul Basit <[email protected]> Co-authored-by: Abdul Basit <[email protected]>
1 parent 15fd312 commit 7029746

File tree

6 files changed

+363
-9
lines changed

6 files changed

+363
-9
lines changed

pkg/detectors/deepseek/deepseek.go

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package deepseek
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
10+
regexp "github.com/wasilibs/go-re2"
11+
12+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
14+
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
16+
)
17+
18+
type Scanner struct {
19+
client *http.Client
20+
}
21+
22+
// Ensure the Scanner satisfies the interface at compile time.
23+
var _ detectors.Detector = (*Scanner)(nil)
24+
25+
var (
26+
defaultClient = common.SaneHttpClient()
27+
28+
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"deepseek"}) + `\b(sk-[a-z0-9]{32})\b`)
29+
)
30+
31+
// Keywords are used for efficiently pre-filtering chunks.
32+
// Use identifiers in the secret preferably, or the provider name.
33+
func (s Scanner) Keywords() []string {
34+
return []string{"deepseek"}
35+
}
36+
37+
// FromData will find and optionally verify DeepSeek secrets in a given set of bytes.
38+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
39+
dataStr := string(data)
40+
41+
uniqueMatches := make(map[string]struct{})
42+
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
43+
uniqueMatches[match[1]] = struct{}{}
44+
}
45+
46+
for token := range uniqueMatches {
47+
s1 := detectors.Result{
48+
DetectorType: detectorspb.DetectorType_DeepSeek,
49+
Raw: []byte(token),
50+
}
51+
52+
if verify {
53+
client := s.client
54+
if client == nil {
55+
client = defaultClient
56+
}
57+
58+
verified, extraData, verificationErr := verifyToken(ctx, client, token)
59+
s1.Verified = verified
60+
s1.ExtraData = extraData
61+
s1.SetVerificationError(verificationErr)
62+
}
63+
64+
results = append(results, s1)
65+
}
66+
67+
return
68+
}
69+
70+
func verifyToken(ctx context.Context, client *http.Client, token string) (bool, map[string]string, error) {
71+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.deepseek.com/user/balance", nil)
72+
if err != nil {
73+
return false, nil, err
74+
}
75+
76+
req.Header.Set("Content-Type", "application/json; charset=utf-8")
77+
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
78+
res, err := client.Do(req)
79+
if err != nil {
80+
return false, nil, err
81+
}
82+
defer func() {
83+
_, _ = io.Copy(io.Discard, res.Body)
84+
_ = res.Body.Close()
85+
}()
86+
87+
switch res.StatusCode {
88+
case http.StatusOK:
89+
var resData response
90+
if err = json.NewDecoder(res.Body).Decode(&resData); err != nil {
91+
return false, nil, err
92+
}
93+
94+
extraData := map[string]string{
95+
"is_available": fmt.Sprintf("%t", resData.IsAvailable),
96+
}
97+
return true, extraData, nil
98+
case http.StatusUnauthorized:
99+
// Invalid
100+
return false, nil, nil
101+
default:
102+
return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
103+
}
104+
}
105+
106+
func (s Scanner) Type() detectorspb.DetectorType {
107+
return detectorspb.DetectorType_DeepSeek
108+
}
109+
110+
func (s Scanner) Description() string {
111+
return "DeepSeek is an artificial intelligence company that develops large language models (LLMs)"
112+
}
113+
114+
type response struct {
115+
IsAvailable bool `json:"is_available"`
116+
}
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
//go:build detectors
2+
// +build detectors
3+
4+
package deepseek
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"github.com/google/go-cmp/cmp"
10+
"github.com/google/go-cmp/cmp/cmpopts"
11+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
12+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
14+
"testing"
15+
"time"
16+
)
17+
18+
func TestDeepseek_FromChunk(t *testing.T) {
19+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
20+
defer cancel()
21+
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
22+
if err != nil {
23+
t.Fatalf("could not get test secrets from GCP: %s", err)
24+
}
25+
26+
apiKey := testSecrets.MustGetField("DEEPSEEK")
27+
inactiveSecret := testSecrets.MustGetField("DEEPSEEK_INACTIVE")
28+
29+
type args struct {
30+
ctx context.Context
31+
data []byte
32+
verify bool
33+
}
34+
tests := []struct {
35+
name string
36+
s Scanner
37+
args args
38+
want []detectors.Result
39+
wantErr bool
40+
wantVerificationErr bool
41+
}{
42+
{
43+
name: "found, verified",
44+
s: Scanner{},
45+
args: args{
46+
ctx: context.Background(),
47+
data: []byte(fmt.Sprintf("You can find a deepseek secret %s within", apiKey)),
48+
verify: true,
49+
},
50+
want: []detectors.Result{
51+
{
52+
DetectorType: detectorspb.DetectorType_DeepSeek,
53+
Verified: true,
54+
},
55+
},
56+
wantErr: false,
57+
wantVerificationErr: false,
58+
},
59+
{
60+
name: "found, unverified",
61+
s: Scanner{},
62+
args: args{
63+
ctx: context.Background(),
64+
data: []byte(fmt.Sprintf("You can find a deepseek secret %s within but not valid", inactiveSecret)),
65+
verify: true,
66+
},
67+
want: []detectors.Result{
68+
{
69+
DetectorType: detectorspb.DetectorType_DeepSeek,
70+
Verified: false,
71+
},
72+
},
73+
wantErr: false,
74+
wantVerificationErr: false,
75+
},
76+
{
77+
name: "not found",
78+
s: Scanner{},
79+
args: args{
80+
ctx: context.Background(),
81+
data: []byte("You cannot find the secret within"),
82+
verify: true,
83+
},
84+
want: nil,
85+
wantErr: false,
86+
wantVerificationErr: false,
87+
},
88+
{
89+
name: "found, would be verified if not for timeout",
90+
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
91+
args: args{
92+
ctx: context.Background(),
93+
data: []byte(fmt.Sprintf("You can find a deepseek secret %s within", apiKey)),
94+
verify: true,
95+
},
96+
want: []detectors.Result{
97+
{
98+
DetectorType: detectorspb.DetectorType_DeepSeek,
99+
Verified: false,
100+
},
101+
},
102+
wantErr: false,
103+
wantVerificationErr: true,
104+
},
105+
}
106+
for _, tt := range tests {
107+
t.Run(tt.name, func(t *testing.T) {
108+
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
109+
if (err != nil) != tt.wantErr {
110+
t.Errorf("Deepseek.FromData() error = %v, wantErr %v", err, tt.wantErr)
111+
return
112+
}
113+
for i := range got {
114+
if len(got[i].Raw) == 0 {
115+
t.Fatalf("no raw secret present: \n %+v", got[i])
116+
}
117+
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
118+
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
119+
}
120+
// Ignore Extra Data for comparison
121+
if tt.want[i].Verified == true {
122+
if got[i].ExtraData != nil {
123+
got[i].ExtraData = nil
124+
} else {
125+
t.Fatalf("no extra data")
126+
}
127+
}
128+
}
129+
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError", "AnalysisInfo")
130+
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
131+
t.Errorf("Deepseek.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
132+
}
133+
})
134+
}
135+
}
136+
137+
func BenchmarkFromData(benchmark *testing.B) {
138+
ctx := context.Background()
139+
s := Scanner{}
140+
for name, data := range detectors.MustGetBenchmarkData() {
141+
benchmark.Run(name, func(b *testing.B) {
142+
b.ResetTimer()
143+
for n := 0; n < b.N; n++ {
144+
_, err := s.FromData(ctx, false, data)
145+
if err != nil {
146+
b.Fatal(err)
147+
}
148+
}
149+
})
150+
}
151+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package deepseek
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/google/go-cmp/cmp"
8+
9+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
10+
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
11+
)
12+
13+
func TestDeepseek_Pattern(t *testing.T) {
14+
d := Scanner{}
15+
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
16+
17+
tests := []struct {
18+
name string
19+
input string
20+
want []string
21+
}{
22+
{
23+
name: "valid pattern",
24+
input: `
25+
other.code()
26+
deepseek.Apikey = sk-abc123def456ghi789jkl012mno345pq
27+
`,
28+
want: []string{
29+
"sk-abc123def456ghi789jkl012mno345pq",
30+
},
31+
},
32+
{
33+
name: "invalid pattern",
34+
input: "deepseek.key = sk-abc123invalid",
35+
want: nil,
36+
},
37+
}
38+
39+
for _, test := range tests {
40+
t.Run(test.name, func(t *testing.T) {
41+
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
42+
if len(matchedDetectors) == 0 {
43+
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
44+
return
45+
}
46+
47+
results, err := d.FromData(context.Background(), false, []byte(test.input))
48+
if err != nil {
49+
t.Errorf("error = %v", err)
50+
return
51+
}
52+
53+
if len(results) != len(test.want) {
54+
if len(results) == 0 {
55+
t.Errorf("did not receive result")
56+
} else {
57+
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
58+
}
59+
return
60+
}
61+
62+
actual := make(map[string]struct{}, len(results))
63+
for _, r := range results {
64+
if len(r.RawV2) > 0 {
65+
actual[string(r.RawV2)] = struct{}{}
66+
} else {
67+
actual[string(r.Raw)] = struct{}{}
68+
}
69+
}
70+
expected := make(map[string]struct{}, len(test.want))
71+
for _, v := range test.want {
72+
expected[v] = struct{}{}
73+
}
74+
75+
if diff := cmp.Diff(expected, actual); diff != "" {
76+
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
77+
}
78+
})
79+
}
80+
}

pkg/engine/defaults/defaults.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ import (
210210
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/datagov"
211211
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/deepai"
212212
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/deepgram"
213+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/deepseek"
213214
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/delighted"
214215
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/demio"
215216
denodeploy "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/deno"
@@ -1059,6 +1060,7 @@ func buildDetectorList() []detectors.Detector {
10591060
// &debounce.Scanner{},
10601061
&deepai.Scanner{},
10611062
&deepgram.Scanner{},
1063+
&deepseek.Scanner{},
10621064
&delighted.Scanner{},
10631065
&demio.Scanner{},
10641066
&denodeploy.Scanner{},

0 commit comments

Comments
 (0)