Skip to content

Commit b18f3d9

Browse files
snieguuDamian Sniezek
andauthored
feat: langfuse support (#4079)
* feat: langfuse support * Code review fixes * Add PrefixRegex --------- Co-authored-by: Damian Sniezek <[email protected]>
1 parent a451507 commit b18f3d9

File tree

6 files changed

+369
-7
lines changed

6 files changed

+369
-7
lines changed

pkg/detectors/langfuse/langfuse.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package langfuse
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
9+
regexp "github.com/wasilibs/go-re2"
10+
11+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
12+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
14+
)
15+
16+
type Scanner struct {
17+
client *http.Client
18+
}
19+
20+
// Ensure the Scanner satisfies the interface at compile time.
21+
var _ detectors.Detector = (*Scanner)(nil)
22+
23+
var (
24+
defaultClient = common.SaneHttpClient()
25+
publicKey = regexp.MustCompile(detectors.PrefixRegex([]string{"langfuse"}) + `\b(pk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`)
26+
secretKey = regexp.MustCompile(detectors.PrefixRegex([]string{"langfuse"}) + `\b(sk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`)
27+
)
28+
29+
func (s Scanner) Keywords() []string {
30+
return []string{"pk-lf-", "sk-lf-"}
31+
}
32+
33+
// FromData will find and optionally verify Langfuse secrets in a given set of bytes.
34+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
35+
dataStr := string(data)
36+
37+
publicKeyMatches := make(map[string]struct{})
38+
for _, match := range publicKey.FindAllStringSubmatch(dataStr, -1) {
39+
publicKeyMatches[match[1]] = struct{}{}
40+
}
41+
42+
secretKeyMatches := make(map[string]struct{})
43+
for _, match := range secretKey.FindAllStringSubmatch(dataStr, -1) {
44+
secretKeyMatches[match[1]] = struct{}{}
45+
}
46+
47+
for pkMatch := range publicKeyMatches {
48+
for skMatch := range secretKeyMatches {
49+
s1 := detectors.Result{
50+
DetectorType: detectorspb.DetectorType_Langfuse,
51+
Raw: []byte(skMatch),
52+
}
53+
54+
if verify {
55+
client := s.client
56+
if client == nil {
57+
client = defaultClient
58+
}
59+
60+
isVerified, verificationErr := verifyMatch(ctx, client, pkMatch, skMatch)
61+
s1.Verified = isVerified
62+
if verificationErr != nil {
63+
s1.SetVerificationError(verificationErr, pkMatch)
64+
}
65+
}
66+
67+
results = append(results, s1)
68+
}
69+
}
70+
71+
return results, nil
72+
}
73+
74+
func verifyMatch(ctx context.Context, client *http.Client, pkMatch string, skMatch string) (bool, error) {
75+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://cloud.langfuse.com/api/public/projects", nil)
76+
if err != nil {
77+
return false, err
78+
}
79+
80+
req.SetBasicAuth(pkMatch, skMatch)
81+
res, err := client.Do(req)
82+
if err != nil {
83+
return false, err
84+
}
85+
defer func() {
86+
_, _ = io.Copy(io.Discard, res.Body)
87+
_ = res.Body.Close()
88+
}()
89+
90+
switch res.StatusCode {
91+
case http.StatusOK:
92+
return true, nil
93+
case http.StatusUnauthorized:
94+
return false, nil
95+
default:
96+
return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
97+
}
98+
}
99+
100+
func (s Scanner) Type() detectorspb.DetectorType {
101+
return detectorspb.DetectorType_Langfuse
102+
}
103+
104+
func (s Scanner) Description() string {
105+
return "Langfuse is a platform for building and scaling AI applications. Langfuse API keys can be used to access these services."
106+
}
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
//go:build detectors
2+
// +build detectors
3+
4+
package langfuse
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"testing"
10+
"time"
11+
12+
"github.com/google/go-cmp/cmp"
13+
"github.com/google/go-cmp/cmp/cmpopts"
14+
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
17+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
18+
)
19+
20+
func TestLangfuse_FromChunk(t *testing.T) {
21+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
22+
defer cancel()
23+
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
24+
if err != nil {
25+
t.Fatalf("could not get test secrets from GCP: %s", err)
26+
}
27+
publicKey := testSecrets.MustGetField("LANGFUSE_PUBLIC_KEY")
28+
secretKey := testSecrets.MustGetField("LANGFUSE_SECRET_KEY")
29+
inactivePublicKey := testSecrets.MustGetField("LANGFUSE_INACTIVE_PUBLIC_KEY")
30+
inactiveSecretKey := testSecrets.MustGetField("LANGFUSE_INACTIVE_SECRET_KEY")
31+
32+
33+
type args struct {
34+
ctx context.Context
35+
data []byte
36+
verify bool
37+
}
38+
tests := []struct {
39+
name string
40+
s Scanner
41+
args args
42+
want []detectors.Result
43+
wantErr bool
44+
wantVerificationErr bool
45+
}{
46+
{
47+
name: "found, verified",
48+
s: Scanner{},
49+
args: args{
50+
ctx: context.Background(),
51+
data: []byte(fmt.Sprintf("You can find a langfuse public key %s and langfuse secret key %s within", publicKey, secretKey)),
52+
verify: true,
53+
},
54+
want: []detectors.Result{
55+
{
56+
DetectorType: detectorspb.DetectorType_Langfuse,
57+
Verified: true,
58+
},
59+
},
60+
wantErr: false,
61+
wantVerificationErr: false,
62+
},
63+
{
64+
name: "found, unverified",
65+
s: Scanner{},
66+
args: args{
67+
ctx: context.Background(),
68+
data: []byte(fmt.Sprintf("You can find a langfuse public key %s and langfuse secret key %s within but not valid", inactivePublicKey, inactiveSecretKey)),
69+
verify: true,
70+
},
71+
want: []detectors.Result{
72+
{
73+
DetectorType: detectorspb.DetectorType_Langfuse,
74+
Verified: false,
75+
},
76+
},
77+
wantErr: false,
78+
wantVerificationErr: false,
79+
},
80+
{
81+
name: "not found",
82+
s: Scanner{},
83+
args: args{
84+
ctx: context.Background(),
85+
data: []byte("You cannot find the secret within"),
86+
verify: true,
87+
},
88+
want: nil,
89+
wantErr: false,
90+
wantVerificationErr: false,
91+
},
92+
{
93+
name: "found, would be verified if not for timeout",
94+
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
95+
args: args{
96+
ctx: context.Background(),
97+
data: []byte(fmt.Sprintf("You can find a langfuse public key %s and langfuse secret key %s within", publicKey, secretKey)),
98+
verify: true,
99+
},
100+
want: []detectors.Result{
101+
{
102+
DetectorType: detectorspb.DetectorType_Langfuse,
103+
Verified: false,
104+
},
105+
},
106+
wantErr: false,
107+
wantVerificationErr: true,
108+
},
109+
{
110+
name: "found, verified but unexpected api surface",
111+
s: Scanner{client: common.ConstantResponseHttpClient(404, "")},
112+
args: args{
113+
ctx: context.Background(),
114+
data: []byte(fmt.Sprintf("You can find a langfuse public key %s and langfuse secret key %s within", publicKey, secretKey)),
115+
verify: true,
116+
},
117+
want: []detectors.Result{
118+
{
119+
DetectorType: detectorspb.DetectorType_Langfuse,
120+
Verified: false,
121+
},
122+
},
123+
wantErr: false,
124+
wantVerificationErr: true,
125+
},
126+
}
127+
for _, tt := range tests {
128+
t.Run(tt.name, func(t *testing.T) {
129+
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
130+
if (err != nil) != tt.wantErr {
131+
t.Errorf("Langfuse.FromData() error = %v, wantErr %v", err, tt.wantErr)
132+
return
133+
}
134+
for i := range got {
135+
if len(got[i].Raw) == 0 {
136+
t.Fatalf("no raw secret present: \n %+v", got[i])
137+
}
138+
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
139+
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
140+
}
141+
}
142+
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError")
143+
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
144+
t.Errorf("Langfuse.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
145+
}
146+
})
147+
}
148+
}
149+
150+
func BenchmarkFromData(benchmark *testing.B) {
151+
ctx := context.Background()
152+
s := Scanner{}
153+
for name, data := range detectors.MustGetBenchmarkData() {
154+
benchmark.Run(name, func(b *testing.B) {
155+
b.ResetTimer()
156+
for n := 0; n < b.N; n++ {
157+
_, err := s.FromData(ctx, false, data)
158+
if err != nil {
159+
b.Fatal(err)
160+
}
161+
}
162+
})
163+
}
164+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package langfuse
2+
3+
import (
4+
"context"
5+
"github.com/google/go-cmp/cmp"
6+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
7+
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
8+
"testing"
9+
)
10+
11+
func TestLangfuse_Pattern(t *testing.T) {
12+
d := Scanner{}
13+
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
14+
tests := []struct {
15+
name string
16+
input string
17+
want []string
18+
}{
19+
{
20+
name: "typical pattern",
21+
input: `langfuse_public_key = pk-lf-00000000-0000-0000-0000-000000000000
22+
langfuse_secret_key = sk-lf-00000000-0000-0000-0000-000000000000`,
23+
want: []string{"sk-lf-00000000-0000-0000-0000-000000000000"},
24+
},
25+
{
26+
name: "finds all matches",
27+
input: `langfuse_public_key1 = pk-lf-00000000-0000-0000-0000-000000000000
28+
langfuse_secret_key1 = sk-lf-00000000-0000-0000-0000-000000000000
29+
langfuse_public_key2 = pk-lf-11111111-1111-1111-1111-111111111111
30+
langfuse_secret_key2 = sk-lf-11111111-1111-1111-1111-111111111111`,
31+
want: []string{"sk-lf-00000000-0000-0000-0000-000000000000",
32+
"sk-lf-11111111-1111-1111-1111-111111111111",
33+
"sk-lf-11111111-1111-1111-1111-111111111111",
34+
"sk-lf-00000000-0000-0000-0000-000000000000"},
35+
},
36+
{
37+
name: "invalid pattern",
38+
input: `langfuse_public_key1 = pk-lf-invalid
39+
langfuse_secret_key1 = sk-lf-invalid`,
40+
want: []string{},
41+
},
42+
}
43+
44+
for _, test := range tests {
45+
t.Run(test.name, func(t *testing.T) {
46+
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
47+
if len(matchedDetectors) == 0 {
48+
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
49+
return
50+
}
51+
52+
results, err := d.FromData(context.Background(), false, []byte(test.input))
53+
if err != nil {
54+
t.Errorf("error = %v", err)
55+
return
56+
}
57+
58+
if len(results) != len(test.want) {
59+
if len(results) == 0 {
60+
t.Errorf("did not receive result")
61+
} else {
62+
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
63+
}
64+
return
65+
}
66+
67+
actual := make(map[string]struct{}, len(results))
68+
for _, r := range results {
69+
if len(r.RawV2) > 0 {
70+
actual[string(r.RawV2)] = struct{}{}
71+
} else {
72+
actual[string(r.Raw)] = struct{}{}
73+
}
74+
}
75+
expected := make(map[string]struct{}, len(test.want))
76+
for _, v := range test.want {
77+
expected[v] = struct{}{}
78+
}
79+
80+
if diff := cmp.Diff(expected, actual); diff != "" {
81+
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
82+
}
83+
})
84+
}
85+
}

pkg/engine/defaults/defaults.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ import (
399399
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/kraken"
400400
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/kucoin"
401401
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/kylas"
402+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/langfuse"
402403
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/languagelayer"
403404
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/larksuite"
404405
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/larksuiteapikey"
@@ -1247,6 +1248,7 @@ func buildDetectorList() []detectors.Detector {
12471248
&kraken.Scanner{},
12481249
&kucoin.Scanner{},
12491250
&kylas.Scanner{},
1251+
&langfuse.Scanner{},
12501252
&languagelayer.Scanner{},
12511253
&larksuite.Scanner{},
12521254
&larksuiteapikey.Scanner{},

0 commit comments

Comments
 (0)