Skip to content

Commit 1848ff6

Browse files
authored
[Feat] Detector implementation for Azure SAS Tokens (#3963)
* azure sas token detector implementation * regex optimization and some refactoring of code. * fixed breaking changes. * change regex to support allowed Ip address * remove stdout printing statements. used background context for individual test. * implemented CustomFalsePositiveChecker for Azure SAS detector. * raise verification error on restricted ip address * cache and skip invalid hosts * some refactoring * fix spelling mistake * added more positive and negative test cases * add test inputs directly in tests instead of declaring separately
1 parent 3d197de commit 1848ff6

File tree

4 files changed

+417
-0
lines changed

4 files changed

+417
-0
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package azuresastoken
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
"strings"
10+
11+
regexp "github.com/wasilibs/go-re2"
12+
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
14+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
15+
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
17+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
18+
)
19+
20+
type Scanner struct {
21+
client *http.Client
22+
detectors.DefaultMultiPartCredentialProvider
23+
}
24+
25+
var _ detectors.Detector = (*Scanner)(nil)
26+
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
27+
28+
var (
29+
defaultClient = common.SaneHttpClient()
30+
31+
// microsoft storage resource naming rules: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules#microsoftstorage:~:text=format%3A%0AVaultName_KeyName_KeyVersion.-,Microsoft.Storage,-Expand%20table
32+
urlPat = regexp.MustCompile(`https://([a-zA-Z0-9][a-z0-9_-]{1,22}[a-zA-Z0-9])\.blob\.core\.windows\.net/[a-z0-9]([a-z0-9-]{1,61}[a-z0-9])?(?:/[a-zA-Z0-9._-]+)*`)
33+
34+
keyPat = regexp.MustCompile(
35+
detectors.PrefixRegex([]string{"azure", "sas", "token", "blob", ".blob.core.windows.net"}) +
36+
`(sp=[racwdli]+&st=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z&se=\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z(?:&sip=\d{1,3}(?:\.\d{1,3}){3}(?:-\d{1,3}(?:\.\d{1,3}){3})?)?(&spr=https)?(?:,https)?&sv=\d{4}-\d{2}-\d{2}&sr=[bcfso]&sig=[a-zA-Z0-9%]{10,})`)
37+
38+
invalidStorageAccounts = simple.NewCache[struct{}]()
39+
40+
noSuchHostErr = errors.New("no such host")
41+
)
42+
43+
func (s Scanner) Keywords() []string {
44+
return []string{
45+
"azure",
46+
".blob.core.windows.net",
47+
}
48+
}
49+
50+
func (s Scanner) Type() detectorspb.DetectorType {
51+
return detectorspb.DetectorType_AzureSasToken
52+
}
53+
54+
func (s Scanner) Description() string {
55+
return "An Azure Shared Access Signature (SAS) token is a time-limited, permission-based URL query string that grants secure, granular access to Azure Storage resources (e.g., blobs, containers, files) without exposing account keys."
56+
}
57+
58+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
59+
logger := logContext.AddLogger(ctx).Logger().WithName("azuresas")
60+
61+
dataStr := string(data)
62+
63+
// deduplicate urlMatches
64+
urlMatchesUnique := make(map[string]string)
65+
for _, urlMatch := range urlPat.FindAllStringSubmatch(dataStr, -1) {
66+
urlMatchesUnique[urlMatch[0]] = urlMatch[1]
67+
}
68+
69+
// deduplicate keyMatches
70+
keyMatchesUnique := make(map[string]struct{})
71+
for _, keyMatch := range keyPat.FindAllStringSubmatch(dataStr, -1) {
72+
keyMatchesUnique[keyMatch[1]] = struct{}{}
73+
}
74+
75+
// Check results.
76+
UrlLoop:
77+
for url, storageAccount := range urlMatchesUnique {
78+
for key := range keyMatchesUnique {
79+
s1 := detectors.Result{
80+
DetectorType: detectorspb.DetectorType_AzureSasToken,
81+
Raw: []byte(url),
82+
RawV2: []byte(url + key),
83+
}
84+
85+
if verify {
86+
if invalidStorageAccounts.Exists(storageAccount) {
87+
logger.V(3).Info("Skipping invalid storage account", "storage account", storageAccount)
88+
break
89+
}
90+
91+
client := s.client
92+
if client == nil {
93+
client = defaultClient
94+
}
95+
96+
isVerified, verificationErr := verifyMatch(ctx, client, url, key, true)
97+
s1.Verified = isVerified
98+
99+
if verificationErr != nil {
100+
if errors.Is(verificationErr, noSuchHostErr) {
101+
invalidStorageAccounts.Set(storageAccount, struct{}{})
102+
continue UrlLoop
103+
}
104+
s1.SetVerificationError(verificationErr, key)
105+
}
106+
}
107+
108+
results = append(results, s1)
109+
}
110+
}
111+
112+
return results, nil
113+
}
114+
115+
func (s Scanner) IsFalsePositive(_ detectors.Result) (bool, string) {
116+
return false, ""
117+
}
118+
119+
func verifyMatch(ctx context.Context, client *http.Client, url, key string, retryOn403 bool) (bool, error) {
120+
urlWithToken := url + "?" + key
121+
122+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlWithToken, nil)
123+
if err != nil {
124+
return false, err
125+
}
126+
127+
res, err := client.Do(req)
128+
if err != nil {
129+
if strings.Contains(err.Error(), "no such host") {
130+
return false, noSuchHostErr
131+
}
132+
return false, err
133+
}
134+
defer res.Body.Close()
135+
136+
bodyBytes, err := io.ReadAll(res.Body)
137+
if err != nil {
138+
return false, err
139+
}
140+
141+
switch res.StatusCode {
142+
case http.StatusOK:
143+
return true, nil
144+
case http.StatusForbidden:
145+
if retryOn403 && strings.Contains(string(bodyBytes), "Signature did not match") {
146+
// need to add additional query parameters for container urls
147+
// https://stackoverflow.com/questions/25038429/azure-shared-access-signature-signature-did-not-match
148+
return verifyMatch(ctx, client, url, key+"&comp=list&restype=container", false)
149+
}
150+
if strings.Contains(string(bodyBytes), "AuthorizationFailure") && strings.Contains(key, "&sip=") {
151+
return false, fmt.Errorf("SAS token is restricted to specific IP addresses")
152+
}
153+
return false, nil
154+
default:
155+
return false, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
156+
}
157+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
//go:build detectors
2+
// +build detectors
3+
4+
package azuresastoken
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"testing"
10+
"time"
11+
12+
"github.com/google/go-cmp/cmp"
13+
"github.com/google/go-cmp/cmp/cmpopts"
14+
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
17+
18+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
19+
)
20+
21+
func TestAzureSasToken_FromChunk(t *testing.T) {
22+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
23+
defer cancel()
24+
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
25+
if err != nil {
26+
t.Fatalf("could not get test secrets from GCP: %s", err)
27+
}
28+
url := testSecrets.MustGetField("AZURESASTOKEN_URL")
29+
secret := testSecrets.MustGetField("AZURESASTOKEN")
30+
inactiveSecret := testSecrets.MustGetField("AZURESASTOKEN_INACTIVE")
31+
32+
type args struct {
33+
ctx context.Context
34+
data []byte
35+
verify bool
36+
}
37+
tests := []struct {
38+
name string
39+
s Scanner
40+
args args
41+
want []detectors.Result
42+
wantErr bool
43+
wantVerificationErr bool
44+
}{
45+
{
46+
name: "found, verified",
47+
s: Scanner{},
48+
args: args{
49+
ctx: context.Background(),
50+
data: []byte(fmt.Sprintf("You can find a azure sas url %s and token %s within", url, secret)),
51+
verify: true,
52+
},
53+
want: []detectors.Result{
54+
{
55+
DetectorType: detectorspb.DetectorType_AzureSasToken,
56+
Verified: true,
57+
},
58+
},
59+
wantErr: false,
60+
wantVerificationErr: false,
61+
},
62+
{
63+
name: "found, unverified",
64+
s: Scanner{},
65+
args: args{
66+
ctx: context.Background(),
67+
data: []byte(fmt.Sprintf("You can find a azure sas url %s and token %s within but not valid", url, inactiveSecret)), // the secret would satisfy the regex but not pass validation
68+
verify: true,
69+
},
70+
want: []detectors.Result{
71+
{
72+
DetectorType: detectorspb.DetectorType_AzureSasToken,
73+
Verified: false,
74+
},
75+
},
76+
wantErr: false,
77+
wantVerificationErr: false,
78+
},
79+
{
80+
name: "not found",
81+
s: Scanner{},
82+
args: args{
83+
ctx: context.Background(),
84+
data: []byte("You cannot find the secret within"),
85+
verify: true,
86+
},
87+
want: nil,
88+
wantErr: false,
89+
wantVerificationErr: false,
90+
},
91+
}
92+
for _, tt := range tests {
93+
t.Run(tt.name, func(t *testing.T) {
94+
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
95+
if (err != nil) != tt.wantErr {
96+
t.Errorf("AzureSasToken.FromData() error = %v, wantErr %v", err, tt.wantErr)
97+
return
98+
}
99+
for i := range got {
100+
if len(got[i].Raw) == 0 {
101+
t.Fatalf("no raw secret present: \n %+v", got[i])
102+
}
103+
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
104+
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
105+
}
106+
}
107+
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "Redacted", "verificationError")
108+
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
109+
t.Errorf("AzureSasToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
110+
}
111+
})
112+
}
113+
}
114+
115+
func BenchmarkFromData(benchmark *testing.B) {
116+
ctx := context.Background()
117+
s := Scanner{}
118+
for name, data := range detectors.MustGetBenchmarkData() {
119+
benchmark.Run(name, func(b *testing.B) {
120+
b.ResetTimer()
121+
for n := 0; n < b.N; n++ {
122+
_, err := s.FromData(ctx, false, data)
123+
if err != nil {
124+
b.Fatal(err)
125+
}
126+
}
127+
})
128+
}
129+
}

0 commit comments

Comments
 (0)