Skip to content

[Testing] Added Detector-level cache to store verification results #4314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
15 changes: 15 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ import (
"errors"
"math/big"
"net/url"
"strconv"
"strings"
"unicode"

"github.com/cespare/xxhash/v2"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
Expand Down Expand Up @@ -316,3 +318,16 @@ func ParseURLAndStripPathAndParams(u string) (*url.URL, error) {
parsedURL.RawQuery = ""
return parsedURL, nil
}

// CachedVerificationResult holds the result of a secret verification.
// It includes whether the secret was verified and any error that occurred during verification.
type CachedVerificationResult struct {
Verified bool
VerificationErr error
}

// ComputeXXHash computes the XXHash of the given secret and returns it as a string.
// This hash can be used as a unique identifier for caching purposes.
func ComputeXXHash(secret []byte) string {
return strconv.FormatUint(xxhash.Sum64(secret), 10)
}
86 changes: 69 additions & 17 deletions pkg/detectors/github/v2/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ package github
import (
"context"
"fmt"
"net/http"
"time"

regexp "github.com/wasilibs/go-re2"
"golang.org/x/sync/singleflight"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
v1 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/github/v1"
Expand All @@ -28,6 +32,7 @@ func (s Scanner) Version() int {
func (Scanner) CloudEndpoint() string { return "https://api.github.com" }

var (
client = common.SaneHttpClient()
// Oauth token
// https://developer.github.com/v3/#oauth2-token-sent-in-a-header
// Token type list:
Expand All @@ -37,8 +42,21 @@ var (

// TODO: Oauth2 client_id and client_secret
// https://developer.github.com/v3/#oauth2-keysecret

credsCache = simple.NewCache(simple.WithExpirationInterval[detectors.CachedVerificationResult](1*time.Hour),
simple.WithPurgeInterval[detectors.CachedVerificationResult](1*time.Hour))

verificationGroup = new(singleflight.Group)
)

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_Github
}

func (s Scanner) Description() string {
return "GitHub is a platform for version control and collaboration. Personal access tokens (PATs) can be used to access and modify repositories and other resources."
}

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
Expand All @@ -53,7 +71,6 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result

for _, match := range matches {
// First match is entire regex, second is the first group.

token := match[1]

s1 := detectors.Result{
Expand All @@ -67,17 +84,8 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}

if verify {
client := common.SaneHttpClient()

isVerified, userResponse, headers, err := s.VerifyGithub(ctx, client, token)
s1.Verified = isVerified
s1.SetVerificationError(err, token)

if userResponse != nil {
v1.SetUserResponse(userResponse, &s1)
}
if headers != nil {
v1.SetHeaderInfo(headers, &s1)
if err := s.verifyOrGetCachedResult(ctx, client, token, &s1); err != nil {
return results, err
}
}

Expand All @@ -87,10 +95,54 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
return
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_Github
}
// verifyOrGetCachedResult checks the cache for a verification result for the given secret and updates the result's verification fields.
// If no cached result exists, it verifies the secret using the GitHub API and caches the result, using singleflight to prevent concurrent verifications of the same secret.
func (s Scanner) verifyOrGetCachedResult(ctx context.Context, client *http.Client, token string, result *detectors.Result) error {
secretHash := detectors.ComputeXXHash(result.Raw)

func (s Scanner) Description() string {
return "GitHub is a platform for version control and collaboration. Personal access tokens (PATs) can be used to access and modify repositories and other resources."
_, err, shared := verificationGroup.Do(secretHash, func() (interface{}, error) {
// check if result for the secret is cached already
credData, exist := credsCache.Get(secretHash)
if exist {
result.Verified = credData.Verified
result.SetVerificationError(credData.VerificationErr)
result.VerificationFromCache = true

return nil, nil
}

// if not cached, verify the secret using github API
isVerified, userResponse, headers, err := s.VerifyGithub(ctx, client, token)
result.Verified = isVerified
result.SetVerificationError(err, token)

if userResponse != nil {
v1.SetUserResponse(userResponse, result)
}
if headers != nil {
v1.SetHeaderInfo(headers, result)
}

credsCache.Set(secretHash, detectors.CachedVerificationResult{
Verified: result.Verified,
VerificationErr: result.VerificationError(),
})

return nil, nil
})
if err != nil {
return err
}

// for shared results, update result fields from cache
// as first request for a secret always set result in cache, in case of shared the cache must exist
if shared {
if credData, exists := credsCache.Get(secretHash); exists {
result.Verified = credData.Verified
result.SetVerificationError(credData.VerificationErr)
result.VerificationFromCache = true
}
}

return nil
}
4 changes: 3 additions & 1 deletion pkg/output/plain.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata)
yellowPrinter.Printf("Verification issue: %s\n", out.VerificationError)
}
}

if r.VerificationFromCache {
cyanPrinter.Print("(Verification info cached)\n")
cyanPrinter.Print("(🔍 Using cached verification)\n")
}

printer.Printf("Detector Type: %s\n", out.DetectorType)
printer.Printf("Decoder Type: %s\n", out.DecoderType)
printer.Printf("Raw result: %s\n", whitePrinter.Sprint(out.Raw))
Expand Down
Loading