diff --git a/pkg/detectors/ldap/ldap.go b/pkg/detectors/ldap/ldap.go index fea2c0d3e12f..be16b0ccf441 100644 --- a/pkg/detectors/ldap/ldap.go +++ b/pkg/detectors/ldap/ldap.go @@ -6,6 +6,7 @@ import ( "fmt" "net" "net/url" + "sort" "strings" "time" @@ -28,109 +29,419 @@ func init() { } var ( - // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. + // Basic patterns for individual credential components. + // These are used when structured patterns don't match. + + // uriPat matches LDAP and LDAPS URIs. + // Examples: + // ldap://127.0.0.1:389 + // ldap://127.0.0.1 + // ldap://mydomain.test + // ldaps://[fe80:4049:92ff:fe44:4bd1]:5060 + // ldap://[fe80::4bd1]:5060 + // ldap://ds.example.com:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe) uriPat = regexp.MustCompile(`\b(?i)ldaps?://[\S]+\b`) - // ldap://127.0.0.1:389 - // ldap://127.0.0.1 - // ldap://mydomain.test - // ldaps://[fe80:4049:92ff:fe44:4bd1]:5060 - // ldap://[fe80::4bd1]:5060 - // ldap://ds.example.com:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe) + + // usernamePat matches common username patterns in configuration. usernamePat = regexp.MustCompile(detectors.PrefixRegex([]string{"user", "bind"}) + `["']([a-zA-Z=,]{4,150})["']`) + + // passwordPat matches password patterns with context. passwordPat = regexp.MustCompile(detectors.PrefixRegex([]string{"pass"}) + `["']([\S]{4,48})["']`) + // High-confidence patterns that capture complete credential sets. + // These patterns have very low false positive rates. + + // iadPat matches Windows IAD/ADSI OpenDSObject calls. // https://learn.microsoft.com/en-us/windows/win32/api/iads/nf-iads-iadsopendsobject-opendsobject?redirectedfrom=MSDN - // I.E. Set ou = dso.OpenDSObject("LDAP://DC.business.com/OU=IT,DC=Business,DC=com", "Business\administrator", "Pa$$word01", 1) + // Example: Set ou = dso.OpenDSObject("LDAP://DC.business.com/OU=IT,DC=Business,DC=com", "Business\administrator", "Pa$$word01", 1) iadPat = regexp.MustCompile(`OpenDSObject\(\"(?i)(ldaps?://[\S]+)\", ?\"([\S]+)\", ?\"([\S]+)\",[ \d]+\)`) + + // configBlockPat matches common config file formats where credentials appear together. + // This pattern is flexible but requires all three components in proximity. + // Example: + // ldap://server.com + // user='cn=admin,dc=example,dc=com' + // password='secret' + configBlockPat = regexp.MustCompile(`(?s)(?i)(ldaps?://[^\s"']+).*?(?:user|bind)[^"']*["']([^"'()]+)["'].*?pass[^"']*["']([^"']+)["']`) + + // connectionStringPat matches semicolon/comma-delimited connection strings. + // Example: ldap://server.com;user=admin;pass=secret123 + connectionStringPat = regexp.MustCompile(`(?i)ldap://([^;,\s]+)[;,].*?user[=:]([^;,\s]+)[;,].*?pass[=:]([^;,\s]+)`) + + // yamlPat matches YAML-style LDAP configuration. + // Example: + // ldap: + // url: ldaps://ldap.example.com + // bind_dn: "cn=admin,dc=example,dc=com" + // password: "secretpassword" + yamlPat = regexp.MustCompile(`(?i)ldap:\s*\n\s*url:\s*(ldaps?://[^\s]+)\s*\n\s*bind_?dn:\s*"?([^\n"]+)"?\s*\n\s*password:\s*"?([^\n"]+)"?`) + + // envPat matches environment variable patterns. + // Example: + // LDAP_URL=ldaps://ldap.example.com + // LDAP_BIND_DN=cn=service,dc=example,dc=com + // LDAP_PASSWORD=servicepass123 + envPat = regexp.MustCompile(`(?i)LDAP_URL=(ldaps?://[^\s]+).*?LDAP_BIND_DN=([^\s]+).*?LDAP_PASSWORD=([^\s]+)`) + + // High-confidence pattern processors. + patterns = [...]struct { + name string + pattern *regexp.Regexp + extract func([]string) (uri, user, pass string) + }{ + { + name: "IAD/ADSI", + pattern: iadPat, + extract: func(m []string) (string, string, string) { return m[1], m[2], m[3] }, + }, + { + name: "ConfigBlock", + pattern: configBlockPat, + extract: func(m []string) (string, string, string) { return m[1], m[2], m[3] }, + }, + { + name: "ConnectionString", + pattern: connectionStringPat, + extract: func(m []string) (string, string, string) { + // Reconstruct full URI since pattern only captures hostname + return "ldap://" + m[1], m[2], m[3] + }, + }, + { + name: "YAML", + pattern: yamlPat, + extract: func(m []string) (string, string, string) { return m[1], m[2], m[3] }, + }, + { + name: "EnvironmentVariables", + pattern: envPat, + extract: func(m []string) (string, string, string) { return m[1], m[2], m[3] }, + }, + } ) -// Keywords are used for efficiently pre-filtering chunks. -// Use identifiers in the secret preferably, or the provider name. +// Keywords returns a small set of substrings that quickly hint that an +// input might contain LDAP-related material. +// +// The surrounding scanner uses these keywords as a low-cost bloom filter +// before handing the file to this much heavier detector. +// The list is therefore intentionally minimal but distinctive. func (s Scanner) Keywords() []string { return []string{"ldaps://", "ldap://"} } -// FromData will find and optionally verify Ldap secrets in a given set of bytes. +// createDeduplicationKey creates a normalized key for deduplication. +// +// The scanner purposely applies several families of regular expressions that can +// overlap: +// +// - High-confidence, format-aware patterns ( iadPat, configBlockPat, … ) +// - The more generic proximity combinator that glues together individually +// detected URI / user / password fragments. +// +// Both approaches can surface *the same* credential set, but not necessarily +// with identical strings – most notably the URI may differ in insignificant +// ways (casing, trailing slashes, explicit default ports, query strings, …). +// +// Example +// +// # YAML-style config (matched by yamlPat) +// url: LDAPS://directory.example.com +// +// # Later in the same file an inline connection string (matched by +// # connectionStringPat) refers to the very same server: +// ldaps://directory.example.com:636;user=admin;pass=secret +// +// In both cases the credential triple is identical from a security point of +// view, yet string comparison would treat them as different unless the URI is +// normalized first. By running the URI through url.Parse and serializing it +// back with .String() we collapse such cosmetic differences and are able to +// deduplicate results coming from different detection paths. +func createDeduplicationKey(uri, username, password string) string { + if parsedURL, err := url.Parse(uri); err == nil { + uri = parsedURL.String() + } + return strings.Join([]string{uri, username, password}, "\t") +} + +// FromData searches the supplied byte slice for LDAP credential sets. +// +// The scan happens in two passes: +// +// 1. High-confidence regular expressions that match an entire +// URI / username / password triple in a single shot. +// These are cheap and precise and therefore executed first. +// +// 2. A proximity-based heuristic that first captures individual URIs, +// usernames, and passwords and then stitches the nearest triples +// together. This pass is more expensive and may yield false +// positives, so it is executed only after step 1. +// +// When verify is true the detector will attempt to bind to the discovered +// LDAP endpoint to confirm that the credentials are valid. Verification +// is best-effort and may be skipped when the context is canceled. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) - // Check for matches in the URI + username + password format - uriMatches := uriPat.FindAllString(dataStr, -1) - for _, uri := range uriMatches { - ldapURL, err := url.Parse(uri) - if err != nil { - continue - } + // Key format: "uri\tusername\tpassword" + found := make(map[string]struct{}) - usernameMatches := usernamePat.FindAllStringSubmatch(dataStr, -1) - for _, username := range usernameMatches { - passwordMatches := passwordPat.FindAllStringSubmatch(dataStr, -1) - for _, password := range passwordMatches { - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_LDAP, - Raw: []byte(strings.Join([]string{ldapURL.String(), username[1], password[1]}, "\t")), - } + // 1. Process high-confidence patterns first (complete credential sets). + highConfidenceResults := s.processHighConfidencePatterns(ctx, dataStr, found, verify) + results = append(results, highConfidenceResults...) - if verify { - verificationErr := verifyLDAP(username[1], password[1], ldapURL) - s1.Verified = verificationErr == nil - if !isErrDeterminate(verificationErr) { - s1.SetVerificationError(verificationErr, password[1]) - } - } + // 2. Process proximity-based combinations for any additional credentials. + proximityResults := s.findProximityCombinations(ctx, data, found, verify) + results = append(results, proximityResults...) + + return results, nil +} + +// processHighConfidencePatterns handles patterns that capture complete credential sets. +// These patterns match specific configuration formats and have very low false positive rates. +func (s Scanner) processHighConfidencePatterns(ctx context.Context, dataStr string, found map[string]struct{}, verify bool) []detectors.Result { + var results []detectors.Result + + for _, p := range patterns { + matches := p.pattern.FindAllStringSubmatch(dataStr, -1) + for _, match := range matches { + select { + case <-ctx.Done(): + return results + default: + } - results = append(results, s1) + uri, user, pass := p.extract(match) + credSet := CredentialSet{ + uri: Match{value: uri}, + username: Match{value: user}, + pwd: Match{value: pass}, + score: 0, // High confidence patterns get score 0 + } + + if result := s.createAndVerifyResult(ctx, credSet, verify); result != nil { + key := createDeduplicationKey(uri, user, pass) + if _, ok := found[key]; !ok { + found[key] = struct{}{} + results = append(results, *result) + } } } } - // Check for matches for the IAD library format - iadMatches := iadPat.FindAllStringSubmatch(dataStr, -1) - for _, iad := range iadMatches { - uri := iad[1] - username := iad[2] - password := iad[3] + return results +} - ldapURL, err := url.Parse(uri) - if err != nil { - continue +// findProximityCombinations handles proximity-based matching logic +// The proximity combinator is the work-horse heuristic used once the +// cheaper "high-confidence" expressions have been exhausted. +// It deliberately limits the number of candidate triples evaluated +// (maxCombinations) to avoid pathological runtimes on large files. +func (s Scanner) findProximityCombinations( + ctx context.Context, + data []byte, + found map[string]struct{}, + verify bool, +) []detectors.Result { + var results []detectors.Result + + uris := findMatchesWithPosition(uriPat, data) + usernames := findMatchesWithPosition(usernamePat, data) + passwords := findMatchesWithPosition(passwordPat, data) + + // Skip if we don't have all components. + if len(uris) == 0 || len(usernames) == 0 || len(passwords) == 0 { + return results + } + + // Find optimal combinations based on proximity. + combinations := findOptimalCombinations(uris, usernames, passwords) + + for _, combo := range combinations { + select { + case <-ctx.Done(): + return results + default: } - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_LDAP, - Raw: []byte(strings.Join([]string{ldapURL.String(), username, password}, "\t")), + if result := s.createAndVerifyResult(ctx, combo, verify); result != nil { + key := createDeduplicationKey(combo.uri.value, combo.username.value, combo.pwd.value) + if _, ok := found[key]; !ok { + found[key] = struct{}{} + results = append(results, *result) + + if len(results) >= maxResults { + break + } + } + } + } + + return results +} + +// Match records a single regular-expression capture along with its byte +// offsets within the scanned text. +// It is used as a lightweight value object when computing proximity between +// the URI, username, and password fragments that may form an LDAP credential +// set. +type Match struct { + // value holds the substring captured by the regular expression. + value string + + // start is the starting byte offset of Value in the original byte slice. + start int + + // end is the exclusive ending byte offset of Value in the original byte + // slice. + end int +} + +// CredentialSet groups the three components—URI, username, and password—that +// together may constitute a valid LDAP credential discovered in source code. +// The zero value is not meaningful; instances are produced internally by +// findOptimalCombinations. +type CredentialSet struct { + // uri is the LDAP endpoint captured from the scanned text. + uri Match + + // username is the bind DN or simple username captured from the scanned + // text. + username Match + + // pwd is the credential associated with Username. + pwd Match + + // score ranks this set by proximity; lower values indicate that the three + // fragments were located nearer to each other in the source and therefore + // have a higher likelihood of forming a real credential. + score int +} + +// Configuration constants. +const ( + // maxCombinations limits the number of proximity-based combinations to evaluate. + // This prevents quadratic runtime on files with many potential matches. + maxCombinations = 20 + + // maxProximity is the maximum character distance between credential components + // to consider them related. Larger values increase false positives. + maxProximity = 200 + + // maxResults is a safety limit for proximity combinations only. + // High-confidence patterns are not subject to this limit. + maxResults = 15 +) + +// findMatchesWithPosition finds all regex matches and returns their positions. +func findMatchesWithPosition(pattern *regexp.Regexp, data []byte) []Match { + dataStr := string(data) + matches := pattern.FindAllStringSubmatchIndex(dataStr, -1) + var results []Match + + for _, match := range matches { + if len(match) >= 4 { // Has capture group + results = append(results, Match{ + value: dataStr[match[2]:match[3]], + start: match[2], + end: match[3], + }) + } else if len(match) >= 2 { // Full match only + results = append(results, Match{ + value: dataStr[match[0]:match[1]], + start: match[0], + end: match[1], + }) } + } + return results +} - if verify { - verificationError := verifyLDAP(username, password, ldapURL) +// findOptimalCombinations finds the best credential combinations based on proximity. +func findOptimalCombinations(uris, usernames, passwords []Match) []CredentialSet { + var combinations []CredentialSet - s1.Verified = verificationError == nil - if !isErrDeterminate(verificationError) { - s1.SetVerificationError(verificationError, password) + for _, uri := range uris { + for _, username := range usernames { + for _, password := range passwords { + score := calculateProximityScore(uri, username, password) + + // Skip combinations that are too far apart. + if score > maxProximity { + continue + } + + combinations = append(combinations, CredentialSet{ + uri: uri, + username: username, + pwd: password, + score: score, + }) } } + } + + // Sort by proximity score (lower is better). + sort.Slice(combinations, func(i, j int) bool { + return combinations[i].score < combinations[j].score + }) - results = append(results, s1) + if len(combinations) > maxCombinations { + combinations = combinations[:maxCombinations] } - return results, nil + return combinations } -func isErrDeterminate(err error) bool { - switch e := err.(type) { - case *ldap.Error: - switch e.Err.(type) { - case *net.OpError: - return false +// calculateProximityScore calculates how close together the credential components are. +func calculateProximityScore(uri, username, password Match) int { + positions := []int{uri.start, uri.end, username.start, username.end, password.start, password.end} + sort.Ints(positions) + + // Use the span from first to last position as the score. + return positions[len(positions)-1] - positions[0] +} + +// createAndVerifyResult creates and optionally verifies a detectors.Result. +func (s Scanner) createAndVerifyResult(ctx context.Context, credSet CredentialSet, verify bool) *detectors.Result { + ldapURL, err := url.Parse(credSet.uri.value) + if err != nil { + return nil + } + + result := detectors.Result{ + DetectorType: detectorspb.DetectorType_LDAP, + Raw: []byte(strings.Join([]string{ldapURL.String(), credSet.username.value, credSet.pwd.value}, "\t")), + } + + if verify { + select { + case <-ctx.Done(): + return &result + default: + } + + verificationErr := verifyLDAP(credSet.username.value, credSet.pwd.value, ldapURL) + result.Verified = verificationErr == nil + if !isErrDeterminate(verificationErr) { + result.SetVerificationError(verificationErr, credSet.pwd.value) } } - return true + return &result } +// verifyLDAP performs the minimal set of network operations required to +// decide whether the credentials are valid: +// +// - Plain LDAP → Bind, optional STARTTLS + Bind +// - LDAPS (TLS) → Bind over an opportunistically insecure TLS config. +// +// We purposefully set InsecureSkipVerify because scanners very often run +// in environments where the target's certificate chain is not trusted. +// The objective is simply to confirm that the credentials *could* be +// used, not to validate the server's identity. func verifyLDAP(username, password string, ldapURL *url.URL) error { - // Tests with non-TLS, TLS, and STARTTLS - uri := ldapURL.String() switch ldapURL.Scheme { @@ -141,6 +452,7 @@ func verifyLDAP(username, password string, ldapURL *url.URL) error { return err } defer l.Close() + // Non-TLS verify err = l.Bind(username, password) if err == nil { @@ -166,13 +478,28 @@ func verifyLDAP(username, password string, ldapURL *url.URL) error { default: return fmt.Errorf("unknown ldap scheme %q", ldapURL.Scheme) } +} +func isErrDeterminate(err error) bool { + switch e := err.(type) { + case *ldap.Error: + switch e.Err.(type) { + case *net.OpError: + return false + } + } + return true } +// Type satisfies the detectors.Detector interface +// and returns the enumerated protobuf value that identifies +// this detector as the LDAP detector. func (s Scanner) Type() detectorspb.DetectorType { return detectorspb.DetectorType_LDAP } +// Description provides a human-readable explanation of what the detector +// looks for. func (s Scanner) Description() string { return "LDAP (Lightweight Directory Access Protocol) is an open, vendor-neutral, industry standard application protocol for accessing and maintaining distributed directory information services over an Internet Protocol (IP) network." } diff --git a/pkg/detectors/ldap/ldap_integration_test.go b/pkg/detectors/ldap/ldap_integration_test.go index 94aaff77b6f3..5f831a78e51d 100644 --- a/pkg/detectors/ldap/ldap_integration_test.go +++ b/pkg/detectors/ldap/ldap_integration_test.go @@ -18,7 +18,6 @@ import ( "github.com/kylelemons/godebug/pretty" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) @@ -204,7 +203,7 @@ func TestLdap_Integration_FromChunk(t *testing.T) { t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) } } - ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError", "primarySecret") if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { t.Errorf("Ldap.FromData() %s diff: (-got +want)\n%s", tt.name, diff) } diff --git a/pkg/detectors/ldap/ldap_test.go b/pkg/detectors/ldap/ldap_test.go index 25972f314fee..a5645938aa9e 100644 --- a/pkg/detectors/ldap/ldap_test.go +++ b/pkg/detectors/ldap/ldap_test.go @@ -3,6 +3,7 @@ package ldap import ( "context" "fmt" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -84,3 +85,111 @@ func TestLdap_Pattern(t *testing.T) { }) } } + +// TestCartesianProductExplosion constructs N distinct URIs, usernames and +// passwords. A naïve triple-nested loop inside the detector would produce +// N³ candidate combinations and quickly blow up. +// 1. scores combinations by textual proximity, +// 2. keeps only the best few (maxCombinations), and +// 3. applies an overall safety-cap (maxResults), +// +// so we should never see the full Cartesian product. The test also injects a +// single high-confidence IAD line to guarantee at least one positive result. +func TestCartesianProductExplosion(t *testing.T) { + const N = 20 // number of URIs, usernames and passwords we generate + + var b strings.Builder + + // 1. N distinct LDAP URIs. + for i := range N { + b.WriteString(fmt.Sprintf("ldap://host%d:389\n", i)) + } + + // 2. N distinct bind-DNs (letters only to satisfy usernamePat). + for i := range N { + letter := 'A' + rune(i%26) + b.WriteString(fmt.Sprintf(`bind="cn=user%c,dc=example,dc=org"`+"\n", letter)) + } + + // 3. N distinct passwords. + for i := range N { + b.WriteString(fmt.Sprintf(`pass="P@ssw0rd%02d"`+"\n", i)) + } + + // 4. Add one high-confidence IAD line to guarantee at least one hit. + b.WriteString( + `Set ou = dso.OpenDSObject("LDAP://host999:389", ` + + `"cn=admin,dc=example,dc=org", "SuperSecret", 1)` + "\n") + + payload := []byte(b.String()) + + results, err := (Scanner{}).FromData(context.Background(), false, payload) + if err != nil { + t.Fatalf("FromData error: %v", err) + } + + if got := len(results); got == 0 { + t.Fatalf("expected at least 1 result, got 0") + } else if got > maxResults { + t.Fatalf("expected at most %d results (safety cap), got %d", maxResults, got) + } else { + t.Logf("detector returned %d results (cap %d) for %d×%d×%d input combinations", + got, maxResults, N, N, N) + } +} + +// BenchmarkCartesianProductExplosion re-uses the same synthetic payload and +// ensures that Scanner.FromData finishes in reasonable time/allocs despite +// the N³ theoretical combination space. This guards against accidental +// performance regressions that would re-introduce the Cartesian explosion. +func BenchmarkCartesianProductExplosion(b *testing.B) { + tests := []struct { + name string + uriCount int + userCount int + passCount int + }{ + {"Small_1x1x1", 1, 1, 1}, + {"Medium_5x5x5", 5, 5, 5}, + {"Large_10x10x10", 10, 10, 10}, + {"ManyURIs_15x5x5", 15, 5, 5}, + {"ManyUsers_5x15x5", 5, 15, 5}, + {"ManyPasswords_5x5x15", 5, 5, 15}, + {"Asymmetric_15x10x5", 15, 10, 5}, + {"VeryLarge_25x25x25", 25, 25, 25}, + } + + scanner := Scanner{} + ctx := context.Background() + + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + var sb strings.Builder + + for i := range tt.uriCount { + sb.WriteString(fmt.Sprintf("ldap://host%d:389\n", i)) + } + + for i := range tt.userCount { + letter := 'A' + rune(i%26) + sb.WriteString(fmt.Sprintf(`bind="cn=user%c,dc=example,dc=org"`+"\n", letter)) + } + + for i := range tt.passCount { + sb.WriteString(fmt.Sprintf(`pass="P@ssw0rd%02d"`+"\n", i)) + } + + payload := []byte(sb.String()) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(payload))) + + for range b.N { + if _, err := scanner.FromData(ctx, false, payload); err != nil { + b.Fatalf("FromData error: %v", err) + } + } + }) + } +}