Skip to content

Commit c48e090

Browse files
authored
feat: smart secret detection looks for high entropy level (#6418)
1 parent df14859 commit c48e090

File tree

5 files changed

+60
-30
lines changed
  • packages/build
    • src/plugins_core/secrets_scanning
    • tests
      • secrets_scanning/fixtures
      • utils_secretscanning

5 files changed

+60
-30
lines changed

packages/build/src/plugins_core/secrets_scanning/utils.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,30 @@ export function getSecretKeysToScanFor(env: Record<string, unknown>, secretKeys:
123123
return filteredSecretKeys.filter((key) => !isValueTrivial(env[key]))
124124
}
125125

126+
const getShannonEntropy = (str: string): number => {
127+
const len = str.length
128+
if (len === 0) return 0
129+
130+
const freqMap = {}
131+
for (const char of str) {
132+
freqMap[char] = (freqMap[char] || 0) + 1
133+
}
134+
135+
let entropy = 0
136+
for (const char in freqMap) {
137+
const p = freqMap[char] / len
138+
entropy -= p * Math.log2(p)
139+
}
140+
141+
return entropy
142+
}
143+
144+
const HIGH_ENTROPY_THRESHOLD = 4.5
145+
const doesEntropyMeetThresholdForSecret = (str: string): boolean => {
146+
const entropy = getShannonEntropy(str)
147+
return entropy >= HIGH_ENTROPY_THRESHOLD
148+
}
149+
126150
// Most prefixes are 4-5 chars, so requiring 12 chars after ensures a reasonable secret length
127151
const MIN_CHARS_AFTER_PREFIX = 12
128152

@@ -187,6 +211,10 @@ export function findLikelySecrets({
187211
if (!token || !prefix || allOmittedValues.includes(token)) {
188212
continue
189213
}
214+
// Despite the prefix, the string does not look random enough to be convinced it's a secret
215+
if (!doesEntropyMeetThresholdForSecret(token)) {
216+
continue
217+
}
190218
matches.push({
191219
prefix,
192220
index: match.index,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[build.environment]
2-
ENV_VAR_1 = "sk_12345678901234567890"
2+
ENV_VAR_1 = "sk_dF6gH9jK4mP7nW2xR5tYc6dBmFP5ym"
33
ENV_VAR_2 = "val2-val2-val2"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[build.environment]
22
SECRETS_SCAN_SMART_DETECTION_ENABLED = "false"
3-
ENV_VAR_1 = "sk_12345678901234567890"
3+
ENV_VAR_1 = "sk_dF6gH9jK4mP7nW2xR5tYc6dBmFP5ym"
44
ENV_VAR_2 = "val2-val2-val2"
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[build.environment]
2-
ENV_VAR_1 = "sk_12345678901234567890"
2+
ENV_VAR_1 = "sk_dF6gH9jK4mP7nW2xR5tYc6dBmFP5ym"
33
ENV_VAR_2 = "val2-val2-val2"
4-
SECRETS_SCAN_SMART_DETECTION_OMIT_VALUES = "sk_12345678901234567890"
4+
SECRETS_SCAN_SMART_DETECTION_OMIT_VALUES = "sk_dF6gH9jK4mP7nW2xR5tYc6dBmFP5ym"

packages/build/tests/utils_secretscanning/tests.js

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ import { findLikelySecrets } from '../../lib/plugins_core/secrets_scanning/utils
44

55
test('findLikelySecrets - should not find secrets without quotes or delimiters', async (t) => {
66
const lines = [
7-
'aws_123456789012345678',
8-
'ghp_1234567890123456789',
9-
'xoxb-123456789012345678',
10-
'nf_123456789012345678',
7+
'aws_Kj2P9xL5mN8vB3cX7qA4',
8+
'ghp_zR4wY7hQ9sK2nM5vL8xbkokM0vgXC',
9+
'xoxb-bV8cX3zL6kM9nQ4wR7y3FIASwY6YX',
10+
'nf_pT2hN7mK4jL9wB5vC8xOzHucej7Or',
1111
]
1212

1313
lines.forEach((text) => {
@@ -18,12 +18,12 @@ test('findLikelySecrets - should not find secrets without quotes or delimiters',
1818

1919
test('findLikelySecrets - should find secrets with quotes or equals', async (t) => {
2020
const matchingLines = [
21-
'my_secret_key=aws_123456789012345678',
22-
'mySecretKey = aws_123456789012345678',
23-
'secretKey="aws_123456789012345678"',
24-
'secretKey = "aws_123456789012345678"',
25-
"secretKey='aws_123456789012345678'",
26-
'secretKey=`aws_123456789012345678`',
21+
'my_secret_key=aws_Kj2P9xL5mN8vB3cX7qA4',
22+
'mySecretKey = aws_zR4wY7hQ9sK2nM5vL8xbkokM0vgXC',
23+
'secretKey="aws_dF6gH9jK4mP7nW2xR5tYc6dBmFP5ym"',
24+
'secretKey = "aws_bV8cX3zL6kM9nQ4wR7y3FIASwY6YX"',
25+
"secretKey='aws_pT2hN7mK4jL9wB5vC8xOzHucej7Or'",
26+
'secretKey=`aws_qS3fD8gH5jK2mN6pR9yHfBxkujdx2`',
2727
]
2828
matchingLines.forEach((text) => {
2929
const matches = findLikelySecrets({ text })
@@ -32,18 +32,18 @@ test('findLikelySecrets - should find secrets with quotes or equals', async (t)
3232
})
3333

3434
test('findLikelySecrets - should not match values with spaces after prefix', async (t) => {
35-
const nonMatchingLine = 'aws_ "123456789012345678"'
35+
const nonMatchingLine = 'aws_ "Kj2P9xL5mN8vB3cX7qA4"'
3636
const matches = findLikelySecrets({ text: nonMatchingLine })
3737
t.is(matches.length, 0)
3838
})
3939

4040
test('findLikelySecrets - should not match values that are too short', async (t) => {
41-
const matches = findLikelySecrets({ text: 'aws_key="12345678901"' })
41+
const matches = findLikelySecrets({ text: 'aws_key="aws_x7B9nM4k"' })
4242
t.is(matches.length, 0)
4343
})
4444

4545
test('findLikelySecrets - should return the matched prefix as the key', async (t) => {
46-
const matches = findLikelySecrets({ text: 'mykey = "github_pat_123456789012345678"' })
46+
const matches = findLikelySecrets({ text: 'mykey = "github_pat_Kj2P9xL5mN8vB3cX7qA4"' })
4747
t.is(matches.length, 1)
4848
t.is(matches[0].prefix, 'github_pat_')
4949
})
@@ -57,14 +57,12 @@ test('findLikelySecrets - should handle empty or invalid input', async (t) => {
5757
}
5858
})
5959

60-
test('findLikelySecrets - should match exactly minimum chars after prefix', async (t) => {
61-
const exactMinChars = 'value = "aws_123456789012"' // Exactly 12 chars after prefix
62-
const matches = findLikelySecrets({ text: exactMinChars })
63-
t.is(matches.length, 1)
64-
})
65-
6660
test('findLikelySecrets - should match different prefixes from LIKELY_SECRET_PREFIXES', async (t) => {
67-
const lines = ['key="ghp_123456789012345678"', 'key="sk_123456789012345678"', 'key="aws_123456789012345678"']
61+
const lines = [
62+
'key="ghp_zR4wY7hQ9sK2nM5vL8xbkokM0vgX"',
63+
'key="sk_zR4wY7hQ9sK2nM5vL8xbkokM0vgX"',
64+
'key="aws_zR4wY7hQ9sK2nM5vL8xbkokM0vgX"',
65+
]
6866

6967
lines.forEach((text) => {
7068
const matches = findLikelySecrets({ text })
@@ -79,14 +77,18 @@ test('findLikelySecrets - should skip safe-listed values', async (t) => {
7977
})
8078

8179
test('findLikelySecrets - should allow dashes and alphanumeric characters only', async (t) => {
82-
const validLines = ['key="aws_abc123-456-789"', 'key="ghp_abc-123-def-456"']
80+
const validLines = ['key="aws_zR4wY7hQ-9sK2nM5vL8xbko-kM0vgXKj2P"', 'key="ghp_zR4wY7hQ9sK2n-M5vL8xbkokM0vgX"']
8381

8482
validLines.forEach((line) => {
8583
const matches = findLikelySecrets({ text: line })
8684
t.is(matches.length, 1, `Should match line with dashes: ${line}`)
8785
})
8886

89-
const invalidLines = ['key="aws_abc123!@#$%^&*()_+"', 'key="ghp_abc.123_456.789"', 'key="sk_live_123_456_789"']
87+
const invalidLines = [
88+
'key="aws_zR4wY7hQ9sK2nMgX!@#$%^&*()_+"',
89+
'key="ghp_zR4wY7hQ.9sK2nM5vL8.xbkokM0vgX"',
90+
'key="sk_R4_wY7hQ9sK2_nM5vL8xbkokM0vgX"',
91+
]
9092

9193
invalidLines.forEach((line) => {
9294
const matches = findLikelySecrets({ text: line })
@@ -97,16 +99,16 @@ test('findLikelySecrets - should allow dashes and alphanumeric characters only',
9799
test('findLikelySecrets - should match full secret value against omitValues', async (t) => {
98100
// Test both partial and full matches to ensure proper behavior
99101
const partialMatch = findLikelySecrets({
100-
text: 'key="aws_123456789012extracharshere"',
102+
text: 'key="aws_zR4wY7hQ9sK2nM5vL8xbkokM0vgX"',
101103
// The omitValue only partially matches the secret - we should still detect the secret
102-
omitValuesFromEnhancedScan: ['aws_123456789012'],
104+
omitValuesFromEnhancedScan: ['aws_zR4wY7hQ9'],
103105
})
104106
t.is(partialMatch.length, 1)
105107

106108
const fullMatch = findLikelySecrets({
107-
text: 'key="aws_123456789012extracharshere"',
109+
text: 'key="aws_zR4wY7hQ9sK2nM5vL8xbkokM0vgX"',
108110
// Omit the full secret value - we should not detect the secret
109-
omitValuesFromEnhancedScan: ['aws_123456789012extracharshere'],
111+
omitValuesFromEnhancedScan: ['aws_zR4wY7hQ9sK2nM5vL8xbkokM0vgX'],
110112
})
111113
t.is(fullMatch.length, 0)
112114
})

0 commit comments

Comments
 (0)