Skip to content

Commit 0ba7733

Browse files
authored
DeepSeek API Key pattern (#815)
* DeepSeek API Key pattern * fix severity * fix missed test
1 parent 5f1154f commit 0ba7733

File tree

10 files changed

+130
-8
lines changed

10 files changed

+130
-8
lines changed

.github/workflows/check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
run: |
9393
banner="$(python -m credsweeper --banner | grep CredSweeper | head -1)"
9494
echo "banner = '${banner}'"
95-
if [ "CredSweeper 1.14.7 crc32:a70f1be7" != "${banner}" ]; then
95+
if [ "CredSweeper 1.14.7 crc32:adbdc773" != "${banner}" ]; then
9696
echo "Update the check for '${banner}'"
9797
exit 1
9898
fi

credsweeper/rules/config.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,6 +1358,21 @@
13581358
- code
13591359
- doc
13601360

1361+
- name: DeepSeek API Key
1362+
severity: high
1363+
confidence: moderate
1364+
type: pattern
1365+
values:
1366+
- (?P<value>sk-[0-9a-f]{32,64})(?![0-9A-Za-z_-])
1367+
min_line_len: 35
1368+
filter_type:
1369+
- ValuePatternCheck
1370+
required_substrings:
1371+
- sk-
1372+
target:
1373+
- code
1374+
- doc
1375+
13611376
- name: Tavily API Key
13621377
severity: high
13631378
confidence: strong

tests/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from pathlib import Path
22

33
# total number of files in test samples
4-
SAMPLES_FILES_COUNT = 174
4+
SAMPLES_FILES_COUNT = 175
55

66
# ML_DELTA for different platforms which may produce a dribbling in ml_probability
77
ML_DELTA = 0.0001
@@ -10,16 +10,16 @@
1010
ZERO_ML_THRESHOLD = 0.0
1111

1212
# with option --doc & NEGLIGIBLE_ML_THRESHOLD
13-
SAMPLES_IN_DOC = 926
13+
SAMPLES_IN_DOC = 927
1414

1515
# credentials count after scan without filters and ML validations
16-
SAMPLES_REGEX_COUNT = 660
16+
SAMPLES_REGEX_COUNT = 661
1717

1818
# credentials count after scan with filters and without ML validation
19-
SAMPLES_FILTERED_COUNT = 546
19+
SAMPLES_FILTERED_COUNT = 547
2020

2121
# credentials count after default post-processing
22-
SAMPLES_POST_CRED_COUNT = 500
22+
SAMPLES_POST_CRED_COUNT = 501
2323

2424
# archived credentials that are not found without --depth
2525
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138

tests/data/depth_3_pedantic.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,6 +1638,27 @@
16381638
}
16391639
]
16401640
},
1641+
{
1642+
"rule": "DeepSeek API Key",
1643+
"severity": "high",
1644+
"confidence": "moderate",
1645+
"ml_probability": null,
1646+
"line_data_list": [
1647+
{
1648+
"line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
1649+
"line_num": 1,
1650+
"path": "./tests/samples/deepseek",
1651+
"info": "FILE:./tests/samples/deepseek|RAW",
1652+
"variable": null,
1653+
"variable_start": -2,
1654+
"variable_end": -2,
1655+
"value": "sk-615ef152bf865a131f88b23571dc4ee6",
1656+
"value_start": 8,
1657+
"value_end": 43,
1658+
"entropy": 3.85416
1659+
}
1660+
]
1661+
},
16411662
{
16421663
"rule": "Digital Ocean Token",
16431664
"severity": "high",

tests/data/doc.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,27 @@
12851285
}
12861286
]
12871287
},
1288+
{
1289+
"rule": "DeepSeek API Key",
1290+
"severity": "high",
1291+
"confidence": "moderate",
1292+
"ml_probability": null,
1293+
"line_data_list": [
1294+
{
1295+
"line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
1296+
"line_num": 1,
1297+
"path": "./tests/samples/deepseek",
1298+
"info": "FILE:./tests/samples/deepseek|RAW",
1299+
"variable": null,
1300+
"variable_start": -2,
1301+
"variable_end": -2,
1302+
"value": "sk-615ef152bf865a131f88b23571dc4ee6",
1303+
"value_start": 8,
1304+
"value_end": 43,
1305+
"entropy": 3.85416
1306+
}
1307+
]
1308+
},
12881309
{
12891310
"rule": "Digital Ocean Token",
12901311
"severity": "high",

tests/data/no_filters_no_ml.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,6 +1807,27 @@
18071807
}
18081808
]
18091809
},
1810+
{
1811+
"rule": "DeepSeek API Key",
1812+
"severity": "high",
1813+
"confidence": "moderate",
1814+
"ml_probability": null,
1815+
"line_data_list": [
1816+
{
1817+
"line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
1818+
"line_num": 1,
1819+
"path": "./tests/samples/deepseek",
1820+
"info": "",
1821+
"variable": null,
1822+
"variable_start": -2,
1823+
"variable_end": -2,
1824+
"value": "sk-615ef152bf865a131f88b23571dc4ee6",
1825+
"value_start": 8,
1826+
"value_end": 43,
1827+
"entropy": 3.85416
1828+
}
1829+
]
1830+
},
18101831
{
18111832
"rule": "Digital Ocean Token",
18121833
"severity": "high",

tests/data/no_ml.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,27 @@
13141314
}
13151315
]
13161316
},
1317+
{
1318+
"rule": "DeepSeek API Key",
1319+
"severity": "high",
1320+
"confidence": "moderate",
1321+
"ml_probability": null,
1322+
"line_data_list": [
1323+
{
1324+
"line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
1325+
"line_num": 1,
1326+
"path": "./tests/samples/deepseek",
1327+
"info": "",
1328+
"variable": null,
1329+
"variable_start": -2,
1330+
"variable_end": -2,
1331+
"value": "sk-615ef152bf865a131f88b23571dc4ee6",
1332+
"value_start": 8,
1333+
"value_end": 43,
1334+
"entropy": 3.85416
1335+
}
1336+
]
1337+
},
13171338
{
13181339
"rule": "Digital Ocean Token",
13191340
"severity": "high",

tests/data/output.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,6 +1293,27 @@
12931293
}
12941294
]
12951295
},
1296+
{
1297+
"rule": "DeepSeek API Key",
1298+
"severity": "high",
1299+
"confidence": "moderate",
1300+
"ml_probability": null,
1301+
"line_data_list": [
1302+
{
1303+
"line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
1304+
"line_num": 1,
1305+
"path": "./tests/samples/deepseek",
1306+
"info": "",
1307+
"variable": null,
1308+
"variable_start": -2,
1309+
"variable_end": -2,
1310+
"value": "sk-615ef152bf865a131f88b23571dc4ee6",
1311+
"value_start": 8,
1312+
"value_end": 43,
1313+
"entropy": 3.85416
1314+
}
1315+
]
1316+
},
12961317
{
12971318
"rule": "Digital Ocean Token",
12981319
"severity": "high",

tests/samples/deepseek

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
>>>sk-615ef152bf865a131f88b23571dc4ee6<<<
2+
<<<sk-2f8d9514cce7a8a3f1f34b64acedfake>>>

tests/test_app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def test_depth_p(self) -> None:
503503
cvs_checksum = hashlib.md5(f.read()).digest()
504504
checksum = bytes(a ^ b for a, b in zip(checksum, cvs_checksum))
505505
# update the checksum manually and keep line endings in the samples as is (git config core.autocrlf false)
506-
self.assertEqual("2ff0d78063a93926ffe1eade56df53bb", binascii.hexlify(checksum).decode())
506+
self.assertEqual("4bc7a88ab5e37cbf8c1701a645567e0e", binascii.hexlify(checksum).decode())
507507
normal_report = []
508508
sorted_report = []
509509
with tempfile.TemporaryDirectory() as tmp_dir:
@@ -614,7 +614,7 @@ def test_rules_ml_p(self) -> None:
614614
rules_text = yaml.dump_all(rules, sort_keys=True)
615615
checksum = hashlib.md5(rules_text.encode()).hexdigest()
616616
# update the expected value manually if some changes
617-
self.assertEqual("892a588c69f35bf2aaa2e9bcd0cb7733", checksum)
617+
self.assertEqual("0f79ee56f0b6ea66a72d79dc9df7f843", checksum)
618618
rules_set = set([i["name"] for i in rules if "code" in i["target"]])
619619
self.assertSetEqual(rules_set, report_set)
620620
self.assertEqual(SAMPLES_POST_CRED_COUNT, len(report))

0 commit comments

Comments
 (0)