DeepSeek API Key pattern (#815)

babenek · web-flow · commit 0ba7733e6ee5 · 2026-02-08T14:02:19.000+02:00
* DeepSeek API Key pattern

* fix severity

* fix missed test
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -92,7 +92,7 @@ jobs:
         run: |
           banner="$(python -m credsweeper --banner | grep CredSweeper | head -1)"
           echo "banner = '${banner}'"
-          if [ "CredSweeper 1.14.7 crc32:a70f1be7" != "${banner}" ]; then
+          if [ "CredSweeper 1.14.7 crc32:adbdc773" != "${banner}" ]; then
             echo "Update the check for '${banner}'"
             exit 1
           fi
diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml
@@ -1358,6 +1358,21 @@
     - code
     - doc
 
+- name: DeepSeek API Key
+  severity: high
+  confidence: moderate
+  type: pattern
+  values:
+    - (?P<value>sk-[0-9a-f]{32,64})(?![0-9A-Za-z_-])
+  min_line_len: 35
+  filter_type:
+    - ValuePatternCheck
+  required_substrings:
+    - sk-
+  target:
+    - code
+    - doc
+
 - name: Tavily API Key
   severity: high
   confidence: strong
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 
 # total number of files in test samples
-SAMPLES_FILES_COUNT = 174
+SAMPLES_FILES_COUNT = 175
 
 # ML_DELTA for different platforms which may produce a dribbling in ml_probability
 ML_DELTA = 0.0001
@@ -10,16 +10,16 @@
 ZERO_ML_THRESHOLD = 0.0
 
 # with option --doc & NEGLIGIBLE_ML_THRESHOLD
-SAMPLES_IN_DOC = 926
+SAMPLES_IN_DOC = 927
 
 # credentials count after scan without filters and ML validations
-SAMPLES_REGEX_COUNT = 660
+SAMPLES_REGEX_COUNT = 661
 
 # credentials count after scan with filters and without ML validation
-SAMPLES_FILTERED_COUNT = 546
+SAMPLES_FILTERED_COUNT = 547
 
 # credentials count after default post-processing
-SAMPLES_POST_CRED_COUNT = 500
+SAMPLES_POST_CRED_COUNT = 501
 
 # archived credentials that are not found without --depth
 SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 138
diff --git a/tests/data/depth_3_pedantic.json b/tests/data/depth_3_pedantic.json
@@ -1638,6 +1638,27 @@
             }
         ]
     },
+    {
+        "rule": "DeepSeek API Key",
+        "severity": "high",
+        "confidence": "moderate",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
+                "line_num": 1,
+                "path": "./tests/samples/deepseek",
+                "info": "FILE:./tests/samples/deepseek|RAW",
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "value": "sk-615ef152bf865a131f88b23571dc4ee6",
+                "value_start": 8,
+                "value_end": 43,
+                "entropy": 3.85416
+            }
+        ]
+    },
     {
         "rule": "Digital Ocean Token",
         "severity": "high",
diff --git a/tests/data/doc.json b/tests/data/doc.json
@@ -1285,6 +1285,27 @@
             }
         ]
     },
+    {
+        "rule": "DeepSeek API Key",
+        "severity": "high",
+        "confidence": "moderate",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
+                "line_num": 1,
+                "path": "./tests/samples/deepseek",
+                "info": "FILE:./tests/samples/deepseek|RAW",
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "value": "sk-615ef152bf865a131f88b23571dc4ee6",
+                "value_start": 8,
+                "value_end": 43,
+                "entropy": 3.85416
+            }
+        ]
+    },
     {
         "rule": "Digital Ocean Token",
         "severity": "high",
diff --git a/tests/data/no_filters_no_ml.json b/tests/data/no_filters_no_ml.json
@@ -1807,6 +1807,27 @@
             }
         ]
     },
+    {
+        "rule": "DeepSeek API Key",
+        "severity": "high",
+        "confidence": "moderate",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
+                "line_num": 1,
+                "path": "./tests/samples/deepseek",
+                "info": "",
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "value": "sk-615ef152bf865a131f88b23571dc4ee6",
+                "value_start": 8,
+                "value_end": 43,
+                "entropy": 3.85416
+            }
+        ]
+    },
     {
         "rule": "Digital Ocean Token",
         "severity": "high",
diff --git a/tests/data/no_ml.json b/tests/data/no_ml.json
@@ -1314,6 +1314,27 @@
             }
         ]
     },
+    {
+        "rule": "DeepSeek API Key",
+        "severity": "high",
+        "confidence": "moderate",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
+                "line_num": 1,
+                "path": "./tests/samples/deepseek",
+                "info": "",
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "value": "sk-615ef152bf865a131f88b23571dc4ee6",
+                "value_start": 8,
+                "value_end": 43,
+                "entropy": 3.85416
+            }
+        ]
+    },
     {
         "rule": "Digital Ocean Token",
         "severity": "high",
diff --git a/tests/data/output.json b/tests/data/output.json
@@ -1293,6 +1293,27 @@
             }
         ]
     },
+    {
+        "rule": "DeepSeek API Key",
+        "severity": "high",
+        "confidence": "moderate",
+        "ml_probability": null,
+        "line_data_list": [
+            {
+                "line": ">>>\u001b[93msk-615ef152bf865a131f88b23571dc4ee6\u001b[0m<<<",
+                "line_num": 1,
+                "path": "./tests/samples/deepseek",
+                "info": "",
+                "variable": null,
+                "variable_start": -2,
+                "variable_end": -2,
+                "value": "sk-615ef152bf865a131f88b23571dc4ee6",
+                "value_start": 8,
+                "value_end": 43,
+                "entropy": 3.85416
+            }
+        ]
+    },
     {
         "rule": "Digital Ocean Token",
         "severity": "high",
diff --git a/tests/samples/deepseek b/tests/samples/deepseek
@@ -0,0 +1,2 @@
+>>>[93msk-615ef152bf865a131f88b23571dc4ee6[0m<<<
+<<<[93msk-2f8d9514cce7a8a3f1f34b64acedfake[0m>>>
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -503,7 +503,7 @@ def test_depth_p(self) -> None:
                     cvs_checksum = hashlib.md5(f.read()).digest()
                 checksum = bytes(a ^ b for a, b in zip(checksum, cvs_checksum))
         # update the checksum manually and keep line endings in the samples as is (git config core.autocrlf false)
-        self.assertEqual("2ff0d78063a93926ffe1eade56df53bb", binascii.hexlify(checksum).decode())
+        self.assertEqual("4bc7a88ab5e37cbf8c1701a645567e0e", binascii.hexlify(checksum).decode())
         normal_report = []
         sorted_report = []
         with tempfile.TemporaryDirectory() as tmp_dir:
@@ -614,7 +614,7 @@ def test_rules_ml_p(self) -> None:
             rules_text = yaml.dump_all(rules, sort_keys=True)
             checksum = hashlib.md5(rules_text.encode()).hexdigest()
             # update the expected value manually if some changes
-            self.assertEqual("892a588c69f35bf2aaa2e9bcd0cb7733", checksum)
+            self.assertEqual("0f79ee56f0b6ea66a72d79dc9df7f843", checksum)
             rules_set = set([i["name"] for i in rules if "code" in i["target"]])
             self.assertSetEqual(rules_set, report_set)
             self.assertEqual(SAMPLES_POST_CRED_COUNT, len(report))

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+>>>[93msk-615ef152bf865a131f88b23571dc4ee6[0m<<<`
	`2`	`+<<<[93msk-2f8d9514cce7a8a3f1f34b64acedfake[0m>>>`