diff --git a/.ci/benchmark.txt b/.ci/benchmark.txt index 7f6c2e6eb..b182fa17b 100644 --- a/.ci/benchmark.txt +++ b/.ci/benchmark.txt @@ -1,15 +1,15 @@ -META MD5 0097e6ada1b9b6a55fbaa486e2996654 -DATA MD5 0874bbca3b6d927910e8f907aaa33196 -DATA: 16703750 interested lines. MARKUP: 61935 items +META MD5 42f77d135b8784c2cf3e5878cabfe088 +DATA MD5 d1b4a31164dd508138dc4881c335e089 +DATA: 16703140 interested lines. MARKUP: 61262 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- - 676 69398 135 415 72 + 676 69398 134 416 72 .04 2 149 4 -.1 2 641 2 5 +.1 2 641 2 8 .admx 1 26 1 -.adoc 1 158 13 5 1 +.adoc 1 158 18 5 1 .api 2 118 4 -.asciidoc 102 15403 70 336 27 +.asciidoc 102 15403 81 336 17 .axaml 5 286 5 .backup 1 62 2 1 .bash 2 2158 2 1 @@ -17,9 +17,9 @@ FileType FileNumber ValidLines Positives Negatives Templat .bats 15 2804 14 48 9 .bazel 3 424 8 .build 2 40 1 2 -.bundle 4 1512 476 +.bundle 4 1512 479 .bzl 3 2503 11 -.c 181 285824 11 768 5 +.c 181 285824 14 768 5 .cast 2 704 6 .cc 29 30562 616 1 .cf 3 126 2 1 @@ -29,15 +29,15 @@ FileType FileNumber ValidLines Positives Negatives Templat .cljc 5 2421 11 .cls 1 657 1 .cmd 4 401 2 3 -.cnf 8 858 18 15 16 +.cnf 8 858 20 15 14 .coffee 1 585 2 .conf 63 5019 66 59 52 .config 20 492 7 40 1 -.cpp 14 5489 2 60 +.cpp 14 5489 7 60 .creds 1 10 1 1 .crlf 1 27 1 -.crt 2 4979 194 -.cs 268 82752 278 863 95 +.crt 2 4979 126 +.cs 268 82752 279 863 95 .cshtml 5 180 12 .csp 3 379 9 .csproj 1 14 1 @@ -47,18 +47,18 @@ FileType FileNumber ValidLines Positives Negatives Templat .deprecated 1 126 1 .development 1 5 1 .diff 2 2460 8 2 -.dist 4 227 6 12 +.dist 4 227 8 10 .doc 1 2489 3 .dockerfile 1 19 1 .dot 1 160 5 .edited 23 713 30 .eex 4 74 8 .ejs 1 13 1 -.env 10 136 11 3 17 +.env 10 136 12 3 16 .erb 13 323 26 .erl 4 96 7 -.ex 26 4999 7 97 4 -.example 25 2025 83 37 51 +.ex 26 4999 7 99 2 +.example 25 2025 83 38 51 .exs 24 4842 8 187 4 .ext 5 211 1 4 2 .fsproj 1 75 1 2 @@ -66,35 +66,35 @@ FileType FileNumber ValidLines Positives Negatives Templat .gd 1 37 1 .gml 3 3075 16 .gni 3 5017 19 -.go 1275 717403 1366 4118 741 +.go 1277 718576 1450 4124 729 .golden 5 1168 1 13 29 -.gradle 50 4295 7 90 100 +.gradle 50 4295 8 90 99 .graphql 8 454 2 13 .graphqls 1 30 1 -.groovy 22 5113 24 214 1 +.groovy 22 5113 25 214 1 .gtpl 1 15 2 .h 10 2269 38 .haml 9 191 17 .hbs 2 54 3 -.hs 14 4140 29 61 4 -.html 60 19400 84 107 18 +.hs 14 4140 30 61 4 +.html 60 19400 85 107 17 .idl 3 1625 37 5 .iml 6 699 30 -.in 7 2242 10 40 9 -.inc 2 56 2 1 +.in 7 2242 10 40 10 +.inc 2 56 1 2 .ini 12 1461 27 12 17 .ipynb 1 134 7 .j 1 241 4 -.j2 32 6043 9 168 10 -.java 670 143955 460 1348 170 +.j2 32 6043 8 169 10 +.java 672 144069 489 1338 161 .jenkinsfile 1 58 2 6 .jinja2 1 64 2 -.js 656 532595 570 2461 284 -.json 886 13114472 1302 9991 132 +.js 655 531277 582 2419 283 +.json 886 13114472 1323 9998 125 .jsp 13 3202 1 37 .jsx 7 857 19 .jwt 1 1 2 -.key 118 3646 105 14 +.key 115 3067 105 11 .ks 1 25 1 .kt 121 20235 65 363 3 .l 1 982 1 @@ -113,15 +113,15 @@ FileType FileNumber ValidLines Positives Negatives Templat .lock 24 155844 158 .log 2 199 38 52 .lua 10 1924 37 3 -.m 16 13358 19 157 3 +.m 16 13358 22 160 .manifest 3 102 9 6 -.markdown 38 5862 54 4 1 +.markdown 38 5862 69 4 .markerb 3 12 3 .marko 1 21 2 -.md 760 180503 894 2308 584 +.md 760 180503 982 2311 578 .mdx 3 549 7 .mjml 1 18 1 -.mjs 22 4424 124 341 +.mjs 22 4424 101 369 .mk 1 5878 13 .ml 1 1856 16 .mlir 2 1596 19 @@ -130,7 +130,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .mqh 1 1023 2 .msg 1 26644 1 1 .mysql 1 36 2 -.ndjson 2 5006 76 226 2 +.ndjson 2 5006 78 226 2 .nix 4 211 12 .nolint 1 2 1 .odd 1 1281 43 @@ -140,8 +140,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .pan 2 48 4 .patch 4 109405 4 27 .pbxproj 1 941 1 -.pem 59 1297 58 8 -.php 390 80502 146 1440 71 +.pem 59 1297 58 3 +.php 390 80502 149 1392 70 .pl 16 14727 7 33 .pm 4 1817 8 .po 3 2994 15 @@ -152,21 +152,21 @@ FileType FileNumber ValidLines Positives Negatives Templat .ppk 1 45 28 .private 1 15 1 .proj 1 85 5 -.properties 55 1637 63 29 28 +.properties 55 1637 67 29 26 .proto 5 5768 2 49 .ps1 16 8509 15 73 2 .ps1xml 1 5022 1 .pug 2 193 2 .purs 1 69 4 -.pxd 1 150 4 2 -.py 901 294351 730 3214 712 +.pxd 1 150 2 4 +.py 901 294351 746 3229 699 .pyi 4 1361 9 .pyp 1 167 1 .python 1 213 .pyx 2 1094 23 .r 4 62 5 2 .rake 2 51 2 -.rb 852 130684 391 2404 594 +.rb 852 130684 420 2397 570 .re 1 31 1 .red 1 159 1 .release 1 13 4 @@ -181,20 +181,20 @@ FileType FileNumber ValidLines Positives Negatives Templat .rsa 3 3 3 .rsc 1 691 1 .rsp 18 7739 24 10 27 -.rst 92 35083 85 305 68 +.rst 92 35083 91 307 62 .rules 1 6 2 .sample 2 25 3 4 4 .sbt 3 570 5 2 .scala 53 5626 38 99 .scss 16 8553 32 1 .secrets 1 11 1 -.sh 152 24184 68 463 23 +.sh 152 24184 71 462 21 .slim 1 153 1 2 .smali 1 775 18 .snap 3 1708 9 29 2 .spec 2 332 2 .spin 1 565 1 -.sql 26 6468 130 58 3 +.sql 26 6468 131 58 1 .storyboard 20 1802 339 .strings 20 1240 137 .stub 3 84 6 @@ -202,7 +202,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .sum 37 22854 283 .svg 1 638 12 .swift 1 112 2 -.t 10 1946 20 43 13 +.t 10 1946 20 49 11 .td 2 14002 6 .template 19 1633 4 36 11 .test 2 24 22 1 4 @@ -210,17 +210,17 @@ FileType FileNumber ValidLines Positives Negatives Templat .tf 27 1644 14 30 1 .tfstate 6 431 49 9 4 .tfvars 1 31 3 2 -.tl 2 2161 154 2 +.tl 2 2161 161 1 .tmpl 5 336 3 9 .token 1 1 3 -.toml 86 2471 54 104 156 +.toml 86 2471 65 106 145 .tpl 1 43 1 .travis 1 34 2 3 1 -.ts 609 109982 213 1772 197 +.ts 609 109982 226 1777 193 .tsx 54 7914 1 114 5 .ttar 1 452 1 -.txt 322 81679 5240 4984 42 -.utf8 1 77 2 +.txt 322 81679 5242 4340 41 +.utf8 1 77 1 .vsixmanifest 1 36 1 .vsmdi 1 6 2 .vue 50 8736 1 153 1 @@ -228,30 +228,32 @@ FileType FileNumber ValidLines Positives Negatives Templat .xcscheme 1 109 4 .xib 11 503 164 .xsl 1 311 1 -.yaml 168 24422 193 337 42 -.yml 555 54516 1230 902 364 +.yaml 168 24422 195 337 40 +.yml 555 54516 1865 900 326 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 11478 16703750 15069 46313 4909 -credsweeper result_cnt : 14696, lost_cnt : 0, true_cnt : 14479, false_cnt : 217 +TOTAL: 11478 16703140 16060 45577 4740 +credsweeper result_cnt : 15681, lost_cnt : 0, true_cnt : 15425, false_cnt : 256 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- -------- -API 240 3172 187 229 229 0 3359 11 0.000000 0.045833 0.996944 1.000000 0.954167 0.976546 +API 246 3174 187 235 234 1 3360 12 0.000298 0.048780 0.996396 0.995745 0.951220 0.972973 AWS Client ID 191 19 0 183 183 0 19 8 0.000000 0.041885 0.961905 1.000000 0.958115 0.978610 AWS Multi 82 10 0 34 34 0 10 48 0.000000 0.585366 0.478261 1.000000 0.414634 0.586207 AWS S3 Bucket 67 23 0 92 67 23 0 0 1.000000 0.000000 0.744444 0.744444 1.000000 0.853503 Akamai Credentials 6 2 0 6 6 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 Atlassian Old PAT token 5 8 0 11 5 6 2 0 0.750000 0.000000 0.538462 0.454545 1.000000 0.625000 -Auth 1076 2759 81 1060 1052 8 2832 24 0.002817 0.022305 0.991828 0.992453 0.977695 0.985019 +Auth 1094 2760 77 1064 1055 9 2828 39 0.003172 0.035649 0.987789 0.991541 0.964351 0.977757 Azure Access Token 21 0 0 13 13 0 0 8 0.380952 0.619048 1.000000 0.619048 0.764706 BASE64 Private Key 22 4 0 22 22 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -BASE64 encoded PEM Private Key 12 0 0 11 11 0 0 1 0.083333 0.916667 1.000000 0.916667 0.956522 -Bitbucket Client ID 36 66 0 48 31 16 50 5 0.242424 0.138889 0.794118 0.659574 0.861111 0.746988 -Bitbucket Client Secret 46 90 1 88 41 46 45 5 0.505495 0.108696 0.627737 0.471264 0.891304 0.616541 +BASE64 encoded PEM Private Key 12 0 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 +Basic Authorization 625 554 0 625 625 0 554 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 +Bearer Authorization 163 0 0 163 163 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 +Bitbucket Client ID 36 66 0 42 25 16 50 11 0.242424 0.305556 0.735294 0.609756 0.694444 0.649351 +Bitbucket Client Secret 38 104 1 86 27 58 47 11 0.552381 0.289474 0.517483 0.317647 0.710526 0.439024 CMD ConvertTo-SecureString 13 4 0 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -CMD Password 27 128 0 27 27 0 128 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -CMD Secret 1 1 0 1 1 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -CMD Token 6 0 0 5 5 0 0 1 0.166667 0.833333 1.000000 0.833333 0.909091 +CMD Password 29 136 1 27 27 0 137 2 0.000000 0.068966 0.987952 1.000000 0.931034 0.964286 +CMD Secret 1 17 0 1 1 0 17 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 +CMD Token 6 2 0 5 5 0 2 1 0.000000 0.166667 0.875000 1.000000 0.833333 0.909091 Credential 99 422 76 100 99 1 497 0 0.002008 0.000000 0.998325 0.990000 1.000000 0.994975 Docker Swarm Token 2 0 0 1 1 0 0 1 0.500000 0.500000 1.000000 0.500000 0.666667 Dropbox App secret 73 144 0 53 39 13 131 34 0.090278 0.465753 0.783410 0.750000 0.534247 0.624000 @@ -264,25 +266,25 @@ Google API Key 12 0 0 1 Google Multi 10 2 0 11 10 1 1 0 0.500000 0.000000 0.916667 0.909091 1.000000 0.952381 Google OAuth Access Token 3 0 0 3 3 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Grafana Provisioned API Key 22 1 0 5 5 0 1 17 0.000000 0.772727 0.260870 1.000000 0.227273 0.370370 -JSON Web Token 180 61 0 141 141 0 61 39 0.000000 0.216667 0.838174 1.000000 0.783333 0.878505 +JSON Web Token 148 61 0 141 141 0 61 7 0.000000 0.047297 0.966507 1.000000 0.952703 0.975779 JWK 55 0 0 55 55 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 36 1 1 31 30 1 1 6 0.500000 0.166667 0.815789 0.967742 0.833333 0.895522 -Key 4186 15781 483 4203 4163 40 16224 23 0.002459 0.005495 0.996919 0.990483 0.994505 0.992490 -Nonce 113 49 0 111 111 0 49 2 0.000000 0.017699 0.987654 1.000000 0.982301 0.991071 -Other 9 7446 5 0 0 7451 9 0.000000 1.000000 0.998794 0.000000 -PEM Private Key 1142 1483 0 1146 1142 4 1479 0 0.002697 0.000000 0.998476 0.996510 1.000000 0.998252 -Password 2331 7544 2539 2286 2262 24 10059 69 0.002380 0.029601 0.992508 0.989501 0.970399 0.979857 -SQL Password 44 13 0 41 41 0 13 3 0.000000 0.068182 0.947368 1.000000 0.931818 0.964706 +Key 4195 15817 477 4217 4154 63 16231 41 0.003866 0.009774 0.994924 0.985060 0.990226 0.987637 +Nonce 115 50 0 111 111 0 50 4 0.000000 0.034783 0.975758 1.000000 0.965217 0.982301 +Other 9 7439 5 0 0 7444 9 0.000000 1.000000 0.998792 0.000000 +PEM Private Key 1142 76 0 1146 1142 4 72 0 0.052632 0.000000 0.996716 0.996510 1.000000 0.998252 +Password 2509 7560 2398 2452 2428 24 9934 81 0.002410 0.032284 0.991578 0.990212 0.967716 0.978835 +SQL Password 44 13 1 41 41 0 14 3 0.000000 0.068182 0.948276 1.000000 0.931818 0.964706 Salesforce Credentials 6 0 0 5 5 0 0 1 0.166667 0.833333 1.000000 0.833333 0.909091 Salt 83 74 1 80 80 0 75 3 0.000000 0.036145 0.981013 1.000000 0.963855 0.981595 -Secret 1492 1579 798 1489 1482 7 2370 10 0.002945 0.006702 0.995606 0.995299 0.993298 0.994297 +Secret 1501 1586 792 1497 1488 9 2369 13 0.003785 0.008661 0.994328 0.993988 0.991339 0.992662 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 Stripe Credentials 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Tencent WeChat API App ID 8 0 0 8 8 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 933 4174 455 861 858 3 4626 75 0.000648 0.080386 0.985976 0.996516 0.919614 0.956522 +Tencent WeChat API App ID 9 0 0 9 9 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 +Token 947 4186 454 862 859 3 4637 88 0.000647 0.092925 0.983712 0.996520 0.907075 0.949696 Twilio Credentials 30 39 0 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -URL Credentials 225 168 197 225 225 0 365 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 +URL Credentials 229 177 184 229 229 0 361 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 UUID 1866 265 0 1849 1848 1 264 18 0.003774 0.009646 0.991084 0.999459 0.990354 0.994886 - 15069 46313 4909 14705 14479 217 46096 590 0.004686 0.039153 0.986853 0.985234 0.960847 0.972888 + 16060 45577 4740 15690 15425 256 45321 635 0.005617 0.039539 0.985544 0.983675 0.960461 0.971929 diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 08919e2e8..03a56f1a7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23 with: repository: Samsung/CredData - ref: d425c1b7600407ca5a82f2379fdc8627d194fb39 + ref: 7b65d7922b9dea9793d42bf7046ee687e96c22ae - name: Markup hashing run: | @@ -87,7 +87,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23 with: repository: Samsung/CredData - ref: d425c1b7600407ca5a82f2379fdc8627d194fb39 + ref: 7b65d7922b9dea9793d42bf7046ee687e96c22ae - name: Markup hashing run: | @@ -190,7 +190,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23 with: repository: Samsung/CredData - ref: d425c1b7600407ca5a82f2379fdc8627d194fb39 + ref: 7b65d7922b9dea9793d42bf7046ee687e96c22ae - name: Markup hashing run: | @@ -378,7 +378,7 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - 2024.10.23 with: repository: Samsung/CredData - ref: d425c1b7600407ca5a82f2379fdc8627d194fb39 + ref: 7b65d7922b9dea9793d42bf7046ee687e96c22ae - name: Markup hashing run: | diff --git a/credsweeper/filters/__init__.py b/credsweeper/filters/__init__.py index a602cdcf5..56f426cde 100644 --- a/credsweeper/filters/__init__.py +++ b/credsweeper/filters/__init__.py @@ -10,6 +10,7 @@ from credsweeper.filters.value_base64_encoded_pem_check import ValueBase64EncodedPem from credsweeper.filters.value_base64_key_check import ValueBase64KeyCheck from credsweeper.filters.value_base64_part_check import ValueBase64PartCheck +from credsweeper.filters.value_basic_auth_check import ValueBasicAuthCheck from credsweeper.filters.value_blocklist_check import ValueBlocklistCheck from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck diff --git a/credsweeper/filters/value_basic_auth_check.py b/credsweeper/filters/value_basic_auth_check.py new file mode 100644 index 000000000..ec28b95c7 --- /dev/null +++ b/credsweeper/filters/value_basic_auth_check.py @@ -0,0 +1,36 @@ +import contextlib + +from credsweeper.common.constants import DEFAULT_PATTERN_LEN, UTF_8 +from credsweeper.config.config import Config +from credsweeper.credentials.line_data import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters.filter import Filter +from credsweeper.utils.util import Util + + +class ValueBasicAuthCheck(Filter): + """Check that candidate have a known structure""" + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received token which might be structured. + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + True, if need to filter candidate and False if left + + """ + value = line_data.value + with contextlib.suppress(Exception): + # Basic encoding -> login:password + decoded = Util.decode_base64(value, padding_safe=True, urlsafe_detect=True) + delimiter_pos = decoded.find(b':') + # check whether the delimiter exists and all chars are decoded + if 0 < delimiter_pos < len(decoded) - DEFAULT_PATTERN_LEN and decoded.decode(UTF_8): + return False + return True diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index ba1e71b1a..6d8e4239d 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -1465,6 +1465,36 @@ - code - doc +- name: Basic Authorization + severity: medium + confidence: strong + type: pattern + values: + - (?P(?i:basic))(?P\s+)(?P[=0-9A-Za-z_/+-]{8,8000})(?![0-9A-Za-z_/+-]) + min_line_len: 18 + filter_type: + - ValueBasicAuthCheck + required_substrings: + - basic + target: + - code + - doc + +- name: Bearer Authorization + severity: medium + confidence: moderate + type: pattern + values: + - (?P(?i:bearer|ntlm))(?P\s+)(?P[.0-9A-Za-z_/+-]{32,8000}=*)(?![0-9A-Za-z_/+-]) + min_line_len: 37 + filter_type: GeneralKeyword + required_substrings: + - bearer + - ntlm + target: + - code + - doc + - name: API severity: low confidence: moderate diff --git a/docs/source/credsweeper.filters.rst b/docs/source/credsweeper.filters.rst index 22e990fe7..611e86dcb 100644 --- a/docs/source/credsweeper.filters.rst +++ b/docs/source/credsweeper.filters.rst @@ -116,6 +116,14 @@ credsweeper.filters.value\_base64\_part\_check module :undoc-members: :show-inheritance: +credsweeper.filters.value\_basic\_auth\_check module +---------------------------------------------------- + +.. automodule:: credsweeper.filters.value_basic_auth_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_blocklist\_check module -------------------------------------------------- diff --git a/tests/__init__.py b/tests/__init__.py index 714b25bec..1af959c36 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,7 +7,7 @@ NEGLIGIBLE_ML_THRESHOLD = 0.0001 # credentials count after scan with negligible ML threshold -SAMPLES_CRED_COUNT = 506 +SAMPLES_CRED_COUNT = 506 + 3 # Number of filtered credentials with ML ML_FILTERED = 22 @@ -16,7 +16,7 @@ SAMPLES_POST_CRED_COUNT = SAMPLES_CRED_COUNT - ML_FILTERED # with option --doc & NEGLIGIBLE_ML_THRESHOLD -SAMPLES_IN_DOC = 858 +SAMPLES_IN_DOC = 858 + 3 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 128 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 19c577651..2e380c931 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -408,6 +408,27 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "FILE:./tests/samples/auth.hs|RAW", + "variable": "NTLM", + "variable_start": 15, + "variable_end": 19, + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "entropy": 4.00235 + } + ] + }, { "rule": "Auth", "severity": "medium", @@ -415,17 +436,38 @@ "ml_probability": 1.0, "line_data_list": [ { - "line": "curl -H \"Authorization: Basic WxhZGRpVuc2VzYW1lbjYp12vcG\" http://localhost:8080/.", + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", "line_num": 8, "path": "./tests/samples/auth_n.template", "info": "FILE:./tests/samples/auth_n.template|RAW", "variable": "Authorization", "variable_start": 9, "variable_end": 22, - "value": "WxhZGRpVuc2VzYW1lbjYp12vcG", + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", "value_start": 30, - "value_end": 56, - "entropy": 4.08506 + "value_end": 58, + "entropy": 4.20897 + } + ] + }, + { + "rule": "Basic Authorization", + "severity": "medium", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", + "line_num": 8, + "path": "./tests/samples/auth_n.template", + "info": "FILE:./tests/samples/auth_n.template|RAW", + "variable": "Basic", + "variable_start": 24, + "variable_end": 29, + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", + "value_start": 30, + "value_end": 58, + "entropy": 4.20897 } ] }, @@ -450,6 +492,27 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", + "line_num": 9, + "path": "./tests/samples/auth_n.template", + "info": "FILE:./tests/samples/auth_n.template|RAW", + "variable": "Bearer", + "variable_start": 24, + "variable_end": 30, + "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", + "value_start": 31, + "value_end": 65, + "entropy": 4.53585 + } + ] + }, { "rule": "AWS Client ID", "severity": "high", diff --git a/tests/data/doc.json b/tests/data/doc.json index 7df300d80..20699d40d 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -167,6 +167,69 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "FILE:./tests/samples/auth.hs|RAW", + "variable": "NTLM", + "variable_start": 15, + "variable_end": 19, + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "entropy": 4.00235 + } + ] + }, + { + "rule": "Basic Authorization", + "severity": "medium", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", + "line_num": 8, + "path": "./tests/samples/auth_n.template", + "info": "FILE:./tests/samples/auth_n.template|RAW", + "variable": "Basic", + "variable_start": 24, + "variable_end": 29, + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", + "value_start": 30, + "value_end": 58, + "entropy": 4.20897 + } + ] + }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", + "line_num": 9, + "path": "./tests/samples/auth_n.template", + "info": "FILE:./tests/samples/auth_n.template|RAW", + "variable": "Bearer", + "variable_start": 24, + "variable_end": 30, + "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", + "value_start": 31, + "value_end": 65, + "entropy": 4.53585 + } + ] + }, { "rule": "AWS Client ID", "severity": "high", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index c1e79b706..ac197051d 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -272,6 +272,27 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "", + "variable": "NTLM", + "variable_start": 15, + "variable_end": 19, + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "entropy": 4.00235 + } + ] + }, { "rule": "Auth", "severity": "medium", @@ -279,17 +300,38 @@ "ml_probability": 1.0, "line_data_list": [ { - "line": "curl -H \"Authorization: Basic WxhZGRpVuc2VzYW1lbjYp12vcG\" http://localhost:8080/.", + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", "line_num": 8, "path": "./tests/samples/auth_n.template", "info": "", "variable": "Authorization", "variable_start": 9, "variable_end": 22, - "value": "WxhZGRpVuc2VzYW1lbjYp12vcG", + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", "value_start": 30, - "value_end": 56, - "entropy": 4.08506 + "value_end": 58, + "entropy": 4.20897 + } + ] + }, + { + "rule": "Basic Authorization", + "severity": "medium", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", + "line_num": 8, + "path": "./tests/samples/auth_n.template", + "info": "", + "variable": "Basic", + "variable_start": 24, + "variable_end": 29, + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", + "value_start": 30, + "value_end": 58, + "entropy": 4.20897 } ] }, @@ -314,6 +356,27 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", + "line_num": 9, + "path": "./tests/samples/auth_n.template", + "info": "", + "variable": "Bearer", + "variable_start": 24, + "variable_end": 30, + "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", + "value_start": 31, + "value_end": 65, + "entropy": 4.53585 + } + ] + }, { "rule": "AWS Client ID", "severity": "high", diff --git a/tests/data/output.json b/tests/data/output.json index 5f380b15d..682d6a029 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -272,6 +272,27 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "Authorization: NTLM TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "line_num": 4, + "path": "./tests/samples/auth.hs", + "info": "", + "variable": "NTLM", + "variable_start": 15, + "variable_end": 19, + "value": "TlRMTUAAABABoITVNIAAZI1AQBuOh4cSAQ8B1A=", + "value_start": 20, + "value_end": 59, + "entropy": 4.00235 + } + ] + }, { "rule": "Auth", "severity": "medium", @@ -279,17 +300,38 @@ "ml_probability": 1.0, "line_data_list": [ { - "line": "curl -H \"Authorization: Basic WxhZGRpVuc2VzYW1lbjYp12vcG\" http://localhost:8080/.", + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", "line_num": 8, "path": "./tests/samples/auth_n.template", "info": "", "variable": "Authorization", "variable_start": 9, "variable_end": 22, - "value": "WxhZGRpVuc2VzYW1lbjYp12vcG", + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", "value_start": 30, - "value_end": 56, - "entropy": 4.08506 + "value_end": 58, + "entropy": 4.20897 + } + ] + }, + { + "rule": "Basic Authorization", + "severity": "medium", + "confidence": "strong", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==\" http://localhost:8080/.", + "line_num": 8, + "path": "./tests/samples/auth_n.template", + "info": "", + "variable": "Basic", + "variable_start": 24, + "variable_end": 29, + "value": "R2hyZG5oYzpycWVpIGVuZ2xiZg==", + "value_start": 30, + "value_end": 58, + "entropy": 4.20897 } ] }, @@ -314,6 +356,27 @@ } ] }, + { + "rule": "Bearer Authorization", + "severity": "medium", + "confidence": "moderate", + "ml_probability": null, + "line_data_list": [ + { + "line": "curl -H \"Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj\" http://localhost:8080/.", + "line_num": 9, + "path": "./tests/samples/auth_n.template", + "info": "", + "variable": "Bearer", + "variable_start": 24, + "variable_end": 30, + "value": "eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj", + "value_start": 31, + "value_end": 65, + "entropy": 4.53585 + } + ] + }, { "rule": "AWS Client ID", "severity": "high", diff --git a/tests/filters/test_value_basic_auth_check.py b/tests/filters/test_value_basic_auth_check.py new file mode 100644 index 000000000..997200ce1 --- /dev/null +++ b/tests/filters/test_value_basic_auth_check.py @@ -0,0 +1,41 @@ +import re +import unittest + +from credsweeper.credentials.line_data import LineData +from credsweeper.filters.value_basic_auth_check import ValueBasicAuthCheck +from tests.filters.conftest import DUMMY_ANALYSIS_TARGET + + +class TestValueBasicAuthCheck(unittest.TestCase): + + def test_value_check_n(self) -> None: + for value in [ + "VGhlVXNlcjtUaGVQYXM1dzByZA==", # + "Programming_Language", # + "OjEyMzQ1Njc4", # + "MTIzNDU6Njc4", # + ]: + line_data = LineData(config=None, + path="dummy", + file_type="", + line=value, + info="", + line_num=1, + line_pos=0, + pattern=re.compile(fr"(?P{value})")) + self.assertTrue(ValueBasicAuthCheck().run(line_data, DUMMY_ANALYSIS_TARGET), value) + + def test_value_check_p(self) -> None: + for value in [ + "MToxMjM0NTY3", # + "VGhlVXNlcjpUaGVQYXM1dzByZA==", # + ]: + line_data = LineData(config=None, + path="dummy", + file_type="", + line=value, + info="", + line_num=1, + line_pos=0, + pattern=re.compile(fr"(?P{value})")) + self.assertFalse(ValueBasicAuthCheck().run(line_data, DUMMY_ANALYSIS_TARGET), value) diff --git a/tests/samples/auth_n.template b/tests/samples/auth_n.template index 7d2418f51..e95cf1aa9 100644 --- a/tests/samples/auth_n.template +++ b/tests/samples/auth_n.template @@ -5,5 +5,5 @@ authors_info = "Nobody John Doe" AUTH_TOKEN= -curl -H "Authorization: Basic WxhZGRpVuc2VzYW1lbjYp12vcG" http://localhost:8080/. +curl -H "Authorization: Basic R2hyZG5oYzpycWVpIGVuZ2xiZg==" http://localhost:8080/. curl -H "Authorization: Bearer eyJGRpVu1c2VzY2-823r_db32hbf4W1lbj" http://localhost:8080/.