Skip to content

Commit 92382c4

Browse files
authored
Dynamic length of filter pattern (#732)
* variable pattern length for the filter * style * BM * BM fix * Update benchmark.txt * [skip actions] [kewordgo] 2025-07-12T10:37:15+03:00 * refactoring * Optional config for filter but required for group * style & linters fix * BM scores fix * ValueLengthCheck * Config optimization in group * Dropbox token fix https://www.dropbox.com/developers/documentation/http/documentation * fix
1 parent 200ab5b commit 92382c4

File tree

77 files changed

+2807
-735
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+2807
-735
lines changed

.ci/benchmark.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ FileType FileNumber ValidLines Positives Negatives
233233
.zsh 6 872 12
234234
.zsh-theme 1 97 1
235235
TOTAL: 11478 16703140 16104 50313
236-
credsweeper result_cnt : 15725, lost_cnt : 0, true_cnt : 15469, false_cnt : 256
236+
credsweeper result_cnt : 15812, lost_cnt : 0, true_cnt : 15521, false_cnt : 291
237237
Rules Positives Negatives Reported TP FP TN FN FPR FNR ACC PRC RCL F1
238238
------------------------------ ----------- ----------- ---------- ----- ---- ----- ---- -------- -------- -------- -------- -------- --------
239239
API 246 3361 235 234 1 3360 12 0.000298 0.048780 0.996396 0.995745 0.951220 0.972973
@@ -242,7 +242,7 @@ AWS Multi 82 10 34 34
242242
AWS S3 Bucket 67 23 92 67 23 0 0 1.000000 0.000000 0.744444 0.744444 1.000000 0.853503
243243
Akamai Credentials 6 2 6 6 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
244244
Atlassian Old PAT token 5 8 11 5 6 2 0 0.750000 0.000000 0.538462 0.454545 1.000000 0.625000
245-
Auth 1094 2837 1064 1055 9 2828 39 0.003172 0.035649 0.987789 0.991541 0.964351 0.977757
245+
Auth 1094 2837 1081 1072 9 2828 22 0.003172 0.020110 0.992114 0.991674 0.979890 0.985747
246246
Azure Access Token 21 0 13 13 0 0 8 0.380952 0.619048 1.000000 0.619048 0.764706
247247
BASE64 Private Key 22 4 22 22 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
248248
BASE64 encoded PEM Private Key 12 0 12 12 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
@@ -251,7 +251,7 @@ Bearer Authorization 165 0 165 165
251251
Bitbucket Client ID 36 66 42 25 16 50 11 0.242424 0.305556 0.735294 0.609756 0.694444 0.649351
252252
Bitbucket Client Secret 38 105 86 27 58 47 11 0.552381 0.289474 0.517483 0.317647 0.710526 0.439024
253253
CMD ConvertTo-SecureString 13 4 13 13 0 4 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
254-
CMD Password 29 137 27 27 0 137 2 0.000000 0.068966 0.987952 1.000000 0.931034 0.964286
254+
CMD Password 29 137 29 29 0 137 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
255255
CMD Secret 1 17 1 1 0 17 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
256256
CMD Token 6 2 5 5 0 2 1 0.000000 0.166667 0.875000 1.000000 0.833333 0.909091
257257
Credential 99 498 100 99 1 497 0 0.002008 0.000000 0.998325 0.990000 1.000000 0.994975
@@ -270,21 +270,21 @@ JSON Web Token 148 61 141 141
270270
JWK 55 0 55 55 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
271271
Jira / Confluence PAT token 0 4 0 0 4 0 0.000000 1.000000
272272
Jira 2FA 36 2 31 30 1 1 6 0.500000 0.166667 0.815789 0.967742 0.833333 0.895522
273-
Key 4195 16294 4217 4154 63 16231 41 0.003866 0.009774 0.994924 0.985060 0.990226 0.987637
274-
Nonce 115 50 111 111 0 50 4 0.000000 0.034783 0.975758 1.000000 0.965217 0.982301
273+
Key 4195 16294 4257 4168 89 16205 27 0.005462 0.006436 0.994338 0.979093 0.993564 0.986275
274+
Nonce 115 50 113 113 0 50 2 0.000000 0.017391 0.987879 1.000000 0.982609 0.991228
275275
Other 9 7444 0 0 7444 9 0.000000 1.000000 0.998792 0.000000
276276
PEM Private Key 1142 76 1146 1142 4 72 0 0.052632 0.000000 0.996716 0.996510 1.000000 0.998252
277-
Password 2513 9954 2456 2432 24 9930 81 0.002411 0.032232 0.991578 0.990228 0.967768 0.978869
277+
Password 2513 9954 2462 2433 29 9925 80 0.002913 0.031834 0.991257 0.988221 0.968166 0.978090
278278
SQL Password 44 14 41 41 0 14 3 0.000000 0.068182 0.948276 1.000000 0.931818 0.964706
279279
Salesforce Credentials 6 0 5 5 0 0 1 0.166667 0.833333 1.000000 0.833333 0.909091
280280
Salt 83 75 80 80 0 75 3 0.000000 0.036145 0.981013 1.000000 0.963855 0.981595
281-
Secret 1501 2378 1497 1488 9 2369 13 0.003785 0.008661 0.994328 0.993988 0.991339 0.992662
281+
Secret 1501 2378 1500 1491 9 2369 10 0.003785 0.006662 0.995102 0.994000 0.993338 0.993669
282282
Seed 1 6 0 0 6 1 0.000000 1.000000 0.857143 0.000000
283283
Slack Token 4 1 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
284284
Stripe Credentials 2 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
285285
Tencent WeChat API App ID 47 0 47 47 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000
286-
Token 947 4640 862 859 3 4637 88 0.000647 0.092925 0.983712 0.996520 0.907075 0.949696
286+
Token 947 4640 879 872 7 4633 75 0.001509 0.079197 0.985323 0.992036 0.920803 0.955093
287287
Twilio Credentials 30 39 30 30 0 39 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
288288
URL Credentials 229 361 229 229 0 361 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000
289289
UUID 1866 265 1849 1848 1 264 18 0.003774 0.009646 0.991084 0.999459 0.990354 0.994886
290-
16104 50313 15734 15469 256 50057 635 0.005088 0.039431 0.986585 0.983720 0.960569 0.972007
290+
16104 50313 15821 15521 291 50022 583 0.005784 0.036202 0.986841 0.981596 0.963798 0.972616

credsweeper/common/keyword_pattern.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,22 @@
44
class KeywordPattern:
55
"""Pattern set of keyword types"""
66
directive = r"(?P<directive>(?:(?:[#%]define|%global)(?:\s|\\t)|\bset))?"
7-
key_left = r"(?:\\[nrt]|%[0-9a-f]{2}|\s)*" \
7+
key_left = r"(?:\\[nrt]|(\\\\*u00|%)[0-9a-f]{2}|\s)*" \
88
r"(?P<variable>(([`'\"]{1,8}[^:='\"`}<>\\/&?]*|[^:='\"`}<>\s()\\/&?;,%]*)" \
99
r"(?P<keyword>"
1010
# there will be inserted a keyword
1111
key_right = r")" \
1212
r"[^%:='\"`<>({?!&;\n]*" \
1313
r")" \
14-
r"(&(quot|apos);|%[0-9a-f]{2}|[`'\"])*" \
14+
r"(&(quot|apos);|(\\\\*u00|%)[0-9a-f]{2}|[`'\"])*" \
1515
r")" # <variable>
1616
separator = r"(?(directive)|(\s|\\{1,8}[tnr])*\]?(\s|\\{1,8}[tnr])*)" \
1717
r"(?P<separator>:(\s[a-z]{3,9}[?]?\s)?=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=" \
1818
r"|(?(directive)(\\t|\s|\((?!\))){1,80}|%3d))" \
1919
r"(\s|\\{1,8}[tnr])*"
2020
# might be curly, square or parenthesis with words before
2121
wrap = r"(?P<wrap>(" \
22-
r"(new(\s|\\{1,8}[tnr]|byte|char|string|\[\]){1,8})?" \
22+
r"((\s|\\{1,8}[tnr]|new|byte|char|string|\[\]){1,8})?" \
2323
r"(?P<get>([_a-z][0-9a-z_.\[\]]*\.)get|(os\.)?getenv)?" \
2424
r"([0-9a-z_.]|::|-(>|&gt;))*" \
2525
r"\s*" \

credsweeper/config/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def __init__(self, config: Dict[str, Any]) -> None:
3939
self.doc: bool = config["doc"]
4040
self.severity: Severity = Severity.get(config.get("severity"))
4141

42-
self.min_keyword_value_length: int = int(config["min_keyword_value_length"])
43-
self.min_pattern_value_length: int = int(config["min_pattern_value_length"])
42+
self.max_url_cred_value_length: int = int(config["max_url_cred_value_length"])
43+
self.max_password_value_length: int = int(config["max_password_value_length"])
4444

4545
# Trim exclude patterns from space like characters
4646
self.exclude_lines = set(line.strip() for line in self.exclude_lines)

credsweeper/filters/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from credsweeper.filters.value_camel_case_check import ValueCamelCaseCheck
1616
from credsweeper.filters.value_couple_keyword_check import ValueCoupleKeywordCheck
1717
from credsweeper.filters.value_dictionary_keyword_check import ValueDictionaryKeywordCheck
18-
from credsweeper.filters.value_dictionary_value_length_check import ValueDictionaryValueLengthCheck
1918
from credsweeper.filters.value_discord_bot_check import ValueDiscordBotCheck
2019
from credsweeper.filters.value_entropy_base32_check import ValueEntropyBase32Check
2120
from credsweeper.filters.value_entropy_base36_check import ValueEntropyBase36Check
@@ -29,6 +28,7 @@
2928
from credsweeper.filters.value_json_web_key_check import ValueJsonWebKeyCheck
3029
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
3130
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
31+
from credsweeper.filters.value_length_check import ValueLengthCheck
3232
from credsweeper.filters.value_method_check import ValueMethodCheck
3333
from credsweeper.filters.value_not_allowed_pattern_check import ValueNotAllowedPatternCheck
3434
from credsweeper.filters.value_not_part_encoded_check import ValueNotPartEncodedCheck

credsweeper/filters/filter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from abc import abstractmethod, ABC
2+
from typing import Optional
23

34
from credsweeper.config.config import Config
45
from credsweeper.credentials.line_data import LineData
@@ -9,7 +10,8 @@ class Filter(ABC):
910
"""Base class for all filters that operates on 'line_data' objects."""
1011

1112
@abstractmethod
12-
def __init__(self, config: Config, *args):
13+
def __init__(self, config: Optional[Config], *args):
14+
"""Config is optional for a filter"""
1315
raise NotImplementedError()
1416

1517
@abstractmethod

credsweeper/filters/group/group.py

Lines changed: 22 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,31 @@ class Group(ABC):
2424
"""Abstract Group class"""
2525

2626
def __init__(self, config: Config, rule_type: GroupType = GroupType.DEFAULT) -> None:
27+
"""Config is required for filter group"""
2728
if rule_type == GroupType.KEYWORD:
28-
self.filters: List[Filter] = self.get_keyword_base_filters(config)
29+
self.__filters = [ #
30+
ValueAllowlistCheck(), #
31+
ValueArrayDictionaryCheck(), #
32+
ValueBlocklistCheck(), #
33+
ValueCamelCaseCheck(), #
34+
ValueFilePathCheck(), #
35+
ValueHexNumberCheck(), #
36+
ValueLastWordCheck(), #
37+
ValueMethodCheck(), #
38+
ValueSimilarityCheck(), #
39+
ValueStringTypeCheck(check_for_literals=config.check_for_literals), #
40+
ValueTokenCheck(), #
41+
]
42+
if not config.doc:
43+
self.__filters.extend([ValuePatternCheck(), ValueNotAllowedPatternCheck()])
2944
elif rule_type == GroupType.PATTERN:
30-
self.filters: List[Filter] = self.get_pattern_base_filters(config)
45+
self.__filters = [ #
46+
LineSpecificKeyCheck(), #
47+
ValuePatternCheck(), #
48+
]
3149
else:
32-
self.filters: List[Filter] = []
50+
# GroupType.DEFAULT
51+
self.__filters = []
3352

3453
@property
3554
def filters(self) -> List[Filter]:
@@ -40,31 +59,3 @@ def filters(self) -> List[Filter]:
4059
def filters(self, filters: List[Filter]) -> None:
4160
"""property setter"""
4261
self.__filters = filters
43-
44-
@staticmethod
45-
def get_keyword_base_filters(config: Config) -> List[Filter]:
46-
"""returns base filters"""
47-
filters = [ #
48-
ValueAllowlistCheck(),
49-
ValueArrayDictionaryCheck(),
50-
ValueBlocklistCheck(),
51-
ValueCamelCaseCheck(),
52-
ValueFilePathCheck(),
53-
ValueHexNumberCheck(),
54-
ValueLastWordCheck(),
55-
ValueMethodCheck(),
56-
ValueSimilarityCheck(),
57-
ValueStringTypeCheck(config),
58-
ValueTokenCheck(),
59-
]
60-
if not config.doc:
61-
filters.extend([ValuePatternCheck(pattern_len=config.pattern_len), ValueNotAllowedPatternCheck()])
62-
return filters
63-
64-
@staticmethod
65-
def get_pattern_base_filters(config: Config) -> List[Filter]:
66-
"""return base filters for pattern"""
67-
return [ #
68-
LineSpecificKeyCheck(), #
69-
ValuePatternCheck(pattern_len=config.pattern_len), #
70-
]
Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from credsweeper.common.constants import GroupType
22
from credsweeper.config.config import Config
3-
from credsweeper.filters import ValueDictionaryValueLengthCheck, LineGitBinaryCheck
3+
from credsweeper.filters import ValueLengthCheck, LineGitBinaryCheck
44
from credsweeper.filters import ValueSplitKeywordCheck
55
from credsweeper.filters.group.group import Group
66
from credsweeper.filters.line_uue_part_check import LineUUEPartCheck
@@ -11,8 +11,9 @@ class PasswordKeyword(Group):
1111

1212
def __init__(self, config: Config) -> None:
1313
super().__init__(config, GroupType.KEYWORD)
14-
self.filters.extend(
15-
[ValueDictionaryValueLengthCheck(),
16-
ValueSplitKeywordCheck(),
17-
LineGitBinaryCheck(),
18-
LineUUEPartCheck()])
14+
self.filters.extend([
15+
ValueLengthCheck(max_len=config.max_password_value_length),
16+
ValueSplitKeywordCheck(),
17+
LineGitBinaryCheck(),
18+
LineUUEPartCheck()
19+
])

credsweeper/filters/group/token_pattern.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ def __init__(self, config: Config) -> None:
1313
ValueCoupleKeywordCheck(),
1414
ValueNumberCheck(),
1515
ValueCamelCaseCheck(),
16-
ValuePatternCheck(pattern_len=config.pattern_len)
16+
ValuePatternCheck(),
1717
]

credsweeper/filters/group/url_credentials_group.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from credsweeper.common.constants import GroupType
22
from credsweeper.config.config import Config
33
from credsweeper.filters import (ValueAllowlistCheck, ValueArrayDictionaryCheck, ValueBlocklistCheck,
4-
ValueCamelCaseCheck, ValueDictionaryValueLengthCheck, ValueFilePathCheck,
5-
ValueLastWordCheck, ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck,
6-
ValueStringTypeCheck, ValueTokenCheck)
4+
ValueCamelCaseCheck, ValueLengthCheck, ValueFilePathCheck, ValueLastWordCheck,
5+
ValueMethodCheck, ValueNotAllowedPatternCheck, ValuePatternCheck, ValueStringTypeCheck,
6+
ValueTokenCheck)
77
from credsweeper.filters.group.group import Group
88

99

@@ -25,9 +25,9 @@ def __init__(self, config: Config) -> None:
2525
ValueFilePathCheck(),
2626
ValueLastWordCheck(),
2727
ValueMethodCheck(),
28-
ValueStringTypeCheck(config),
28+
ValueStringTypeCheck(check_for_literals=config.check_for_literals),
2929
ValueNotAllowedPatternCheck(),
3030
ValueTokenCheck(),
31-
ValueDictionaryValueLengthCheck(min_len=4, max_len=80),
32-
ValuePatternCheck(pattern_len=config.pattern_len)
31+
ValueLengthCheck(max_len=config.max_url_cred_value_length),
32+
ValuePatternCheck()
3333
]

credsweeper/filters/group/weird_base36_token.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def __init__(self, config: Config) -> None:
1212
super().__init__(config, GroupType.DEFAULT)
1313
self.filters = [
1414
ValueCoupleKeywordCheck(),
15-
ValuePatternCheck(config),
15+
ValuePatternCheck(),
1616
ValueNumberCheck(),
1717
ValueTokenBase36Check(),
1818
ValueEntropyBase36Check()

0 commit comments

Comments
 (0)