Samsung
diff --git a/‎.github/workflows/check.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/check.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎credsweeper/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎credsweeper/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎credsweeper/deep_scanner/png_scanner.py‎
Lines changed: 1 addition & 1 deletion b/‎credsweeper/deep_scanner/png_scanner.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎credsweeper/file_handler/data_content_provider.py‎
Lines changed: 1 addition & 1 deletion b/‎credsweeper/file_handler/data_content_provider.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎credsweeper/ml_model/features/entropy_evaluation.py‎
Lines changed: 9 additions & 12 deletions b/‎credsweeper/ml_model/features/entropy_evaluation.py‎
Lines changed: 9 additions & 12 deletions
diff --git a/‎credsweeper/rules/config.yaml‎
Lines changed: 27 additions & 4 deletions b/‎credsweeper/rules/config.yaml‎
Lines changed: 27 additions & 4 deletions
diff --git a/‎credsweeper/scanner/scan_type/multi_pattern.py‎
Lines changed: 3 additions & 2 deletions b/‎credsweeper/scanner/scan_type/multi_pattern.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎credsweeper/scanner/scan_type/pem_key_pattern.py‎
Lines changed: 3 additions & 2 deletions b/‎credsweeper/scanner/scan_type/pem_key_pattern.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎credsweeper/scanner/scan_type/single_pattern.py‎
Lines changed: 5 additions & 0 deletions b/‎credsweeper/scanner/scan_type/single_pattern.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎credsweeper/utils/util.py‎
Lines changed: 5 additions & 5 deletions b/‎credsweeper/utils/util.py‎
Lines changed: 5 additions & 5 deletions
@@ -92,7 +92,7 @@ jobs:
         run: |
           banner="$(python -m credsweeper --banner | head -1)"
           echo "banner = '${banner}'"
-          if [ "CredSweeper 1.14.5 crc32:da87b2ca" != "${banner}" ]; then
+          if [ "CredSweeper 1.14.6 crc32:765e27c6" != "${banner}" ]; then
             echo "Update the check for '${banner}'"
             exit 1
           fi
 
@@ -24,4 +24,4 @@
     "__version__"
 ]
 
-__version__ = "1.14.5"
+__version__ = "1.14.6"
@@ -57,7 +57,7 @@ def yield_png_chunks(data: bytes) -> Generator[Tuple[int, str, bytes], None, Non
                     else:
                         raise ValueError(f"Unsupported compression {repr(itxt_data[:2])}")
                     lang_tag, itxt_data = itxt_data[2:].split(b'\0', 1)
-                    trans_key, itxt_data = itxt_data[2:].split(b'\0', 1)
+                    trans_key, itxt_data = itxt_data.split(b'\0', 1)
                     yield (offset, f"PNG_ITXT_{'1' if compression else '0'}"
                            f":{keyword.decode(encoding=UTF_8)}"
                            f":{lang_tag.decode(encoding=UTF_8)}"
 
@@ -125,7 +125,7 @@ def represent_as_structure(self) -> Optional[bool]:
         # # # YAML - almost always recognized
         try:
             if ':' in self.text and (2 < self.text.count('\n') or 2 < self.text.count('\r')):
-                self.structure = yaml.load(self.text, Loader=yaml.FullLoader)
+                self.structure = yaml.safe_load(self.text)
                 logger.debug("CONVERTED from yaml")
             else:
                 logger.debug("Data do not contain colon mark - weak YAML")
 
@@ -20,26 +20,23 @@ class EntropyEvaluation(Feature):
 
     """
 
-    def __init__(self) -> None:
-        """Class initializer"""
-        super().__init__()
-        # Max size of ML analyzed value is ML_HUNK but value may be bigger
-        self.hunk_size = 4 * ML_HUNK
-        self.log2_cache: Dict[int, float] = {x: math.log2(x) for x in range(4, self.hunk_size + 1)}
-        self.char_sets: List[Set[str]] = [set(x.value) for x in Chars]
+    # Max size of ML analyzed value is ML_HUNK but value may be bigger
+    HUNK_SIZE = 4 * ML_HUNK
+    LOG2_CACHE: Dict[int, float] = {x: math.log2(x) for x in range(4, 4 * ML_HUNK + 1)}
+    CHAR_SET: List[Set[str]] = [set(x.value) for x in Chars]
+    RESULT_SIZE = 3 + len(Chars)
 
     def extract(self, candidate: Candidate) -> np.ndarray:
         """Returns real entropy and possible sets of characters"""
         # only head of value will be analyzed
-        result: np.ndarray = np.zeros(shape=3 + len(self.char_sets), dtype=np.float32)
-        value = candidate.line_data_list[0].value[:self.hunk_size]
+        result: np.ndarray = np.zeros(shape=EntropyEvaluation.RESULT_SIZE, dtype=np.float32)
+        value = candidate.line_data_list[0].value[:EntropyEvaluation.HUNK_SIZE]
         size = len(value)
         uniq, counts = np.unique(list(value), return_counts=True)
         if MIN_DATA_LEN <= size:
             # evaluate the entropy for a value of at least 4
             probabilities = counts / size
-            hartley_entropy = self.log2_cache.get(size, -1.0)
-            assert hartley_entropy, str(candidate)
+            hartley_entropy = EntropyEvaluation.LOG2_CACHE.get(size, -1.0)
 
             # renyi_entropy alpha=0.5
             sum_prob_05 = np.sum(probabilities**0.5)
@@ -59,7 +56,7 @@ def extract(self, candidate: Candidate) -> np.ndarray:
             # check charset for non-zero value
             # use the new variable to deal with mypy
             uniq_set = set(uniq)
-            for n, i in enumerate(self.char_sets, start=3):
+            for n, i in enumerate(EntropyEvaluation.CHAR_SET, start=3):
                 if not uniq_set.difference(i):
                     result[n] = 1.0
 
 
@@ -68,7 +68,7 @@
   confidence: moderate
   type: pattern
   values:
-    - (?P<variable>[\"'`]?(?i:token|secret|key|키|암호화?|토큰)[\"'`]?)((\s)*[=:](\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,80}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
+    - (?P<variable>[\"'`]?(?i:token|secret|key|키|암호화?|토큰)[\"'`]?)((\s)*(?P<separator>설정은|:=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=|%3[Dd])(\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,80}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
   filter_type:
     - ValueAllowlistCheck
     - ValuePatternCheck(4)
@@ -84,13 +84,14 @@
     - 토큰
   target:
     - doc
+  use_ml: true
 
 - name: PASSWD_PAIR
   severity: medium
   confidence: moderate
   type: pattern
   values:
-    - (?P<variable>[\"'`]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[\"'`]?)((\s)*[=:](\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
+    - (?P<variable>[\"'`]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[\"'`]?)((\s)*(?P<separator>설정은|:=|:(?!:)|=(>|&gt;|(\\\\*u00|%)26gt;)|!==|!=|===|==|=~|=|%3[Dd])(\s)*)(?P<quote>[\"'`(])?(?P<value>(?-i:(?P<a>[A-Z])|(?P<b>[a-z])|(?P<c>[0-9/_+=~!@#$%^&*;:?-])){8,64}(?(a)(?(b)(?(c)(\S|$)|(?!x)x)|(?!x)x)|(?!x)x))(?(quote)[)\"'`])
   filter_type:
     - ValueAllowlistCheck
     - ValuePatternCheck(4)
@@ -112,6 +113,7 @@
     - 암호
   target:
     - doc
+  use_ml: true
 
 - name: IP_ID_PASSWORD_TRIPLE
   severity: medium
@@ -128,6 +130,7 @@
     - "."
   target:
     - doc
+  use_ml: true
 
 - name: ID_PAIR_PASSWD_PAIR
   severity: medium
@@ -151,6 +154,7 @@
     - 암호
   target:
     - doc
+  use_ml: true
 
 - name: ID_PASSWD_PAIR
   severity: medium
@@ -173,6 +177,7 @@
     - 암호
   target:
     - doc
+  use_ml: true
 
 - name: UUID
   severity: info
@@ -204,14 +209,30 @@
     - code
     - doc
 
+- name: Amazon Bedrock API Key
+  severity: high
+  confidence: moderate
+  type: pattern
+  values:
+    - (?:^|/|[^\\0-9A-Za-z+_-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>(ABSK|bedrock-api-key-)[0-9A-Za-z/+]{28,800})(?![0-9A-Za-z/+])
+  filter_type: GeneralPattern
+  required_substrings:
+    - ABSK
+    - bedrock-api-key-
+  min_line_len: 44
+  target:
+    - code
+    - doc
+
 - name: AWS Client ID
   severity: high
   confidence: moderate
   type: pattern
   values:
-    - (?:^|/|[^\\0-9A-Za-z+_-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>(ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_+-])
+    - (?:^|/|[^\\0-9A-Za-z+_-]|\\[0abfnrtv]|(?:%|\\x)[0-9A-Fa-f]{2}|\\[0-7]{3}|\\[Uu][0-9A-Fa-f]{4}|\x1B\[[0-9;]{0,80}m)(?P<value>(A3T[0-9A-Z]|ABIA|ACCA|AGPA|AIDA|AIPA|AKIA|ANPA|ANVA|AROA|APKA|ASCA|ASIA)[0-9A-Z]{16,17})(?![0-9A-Za-z_+-])
   filter_type: GeneralPattern
   required_substrings:
+    - A3T
     - ABIA
     - ACCA
     - AGPA
@@ -1000,7 +1021,7 @@
   confidence: strong
   type: pattern
   values:
-    - (?P<value>(_gitlab_session=|GR1348941|gl(agent|soat|ffct|p[at]t|oas|cbt|imt|[dfr]t)-)[0-9A-Za-z_-]{20,64}(\.[0-9A-Za-z_-]{2,16}){0,2})(?![0-9A-Za-z_-])
+    - (?P<value>(_gitlab_session=|GR1348941|gl(agent|soat|ffct|p[at]t|oas|cbt|imt|rtr|[dfrw]t)-)[0-9A-Za-z_-]{20,64}(\.[0-9A-Za-z_-]{2,16}){0,2})(?![0-9A-Za-z_-])
   filter_type:
     - ValuePatternCheck
   min_line_len: 25
@@ -1018,6 +1039,8 @@
     - gldt-
     - glft-
     - glrt-
+    - glrtr-
+    - glwt-
   target:
     - code
     - doc
 
@@ -37,8 +37,9 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida
             Empty list (False) - otherwise.
 
         """
-        assert rule.rule_type == RuleType.MULTI, \
-            "Rules provided to MultiPattern.run should have pattern_type equal to MULTI_PATTERN"
+        if RuleType.MULTI != rule.rule_type:
+            raise ValueError(f"Rule `{rule}` provided to `{cls.__name__}`.run "
+                             f"should have pattern_type equal to `{RuleType.MULTI.value}`")
 
         candidates = cls._get_candidates(config, rule, target)
 
 
@@ -29,8 +29,9 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida
             and filters defined in rule do not remove current line. Empty list - otherwise
 
         """
-        assert rule.rule_type == RuleType.PEM_KEY, \
-            "Rules provided to PemKeyPattern.run should have pattern_type equal to PEM_KEY_PATTERN"
+        if RuleType.PEM_KEY != rule.rule_type:
+            raise ValueError(f"Rule `{rule}` provided to `{cls.__name__}`.run "
+                             f"should have pattern_type equal to `{RuleType.PEM_KEY.value}`")
         if candidates := cls._get_candidates(config, rule, target):
             candidate = candidates[0]
             if pem_lines := PemKeyDetector.detect_pem_key(config, target):
 
@@ -1,5 +1,6 @@
 from typing import List
 
+from credsweeper.common.constants import RuleType
 from credsweeper.config.config import Config
 from credsweeper.credentials.candidate import Candidate
 from credsweeper.file_handler.analysis_target import AnalysisTarget
@@ -25,4 +26,8 @@ def run(cls, config: Config, rule: Rule, target: AnalysisTarget) -> List[Candida
 
         """
 
+        if RuleType.PATTERN != rule.rule_type and RuleType.KEYWORD != rule.rule_type:
+            raise ValueError(f"Rule `{rule}` provided to `{cls.__name__}`.run "
+                             f"should have pattern_type equal to `{RuleType.PATTERN.value}`")
+
         return cls._get_candidates(config, rule, target)
@@ -352,7 +352,7 @@ def extract_element_data(element: Any, attr: str) -> str:
 
     @staticmethod
     def json_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> Any:
-        """Load dictionary from json file"""
+        """Load dictionary from JSON file"""
         try:
             with open(file_path, "r", encoding=encoding) as f:
                 return json.load(f)
@@ -362,7 +362,7 @@ def json_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> Any:
 
     @staticmethod
     def json_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_ENCODING, indent=4) -> None:
-        """Write dictionary to json file"""
+        """Write dictionary to JSON file"""
         try:
             with open(file_path, "w", encoding=encoding) as f:
                 json.dump(obj, f, indent=indent)
@@ -371,17 +371,17 @@ def json_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_ENCODING,
 
     @staticmethod
     def yaml_load(file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> Any:
-        """Load dictionary from yaml file"""
+        """Load dictionary from YAML file"""
         try:
             with open(file_path, "r", encoding=encoding) as f:
-                return yaml.load(f, Loader=yaml.FullLoader)
+                return yaml.safe_load(f)
         except Exception as exc:
             logger.error(f"Failed to read {file_path} {exc}")
         return None
 
     @staticmethod
     def yaml_dump(obj: Any, file_path: Union[str, Path], encoding=DEFAULT_ENCODING) -> None:
-        """Write dictionary to yaml file"""
+        """Write dictionary to YAML file"""
         try:
             with open(file_path, "w", encoding=encoding) as f:
                 yaml.dump(obj, f)
Original file line number	Diff line number	Diff line change
`@@ -24,4 +24,4 @@`
`24`	`24`	`"__version__"`
`25`	`25`	`]`
`26`	`26`
`27`		`-__version__ = "1.14.5"`
	`27`	`+__version__ = "1.14.6"`