diff --git a/examples/config/patterns.yml b/examples/config/patterns.yml index 866794e..7f166ce 100644 --- a/examples/config/patterns.yml +++ b/examples/config/patterns.yml @@ -3,7 +3,7 @@ name: Configuration Secrets patterns: - name: Django Secret Key - type: django_secret_key + type: test_django_secret_key_test regex: version: 0.1 # required @@ -38,5 +38,5 @@ patterns: # optional, defaults to -1 (the end of the file) end_offset: 48 - name: django_key.txt - start_offset: 49 + start_offset: 60 end_offset: 97 diff --git a/examples/update_custom_patterns_readme.sh b/examples/update_custom_patterns_readme.sh index 755b1d7..a14f869 100755 --- a/examples/update_custom_patterns_readme.sh +++ b/examples/update_custom_patterns_readme.sh @@ -1,11 +1,15 @@ #!/bin/bash -GITHUB_REPOSITORY="{$GITHUB_REPOSITORY:-advanced-security/secret-scanning-custom-patterns}" +SCRIPT_PATH="$(dirname -- "${BASH_SOURCE[0]}")" + +GITHUB_REPOSITORY="${GITHUB_REPOSITORY:-advanced-security/secret-scanning-custom-patterns}" CUSTOM_PATTERNS_PATH="${CUSTOM_PATTERNS_PATH:-$HOME/secret-scanning-custom-patterns}" +cd "${SCRIPT_PATH}"/.. || exit 1 pipenv run markdown --github-repository "${GITHUB_REPOSITORY}" -p "${CUSTOM_PATTERNS_PATH}" cd "${CUSTOM_PATTERNS_PATH}" || exit 1 find . -type f -name 'README.md' -exec git add {} \; git commit -S -m "Updated README.md" git push + diff --git a/secretscanning/combine.py b/secretscanning/combine.py index e762b6f..5154994 100755 --- a/secretscanning/combine.py +++ b/secretscanning/combine.py @@ -5,6 +5,7 @@ config files into one for eaasy upload using the Field browser extension """ +import fnmatch import yaml import json import logging @@ -12,7 +13,7 @@ import sys import argparse from pathlib import Path -from typing import Any +from typing import Any, List, Dict LOG = logging.getLogger(__name__) @@ -25,6 +26,28 @@ def add_args(parser: argparse.ArgumentParser) -> None: parser.add_argument( "input_dir", help="Directory with custom pattern config files in YAML format" ) + parser.add_argument( + "--exclude-type", type=str, nargs="+", help="Exclude patterns with a 'type' with these globs" + ) + parser.add_argument( + "--exclude-name", type=str, nargs="+", help="Exclude patterns with a 'name' with these globs" + ) + parser.add_argument( + "--include-type", type=str, nargs="+", help="Include patterns with a 'name' with these globs" + ) + parser.add_argument( + "--include-name", type=str, nargs="+", help="Include patterns with a 'name' with these globs" + ) + + +def glob_match(field: str, exclude: List[str]) -> bool: + """Check if field matches any of the exclude globs, using globbing library.""" + if exclude is None or not exclude: + return False + for pattern in exclude: + if fnmatch.fnmatch(field, pattern): + return True + return False def main() -> None: @@ -38,7 +61,9 @@ def main() -> None: if args.debug: LOG.setLevel(logging.DEBUG) - patterns = [] + LOG.debug(args.include_name) + + patterns: List[Dict[str, Any]] = [] # find patterns.yml in directory by walking it for root, dirs, filenames in os.walk(args.input_dir): @@ -51,7 +76,41 @@ def main() -> None: data = yaml.safe_load(f) if "patterns" in data: - patterns.extend(data["patterns"]) + for pattern in data["patterns"]: + include = True + if args.include_name is not None or args.include_type is not None: + include = False + if "name" in pattern and args.include_name is not None: + name = pattern.get("name", None) + if glob_match(name, args.include_name): + include = True + else: + LOG.debug("Excluding pattern named: %s", name) + if "type" in pattern and args.include_type is not None: + type_ = pattern.get("type", None) + if glob_match(type_, args.include_type): + include = True + else: + LOG.debug("Excluding pattern 'type': %s", type_) + if "type" in pattern and args.exclude_type is not None: + type_ = pattern.get("type", None) + if not glob_match(type_, args.exclude_type): + pass + else: + if include: + include = False + LOG.debug("Excluding pattern 'type': %s", type_) + if "name" in pattern and args.exclude_name is not None: + name = pattern.get("name", None) + if not glob_match(name, args.exclude_name): + pass + else: + if include: + include = False + LOG.debug("Excluding pattern 'name': %s", name) + if include: + patterns.append(pattern) + print(yaml.dump({"name": "Collection of custom patterns", "patterns": patterns})) diff --git a/secretscanning/test.py b/secretscanning/test.py index ec8b665..7726ed8 100755 --- a/secretscanning/test.py +++ b/secretscanning/test.py @@ -91,6 +91,7 @@ def __init__( if self.test_data["end_offset"] == -1: self.test_data["end_offset"] = len(str(self.test_data["data"])) self.test_data["name"] = None + self.test_data["data"] = self.test_data["data"].strip() def regex_string(self) -> bytes: """Concatenate and UTF-8 encode.""" @@ -547,19 +548,28 @@ def test_patterns( pattern_results = RESULTS.get(pattern.name, []) if len(pattern_results) > 1: + LOG.error("❌ matched more than once on test data on '%s' in '%s'", pattern.type, rel_dirpath) + for res in pattern_results: + LOG.error("%s%s%s", res['groups']['start'], Fore.RED + res['groups']['pattern'] + Style.RESET_ALL, res['groups']['end']) + ok_test = False + elif len(pattern_results) == 0: + LOG.error("❌ no matches on test data on '%s' in '%s'", pattern.type, rel_dirpath) ok_test = False else: - # did we match what we expected? - if len(pattern_results) == 0 or not path_offsets_match( - pattern.test_data, pattern_results[0].get("file", {}) - ): + result = pattern_results[0].get("file", {}) + if not path_offsets_match(pattern.test_data, result): LOG.error( - "❌ did not match test data for '%s': '%s':%d-%d ", + "❌ did not match test data for '%s': '%s':%d-%d", pattern.type, pattern.test_data["data"], pattern.test_data["start_offset"], pattern.test_data["end_offset"], ) + LOG.error( + "❌ matched: %d-%d", + result["start_offset"], + result["end_offset"], + ) ok_test = False # did we match anything unexpected? @@ -592,9 +602,11 @@ def test_patterns( result_data.get("start_offset", -1), result_data.get("end_offset", -1), ) + LOG.debug("❌ Matched unexpected test data for: %s", pattern.type) ok_test = False if not ok_test: + LOG.debug("❌ Test OK flag set to False for %s:", pattern.type) ret = False else: @@ -618,8 +630,12 @@ def test_patterns( for filename in [f for f in filenames if f not in FILENAME_EXCLUDES]: path = (Path(dirpath) / filename).relative_to(tests_path) with (Path(tests_path) / path).resolve().open("rb") as f: + LOG.debug("Scanning file %s/%s", tests_path, path) + content = f.read() + LOG.debug(content) + # sideffect: writes to global RESULTS scan( db, @@ -640,6 +656,7 @@ def test_patterns( if pattern.expected: for expected in pattern.expected: pattern_results = RESULTS.get(pattern.name, []) + LOG.debug("Pattern results: %s", pattern_results) if not any( [ path_offsets_match(expected, result.get("file", {})) @@ -731,12 +748,14 @@ def test_patterns( err, ) + LOG.debug("❌ Matched an unexpected result for %s", pattern.type) ok = False if ok and not quiet: LOG.info("✅ '%s' in '%s'", pattern.type, rel_dirpath) if not ok: + LOG.debug("❌ ok flag set to False for %s", pattern.type) ret = False else: @@ -1237,6 +1256,7 @@ def main() -> None: ) and not args.continue_on_fail ): + LOG.debug("Testing patterns returned False") exit(1) db, patterns = build_hyperscan_patterns(