diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 21f41ffd..afb7d9ae 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -20,4 +20,4 @@ jobs: python-version: "3.8" - run: pip install -r dev-requirements.txt - name: Run Pre-Commit - run: pre-commit run --all-files + run: pre-commit run --all-files --config .pre-commit-config-remote.yaml diff --git a/.pre-commit-config-remote.yaml b/.pre-commit-config-remote.yaml new file mode 100644 index 00000000..85afa870 --- /dev/null +++ b/.pre-commit-config-remote.yaml @@ -0,0 +1,36 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: check-added-large-files +- repo: local + hooks: + - id: nb-clean + name: nb-clean + description: "Clean Jupyter notebooks of outputs, metadata, and empty cells, with Git integration" + entry: tox -qqq run -e nb-clean -- clean + require_serial: true + language: python + types_or: [jupyter] + minimum_pre_commit_version: 2.9.2 + - id: ruff + name: ruff + description: "Run 'ruff' for extremely fast Python linting" + entry: tox -qqq run -e ruff -- check --force-exclude + language: python + types_or: [python, pyi, jupyter] + require_serial: true + additional_dependencies: [] + minimum_pre_commit_version: "2.9.2" + args: ["--fix", "--exit-non-zero-on-fix", "--exclude", "detect_azure_secrets.py"] + - id: black + name: black + description: "Black: The uncompromising Python code formatter" + minimum_pre_commit_version: 2.9.2 + require_serial: true + types_or: [python, pyi, jupyter] + entry: python + language: system + args: ["-m", "tox", "-qqq", "run", "-e", "black", "--"] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9002a2da..f310d1db 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: require_serial: true additional_dependencies: [] minimum_pre_commit_version: "2.9.2" - args: ["--fix", "--exit-non-zero-on-fix"] + args: ["--fix", "--exit-non-zero-on-fix", "--exclude", "detect_azure_secrets.py"] - id: black name: black description: "Black: The uncompromising Python code formatter" @@ -34,3 +34,8 @@ repos: entry: python language: system args: ["-m", "tox", "-qqq", "run", "-e", "black", "--"] + - id: detect-azure-secrets-custom + name: Detect Azure Secrets + entry: python detect_azure_secrets.py + language: python + types: [file] diff --git a/detect_azure_secrets.py b/detect_azure_secrets.py new file mode 100644 index 00000000..7865b652 --- /dev/null +++ b/detect_azure_secrets.py @@ -0,0 +1,64 @@ +import re +import sys +import json +import os +from typing import Union + + +SECRET_PATTERNS = [ + re.compile(r'[\'"]?subscription_id[\'"]?\s*[:=]\s*[\'"][0-9a-f\-]{36}[\'"]', re.IGNORECASE), + re.compile(r'[\'"]?resource_group_name[\'"]?\s*[:=]\s*[\'"][a-zA-Z0-9\-_]+[\'"]', re.IGNORECASE), + re.compile(r'[\'"]?project_name[\'"]?\s*[:=]\s*[\'"][a-zA-Z0-9\-_]+[\'"]', re.IGNORECASE), + re.compile(r'[\'"]?api_key[\'"]?\s*[:=]\s*[\'"][A-Za-z0-9\-_]{40,}[\'"]', re.IGNORECASE), + re.compile( + r'[\'"]?azure_endpoint[\'"]?\s*[:=]\s*[\'"]https:\/\/[a-zA-Z0-9\-\.]+\.azure\.com[\/a-zA-Z0-9\.\-]*[\'"]', + re.IGNORECASE, + ), + re.compile(r'export\s+[A-Z_][A-Z0-9_]*\s*=\s*["\'][^"\']+["\']', re.IGNORECASE), + re.compile(r'os\.environ\["\s*[A-Za-z0-9_]*(API_KEY|ENDPOINT)[A-Za-z0-9_]*\s*"\]', re.IGNORECASE), +] + + +def check_ipynb_for_secrets(filename: Union[str, os.PathLike]) -> bool: + """Jupyter notebooks can't be parsed directly - need to convert to JSON first""" + try: + with open(filename, "r", encoding="utf-8") as file: + notebook_data = json.load(file) + failed = False + for cell in notebook_data.get("cells", []): + if cell["cell_type"] == "code": + for line_number, line in enumerate(cell["source"], start=1): + for pattern in SECRET_PATTERNS: + if pattern.search(line): + print(f"Secret detected in {filename} on line {line_number}: {line.strip()}") + failed = True + return failed + except (UnicodeDecodeError, json.JSONDecodeError) as e: + print(f"Failed to read {filename}. Skipping secrets check. Error: {e}") + return True + + +def main(): + failed = False + + for filename in sys.argv[1:]: + if filename.endswith((".py", ".yaml", ".yml", ".md")): + try: + with open(filename, "r", encoding="utf-8") as file: + for line_number, line in enumerate(file, start=1): + for pattern in SECRET_PATTERNS: + if pattern.search(line): + print(f"Secret detected in {filename} on line {line_number}: {line.strip()}") + failed = True + except UnicodeDecodeError: + print(f"Failed to read {filename}. Skipping secrets check.") + elif filename.endswith(".ipynb"): + if check_ipynb_for_secrets(filename): + failed = True + + if failed: + sys.exit(1) + + +if __name__ == "__main__": + main()