Skip to content

Commit 66dc620

Browse files
authored
Add pre-commit hook to check for secrets (#156)
* add hub files from Samples/azure-ai and update README * Add secrets check to pre-commit * Remove unrelated changes * Don't run secrets check on remote, only local check
1 parent 2542c01 commit 66dc620

File tree

4 files changed

+107
-2
lines changed

4 files changed

+107
-2
lines changed

.github/workflows/pre-commit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ jobs:
2020
python-version: "3.8"
2121
- run: pip install -r dev-requirements.txt
2222
- name: Run Pre-Commit
23-
run: pre-commit run --all-files
23+
run: pre-commit run --all-files --config .pre-commit-config-remote.yaml

.pre-commit-config-remote.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# See https://pre-commit.com for more information
2+
# See https://pre-commit.com/hooks.html for more hooks
3+
repos:
4+
- repo: https://github.com/pre-commit/pre-commit-hooks
5+
rev: v3.2.0
6+
hooks:
7+
- id: check-added-large-files
8+
- repo: local
9+
hooks:
10+
- id: nb-clean
11+
name: nb-clean
12+
description: "Clean Jupyter notebooks of outputs, metadata, and empty cells, with Git integration"
13+
entry: tox -qqq run -e nb-clean -- clean
14+
require_serial: true
15+
language: python
16+
types_or: [jupyter]
17+
minimum_pre_commit_version: 2.9.2
18+
- id: ruff
19+
name: ruff
20+
description: "Run 'ruff' for extremely fast Python linting"
21+
entry: tox -qqq run -e ruff -- check --force-exclude
22+
language: python
23+
types_or: [python, pyi, jupyter]
24+
require_serial: true
25+
additional_dependencies: []
26+
minimum_pre_commit_version: "2.9.2"
27+
args: ["--fix", "--exit-non-zero-on-fix", "--exclude", "detect_azure_secrets.py"]
28+
- id: black
29+
name: black
30+
description: "Black: The uncompromising Python code formatter"
31+
minimum_pre_commit_version: 2.9.2
32+
require_serial: true
33+
types_or: [python, pyi, jupyter]
34+
entry: python
35+
language: system
36+
args: ["-m", "tox", "-qqq", "run", "-e", "black", "--"]

.pre-commit-config.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ repos:
2424
require_serial: true
2525
additional_dependencies: []
2626
minimum_pre_commit_version: "2.9.2"
27-
args: ["--fix", "--exit-non-zero-on-fix"]
27+
args: ["--fix", "--exit-non-zero-on-fix", "--exclude", "detect_azure_secrets.py"]
2828
- id: black
2929
name: black
3030
description: "Black: The uncompromising Python code formatter"
@@ -34,3 +34,8 @@ repos:
3434
entry: python
3535
language: system
3636
args: ["-m", "tox", "-qqq", "run", "-e", "black", "--"]
37+
- id: detect-azure-secrets-custom
38+
name: Detect Azure Secrets
39+
entry: python detect_azure_secrets.py
40+
language: python
41+
types: [file]

detect_azure_secrets.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import re
2+
import sys
3+
import json
4+
import os
5+
from typing import Union
6+
7+
8+
SECRET_PATTERNS = [
9+
re.compile(r'[\'"]?subscription_id[\'"]?\s*[:=]\s*[\'"][0-9a-f\-]{36}[\'"]', re.IGNORECASE),
10+
re.compile(r'[\'"]?resource_group_name[\'"]?\s*[:=]\s*[\'"][a-zA-Z0-9\-_]+[\'"]', re.IGNORECASE),
11+
re.compile(r'[\'"]?project_name[\'"]?\s*[:=]\s*[\'"][a-zA-Z0-9\-_]+[\'"]', re.IGNORECASE),
12+
re.compile(r'[\'"]?api_key[\'"]?\s*[:=]\s*[\'"][A-Za-z0-9\-_]{40,}[\'"]', re.IGNORECASE),
13+
re.compile(
14+
r'[\'"]?azure_endpoint[\'"]?\s*[:=]\s*[\'"]https:\/\/[a-zA-Z0-9\-\.]+\.azure\.com[\/a-zA-Z0-9\.\-]*[\'"]',
15+
re.IGNORECASE,
16+
),
17+
re.compile(r'export\s+[A-Z_][A-Z0-9_]*\s*=\s*["\'][^"\']+["\']', re.IGNORECASE),
18+
re.compile(r'os\.environ\["\s*[A-Za-z0-9_]*(API_KEY|ENDPOINT)[A-Za-z0-9_]*\s*"\]', re.IGNORECASE),
19+
]
20+
21+
22+
def check_ipynb_for_secrets(filename: Union[str, os.PathLike]) -> bool:
23+
"""Jupyter notebooks can't be parsed directly - need to convert to JSON first"""
24+
try:
25+
with open(filename, "r", encoding="utf-8") as file:
26+
notebook_data = json.load(file)
27+
failed = False
28+
for cell in notebook_data.get("cells", []):
29+
if cell["cell_type"] == "code":
30+
for line_number, line in enumerate(cell["source"], start=1):
31+
for pattern in SECRET_PATTERNS:
32+
if pattern.search(line):
33+
print(f"Secret detected in {filename} on line {line_number}: {line.strip()}")
34+
failed = True
35+
return failed
36+
except (UnicodeDecodeError, json.JSONDecodeError) as e:
37+
print(f"Failed to read {filename}. Skipping secrets check. Error: {e}")
38+
return True
39+
40+
41+
def main():
42+
failed = False
43+
44+
for filename in sys.argv[1:]:
45+
if filename.endswith((".py", ".yaml", ".yml", ".md")):
46+
try:
47+
with open(filename, "r", encoding="utf-8") as file:
48+
for line_number, line in enumerate(file, start=1):
49+
for pattern in SECRET_PATTERNS:
50+
if pattern.search(line):
51+
print(f"Secret detected in {filename} on line {line_number}: {line.strip()}")
52+
failed = True
53+
except UnicodeDecodeError:
54+
print(f"Failed to read {filename}. Skipping secrets check.")
55+
elif filename.endswith(".ipynb"):
56+
if check_ipynb_for_secrets(filename):
57+
failed = True
58+
59+
if failed:
60+
sys.exit(1)
61+
62+
63+
if __name__ == "__main__":
64+
main()

0 commit comments

Comments
 (0)