@@ -30,23 +30,92 @@ repos:
30
30
- repo : https://github.com/pre-commit/pre-commit-hooks
31
31
rev : v5.0.0
32
32
hooks :
33
- # - id: detect-aws-credentials
34
- # name: 🔐 Detect AWS Credentials
35
- # description: Detects *your* aws credentials from the aws cli credentials file.
36
- # types: [text]
33
+ # - id: detect-aws-credentials
34
+ # name: 🔐 Detect AWS Credentials
35
+ # description: Detects *your* aws credentials from the aws cli credentials file.
36
+ # types: [text]
37
37
38
38
- id : detect-private-key
39
39
name : 🔐 Detect Private Key
40
40
description : Detects the presence of private keys.
41
41
types : [text]
42
42
43
- # - repo: https://github.com/Yelp/detect-secrets
44
- # rev: v1.5.0
45
- # hooks:
46
- # - id: detect-secrets
47
- # name: 🔐 Detect Secrets
48
- # description: Detects secrets within a repository.
49
- # args: ['--baseline', '.secrets.baseline']
43
+ # - repo: https://github.com/Yelp/detect-secrets
44
+ # rev: v1.5.0
45
+ # hooks:
46
+ # - id: detect-secrets
47
+ # name: 🔐 Detect Secrets
48
+ # description: Detects secrets within a repository.
49
+ # args: ['--baseline', '.secrets.baseline']
50
+
51
+ # -----------------------------------------------------------------------------
52
+ # ❌ Forbid Specific AI / LLM Patterns
53
+ # -----------------------------------------------------------------------------
54
+ # This local hook checks for patterns that should not be committed.
55
+ # It aims to detect and prevent the inclusion of AI-generated content by
56
+ # identifying common artifacts associated with large language models (LLMs).
57
+ #
58
+ # Patterns checked include:
59
+ # - `:contentReference`
60
+ # - `[oaicite:??<digits>]` (e.g., `[oaicite:??12345]`)
61
+ # - Common AI-generated phrases (e.g., "As an AI language model")
62
+ # - Placeholder citations (e.g., "(Author, 2023)")
63
+ # - Repetitive or generic phrases often produced by LLMs
64
+ # -----------------------------------------------------------------------------
65
+ - repo : local
66
+ hooks :
67
+ - id : forbid-specific-patterns
68
+ name : ❌ Forbid Specific AI / LLM Patterns
69
+ entry : >
70
+ bash -c '
71
+ # Succeed immediately if no files are passed
72
+ [ "$#" -eq 0 ] && exit 0
73
+
74
+ # Invert grep exit-code:
75
+ ! grep -rnE "(:contentReference|\[oaicite:\?\?\d*\]|As an AI language model|I am an AI developed by|This response was generated by|\(Author, [0-9]{4}\)|\(Source: [^)]+\)|In conclusion,|To summarize,|It is important to note that|Remember that|Keep in mind that)" \
76
+ --exclude-dir=.git \
77
+ --exclude-dir=node_modules \
78
+ --exclude-dir=.venv \
79
+ --exclude-dir=dist \
80
+ --exclude-dir=build \
81
+ --exclude-dir=__pycache__ \
82
+ --exclude=.pre-commit-config.yaml \
83
+ "$@"
84
+ '
85
+ language : system
86
+ pass_filenames : true
87
+ types : [text]
88
+ description : Prevents committing LLM artefacts like :contentReference, [oaicite], and common AI-generated phrases.
89
+
90
+ # -----------------------------------------------------------------------------
91
+ # 🔤 Unicode Text Normalization (via texthooks)
92
+ # -----------------------------------------------------------------------------
93
+ # A collection of hooks to clean up problematic Unicode characters:
94
+ #
95
+ # 📝 fix-smartquotes: Converts curly quotes (" " ' ') to standard ASCII quotes.
96
+ # 🔡 fix-ligatures: Replaces typographic ligatures (fi, ff) with ASCII equivalents.
97
+ # ␣ fix-spaces: Normalizes non-breaking and exotic spaces to regular spaces.
98
+ # 🚫 forbid-bidi-controls: Prevents Unicode BiDi control characters used to
99
+ # obscure code logic or directionality.
100
+ #
101
+ # These prevent copy-paste artifacts, invisible formatting errors, and
102
+ # encoding bugs from creeping into the codebase.
103
+ # -----------------------------------------------------------------------------
104
+ - repo : https://github.com/sirosen/texthooks
105
+ rev : 0.6.8
106
+ hooks :
107
+ - id : fix-smartquotes
108
+ name : 📝 Normalize Smart Quotes
109
+ description : Replaces smart/curly quotes with standard ASCII quotes.
110
+ - id : fix-ligatures
111
+ name : 🔡 Normalize Ligatures
112
+ description : Replaces typographic ligatures with standard characters.
113
+ - id : fix-spaces
114
+ name : ␣ Normalize Unicode Spaces
115
+ description : Replaces non-breaking or exotic space characters with regular spaces.
116
+ - id : forbid-bidi-controls
117
+ name : 🚫 Forbid BiDi Unicode Controls
118
+ description : Prevents bidirectional control characters that can obscure code meaning.
50
119
51
120
# -----------------------------------------------------------------------------
52
121
# 🧹 Formatting Hooks (MODIFIES FILES)
@@ -73,7 +142,7 @@ repos:
73
142
74
143
- id : fix-encoding-pragma
75
144
name : 🧹 Fix Python Encoding Pragma
76
- description : ' Adds # -*- coding: utf-8 -*- to the top of python files.'
145
+ description : " Adds # -*- coding: utf-8 -*- to the top of python files."
77
146
types : [python]
78
147
79
148
- id : mixed-line-ending
@@ -91,12 +160,12 @@ repos:
91
160
name : 🧹 File Contents Sorter
92
161
description : Sorts the lines in specified files (defaults to alphabetical).
93
162
language : python
94
- files : ' ^$ '
163
+ files : " ^$ "
95
164
96
165
- id : sort-simple-yaml
97
166
name : 🧹 Sort Simple YAML Files
98
167
description : Sorts simple YAML files which consist only of top-level keys.
99
- files : ' ^$ '
168
+ files : " ^$ "
100
169
101
170
# Optional: Uncomment to enable Prettier formatting
102
171
# - repo: https://github.com/pre-commit/mirrors-prettier
@@ -158,7 +227,7 @@ repos:
158
227
name : ✅ Forbid Submodules
159
228
description : Forbids any submodules in the repository.
160
229
language : fail
161
- entry : ' submodules are not allowed in this repository:'
230
+ entry : " submodules are not allowed in this repository:"
162
231
types : [directory]
163
232
164
233
- id : check-vcs-permalinks
@@ -199,14 +268,14 @@ repos:
199
268
- id : yamllint
200
269
name : ✅ YAMLlint - YAML Linter
201
270
description : A linter for YAML files.
202
- args : [ -c, .yamllint ]
271
+ args : [-c, .yamllint]
203
272
204
- # - repo: https://github.com/igorshubovych/markdownlint-cli
205
- # rev: v0.45.0
206
- # hooks:
207
- # - id: markdownlint
208
- # name: ✅ Markdownlint - Markdown Linter
209
- # description: A tool to check markdown files and flag style issues.
273
+ # - repo: https://github.com/igorshubovych/markdownlint-cli
274
+ # rev: v0.45.0
275
+ # hooks:
276
+ # - id: markdownlint
277
+ # name: ✅ Markdownlint - Markdown Linter
278
+ # description: A tool to check markdown files and flag style issues.
210
279
211
280
# -----------------------------------------------------------------------------
212
281
# 🐍 Python Code Quality Hooks (LINTING ONLY)
@@ -246,7 +315,7 @@ repos:
246
315
description : Verifies test files in tests/ directories start with `test_`.
247
316
language : python
248
317
files : (^|/)tests/.+\.py$
249
- args : [--pytest-test-first] # `test_.*\.py`
318
+ args : [--pytest-test-first] # `test_.*\.py`
250
319
251
320
# - repo: https://github.com/pycqa/flake8
252
321
# rev: 7.2.0
0 commit comments