33Standalone script for checking files for prompt injection patterns.
44Can be used with pre-commit hooks or CI/CD pipelines.
55
6+ Specifically designed for USPTO Final Petition Decisions (FPD) MCP to detect:
7+ - Unicode steganography attacks (emoji-based hiding from Repello.ai article)
8+ - FPD-specific injection attempts (petition data extraction, API bypass)
9+ - Standard prompt injection patterns
10+
611Usage:
712 python check_prompt_injections.py file1.py file2.txt ...
8-
13+ python check_prompt_injections.py src/ tests/ *.md
14+
915Exit codes:
1016 0 - No prompt injections found
1117 1 - Prompt injections detected
1723from pathlib import Path
1824from typing import List , Tuple
1925
20- from fpd_prompt_injection_detector import FPDPromptInjectionDetector
26+ from prompt_injection_detector import PromptInjectionDetector
2127
2228
23- def check_file (filepath : Path , detector : FPDPromptInjectionDetector ) -> List [Tuple [int , str ]]:
29+ def check_file (filepath : Path , detector : PromptInjectionDetector ) -> List [Tuple [int , str ]]:
2430 """
2531 Check a single file for prompt injection patterns.
2632
@@ -32,10 +38,31 @@ def check_file(filepath: Path, detector: FPDPromptInjectionDetector) -> List[Tup
3238 if not filepath .is_file ():
3339 return []
3440
35- # Only check text-based files
36- text_extensions = {'.py' , '.txt' , '.md' , '.yml' , '.yaml' , '.json' , '.js' , '.ts' , '.html' , '.xml' , '.csv' }
41+ # Only check text-based files (including FPD-specific file types)
42+ text_extensions = {
43+ '.py' , '.txt' , '.md' , '.yml' , '.yaml' , '.json' , '.js' , '.ts' ,
44+ '.html' , '.xml' , '.csv' , '.rst' , '.cfg' , '.ini' , '.toml' ,
45+ '.log' , '.env' , '.sh' , '.bat' , '.ps1'
46+ }
3747 if filepath .suffix .lower () not in text_extensions and filepath .suffix :
3848 return []
49+
50+ # Skip files that are likely to contain legitimate security examples or documentation
51+ excluded_files = {
52+ # Security documentation and tools
53+ 'SECURITY_SCANNING.md' , 'SECURITY_GUIDELINES.md' , 'security_examples.py' , 'test_security.py' ,
54+ 'prompt_injection_detector.py' , 'check_prompt_injections.py' ,
55+ # Documentation files likely to contain examples
56+ 'README.md' , 'PROMPTS.md' , 'CLAUDE.md' ,
57+ # Deployment and configuration scripts
58+ 'linux_setup.sh' , 'windows_setup.ps1' , 'manage_api_keys.ps1' ,
59+ }
60+ if filepath .name in excluded_files :
61+ return []
62+
63+ # Skip prompt template files (legitimate use of prompt-related keywords)
64+ if 'prompt' in filepath .name .lower () and filepath .suffix == '.py' :
65+ return []
3966
4067 with open (filepath , 'r' , encoding = 'utf-8' , errors = 'ignore' ) as f :
4168 content = f .read ()
@@ -59,39 +86,51 @@ def check_file(filepath: Path, detector: FPDPromptInjectionDetector) -> List[Tup
5986def main ():
6087 """Main function."""
6188 parser = argparse .ArgumentParser (
62- description = "Check files for prompt injection patterns" ,
89+ description = "Check files for prompt injection patterns (USPTO FPD MCP) " ,
6390 formatter_class = argparse .RawDescriptionHelpFormatter ,
6491 epilog = """
6592Examples:
6693 python check_prompt_injections.py src/**/*.py
6794 python check_prompt_injections.py README.md config.yml
68-
69- Common prompt injection patterns detected:
70- - Instruction override attempts ("ignore previous instructions")
71- - Prompt extraction ("show me your instructions")
95+ python check_prompt_injections.py --verbose src/ tests/
96+
97+ Detected attack categories:
98+ - Instruction override ("ignore previous instructions")
99+ - Prompt extraction ("show me your instructions")
72100- Persona switching ("you are now a different AI")
73101- Output format manipulation ("encode in hex")
74102- Social engineering ("we became friends")
75- - FPD-specific attacks ("extract petition data", "bypass CFR rules")
103+ - USPTO FPD specific ("extract all petition numbers")
104+ - Unicode steganography (emoji-based hiding)
105+
106+ Critical: Detects Unicode Variation Selector steganography
107+ from Repello.ai article where malicious prompts are hidden
108+ in invisible characters appended to innocent text like "Hello!".
76109"""
77110 )
78111
79112 parser .add_argument (
80113 'files' ,
81114 nargs = '*' ,
82- help = 'Files to check for prompt injections'
115+ help = 'Files and directories to check for prompt injections'
83116 )
84117
85118 parser .add_argument (
86119 '--verbose' , '-v' ,
87120 action = 'store_true' ,
88- help = 'Show detailed output'
121+ help = 'Show detailed output including full matches '
89122 )
90123
91124 parser .add_argument (
92125 '--quiet' , '-q' ,
93126 action = 'store_true' ,
94- help = 'Only show summary'
127+ help = 'Only show summary (suppress individual findings)'
128+ )
129+
130+ parser .add_argument (
131+ '--include-security-files' ,
132+ action = 'store_true' ,
133+ help = 'Check security documentation files (normally excluded)'
95134 )
96135
97136 args = parser .parse_args ()
@@ -100,60 +139,90 @@ def main():
100139 print ("No files specified. Use --help for usage." , file = sys .stderr )
101140 return 2
102141
103- detector = FPDPromptInjectionDetector ()
142+ detector = PromptInjectionDetector ()
104143 total_issues = 0
105144 total_files_checked = 0
106145 files_with_issues = []
146+ unicode_steganography_detected = False
107147
108148 for file_pattern in args .files :
109149 filepath = Path (file_pattern )
110150
111151 if filepath .is_file ():
112152 files_to_check = [filepath ]
153+ elif filepath .is_dir ():
154+ # Recursively check directory
155+ files_to_check = []
156+ for ext in ['.py' , '.txt' , '.md' , '.yml' , '.yaml' , '.json' , '.js' , '.ts' , '.html' , '.xml' , '.csv' ]:
157+ files_to_check .extend (filepath .rglob (f"*{ ext } " ))
113158 else :
114159 # Handle glob patterns
115160 files_to_check = list (filepath .parent .glob (filepath .name )) if filepath .parent .exists () else []
116161
117162 for file_path in files_to_check :
118163 if not file_path .is_file ():
119164 continue
165+
166+ # Skip security files unless explicitly requested
167+ if not args .include_security_files and file_path .name in {
168+ 'SECURITY_SCANNING.md' , 'security_examples.py' , 'test_security.py' ,
169+ 'prompt_injection_detector.py' , 'check_prompt_injections.py'
170+ }:
171+ continue
120172
121173 total_files_checked += 1
122174 findings = check_file (file_path , detector )
123175
124176 if findings :
125177 files_with_issues .append (str (file_path ))
126178 total_issues += len (findings )
179+
180+ # Check for Unicode steganography specifically
181+ for _ , match in findings :
182+ if 'steganography' in match .lower () or 'variation selector' in match .lower ():
183+ unicode_steganography_detected = True
127184
128185 if not args .quiet :
129186 print (f"\n [!] Prompt injection patterns found in { file_path } :" )
130187 for line_num , match in findings :
131188 if args .verbose :
132- print (f" Line { line_num :4d} : { match } " )
189+ # Safe display of matches (handle Unicode characters)
190+ safe_match = match .encode ('ascii' , 'replace' ).decode ('ascii' )
191+ print (f" Line { line_num :4d} : { safe_match } " )
133192 else :
134- # Truncate long matches
135- display_match = match [:60 ] + "..." if len (match ) > 60 else match
193+ # Truncate long matches for readability and ensure safe display
194+ safe_match = match .encode ('ascii' , 'replace' ).decode ('ascii' )
195+ display_match = safe_match [:60 ] + "..." if len (safe_match ) > 60 else safe_match
136196 print (f" Line { line_num :4d} : { display_match } " )
137197
138198 # Summary
139199 if not args .quiet or total_issues > 0 :
140- print (f"\n { '=' * 60 } " )
200+ print (f"\n { '=' * 70 } " )
201+ print (f"USPTO FPD MCP Security Scan Results:" )
141202 print (f"Files checked: { total_files_checked } " )
142203 print (f"Files with issues: { len (files_with_issues )} " )
143204 print (f"Total issues found: { total_issues } " )
144-
205+
206+ if unicode_steganography_detected :
207+ print (f"\n [CRITICAL] Unicode steganography detected!" )
208+ print ("This indicates potential emoji-based prompt injection attacks" )
209+ print ("as described in the Repello.ai article. IMMEDIATE REVIEW REQUIRED." )
210+
145211 if total_issues > 0 :
146212 print (f"\n [WARNING] Prompt injection patterns detected!" )
147213 print ("These patterns may indicate attempts to:" )
148- print ("- Override system instructions" )
214+ print ("- Override system instructions" )
149215 print ("- Extract sensitive prompts" )
150- print ("- Change AI behavior" )
216+ print ("- Change AI behavior" )
151217 print ("- Bypass security controls" )
152- print ("- Extract petition data" )
153- print ("- Manipulate CFR rules " )
218+ print ("- Extract USPTO FPD petition data" )
219+ print ("- Hide malicious instructions in Unicode characters " )
154220 print ("\n Review these findings to ensure they are not malicious." )
221+ print ("For suspected Unicode steganography, use a Unicode analyzer" )
222+ print ("to examine invisible characters in the flagged content." )
155223 else :
156224 print ("[OK] No prompt injection patterns detected." )
225+ print ("System appears secure against known injection techniques." )
157226
158227 return 1 if total_issues > 0 else 0
159228
0 commit comments