@@ -6,7 +6,7 @@ use std::path::Path;
66lazy_static ! {
77 // Hallucinated API patterns - APIs that LLMs commonly generate but don't exist
88 pub static ref HALLUCINATED_API_REGEX : Regex = Regex :: new(
9- r"(?i)\.(authenticate|validateInput|sanitize|encryptData|hashPassword|secureRandom)\s*\(\s*\)"
9+ r"(?i)\.(authenticate|validateInput|sanitize|encryptData|hashPassword|secureRandom|generateToken|verifySignature|encodeBase64|decodeBase64|compressData|decompressData|validateEmail|validatePhone|formatCurrency|parseJson|serializeJson )\s*\(\s*\)"
1010 ) . unwrap( ) ;
1111
1212 pub static ref INCOMPLETE_API_REGEX : Regex = Regex :: new(
@@ -95,7 +95,42 @@ lazy_static! {
9595
9696 // LLM-specific comment patterns that indicate AI generation
9797 pub static ref LLM_GENERATED_COMMENTS_REGEX : Regex = Regex :: new(
98- r"(?i)//.*(?:ai generated|generated by|gpt|claude|chatgpt|copilot|based on|as an ai)"
98+ r"(?i)//.*(?:ai generated|generated by|gpt|claude|chatgpt|copilot|based on|as an ai|llm|machine learning|neural network|deep learning|transformer|attention mechanism)"
99+ ) . unwrap( ) ;
100+
101+ // AI model hallucinated patterns - common incorrect implementations
102+ pub static ref AI_MODEL_HALLUCINATION_REGEX : Regex = Regex :: new(
103+ r"(?i)(?:tensorflow\.keras|torch\.nn\.Module|sklearn\.model_selection\.GridSearchCV|transformers\.pipeline)\s*\(\s*['\x22][^'\x22]*['\x22]\s*\)\s*\.\s*(fit|predict|train|evaluate)\s*\(\s*\)"
104+ ) . unwrap( ) ;
105+
106+ // Incorrect async patterns commonly generated by LLMs
107+ pub static ref INCORRECT_ASYNC_REGEX : Regex = Regex :: new(
108+ r"(?:async\s+function\s+\w+\s*\([^)]*\)\s*\{\s*return\s+await\s+Promise\.resolve\([^;]*\);\s*\}|await\s+\w+\s*\([^)]*\)\s*;?\s*//.*blocking|Promise\.all\([^)]*\)\s*\.\s*then\s*\([^)]*\)\s*await)"
109+ ) . unwrap( ) ;
110+
111+ // Common LLM-generated security anti-patterns
112+ pub static ref LLM_SECURITY_ANTIPATTERN_REGEX : Regex = Regex :: new(
113+ r"(?i)(?:eval\s*\([^)]*req\.|Function\s*\([^)]*req\.|setTimeout\s*\([^)]*req\.|setInterval\s*\([^)]*req\.|innerHTML\s*=.*req\.|outerHTML\s*=.*req\.|document\.write\s*\([^)]*req\.|window\.location\s*=.*req\.|localStorage\.setItem\s*\([^,)]*,\s*req\.|sessionStorage\.setItem\s*\([^,)]*,\s*req\.)"
114+ ) . unwrap( ) ;
115+
116+ // LLM-generated database anti-patterns
117+ pub static ref LLM_DB_ANTIPATTERN_REGEX : Regex = Regex :: new(
118+ r"(?i)(?:SELECT\s+\*\s+FROM\s+\w+\s+WHERE\s+.*=.*\+|INSERT\s+INTO\s+\w+\s+VALUES\s*\([^)]*\+|UPDATE\s+\w+\s+SET\s+.*=.*\+|DELETE\s+FROM\s+\w+\s+WHERE\s+.*=.*\+)"
119+ ) . unwrap( ) ;
120+
121+ // Common LLM-generated error handling mistakes
122+ pub static ref LLM_ERROR_HANDLING_MISTAKES_REGEX : Regex = Regex :: new(
123+ r"(?:try\s*\{\s*[^}]*\}\s*catch\s*\([^)]*\)\s*\{\s*\}\s*//.*ignore|catch\s*\([^)]*\)\s*\{\s*console\.log\s*\([^)]*\)\s*\}\s*//.*log|throw\s+new\s+Error\s*\([^)]*\)\s*;?\s*//.*generic|\.catch\s*\([^)]*\)\s*=>\s*\{\s*\}\s*//.*empty)"
124+ ) . unwrap( ) ;
125+
126+ // LLM-generated performance issues
127+ pub static ref LLM_PERFORMANCE_MISTAKES_REGEX : Regex = Regex :: new(
128+ r"(?:for\s*\([^)]*\)\s*\{\s*[^}]*for\s*\([^)]*\)\s*\{\s*[^}]*for\s*\([^)]*\)\s*\{\s*[^}]*\}\s*\}\s*\}\s*//.*nested|Array\.from\s*\([^)]*\)\s*\.\s*map\s*\([^)]*\)\s*\.\s*filter\s*\([^)]*\)\s*\.\s*reduce\s*\([^)]*\)\s*//.*chain|\.sort\s*\([^)]*\)\s*\.\s*reverse\s*\([^)]*\)\s*//.*inefficient)"
129+ ) . unwrap( ) ;
130+
131+ // LLM-generated incorrect type handling
132+ pub static ref LLM_TYPE_MISTAKES_REGEX : Regex = Regex :: new(
133+ r"(?:let\s+\w+\s*:\s*any\s*=\s*[^;]*;?\s*//.*type|var\s+\w+\s*=\s*[^;]*;?\s*//.*untyped|const\s+\w+\s*=\s*null\s*;?\s*//.*nullable|function\s+\w+\s*\([^)]*\)\s*:\s*any\s*\{[^}]*\}\s*//.*return)"
99134 ) . unwrap( ) ;
100135}
101136
@@ -410,6 +445,104 @@ impl PatternDetector for LLMGeneratedCommentsDetector {
410445 }
411446}
412447
448+ /// Detector for AI model hallucinated patterns
449+ pub struct AIModelHallucinationDetector ;
450+
451+ impl PatternDetector for AIModelHallucinationDetector {
452+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
453+ detect_pattern_with_context (
454+ content,
455+ file_path,
456+ "LLM_AI_MODEL_HALLUCINATION" ,
457+ & AI_MODEL_HALLUCINATION_REGEX ,
458+ )
459+ }
460+ }
461+
462+ /// Detector for incorrect async patterns
463+ pub struct IncorrectAsyncDetector ;
464+
465+ impl PatternDetector for IncorrectAsyncDetector {
466+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
467+ detect_pattern_with_context (
468+ content,
469+ file_path,
470+ "LLM_INCORRECT_ASYNC" ,
471+ & INCORRECT_ASYNC_REGEX ,
472+ )
473+ }
474+ }
475+
476+ /// Detector for LLM-generated security anti-patterns
477+ pub struct LLMSecurityAntipatternDetector ;
478+
479+ impl PatternDetector for LLMSecurityAntipatternDetector {
480+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
481+ detect_pattern_with_context (
482+ content,
483+ file_path,
484+ "LLM_SECURITY_ANTIPATTERN" ,
485+ & LLM_SECURITY_ANTIPATTERN_REGEX ,
486+ )
487+ }
488+ }
489+
490+ /// Detector for LLM-generated database anti-patterns
491+ pub struct LLMDBAntipatternDetector ;
492+
493+ impl PatternDetector for LLMDBAntipatternDetector {
494+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
495+ detect_pattern_with_context (
496+ content,
497+ file_path,
498+ "LLM_DB_ANTIPATTERN" ,
499+ & LLM_DB_ANTIPATTERN_REGEX ,
500+ )
501+ }
502+ }
503+
504+ /// Detector for LLM-generated error handling mistakes
505+ pub struct LLMErrorHandlingMistakesDetector ;
506+
507+ impl PatternDetector for LLMErrorHandlingMistakesDetector {
508+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
509+ detect_pattern_with_context (
510+ content,
511+ file_path,
512+ "LLM_ERROR_HANDLING_MISTAKE" ,
513+ & LLM_ERROR_HANDLING_MISTAKES_REGEX ,
514+ )
515+ }
516+ }
517+
518+ /// Detector for LLM-generated performance mistakes
519+ pub struct LLMPerformanceMistakesDetector ;
520+
521+ impl PatternDetector for LLMPerformanceMistakesDetector {
522+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
523+ detect_pattern_with_context (
524+ content,
525+ file_path,
526+ "LLM_PERFORMANCE_MISTAKE" ,
527+ & LLM_PERFORMANCE_MISTAKES_REGEX ,
528+ )
529+ }
530+ }
531+
532+ /// Detector for LLM-generated type handling mistakes
533+ pub struct LLMTypeMistakesDetector ;
534+
535+ impl PatternDetector for LLMTypeMistakesDetector {
536+ fn detect ( & self , content : & str , file_path : & Path ) -> Vec < Match > {
537+ detect_pattern_with_context (
538+ content,
539+ file_path,
540+ "LLM_TYPE_MISTAKE" ,
541+ & LLM_TYPE_MISTAKES_REGEX ,
542+ )
543+ }
544+ }
545+
413546/// Comprehensive LLM vulnerability detector that combines multiple patterns
414547pub struct ComprehensiveLLMDetector {
415548 detectors : Vec < Box < dyn PatternDetector > > ,
@@ -436,6 +569,13 @@ impl ComprehensiveLLMDetector {
436569 Box :: new( ContextConfusionDetector ) ,
437570 Box :: new( DatabaseAntipatternDetector ) ,
438571 Box :: new( LLMGeneratedCommentsDetector ) ,
572+ Box :: new( AIModelHallucinationDetector ) ,
573+ Box :: new( IncorrectAsyncDetector ) ,
574+ Box :: new( LLMSecurityAntipatternDetector ) ,
575+ Box :: new( LLMDBAntipatternDetector ) ,
576+ Box :: new( LLMErrorHandlingMistakesDetector ) ,
577+ Box :: new( LLMPerformanceMistakesDetector ) ,
578+ Box :: new( LLMTypeMistakesDetector ) ,
439579 ] ;
440580
441581 Self { detectors }
0 commit comments