@@ -1069,22 +1069,80 @@ def check_hallucination_signals(output: str, context: dict) -> dict:
10691069 # --- 3. Internal self-contradictions on key technical claims ---
10701070 contradiction_pairs = [
10711071 (['stateless' , 'no session' , 'sessionless' ], ['stores session' , 'session state' , 'session management' ]),
1072- (['no authentication' , 'no auth' , 'unauthenticated' ], ['requires authentication' , 'auth required' , 'must authenticate' ]),
1072+ (['no authentication' , 'no auth' ], ['requires authentication' , 'auth required' , 'must authenticate' ]), # removed 'unauthenticated' to avoid HTTP 401 false positive
10731073 (['no database' , 'no db' , 'database-free' ], ['connects to database' , 'database stores' , 'db connection' ]),
10741074 (['synchronous' , 'sync only' , 'blocking' ], ['asynchronous' , 'async' , 'non-blocking' ]),
10751075 (['monolith' , 'single service' , 'monolithic' ], ['microservices' , 'micro-service' , 'separate services' ]),
1076- ([ 'read-only' , 'read only' , 'immutable' ], [ ' write', 'update' , 'modify' , 'mutate' ]),
1076+ # Removed 'read-only' vs ' write' pair - this is a common false positive for CRUD APIs with field-level permissions
10771077 ]
10781078 for side_a , side_b in contradiction_pairs :
10791079 has_a = any (term in output_lower for term in side_a )
10801080 has_b = any (term in output_lower for term in side_b )
10811081 if has_a and has_b :
1082- # Only flag if both appear in non-comparative contexts
1083- # (i.e. not "monolith vs microservices" comparison)
1082+ # Only flag if both appear in non-comparative/non-negative contexts
1083+ # (i.e. not "monolith vs microservices" comparison or "no need for microservices" )
10841084 comparison_markers = ['vs' , 'versus' , 'compared to' , 'instead of' , 'rather than' ,
10851085 'alternative' , 'trade-off' , 'tradeoff' , 'consider' ]
1086- nearby = any (m in output_lower for m in comparison_markers )
1087- if not nearby :
1086+ negative_markers = ['no need for' , 'avoid' , 'no ' , 'not ' , 'without ' ,
1087+ 'don\' t ' , 'doesn\' t ' , 'won\' t ' , 'can\' t ' ]
1088+
1089+ # Additional exclusion patterns for common false positives
1090+ exclusion_patterns = [
1091+ r'40[13]\s*\(?\s*unauthenticated' , # HTTP 401 status code
1092+ r'403\s*\(?\s*unauthorized' , # HTTP 403 status code
1093+ r'immutable\s+(logs?|audit|records?|data)' , # immutable logs/audit/records
1094+ r'(logs?|audit|records?)\s+(?:must|should|are|is)\s+(?:be\s+)?immutable' , # logs must be immutable
1095+ ]
1096+
1097+ has_exclusion = False
1098+ for pattern in exclusion_patterns :
1099+ if re .search (pattern , output_lower ):
1100+ has_exclusion = True
1101+ break
1102+
1103+ has_comparison = any (m in output_lower for m in comparison_markers )
1104+
1105+ # Check if the terms appear together in a question presenting alternatives (e.g., "A or B?")
1106+ # Build a pattern that checks if any term from side_a appears near any term from side_b with "or" between them
1107+ has_or_question = False
1108+ for term_a in side_a :
1109+ for term_b in side_b :
1110+ # Check for "term_a or term_b" pattern (within 50 chars) followed by "?" (within 200 chars)
1111+ or_pattern = rf'{ re .escape (term_a )} .{{0,50}}\bor\b.{{0,50}}{ re .escape (term_b )} '
1112+ if re .search (or_pattern , output_lower ):
1113+ # Check if there's a question mark within 200 chars after the first term
1114+ match = re .search (or_pattern , output_lower )
1115+ if match :
1116+ text_after = output_lower [match .start ():match .end () + 200 ]
1117+ if '?' in text_after :
1118+ has_or_question = True
1119+ break
1120+ # Also check reverse order: "term_b or term_a"
1121+ or_pattern_rev = rf'{ re .escape (term_b )} .{{0,50}}\bor\b.{{0,50}}{ re .escape (term_a )} '
1122+ if re .search (or_pattern_rev , output_lower ):
1123+ match = re .search (or_pattern_rev , output_lower )
1124+ if match :
1125+ text_after = output_lower [match .start ():match .end () + 200 ]
1126+ if '?' in text_after :
1127+ has_or_question = True
1128+ break
1129+ if has_or_question :
1130+ break
1131+
1132+ # Check if either side appears in negative context with wider window
1133+ has_negative = False
1134+ for term in side_a + side_b :
1135+ for neg_marker in negative_markers :
1136+ # Look for patterns like "no need for X", "avoid X", "no X"
1137+ # Use regex for more flexible matching
1138+ pattern = rf'{ re .escape (neg_marker )} \w*\s+{ re .escape (term )} '
1139+ if re .search (pattern , output_lower ):
1140+ has_negative = True
1141+ break
1142+ if has_negative :
1143+ break
1144+
1145+ if not has_comparison and not has_negative and not has_exclusion and not has_or_question :
10881146 findings .append (f'Possible contradiction: "{ side_a [0 ]} " vs "{ side_b [0 ]} "' )
10891147
10901148 # --- 4. Suspicious RFC/standard fabrication ---
@@ -1147,18 +1205,47 @@ def in_negative_context(term: str, text: str) -> bool:
11471205 return any (m in window for m in negative_markers )
11481206
11491207 # --- 1. Insecure cryptography recommended positively ---
1208+ # Helper: check if MD5 is used for file integrity (acceptable) vs security (unacceptable)
1209+ def md5_in_acceptable_context (text : str ) -> bool :
1210+ """MD5 is acceptable for file checksums/integrity, not for passwords or security."""
1211+ md5_idx = text .find ('md5' )
1212+ if md5_idx == - 1 :
1213+ return False
1214+ # Check surrounding context (100 chars before and after)
1215+ window = text [max (0 , md5_idx - 100 ):md5_idx + 100 ]
1216+ acceptable_markers = ['checksum' , 'file integrity' , 'file hash' , 'etag' , 'content hash' ,
1217+ 'file verification' , 'duplicate detection' , 'deduplication' ]
1218+ unacceptable_markers = ['password' , 'authentication' , 'secure' , 'encryption' , 'cryptographic' ]
1219+
1220+ has_acceptable = any (m in window for m in acceptable_markers )
1221+ has_unacceptable = any (m in window for m in unacceptable_markers )
1222+
1223+ # If in file integrity context and not in security context, it's acceptable
1224+ return has_acceptable and not has_unacceptable
1225+
11501226 bad_crypto = [
1151- ('md5' , 'MD5 is cryptographically broken; unsuitable for password hashing or integrity checks ' ),
1227+ ('md5' , 'MD5 is cryptographically broken; unsuitable for password hashing or security ' ),
11521228 ('sha-1' , 'SHA-1 is deprecated for security use' ),
11531229 ('sha1' , 'SHA-1 is deprecated for security use' ),
1154- ('des ' , 'DES is a broken cipher (56-bit key)' ),
1230+ (r'\bdes\b ' , 'DES is a broken cipher (56-bit key)' ),
11551231 ('3des' , '3DES is deprecated and slow' ),
11561232 ('ecb mode' , 'ECB mode leaks patterns; use CBC/GCM' ),
11571233 ('rc4' , 'RC4 is a broken stream cipher' ),
11581234 ]
11591235 for term , reason in bad_crypto :
1160- if term in output_lower and not in_negative_context (term , output_lower ):
1161- findings .append (f'Bad crypto: { reason } ' )
1236+ # Special handling for MD5 - allow for file integrity
1237+ if term == 'md5' :
1238+ if 'md5' in output_lower and not in_negative_context ('md5' , output_lower ):
1239+ # Only flag if NOT in acceptable file integrity context
1240+ if not md5_in_acceptable_context (output_lower ):
1241+ findings .append (f'Bad crypto: { reason } ' )
1242+ # Check if term is a regex pattern (starts with \b or contains regex special chars)
1243+ elif term .startswith (r'\b' ) or '\\ ' in term :
1244+ if re .search (term , output_lower ) and not in_negative_context (term .replace (r'\b' , '' ), output_lower ):
1245+ findings .append (f'Bad crypto: { reason } ' )
1246+ else :
1247+ if term in output_lower and not in_negative_context (term , output_lower ):
1248+ findings .append (f'Bad crypto: { reason } ' )
11621249
11631250 # --- 2. Insecure transport / protocol advice ---
11641251 insecure_transport = [
0 commit comments