44
55import re
66
7+ from .entropy import shannon_entropy
78from ..models import DetectionMatch , DetectorSource , Severity
89
910SUSPICIOUS_NAMES = {
1011 "password" ,
1112 "passwd" ,
1213 "secret" ,
1314 "token" ,
14- "key" ,
15- "api" ,
16- "apikey" ,
1715 "auth" ,
1816 "credential" ,
19- "private " ,
17+ "apikey " ,
2018}
2119
22- ASSIGNMENT_PATTERN = re .compile (
23- r"(?P<name>[A-Za-z_][A-Za-z0-9_\-]*)\s*[:=]\s*(?P<quote>['\"]? )(?P<value>[^'\"\n#]{6,})(?P=quote)"
20+ QUOTED_ASSIGNMENT_PATTERN = re .compile (
21+ r"(?P<name>[A-Za-z_][A-Za-z0-9_\-]*)\s*[:=]\s*(?P<quote>['\"])(?P<value>[^'\"\n#]{6,})(?P=quote)"
2422)
23+ UNQUOTED_ENV_PATTERN = re .compile (r"(?P<name>[A-Za-z_][A-Za-z0-9_\-]*)\s*[:=]\s*(?P<value>[^\s#]{12,})\s*$" )
2524
2625CONNECTION_STRING_PATTERN = re .compile (
2726 r"(?i)\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql)://[^\s:/]+:[^\s@/]+@[^\s]+"
2827)
2928
30- AUTH_CONTEXT_PATTERN = re .compile (r"(?i)\b(auth|login|security|credential|config )\b" )
29+ AUTH_CONTEXT_PATTERN = re .compile (r"(?i)\b(auth|login|security|credential|bearer|jwt|oauth|session )\b" )
3130
3231PLACEHOLDERS = {"changeme" , "example" , "sample" , "test" , "dummy" , "password" , "secret" }
3332NAME_SPLIT_PATTERN = re .compile (r"[^A-Za-z0-9]+" )
@@ -41,6 +40,10 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
4140 del line_number
4241 hits : list [DetectionMatch ] = []
4342 lowered = line .lower ()
43+ stripped = line .strip ()
44+
45+ if re .match (r"^\s*(#|//|/\*|\*)" , stripped ):
46+ return []
4447
4548 for match in CONNECTION_STRING_PATTERN .finditer (line ):
4649 value = match .group (0 )
@@ -66,7 +69,7 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
6669 )
6770 )
6871
69- for match in ASSIGNMENT_PATTERN .finditer (line ):
72+ for match in QUOTED_ASSIGNMENT_PATTERN .finditer (line ):
7073 name = match .group ("name" )
7174 value = match .group ("value" ).strip ()
7275 if _should_skip_assignment_value (value ):
@@ -101,29 +104,64 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
101104 )
102105 )
103106
107+ if file_path .endswith (".env" ):
108+ for match in UNQUOTED_ENV_PATTERN .finditer (line ):
109+ name = match .group ("name" )
110+ value = match .group ("value" ).strip ()
111+ if _should_skip_assignment_value (value ):
112+ continue
113+ if not _is_suspicious_name (name ):
114+ continue
115+ if len (value ) < 16 :
116+ continue
117+
118+ hits .append (
119+ DetectionMatch (
120+ finding_type = "Suspicious Hardcoded Credential" ,
121+ value = value ,
122+ start = match .start ("value" ),
123+ end = match .end ("value" ),
124+ source = DetectorSource .CONTEXT ,
125+ confidence = 0.74 ,
126+ severity = Severity .HIGH ,
127+ risk = "Hardcoded credentials are often propagated to logs, forks, and artifacts." ,
128+ remediation = "Replace literal with an environment variable and rotate if already exposed." ,
129+ safer_alternative = (
130+ "Use os.getenv() in Python or process.env in Node with secret manager injection."
131+ ),
132+ autofix = _autofix_for_assignment (file_path , name ),
133+ )
134+ )
135+
104136 if AUTH_CONTEXT_PATTERN .search (lowered ):
105137 literals = _extract_string_literals (line )
106138 for value , start , end in literals :
107- if len (value ) < 12 or _is_placeholder (value ):
139+ if len (value ) < 16 or _is_placeholder (value ):
140+ continue
141+ if _should_skip_assignment_value (value ):
108142 continue
109- if _has_diverse_chars (value ):
110- hits .append (
111- DetectionMatch (
112- finding_type = "Auth Context Secret Literal" ,
113- value = value ,
114- start = start ,
115- end = end ,
116- source = DetectorSource .CONTEXT ,
117- confidence = 0.62 ,
118- severity = Severity .MEDIUM ,
119- risk = "Sensitive literals near auth/security code are likely real secrets." ,
120- remediation = "Move this literal to secrets storage and reference by env variable." ,
121- safer_alternative = (
122- "Use runtime secret injection and avoid embedding values in repository code."
123- ),
124- autofix = "Replace literal with os.getenv(\" AUTH_SECRET\" ) and define .env.example entry." ,
125- )
143+ if shannon_entropy (value ) < 3.4 :
144+ continue
145+ if not _looks_secret_like_literal (value ):
146+ continue
147+
148+ hits .append (
149+ DetectionMatch (
150+ finding_type = "Auth Context Secret Literal" ,
151+ value = value ,
152+ start = start ,
153+ end = end ,
154+ source = DetectorSource .CONTEXT ,
155+ confidence = 0.65 ,
156+ severity = Severity .MEDIUM ,
157+ risk = "Sensitive literals near auth/security code are likely real secrets." ,
158+ remediation = "Move this literal to secrets storage and reference by env variable." ,
159+ safer_alternative = (
160+ "Use runtime secret injection and avoid embedding values in repository code."
161+ ),
162+ autofix = "Replace literal with os.getenv(\" AUTH_SECRET\" ) and define .env.example entry." ,
126163 )
164+ )
127165
128166 return hits
129167
@@ -158,6 +196,12 @@ def _is_suspicious_name(name: str) -> bool:
158196 return True
159197 if {"private" , "key" }.issubset (token_set ):
160198 return True
199+ if {"access" , "key" }.issubset (token_set ):
200+ return True
201+ if {"secret" , "key" }.issubset (token_set ):
202+ return True
203+ if {"client" , "secret" }.issubset (token_set ):
204+ return True
161205
162206 return any (token in SUSPICIOUS_NAMES for token in token_set )
163207
@@ -178,6 +222,8 @@ def _should_skip_assignment_value(value: str) -> bool:
178222 return True
179223 if stripped .isdigit ():
180224 return True
225+ if "(" in stripped or ")" in stripped :
226+ return True
181227 if lowered .startswith (("http://" , "https://" , "file://" )):
182228 return True
183229 if lowered .startswith (("./" , "../" , "/" )):
@@ -187,6 +233,19 @@ def _should_skip_assignment_value(value: str) -> bool:
187233 return False
188234
189235
236+ def _looks_secret_like_literal (value : str ) -> bool :
237+ if len (value ) < 16 :
238+ return False
239+ if " " in value :
240+ return False
241+ has_upper = any (char .isupper () for char in value )
242+ has_lower = any (char .islower () for char in value )
243+ has_digit = any (char .isdigit () for char in value )
244+ has_symbol = any (not char .isalnum () for char in value )
245+ score = sum ([has_upper , has_lower , has_digit , has_symbol ])
246+ return score >= 2
247+
248+
190249def _to_snake_case (name : str ) -> str :
191250 with_boundaries = re .sub (r"([a-z0-9])([A-Z])" , r"\1_\2" , name )
192251 return with_boundaries .replace ("-" , "_" ).lower ()
0 commit comments