en0ndev
diff --git a/‎.leaklensignore‎
Lines changed: 0 additions & 2 deletions b/‎.leaklensignore‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/leaklens/detectors/context.py‎
Lines changed: 85 additions & 26 deletions b/‎src/leaklens/detectors/context.py‎
Lines changed: 85 additions & 26 deletions
diff --git a/‎src/leaklens/detectors/entropy.py‎
Lines changed: 6 additions & 0 deletions b/‎src/leaklens/detectors/entropy.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/leaklens/detectors/regex.py‎
Lines changed: 138 additions & 0 deletions b/‎src/leaklens/detectors/regex.py‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎src/leaklens/rules.py‎
Lines changed: 1 addition & 1 deletion b/‎src/leaklens/rules.py‎
Lines changed: 1 addition & 1 deletion
@@ -2,6 +2,4 @@
 examples/generated/**
 examples/vulnerable_repo/**
 tests/**
-src/leaklens/rules.py
-src/leaklens/detectors/context.py
 *.min.js
@@ -4,30 +4,29 @@
 
 import re
 
+from .entropy import shannon_entropy
 from ..models import DetectionMatch, DetectorSource, Severity
 
 SUSPICIOUS_NAMES = {
     "password",
     "passwd",
     "secret",
     "token",
-    "key",
-    "api",
-    "apikey",
     "auth",
     "credential",
-    "private",
+    "apikey",
 }
 
-ASSIGNMENT_PATTERN = re.compile(
-    r"(?P<name>[A-Za-z_][A-Za-z0-9_\-]*)\s*[:=]\s*(?P<quote>['\"]?)(?P<value>[^'\"\n#]{6,})(?P=quote)"
+QUOTED_ASSIGNMENT_PATTERN = re.compile(
+    r"(?P<name>[A-Za-z_][A-Za-z0-9_\-]*)\s*[:=]\s*(?P<quote>['\"])(?P<value>[^'\"\n#]{6,})(?P=quote)"
 )
+UNQUOTED_ENV_PATTERN = re.compile(r"(?P<name>[A-Za-z_][A-Za-z0-9_\-]*)\s*[:=]\s*(?P<value>[^\s#]{12,})\s*$")
 
 CONNECTION_STRING_PATTERN = re.compile(
     r"(?i)\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql)://[^\s:/]+:[^\s@/]+@[^\s]+"
 )
 
-AUTH_CONTEXT_PATTERN = re.compile(r"(?i)\b(auth|login|security|credential|config)\b")
+AUTH_CONTEXT_PATTERN = re.compile(r"(?i)\b(auth|login|security|credential|bearer|jwt|oauth|session)\b")
 
 PLACEHOLDERS = {"changeme", "example", "sample", "test", "dummy", "password", "secret"}
 NAME_SPLIT_PATTERN = re.compile(r"[^A-Za-z0-9]+")
@@ -41,6 +40,10 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
         del line_number
         hits: list[DetectionMatch] = []
         lowered = line.lower()
+        stripped = line.strip()
+
+        if re.match(r"^\s*(#|//|/\*|\*)", stripped):
+            return []
 
         for match in CONNECTION_STRING_PATTERN.finditer(line):
             value = match.group(0)
@@ -66,7 +69,7 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
                 )
             )
 
-        for match in ASSIGNMENT_PATTERN.finditer(line):
+        for match in QUOTED_ASSIGNMENT_PATTERN.finditer(line):
             name = match.group("name")
             value = match.group("value").strip()
             if _should_skip_assignment_value(value):
@@ -101,29 +104,64 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
                 )
             )
 
+        if file_path.endswith(".env"):
+            for match in UNQUOTED_ENV_PATTERN.finditer(line):
+                name = match.group("name")
+                value = match.group("value").strip()
+                if _should_skip_assignment_value(value):
+                    continue
+                if not _is_suspicious_name(name):
+                    continue
+                if len(value) < 16:
+                    continue
+
+                hits.append(
+                    DetectionMatch(
+                        finding_type="Suspicious Hardcoded Credential",
+                        value=value,
+                        start=match.start("value"),
+                        end=match.end("value"),
+                        source=DetectorSource.CONTEXT,
+                        confidence=0.74,
+                        severity=Severity.HIGH,
+                        risk="Hardcoded credentials are often propagated to logs, forks, and artifacts.",
+                        remediation="Replace literal with an environment variable and rotate if already exposed.",
+                        safer_alternative=(
+                            "Use os.getenv() in Python or process.env in Node with secret manager injection."
+                        ),
+                        autofix=_autofix_for_assignment(file_path, name),
+                    )
+                )
+
         if AUTH_CONTEXT_PATTERN.search(lowered):
             literals = _extract_string_literals(line)
             for value, start, end in literals:
-                if len(value) < 12 or _is_placeholder(value):
+                if len(value) < 16 or _is_placeholder(value):
+                    continue
+                if _should_skip_assignment_value(value):
                     continue
-                if _has_diverse_chars(value):
-                    hits.append(
-                        DetectionMatch(
-                            finding_type="Auth Context Secret Literal",
-                            value=value,
-                            start=start,
-                            end=end,
-                            source=DetectorSource.CONTEXT,
-                            confidence=0.62,
-                            severity=Severity.MEDIUM,
-                            risk="Sensitive literals near auth/security code are likely real secrets.",
-                            remediation="Move this literal to secrets storage and reference by env variable.",
-                            safer_alternative=(
-                                "Use runtime secret injection and avoid embedding values in repository code."
-                            ),
-                            autofix="Replace literal with os.getenv(\"AUTH_SECRET\") and define .env.example entry.",
-                        )
+                if shannon_entropy(value) < 3.4:
+                    continue
+                if not _looks_secret_like_literal(value):
+                    continue
+
+                hits.append(
+                    DetectionMatch(
+                        finding_type="Auth Context Secret Literal",
+                        value=value,
+                        start=start,
+                        end=end,
+                        source=DetectorSource.CONTEXT,
+                        confidence=0.65,
+                        severity=Severity.MEDIUM,
+                        risk="Sensitive literals near auth/security code are likely real secrets.",
+                        remediation="Move this literal to secrets storage and reference by env variable.",
+                        safer_alternative=(
+                            "Use runtime secret injection and avoid embedding values in repository code."
+                        ),
+                        autofix="Replace literal with os.getenv(\"AUTH_SECRET\") and define .env.example entry.",
                     )
+                )
 
         return hits
 
@@ -158,6 +196,12 @@ def _is_suspicious_name(name: str) -> bool:
         return True
     if {"private", "key"}.issubset(token_set):
         return True
+    if {"access", "key"}.issubset(token_set):
+        return True
+    if {"secret", "key"}.issubset(token_set):
+        return True
+    if {"client", "secret"}.issubset(token_set):
+        return True
 
     return any(token in SUSPICIOUS_NAMES for token in token_set)
 
@@ -178,6 +222,8 @@ def _should_skip_assignment_value(value: str) -> bool:
         return True
     if stripped.isdigit():
         return True
+    if "(" in stripped or ")" in stripped:
+        return True
     if lowered.startswith(("http://", "https://", "file://")):
         return True
     if lowered.startswith(("./", "../", "/")):
@@ -187,6 +233,19 @@ def _should_skip_assignment_value(value: str) -> bool:
     return False
 
 
+def _looks_secret_like_literal(value: str) -> bool:
+    if len(value) < 16:
+        return False
+    if " " in value:
+        return False
+    has_upper = any(char.isupper() for char in value)
+    has_lower = any(char.islower() for char in value)
+    has_digit = any(char.isdigit() for char in value)
+    has_symbol = any(not char.isalnum() for char in value)
+    score = sum([has_upper, has_lower, has_digit, has_symbol])
+    return score >= 2
+
+
 def _to_snake_case(name: str) -> str:
     with_boundaries = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", name)
     return with_boundaries.replace("-", "_").lower()
 
@@ -17,6 +17,9 @@
     r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", re.IGNORECASE
 )
 HEX_DIGEST_PATTERN = re.compile(r"^[0-9a-f]{32}$|^[0-9a-f]{40}$|^[0-9a-f]{64}$", re.IGNORECASE)
+SECRET_CONTEXT_PATTERN = re.compile(
+    r"(?i)\b(password|passwd|secret|token|api[_-]?key|auth|credential|private[_-]?key|access[_-]?key|bearer|jwt)\b"
+)
 
 
 class EntropyDetector:
@@ -29,6 +32,7 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
         """Evaluate high-entropy candidate tokens in one line."""
         del file_path, line_number
         hits: list[DetectionMatch] = []
+        line_has_context = bool(SECRET_CONTEXT_PATTERN.search(line))
 
         for value, start, end in _extract_candidates(line):
             if _skip_candidate(value):
@@ -37,6 +41,8 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
             entropy = shannon_entropy(value)
             if entropy < self.threshold:
                 continue
+            if not line_has_context and (len(value) < 32 or entropy < (self.threshold + 0.6)):
+                continue
 
             confidence = min(0.92, 0.48 + (entropy - self.threshold) * 0.18)
             severity = Severity.MEDIUM if confidence < 0.8 else Severity.HIGH
 
@@ -2,10 +2,13 @@
 
 from __future__ import annotations
 
+import base64
+import json
 import re
 from dataclasses import dataclass
 from pathlib import Path
 
+from .entropy import shannon_entropy
 from ..models import DetectionMatch, DetectorSource, RuleSpec
 
 SAFE_ALT = (
@@ -37,6 +40,7 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
         del line_number
         hits: list[DetectionMatch] = []
         file_name = Path(file_path).name
+        line_lower = line.lower()
 
         for compiled in self._rules:
             if compiled.spec.name == "dotenv_assignment" and not file_name.startswith(".env"):
@@ -47,6 +51,8 @@ def scan_line(self, file_path: str, line_number: int, line: str) -> list[Detecti
                     continue
                 if compiled.spec.name == "dotenv_assignment" and _is_placeholder_value(value):
                     continue
+                if not _passes_rule_heuristics(compiled.spec.name, value, line, line_lower):
+                    continue
                 hits.append(
                     DetectionMatch(
                         finding_type=compiled.spec.secret_type,
@@ -104,3 +110,135 @@ def _is_placeholder_value(value: str) -> bool:
         "null",
     }
     return lowered in placeholders or lowered.startswith("example")
+
+
+def _passes_rule_heuristics(rule_name: str, value: str, line: str, line_lower: str) -> bool:
+    normalized = value.strip()
+    if not normalized:
+        return False
+    if _contains_placeholder_markers(normalized):
+        return False
+
+    if rule_name == "aws_access_key":
+        if len(normalized) != 20:
+            return False
+        if normalized.upper().endswith("EXAMPLE"):
+            return False
+        return _has_any_keyword(line_lower, {"aws", "access_key", "secret_key", "iam", "akia", "asia"})
+
+    if rule_name == "aws_secret_key":
+        if len(normalized) != 40:
+            return False
+        return shannon_entropy(normalized) >= 3.4
+
+    if rule_name == "github_token":
+        if len(normalized) < 40:
+            return False
+        if not _has_any_keyword(
+            line_lower,
+            {"github", "token", "authorization", "bearer", "ghp_", "gho_", "ghu_", "ghs_", "ghr_", "github_pat_"},
+        ):
+            return False
+        return shannon_entropy(_strip_prefix(normalized)) >= 3.2
+
+    if rule_name == "stripe_secret":
+        if len(normalized) < 24:
+            return False
+        if not _has_any_keyword(line_lower, {"stripe", "sk_live_", "sk_test_", "secret", "token", "api_key"}):
+            return False
+        return shannon_entropy(_strip_prefix(normalized)) >= 3.1
+
+    if rule_name == "slack_token":
+        if len(normalized) < 20 or normalized.count("-") < 2:
+            return False
+        if not _has_any_keyword(line_lower, {"slack", "xox", "token", "oauth", "bot", "webhook"}):
+            return False
+        return shannon_entropy(_strip_prefix(normalized)) >= 3.0
+
+    if rule_name == "jwt_token":
+        if len(normalized) < 80:
+            return False
+        if not _has_any_keyword(line_lower, {"jwt", "bearer", "authorization", "id_token", "access_token", "token"}):
+            return False
+        if not _looks_like_jwt_header(normalized):
+            return False
+        parts = normalized.split(".")
+        if len(parts) != 3:
+            return False
+        return shannon_entropy(parts[1] + parts[2]) >= 3.2
+
+    if rule_name == "ssh_private_key":
+        marker = "-----BEGIN OPENSSH PRIVATE KEY-----"
+        return _is_unquoted_marker_line(line, marker)
+
+    if rule_name == "rsa_private_key":
+        marker = "-----BEGIN RSA PRIVATE KEY-----"
+        return _is_unquoted_marker_line(line, marker)
+
+    return True
+
+
+def _strip_prefix(value: str) -> str:
+    if "_" in value:
+        return value.split("_", 1)[1]
+    if "-" in value:
+        return value.split("-", 1)[1]
+    return value
+
+
+def _has_any_keyword(line_lower: str, keywords: set[str]) -> bool:
+    return any(keyword in line_lower for keyword in keywords)
+
+
+def _contains_placeholder_markers(value: str) -> bool:
+    lowered = value.lower()
+    markers = {
+        "example",
+        "placeholder",
+        "dummy",
+        "changeme",
+        "replace_me",
+        "your_token_here",
+        "your-key",
+        "fake",
+        "testtoken",
+        "redacted",
+    }
+    return any(marker in lowered for marker in markers)
+
+
+def _is_unquoted_marker_line(line: str, marker: str) -> bool:
+    stripped = line.strip()
+    if not stripped.startswith(marker):
+        return False
+    if stripped != marker:
+        return False
+    return '"' not in stripped and "'" not in stripped
+
+
+def _looks_like_jwt_header(token: str) -> bool:
+    parts = token.split(".")
+    if len(parts) != 3:
+        return False
+    decoded = _decode_base64url(parts[0])
+    if decoded is None:
+        return False
+    try:
+        payload = json.loads(decoded)
+    except json.JSONDecodeError:
+        return False
+    if not isinstance(payload, dict):
+        return False
+    return "alg" in payload or "typ" in payload
+
+
+def _decode_base64url(segment: str) -> str | None:
+    padding = "=" * (-len(segment) % 4)
+    try:
+        raw = base64.urlsafe_b64decode(segment + padding)
+    except (ValueError, TypeError):
+        return None
+    try:
+        return raw.decode("utf-8")
+    except UnicodeDecodeError:
+        return None
@@ -27,7 +27,7 @@
     RuleSpec(
         name="github_token",
         secret_type="GitHub Token",
-        pattern=r"\bgh[pousr]_[A-Za-z0-9]{36,255}\b",
+        pattern=r"\b(?:gh[pousr]_[A-Za-z0-9]{36,255}|github_pat_[A-Za-z0-9_]{20,255})\b",
         severity=Severity.HIGH,
         confidence=0.95,
         risk="GitHub tokens can be used to access repositories, secrets, and workflows.",