fix: detect (a+)+ as vulnerable in AUTO mode (issue #2)

dvershinin · dvershinin · commit 40d37b160ec0 · 2026-01-30T11:21:21.000+08:00
The pattern (a+)+ was incorrectly reported as safe due to flawed logic in _is_multi_trans_exploitable that assumed unanchored patterns could always "escape early". This is only true for optional quantifiers like (a*)*, not for required quantifiers like (a+)+. Changed AUTO mode to be conservative by always reporting multi-transitions as exploitable. Users who want lenient analysis can use match_mode=PARTIAL. Added regression test for issue #2.
diff --git a/src/redoctor/automaton/scc_checker.py b/src/redoctor/automaton/scc_checker.py
@@ -326,14 +326,9 @@ def _is_multi_trans_exploitable(self, state: NFAState, nfa_char: NFAChar) -> boo
             # Has end anchor OR requires continuation → exploitable
             return True
 
-        # AUTO mode: use anchor and continuation detection
-        if not self.has_end_anchor and not self.requires_continuation:
-            # No end anchor and no continuation → can escape early → not exploitable
-            # This is the key insight: (a*)* without $ is safe in partial match
-            # But ^([^@]+)+@ IS exploitable because of the @ after the quantifier
-            return False
-
-        # Has end anchor OR requires continuation → must try all combinations → exploitable
+        # AUTO mode: conservative approach for security analysis
+        # Multi-transitions indicate nested quantifier ambiguity - report as exploitable
+        # Users who want lenient analysis can use match_mode=PARTIAL
         return True
 
     def _check_eda_with_pair_graph(
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
@@ -114,3 +114,24 @@ def test_username_pattern(self):
         result = check(pattern, config=Config.quick())
         assert result is not None
         assert result.status.value in ("safe", "unknown")
+
+
+class TestGitHubIssues:
+    """Regression tests for reported GitHub issues."""
+
+    def test_issue_2_unanchored_nested_plus_is_vulnerable(self):
+        """GitHub issue #2: (a+)+ should be detected as vulnerable.
+
+        The pattern (a+)+ without anchors was incorrectly reported as safe
+        with O(n) complexity. It should be detected as vulnerable with
+        exponential complexity due to nested quantifier ambiguity.
+
+        Note: We use skip_recall=True because recall validation uses re.match()
+        which doesn't require full-string matching, so the attack string doesn't
+        trigger backtracking. The automaton analysis correctly identifies this
+        as vulnerable in full-match contexts (e.g., re.fullmatch()).
+        """
+        result = check(r"(a+)+", config=Config(skip_recall=True))
+        assert result.is_vulnerable
+        assert result.complexity is not None
+        assert result.complexity.is_exponential