diff --git a/pdd/preprocess.py b/pdd/preprocess.py index 7c39e5cd2..2ea87fcf2 100644 --- a/pdd/preprocess.py +++ b/pdd/preprocess.py @@ -197,11 +197,30 @@ def replace_include(match): console.print(f"[bold red]Error processing include:[/bold red] {str(e)}") _dbg(f"Error processing backtick include {file_path}: {e}") return f"```[Error processing include: {file_path}]```" + _expanded_prior = set() + _expanded_current = set() + + def replace_include_tracked(match): + file_path = match.group(1).strip() + try: + full_path = get_file_path(file_path) + resolved = os.path.realpath(full_path) + if resolved in _expanded_prior: + raise ValueError(f"Circular include detected: {file_path} is already in the include chain") + _expanded_current.add(resolved) + except (FileNotFoundError, ValueError): + raise + except Exception: + pass + return replace_include(match) + prev_text = "" current_text = text while prev_text != current_text: prev_text = current_text - current_text = re.sub(pattern, replace_include, current_text, flags=re.DOTALL) + _expanded_current = set() + current_text = re.sub(pattern, replace_include_tracked, current_text, flags=re.DOTALL) + _expanded_prior.update(_expanded_current) return current_text def process_xml_tags(text: str, recursive: bool, _seen: Optional[set] = None) -> str: @@ -290,16 +309,35 @@ def replace_include(match): console.print(f"[bold red]Error processing include:[/bold red] {str(e)}") _dbg(f"Error processing XML include {file_path}: {e}") return f"[Error processing include: {file_path}]" + _expanded_prior = set() + _expanded_current = set() + + def replace_include_tracked(match): + file_path = match.group(1).strip() + try: + full_path = get_file_path(file_path) + resolved = os.path.realpath(full_path) + if resolved in _expanded_prior: + raise ValueError(f"Circular include detected: {file_path} is already in the include chain") + _expanded_current.add(resolved) + except (FileNotFoundError, ValueError): + raise + except Exception: + pass + return replace_include(match) + prev_text = "" current_text = text while prev_text != current_text: prev_text = current_text + _expanded_current = set() code_spans = _extract_code_spans(current_text) def replace_include_with_spans(match): if _intersects_any_span(match.start(), match.end(), code_spans): return match.group(0) - return replace_include(match) + return replace_include_tracked(match) current_text = re.sub(pattern, replace_include_with_spans, current_text, flags=re.DOTALL) + _expanded_prior.update(_expanded_current) return current_text def process_pdd_tags(text: str) -> str: diff --git a/tests/test_circular_includes.py b/tests/test_circular_includes.py index 789cd3c64..2fb5c917e 100644 --- a/tests/test_circular_includes.py +++ b/tests/test_circular_includes.py @@ -1,19 +1,42 @@ """ -Tests for circular detection in pdd/preprocess.py (Issue #521). +Tests for circular detection in pdd/preprocess.py (Issues #521, #553). -The preprocessor has no cycle detection — circular includes recurse ~82 times -until Python's recursion limit, then the broad `except Exception` swallows -the RecursionError and returns corrupted output with exit code 0. +Issue #521: Circular includes in the recursive path must be detected. +Issue #553: Circular includes in the non-recursive (default) path must also +be detected. The _seen set is only populated inside `if recursive:` blocks, +so the non-recursive convergence loop has no cycle guard and loops forever. These tests verify that circular includes produce an error (exception or error marker in output), NOT silently corrupted content. """ import os +import signal import pytest from unittest.mock import patch, mock_open, MagicMock from pdd.preprocess import preprocess + +def _timeout_handler(signum, frame): + raise TimeoutError("Test timed out — possible infinite loop") + + +def _with_timeout(seconds=10): + """Decorator that enforces a real timeout using SIGALRM (Unix only).""" + def decorator(func): + def wrapper(*args, **kwargs): + old_handler = signal.signal(signal.SIGALRM, _timeout_handler) + signal.alarm(seconds) + try: + return func(*args, **kwargs) + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) + wrapper.__name__ = func.__name__ + wrapper.__doc__ = func.__doc__ + return wrapper + return decorator + # Store original so we can restore _original_pdd_path = os.environ.get('PDD_PATH') @@ -194,3 +217,135 @@ def test_diamond_includes_not_falsely_flagged(self): assert 'C' in result # D is included twice (via B and via C) — that's fine, not circular assert result.count('Shared') == 2 + + +class TestCircularIncludesNonRecursive: + """Issue #553: Circular includes must be detected in the non-recursive (default) path. + + PR #528 added cycle detection for recursive=True, but the non-recursive path + (which uses a while-loop for convergence) never populates the _seen set, + so circular includes cause an infinite loop instead of raising ValueError. + """ + + def setup_method(self): + set_pdd_path('/mock/path') + + def teardown_method(self): + if _original_pdd_path is not None: + os.environ['PDD_PATH'] = _original_pdd_path + elif 'PDD_PATH' in os.environ: + del os.environ['PDD_PATH'] + + @_with_timeout(10) + def test_circular_xml_includes_non_recursive_must_error(self): + """A→B→A circular XML include with recursive=False must raise ValueError, not loop forever.""" + file_map = { + 'a.txt': 'Hello\nb.txt', + 'b.txt': 'World\na.txt', + } + with patch('builtins.open', mock_open()) as m: + m.side_effect = _make_mock_open(file_map) + + # Buggy code: infinite loop in the while-loop → timeout → test fails + # Fixed code: ValueError("Circular include detected: ...") → test passes + with pytest.raises(ValueError, match="Circular include detected"): + preprocess( + 'a.txt', + recursive=False, + double_curly_brackets=False, + ) + + @_with_timeout(10) + def test_circular_backtick_includes_non_recursive_must_error(self): + """A→B→A circular backtick include with recursive=False must raise ValueError.""" + file_map = { + 'x.txt': 'Foo\n``````', + 'y.txt': 'Bar\n``````', + } + with patch('builtins.open', mock_open()) as m: + m.side_effect = _make_mock_open(file_map) + + # Buggy code: infinite loop in process_backtick_includes while-loop + # Fixed code: ValueError raised + with pytest.raises(ValueError, match="Circular include detected"): + preprocess( + '``````', + recursive=False, + double_curly_brackets=False, + ) + + @_with_timeout(10) + def test_self_referencing_include_non_recursive_must_error(self): + """A file that includes itself must be detected in non-recursive mode.""" + file_map = { + 'self.txt': 'Content\nself.txt', + } + with patch('builtins.open', mock_open()) as m: + m.side_effect = _make_mock_open(file_map) + + with pytest.raises(ValueError, match="Circular include detected"): + preprocess( + 'self.txt', + recursive=False, + double_curly_brackets=False, + ) + + @_with_timeout(10) + def test_three_file_cycle_non_recursive_must_error(self): + """A→B→C→A three-file cycle must be detected in non-recursive mode.""" + file_map = { + 'a.txt': 'AAA\nb.txt', + 'b.txt': 'BBB\nc.txt', + 'c.txt': 'CCC\na.txt', + } + with patch('builtins.open', mock_open()) as m: + m.side_effect = _make_mock_open(file_map) + + with pytest.raises(ValueError, match="Circular include detected"): + preprocess( + 'a.txt', + recursive=False, + double_curly_brackets=False, + ) + + def test_non_circular_includes_non_recursive_still_work(self): + """Non-circular includes (A→B→C, no cycle) must still work in non-recursive mode.""" + file_map = { + 'top.txt': 'Top\nmid.txt', + 'mid.txt': 'Mid\nleaf.txt', + 'leaf.txt': 'Leaf', + } + with patch('builtins.open', mock_open()) as m: + m.side_effect = _make_mock_open(file_map) + + result = preprocess( + 'top.txt', + recursive=False, + double_curly_brackets=False, + ) + + assert 'Top' in result + assert 'Mid' in result + assert 'Leaf' in result + + def test_diamond_includes_non_recursive_not_falsely_flagged(self): + """Diamond pattern (A→B, A→C, B→D, C→D) is NOT circular and must work.""" + file_map = { + 'a.txt': 'b.txt\nc.txt', + 'b.txt': 'B\nd.txt', + 'c.txt': 'C\nd.txt', + 'd.txt': 'Shared', + } + with patch('builtins.open', mock_open()) as m: + m.side_effect = _make_mock_open(file_map) + + result = preprocess( + 'a.txt', + recursive=False, + double_curly_brackets=False, + ) + + assert 'B' in result + assert 'C' in result + # D is included twice (via B and via C) — that's fine, not circular + assert result.count('Shared') == 2 diff --git a/tests/test_e2e_issue_553_circular_includes_non_recursive.py b/tests/test_e2e_issue_553_circular_includes_non_recursive.py new file mode 100644 index 000000000..8810c1459 --- /dev/null +++ b/tests/test_e2e_issue_553_circular_includes_non_recursive.py @@ -0,0 +1,194 @@ +""" +E2E Test for Issue #553: Circular tags cause infinite loop in non-recursive preprocess. + +This test exercises the full CLI path: `pdd preprocess` (default, non-recursive) with +real circular prompt files on disk. No mocking of the preprocessor. This verifies the +user-facing behavior: running `pdd preprocess` on circular includes must produce an +error exit code within a reasonable time, not hang forever. + +Issue #521 (fixed by PR #528) added cycle detection for --recursive mode. +Issue #553 reports that the default (non-recursive) path was not covered by that fix. +""" + +import os +import subprocess +import sys +import pytest +from pathlib import Path + + +# Timeout in seconds for subprocess — buggy code loops forever, so this must be finite. +# 10 seconds is generous; the fixed code should exit in < 1 second. +CLI_TIMEOUT = 10 + + +def _get_project_root() -> Path: + """Get the project root directory.""" + current = Path(__file__).parent + while current != current.parent: + if (current / "pdd").is_dir() and (current / "pyproject.toml").exists(): + return current + current = current.parent + raise RuntimeError("Could not find project root with pdd/ directory") + + +def _run_pdd_preprocess(prompt_file: str, cwd: str, extra_args: list = None): + """Run `pdd --force preprocess ` via subprocess with timeout. + + Returns (returncode, stdout, stderr) or raises subprocess.TimeoutExpired + if the process hangs (indicating the infinite loop bug). + """ + cmd = [sys.executable, "-m", "pdd.cli", "--force", "preprocess", prompt_file] + if extra_args: + cmd.extend(extra_args) + env = os.environ.copy() + env["PYTHONPATH"] = str(_get_project_root()) + return subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=cwd, + env=env, + timeout=CLI_TIMEOUT, + ) + + +class TestIssue553CircularIncludesNonRecursiveE2E: + """E2E: `pdd preprocess` (non-recursive) on circular includes must error, not loop forever.""" + + def test_mutual_circular_includes_default_mode(self, tmp_path): + """Reproduce the exact bug: A includes B, B includes A, default (non-recursive) mode. + + Bug behavior: process hangs forever, spamming "Processing XML include" messages. + Expected: non-zero exit code with "Circular include detected" in output, within seconds. + """ + a_file = tmp_path / "a_python.prompt" + b_file = tmp_path / "b_python.prompt" + + a_file.write_text("Hello\nb_python.prompt\n") + b_file.write_text("World\na_python.prompt\n") + + try: + result = _run_pdd_preprocess(str(a_file), cwd=str(tmp_path)) + except subprocess.TimeoutExpired: + pytest.fail( + f"pdd preprocess hung for {CLI_TIMEOUT}s — infinite loop detected (bug #553). " + f"Non-recursive circular include cycle detection is missing." + ) + + # Process completed within timeout. Verify it errored out properly. + assert result.returncode != 0, ( + f"pdd preprocess should have exited with non-zero code for circular includes, " + f"got exit code {result.returncode}.\n" + f"stdout: {result.stdout[:500]}\nstderr: {result.stderr[:500]}" + ) + + combined_output = result.stdout + result.stderr + assert "circular include" in combined_output.lower(), ( + f"Expected 'Circular include detected' in output, got:\n" + f"stdout: {result.stdout[:500]}\nstderr: {result.stderr[:500]}" + ) + + def test_self_referencing_include_default_mode(self, tmp_path): + """A file that includes itself must not loop forever in non-recursive mode.""" + self_file = tmp_path / "self_ref_python.prompt" + self_file.write_text("Content\nself_ref_python.prompt\n") + + try: + result = _run_pdd_preprocess(str(self_file), cwd=str(tmp_path)) + except subprocess.TimeoutExpired: + pytest.fail( + f"pdd preprocess hung for {CLI_TIMEOUT}s — self-referencing infinite loop (bug #553)." + ) + + assert result.returncode != 0, ( + f"Self-referencing include should error, got exit code {result.returncode}." + ) + + def test_three_file_cycle_default_mode(self, tmp_path): + """A→B→C→A cycle via CLI must not loop forever in non-recursive mode.""" + (tmp_path / "a_python.prompt").write_text("AAA\nb_python.prompt\n") + (tmp_path / "b_python.prompt").write_text("BBB\nc_python.prompt\n") + (tmp_path / "c_python.prompt").write_text("CCC\na_python.prompt\n") + + try: + result = _run_pdd_preprocess( + str(tmp_path / "a_python.prompt"), cwd=str(tmp_path) + ) + except subprocess.TimeoutExpired: + pytest.fail( + f"pdd preprocess hung for {CLI_TIMEOUT}s — 3-file cycle infinite loop (bug #553)." + ) + + assert result.returncode != 0, ( + f"Three-file cycle should error, got exit code {result.returncode}." + ) + + def test_backtick_circular_includes_default_mode(self, tmp_path): + """Backtick-style circular includes must also be detected in non-recursive mode.""" + x_file = tmp_path / "x_python.prompt" + y_file = tmp_path / "y_python.prompt" + + x_file.write_text("Foo\n``````\n") + y_file.write_text("Bar\n``````\n") + + try: + result = _run_pdd_preprocess(str(x_file), cwd=str(tmp_path)) + except subprocess.TimeoutExpired: + pytest.fail( + f"pdd preprocess hung for {CLI_TIMEOUT}s — backtick circular include loop (bug #553)." + ) + + assert result.returncode != 0, ( + f"Backtick circular include should error, got exit code {result.returncode}." + ) + + def test_non_circular_includes_still_work(self, tmp_path): + """Non-circular chain (A→B→C) must still preprocess successfully in default mode.""" + (tmp_path / "top_python.prompt").write_text("Top\nmid.txt\n") + (tmp_path / "mid.txt").write_text("Mid\nleaf.txt\n") + (tmp_path / "leaf.txt").write_text("Leaf\n") + + output_file = tmp_path / "output.txt" + + try: + result = _run_pdd_preprocess( + str(tmp_path / "top_python.prompt"), + cwd=str(tmp_path), + extra_args=["--output", str(output_file)], + ) + except subprocess.TimeoutExpired: + pytest.fail("Non-circular preprocess should not hang.") + + assert result.returncode == 0, ( + f"Non-circular include should succeed, got exit code {result.returncode}.\n" + f"stderr: {result.stderr[:500]}" + ) + + assert output_file.exists(), ( + f"Expected output file {output_file} to be created by --output flag.\n" + f"stdout: {result.stdout[:500]}" + ) + content = output_file.read_text() + assert "Top" in content + assert "Mid" in content + assert "Leaf" in content + + def test_recursive_mode_still_detects_cycle(self, tmp_path): + """Sanity check: --recursive mode still catches circular includes (regression guard).""" + a_file = tmp_path / "a_python.prompt" + b_file = tmp_path / "b_python.prompt" + + a_file.write_text("Hello\nb_python.prompt\n") + b_file.write_text("World\na_python.prompt\n") + + try: + result = _run_pdd_preprocess( + str(a_file), cwd=str(tmp_path), extra_args=["--recursive"] + ) + except subprocess.TimeoutExpired: + pytest.fail("--recursive mode should not hang on circular includes.") + + assert result.returncode != 0, ( + f"--recursive mode should error on circular includes, got exit code {result.returncode}." + )