Skip to content

Commit fc1c7cb

Browse files
authored
Merge pull request #528 from Serhan-Asad/fix/issue-521
Add failing tests for circular include cycle detection (#521)
2 parents 3f7d5e3 + 0d5e658 commit fc1c7cb

File tree

3 files changed

+368
-9
lines changed

3 files changed

+368
-9
lines changed

pdd/preprocess.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,18 +110,20 @@ def _scan_risky_placeholders(text: str) -> Tuple[List[Tuple[int, str]], List[Tup
110110
pass
111111
return single_brace, template_brace
112112

113-
def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool = True, exclude_keys: Optional[List[str]] = None) -> str:
113+
def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool = True, exclude_keys: Optional[List[str]] = None, _seen: Optional[set] = None) -> str:
114114
try:
115115
if not prompt:
116116
console.print("[bold red]Error:[/bold red] Empty prompt provided")
117117
return ""
118+
if _seen is None:
119+
_seen = set()
118120
_DEBUG_EVENTS.clear()
119121
_dbg(f"Start preprocess(recursive={recursive}, double_curly={double_curly_brackets}, exclude_keys={exclude_keys})")
120122
_dbg(f"Initial length: {len(prompt)} characters")
121123
console.print(Panel("Starting prompt preprocessing", style="bold blue"))
122-
prompt = process_backtick_includes(prompt, recursive)
124+
prompt = process_backtick_includes(prompt, recursive, _seen=_seen)
123125
_dbg("After backtick includes processed")
124-
prompt = process_xml_tags(prompt, recursive)
126+
prompt = process_xml_tags(prompt, recursive, _seen=_seen)
125127
_dbg("After XML-like tags processed")
126128
if double_curly_brackets:
127129
prompt = double_curly(prompt, exclude_keys)
@@ -141,6 +143,10 @@ def preprocess(prompt: str, recursive: bool = False, double_curly_brackets: bool
141143
_dbg(f"Final length: {len(prompt)} characters")
142144
_write_debug_report()
143145
return prompt
146+
except (RecursionError, ValueError) as e:
147+
if "Circular include" in str(e) or isinstance(e, RecursionError):
148+
raise
149+
raise
144150
except Exception as e:
145151
console.print(f"[bold red]Error during preprocessing:[/bold red] {str(e)}")
146152
console.print(Panel(traceback.format_exc(), title="Error Details", style="red"))
@@ -155,18 +161,24 @@ def get_file_path(file_name: str) -> str:
155161
return os.path.join("./", file_name)
156162
return str(resolved)
157163

158-
def process_backtick_includes(text: str, recursive: bool) -> str:
164+
def process_backtick_includes(text: str, recursive: bool, _seen: Optional[set] = None) -> str:
165+
if _seen is None:
166+
_seen = set()
159167
# More specific pattern that doesn't match nested > characters
160168
pattern = r"```<([^>]*?)>```"
161169
def replace_include(match):
162170
file_path = match.group(1).strip()
163171
try:
164172
full_path = get_file_path(file_path)
173+
resolved = os.path.realpath(full_path)
174+
if resolved in _seen:
175+
raise ValueError(f"Circular include detected: {file_path} is already in the include chain")
165176
console.print(f"Processing backtick include: [cyan]{full_path}[/cyan]")
166177
with open(full_path, 'r', encoding='utf-8') as file:
167178
content = file.read()
168179
if recursive:
169-
content = preprocess(content, recursive=True, double_curly_brackets=False)
180+
child_seen = _seen | {resolved}
181+
content = preprocess(content, recursive=True, double_curly_brackets=False, _seen=child_seen)
170182
_dbg(f"Included via backticks: {file_path} (len={len(content)})")
171183
return f"```{content}```"
172184
except FileNotFoundError:
@@ -175,6 +187,12 @@ def replace_include(match):
175187
# First pass (recursive=True): leave the tag so a later env expansion can resolve it
176188
# Second pass (recursive=False): replace with a visible placeholder
177189
return match.group(0) if recursive else f"```[File not found: {file_path}]```"
190+
except ValueError as e:
191+
if "Circular include" in str(e):
192+
raise
193+
console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
194+
_dbg(f"Error processing backtick include {file_path}: {e}")
195+
return f"```[Error processing include: {file_path}]```"
178196
except Exception as e:
179197
console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
180198
_dbg(f"Error processing backtick include {file_path}: {e}")
@@ -186,20 +204,27 @@ def replace_include(match):
186204
current_text = re.sub(pattern, replace_include, current_text, flags=re.DOTALL)
187205
return current_text
188206

189-
def process_xml_tags(text: str, recursive: bool) -> str:
207+
def process_xml_tags(text: str, recursive: bool, _seen: Optional[set] = None) -> str:
208+
if _seen is None:
209+
_seen = set()
190210
text = process_pdd_tags(text)
191-
text = process_include_tags(text, recursive)
211+
text = process_include_tags(text, recursive, _seen=_seen)
192212
text = process_include_many_tags(text, recursive)
193213
text = process_shell_tags(text, recursive)
194214
text = process_web_tags(text, recursive)
195215
return text
196216

197-
def process_include_tags(text: str, recursive: bool) -> str:
217+
def process_include_tags(text: str, recursive: bool, _seen: Optional[set] = None) -> str:
218+
if _seen is None:
219+
_seen = set()
198220
pattern = r'<include>(.*?)</include>'
199221
def replace_include(match):
200222
file_path = match.group(1).strip()
201223
try:
202224
full_path = get_file_path(file_path)
225+
resolved = os.path.realpath(full_path)
226+
if resolved in _seen:
227+
raise ValueError(f"Circular include detected: {file_path} is already in the include chain")
203228
ext = os.path.splitext(file_path)[1].lower()
204229
image_extensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.heic']
205230

@@ -245,7 +270,8 @@ def replace_include(match):
245270
with open(full_path, 'r', encoding='utf-8') as file:
246271
content = file.read()
247272
if recursive:
248-
content = preprocess(content, recursive=True, double_curly_brackets=False)
273+
child_seen = _seen | {resolved}
274+
content = preprocess(content, recursive=True, double_curly_brackets=False, _seen=child_seen)
249275
_dbg(f"Included via XML tag: {file_path} (len={len(content)})")
250276
return content
251277
except FileNotFoundError:
@@ -254,6 +280,12 @@ def replace_include(match):
254280
# First pass (recursive=True): leave the tag so a later env expansion can resolve it
255281
# Second pass (recursive=False): replace with a visible placeholder
256282
return match.group(0) if recursive else f"[File not found: {file_path}]"
283+
except ValueError as e:
284+
if "Circular include" in str(e):
285+
raise
286+
console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
287+
_dbg(f"Error processing XML include {file_path}: {e}")
288+
return f"[Error processing include: {file_path}]"
257289
except Exception as e:
258290
console.print(f"[bold red]Error processing include:[/bold red] {str(e)}")
259291
_dbg(f"Error processing XML include {file_path}: {e}")

tests/test_circular_includes.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""
2+
Tests for circular <include> detection in pdd/preprocess.py (Issue #521).
3+
4+
The preprocessor has no cycle detection — circular includes recurse ~82 times
5+
until Python's recursion limit, then the broad `except Exception` swallows
6+
the RecursionError and returns corrupted output with exit code 0.
7+
8+
These tests verify that circular includes produce an error (exception or
9+
error marker in output), NOT silently corrupted content.
10+
"""
11+
12+
import os
13+
import pytest
14+
from unittest.mock import patch, mock_open, MagicMock
15+
from pdd.preprocess import preprocess
16+
17+
# Store original so we can restore
18+
_original_pdd_path = os.environ.get('PDD_PATH')
19+
20+
21+
def set_pdd_path(path: str) -> None:
22+
os.environ['PDD_PATH'] = path
23+
24+
25+
def _make_mock_open(file_map: dict):
26+
"""Create a mock open that returns content based on filename."""
27+
def side_effect(file_name, *args, **kwargs):
28+
mock_file = MagicMock()
29+
for key, content in file_map.items():
30+
if key in str(file_name):
31+
mock_file.read.return_value = content
32+
mock_file.__enter__ = lambda s: s
33+
mock_file.__exit__ = MagicMock(return_value=False)
34+
return mock_file
35+
raise FileNotFoundError(f"No mock for {file_name}")
36+
return side_effect
37+
38+
39+
class TestCircularIncludes:
40+
"""Issue #521: Circular <include> tags must be detected, not silently expanded."""
41+
42+
def setup_method(self):
43+
set_pdd_path('/mock/path')
44+
45+
def teardown_method(self):
46+
if _original_pdd_path is not None:
47+
os.environ['PDD_PATH'] = _original_pdd_path
48+
elif 'PDD_PATH' in os.environ:
49+
del os.environ['PDD_PATH']
50+
51+
def test_circular_xml_includes_must_error(self):
52+
"""A→B→A circular include must raise or return error, not silently corrupt."""
53+
file_map = {
54+
'a.txt': 'Hello\n<include>b.txt</include>',
55+
'b.txt': 'World\n<include>a.txt</include>',
56+
}
57+
with patch('builtins.open', mock_open()) as m:
58+
m.side_effect = _make_mock_open(file_map)
59+
60+
# The bug: preprocess silently returns corrupted output with
61+
# "Hello" repeated ~82 times. A correct implementation must
62+
# either raise an error OR return output containing an error marker.
63+
try:
64+
result = preprocess(
65+
'<include>a.txt</include>',
66+
recursive=True,
67+
double_curly_brackets=False,
68+
)
69+
except (RecursionError, ValueError, RuntimeError):
70+
# Any of these exceptions is acceptable — cycle was detected
71+
return
72+
73+
# If no exception, the output must NOT contain duplicated content.
74+
# The buggy behavior produces "Hello" 82+ times.
75+
hello_count = result.count('Hello')
76+
world_count = result.count('World')
77+
assert hello_count <= 2 and world_count <= 2, (
78+
f"Circular include silently produced corrupted output: "
79+
f"'Hello' appeared {hello_count} times, 'World' appeared {world_count} times. "
80+
f"Expected an error or at most 2 occurrences each."
81+
)
82+
83+
def test_circular_backtick_includes_must_error(self):
84+
"""Circular backtick-style includes must also be detected."""
85+
file_map = {
86+
'x.txt': 'Foo\n```<y.txt>```',
87+
'y.txt': 'Bar\n```<x.txt>```',
88+
}
89+
with patch('builtins.open', mock_open()) as m:
90+
m.side_effect = _make_mock_open(file_map)
91+
92+
try:
93+
result = preprocess(
94+
'```<x.txt>```',
95+
recursive=True,
96+
double_curly_brackets=False,
97+
)
98+
except (RecursionError, ValueError, RuntimeError):
99+
return
100+
101+
foo_count = result.count('Foo')
102+
bar_count = result.count('Bar')
103+
assert foo_count <= 2 and bar_count <= 2, (
104+
f"Circular backtick include silently produced corrupted output: "
105+
f"'Foo' appeared {foo_count} times, 'Bar' appeared {bar_count} times."
106+
)
107+
108+
def test_self_referencing_include_must_error(self):
109+
"""A file that includes itself must be detected as circular."""
110+
file_map = {
111+
'self.txt': 'Content\n<include>self.txt</include>',
112+
}
113+
with patch('builtins.open', mock_open()) as m:
114+
m.side_effect = _make_mock_open(file_map)
115+
116+
try:
117+
result = preprocess(
118+
'<include>self.txt</include>',
119+
recursive=True,
120+
double_curly_brackets=False,
121+
)
122+
except (RecursionError, ValueError, RuntimeError):
123+
return
124+
125+
content_count = result.count('Content')
126+
assert content_count <= 2, (
127+
f"Self-referencing include silently produced corrupted output: "
128+
f"'Content' appeared {content_count} times."
129+
)
130+
131+
def test_three_file_cycle_must_error(self):
132+
"""A→B→C→A three-file cycle must be detected."""
133+
file_map = {
134+
'a.txt': 'AAA\n<include>b.txt</include>',
135+
'b.txt': 'BBB\n<include>c.txt</include>',
136+
'c.txt': 'CCC\n<include>a.txt</include>',
137+
}
138+
with patch('builtins.open', mock_open()) as m:
139+
m.side_effect = _make_mock_open(file_map)
140+
141+
try:
142+
result = preprocess(
143+
'<include>a.txt</include>',
144+
recursive=True,
145+
double_curly_brackets=False,
146+
)
147+
except (RecursionError, ValueError, RuntimeError):
148+
return
149+
150+
aaa_count = result.count('AAA')
151+
assert aaa_count <= 2, (
152+
f"Three-file circular include silently produced corrupted output: "
153+
f"'AAA' appeared {aaa_count} times."
154+
)
155+
156+
def test_non_circular_recursive_includes_still_work(self):
157+
"""Non-circular recursive includes (A→B→C, no cycle) must still work."""
158+
file_map = {
159+
'top.txt': 'Top\n<include>mid.txt</include>',
160+
'mid.txt': 'Mid\n<include>leaf.txt</include>',
161+
'leaf.txt': 'Leaf',
162+
}
163+
with patch('builtins.open', mock_open()) as m:
164+
m.side_effect = _make_mock_open(file_map)
165+
166+
result = preprocess(
167+
'<include>top.txt</include>',
168+
recursive=True,
169+
double_curly_brackets=False,
170+
)
171+
172+
assert 'Top' in result
173+
assert 'Mid' in result
174+
assert 'Leaf' in result
175+
176+
def test_diamond_includes_not_falsely_flagged(self):
177+
"""Diamond pattern (A→B, A→C, B→D, C→D) is NOT circular and must work."""
178+
file_map = {
179+
'a.txt': '<include>b.txt</include>\n<include>c.txt</include>',
180+
'b.txt': 'B\n<include>d.txt</include>',
181+
'c.txt': 'C\n<include>d.txt</include>',
182+
'd.txt': 'Shared',
183+
}
184+
with patch('builtins.open', mock_open()) as m:
185+
m.side_effect = _make_mock_open(file_map)
186+
187+
result = preprocess(
188+
'<include>a.txt</include>',
189+
recursive=True,
190+
double_curly_brackets=False,
191+
)
192+
193+
assert 'B' in result
194+
assert 'C' in result
195+
# D is included twice (via B and via C) — that's fine, not circular
196+
assert result.count('Shared') == 2

0 commit comments

Comments
 (0)