Skip to content

Commit 421c2e8

Browse files
authored
Refactor budget enforcement to use exception-based control flow (#19)
* Fix budget enforcement * Fix budget enforcement tests for file path cost accounting - Update test budgets to account for file path length being debited - Fix FileWindow validation to require line_count >= 1 (not >= 0) - Adjust budget calculations in tests to include both path and content costs - All 145 tests now pass with proper budget enforcement
1 parent 9728cbf commit 421c2e8

File tree

8 files changed

+88
-207
lines changed

8 files changed

+88
-207
lines changed

src/glob_grep_glance/_defaults.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pydantic import BaseModel
1111

12-
from ._budget import OutputBudget
12+
from ._budget import BudgetExceeded, OutputBudget
1313
from ._sandbox import Sandbox
1414
from .common import FileContent, FileReadResult, FileWindow, GlobPattern, RegexPattern
1515

@@ -76,6 +76,7 @@ def read_window(
7676

7777
try:
7878
# Open file with UTF-8 encoding and ignore errors for binary files
79+
budget.debit(len(file.as_posix())) # Debit budget for file path length
7980
with open(file, "r", encoding="utf-8", errors="ignore") as f:
8081
# Skip to the starting line offset
8182
current_line = 0
@@ -91,13 +92,6 @@ def read_window(
9192
line = f.readline()
9293
if not line: # EOF reached
9394
break
94-
95-
# Check if this line would exceed budget
96-
if len(line) > budget.remaining:
97-
truncated = True
98-
break
99-
100-
# Line fits in budget, debit and add to contents
10195
budget.debit(len(line))
10296
contents += line
10397
lines_read += 1
@@ -106,6 +100,9 @@ def read_window(
106100
# Re-raise file system errors (like file not found)
107101
raise e
108102

103+
except BudgetExceeded:
104+
truncated = True
105+
109106
return FileReadResult(contents=contents, truncated=truncated)
110107

111108

@@ -118,7 +115,6 @@ def iter_matches(
118115
self,
119116
file: Path,
120117
search_regex: RegexPattern,
121-
budget: OutputBudget,
122118
) -> Iterable[FileContent]:
123119
"""Yield regex matches from file, one per matching line."""
124120
# Validate file access through sandbox
@@ -133,13 +129,6 @@ def iter_matches(
133129
for line_number, line in enumerate(f):
134130
# Check if line matches pattern
135131
if compiled_pattern.search(line):
136-
# Check if this line would exceed budget
137-
if len(line) > budget.remaining:
138-
# Skip this line and continue to next
139-
continue
140-
141-
# Line fits in budget, debit and yield match
142-
budget.debit(len(line))
143132
yield FileContent(
144133
path=file,
145134
contents=line,

src/glob_grep_glance/_protocols.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def iter_matches(
3535
self,
3636
file: Path,
3737
search_regex: RegexPattern,
38-
budget: OutputBudget,
3938
) -> Iterable[FileContent]:
4039
"""Yield regex matches from file, respecting budget constraints."""
4140
...

src/glob_grep_glance/glob.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from pydantic import BaseModel, Field
77

8-
from ._budget import OutputBudget
8+
from ._budget import BudgetExceeded, OutputBudget
99
from ._defaults import FilesystemPathEnumerator
1010
from ._protocols import PathEnumerator
1111
from ._sandbox import Sandbox
@@ -38,13 +38,11 @@ def glob(
3838
truncated = False
3939

4040
for path in self.path_enum.iter_paths(glob_patterns):
41-
# Check if we have budget remaining
42-
if budget.remaining <= 0:
41+
try:
42+
budget.debit(len(path.as_posix())) # debit budget by path length
43+
paths.append(path)
44+
except BudgetExceeded:
4345
truncated = True
4446
break
4547

46-
# Count each path as 1 unit towards budget
47-
budget.debit(1)
48-
paths.append(path)
49-
5048
return GlobOutput(paths=paths, truncated=truncated)

src/glob_grep_glance/grep.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from pydantic import BaseModel, Field
66

7-
from ._budget import OutputBudget
7+
from ._budget import BudgetExceeded, OutputBudget
88
from ._defaults import FilesystemPathEnumerator, StreamingRegexSearcher
99
from ._protocols import PathEnumerator, RegexSearcher
1010
from ._sandbox import Sandbox
@@ -49,22 +49,17 @@ def grep(
4949
for file_path in self.path_enum.iter_paths(glob_patterns):
5050
try:
5151
# Search for matches in this file
52-
for match in self.regex_searcher.iter_matches(
53-
file_path, search_regex, budget
54-
):
52+
for match in self.regex_searcher.iter_matches(file_path, search_regex):
53+
budget.debit(len(match.model_dump_json()))
5554
matches.append(match)
5655

57-
# Check if budget is exhausted
58-
if budget.remaining <= 0:
59-
truncated = True
60-
break
56+
except BudgetExceeded:
57+
# If budget exceeded, set truncated flag and break
58+
truncated = True
59+
break
6160

6261
except Exception:
6362
# Continue to next file if this one fails
6463
continue
6564

66-
# Break out of file loop if budget exhausted
67-
if truncated:
68-
break
69-
7065
return GrepOutput(matches=matches, truncated=truncated)

tests/test_file_reader.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,16 @@ def test_read_simple_file(
4646
test_content = "line 1\nline 2\nline 3\n"
4747
test_file.write_text(test_content, encoding="utf-8")
4848

49-
budget = OutputBudget(limit=100)
49+
budget = OutputBudget(limit=200) # Increased to account for file path cost
5050
window = FileWindow(line_offset=0, line_count=3)
5151

5252
result = reader.read_window(test_file, window, budget)
5353

5454
assert result.contents == test_content
5555
assert not result.truncated
56-
assert budget.remaining == 100 - len(test_content)
56+
# Budget remaining = initial - file_path_length - content_length
57+
expected_remaining = 200 - len(test_file.as_posix()) - len(test_content)
58+
assert budget.remaining == expected_remaining
5759

5860
def test_read_file_with_window_offset(
5961
self, temp_sandbox: tuple[Path, Sandbox], reader: StreamingFileReader
@@ -131,7 +133,12 @@ def test_budget_constraint_enforcement(
131133
"short\nmedium line\nvery long line that exceeds budget\n", encoding="utf-8"
132134
)
133135

134-
budget = OutputBudget(limit=20) # Small budget
136+
# Calculate budget to allow file path + first two lines but not the third
137+
first_two_lines = "short\nmedium line\n"
138+
budget_needed = (
139+
len(test_file.as_posix()) + len(first_two_lines) + 5
140+
) # +5 buffer
141+
budget = OutputBudget(limit=budget_needed)
135142
window = FileWindow(line_offset=0, line_count=3)
136143

137144
result = reader.read_window(test_file, window, budget)
@@ -328,7 +335,10 @@ def test_line_by_line_budget_debit(
328335
test_file = sandbox_dir / "test.txt"
329336
test_file.write_text("line1\nline2\nline3\n", encoding="utf-8")
330337

331-
budget = OutputBudget(limit=12) # Exactly enough for first two lines (6+6)
338+
# Calculate budget: file path + first two lines exactly
339+
first_two_lines = "line1\nline2\n"
340+
budget_limit = len(test_file.as_posix()) + len(first_two_lines)
341+
budget = OutputBudget(limit=budget_limit)
332342
window = FileWindow(line_offset=0, line_count=3)
333343

334344
result = reader.read_window(test_file, window, budget)
@@ -347,7 +357,8 @@ def test_utf8_with_special_characters(
347357
test_content = "héllo wørld 🌍\nünicode tëst ñoño\n"
348358
test_file.write_text(test_content, encoding="utf-8")
349359

350-
budget = OutputBudget(limit=100)
360+
# Increase budget to account for file path + UTF-8 content
361+
budget = OutputBudget(limit=200)
351362
window = FileWindow(line_offset=0, line_count=2)
352363

353364
result = reader.read_window(test_file, window, budget)
@@ -364,7 +375,8 @@ def test_malformed_encoding_graceful_handling(
364375
# Write some invalid UTF-8 sequences
365376
test_file.write_bytes(b"valid text\n\xff\xfe invalid utf8\n more text\n")
366377

367-
budget = OutputBudget(limit=100)
378+
# Increase budget to account for file path + content
379+
budget = OutputBudget(limit=200)
368380
window = FileWindow(line_offset=0, line_count=3)
369381

370382
# Should not crash due to encoding errors (errors="ignore")

tests/test_protocols.py

Lines changed: 8 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,10 @@ def test_protocol_compliance(self) -> None:
121121

122122
file_path = Path("test.txt")
123123
search_regex: RegexPattern = r"match"
124-
budget = OutputBudget(limit=1000)
125-
126-
result = list(mock_searcher.iter_matches(file_path, search_regex, budget))
124+
result = list(mock_searcher.iter_matches(file_path, search_regex))
127125

128126
assert result == expected_content
129-
mock_searcher.iter_matches.assert_called_once_with(
130-
file_path, search_regex, budget
131-
)
127+
mock_searcher.iter_matches.assert_called_once_with(file_path, search_regex)
132128

133129
def test_regex_pattern_type_safety(self) -> None:
134130
"""Test RegexSearcher uses validated RegexPattern type."""
@@ -137,39 +133,10 @@ def test_regex_pattern_type_safety(self) -> None:
137133

138134
file_path = Path("test.txt")
139135
search_regex: RegexPattern = r"\d+" # Valid regex pattern
140-
budget = OutputBudget(limit=1000)
141-
142-
result = list(mock_searcher.iter_matches(file_path, search_regex, budget))
136+
result = list(mock_searcher.iter_matches(file_path, search_regex))
143137

144138
assert result == []
145-
mock_searcher.iter_matches.assert_called_once_with(
146-
file_path, search_regex, budget
147-
)
148-
149-
def test_budget_aware_streaming(self) -> None:
150-
"""Test RegexSearcher respects budget constraints during streaming."""
151-
mock_searcher = Mock(spec=RegexSearcher)
152-
# Simulate budget-limited results
153-
limited_content = [
154-
FileContent(
155-
path=Path("test.txt"),
156-
contents="first match",
157-
window=FileWindow(line_offset=0, line_count=1),
158-
)
159-
]
160-
mock_searcher.iter_matches = Mock(return_value=iter(limited_content))
161-
162-
file_path = Path("test.txt")
163-
search_regex: RegexPattern = r"match"
164-
budget = OutputBudget(limit=50) # Limited budget
165-
166-
result = list(mock_searcher.iter_matches(file_path, search_regex, budget))
167-
168-
assert len(result) == 1
169-
assert result[0].contents == "first match"
170-
mock_searcher.iter_matches.assert_called_once_with(
171-
file_path, search_regex, budget
172-
)
139+
mock_searcher.iter_matches.assert_called_once_with(file_path, search_regex)
173140

174141
def test_consistent_parameter_naming(self) -> None:
175142
"""Test RegexSearcher uses consistent parameter naming (search_regex)."""
@@ -178,13 +145,9 @@ def test_consistent_parameter_naming(self) -> None:
178145

179146
file_path = Path("test.txt")
180147
search_regex: RegexPattern = r"pattern"
181-
budget = OutputBudget(limit=1000)
182-
183148
# This test ensures the parameter is named 'search_regex', not 'pattern' or 'regex'
184-
mock_searcher.iter_matches(file_path, search_regex, budget)
185-
mock_searcher.iter_matches.assert_called_once_with(
186-
file_path, search_regex, budget
187-
)
149+
mock_searcher.iter_matches(file_path, search_regex)
150+
mock_searcher.iter_matches.assert_called_once_with(file_path, search_regex)
188151

189152

190153
class TestProtocolIntegration:
@@ -204,8 +167,7 @@ def test_all_protocols_use_validated_types(self) -> None:
204167
mock_searcher.iter_matches = Mock(return_value=iter([]))
205168

206169
regex_pattern: RegexPattern = r"\w+"
207-
budget = OutputBudget(limit=1000)
208-
mock_searcher.iter_matches(Path("test.txt"), regex_pattern, budget)
170+
mock_searcher.iter_matches(Path("test.txt"), regex_pattern)
209171

210172
# Verify type-safe calls completed without error
211173
assert True
@@ -226,7 +188,7 @@ def test_budget_consistency_across_protocols(self) -> None:
226188
# RegexSearcher uses OutputBudget
227189
mock_searcher = Mock(spec=RegexSearcher)
228190
mock_searcher.iter_matches = Mock(return_value=iter([]))
229-
mock_searcher.iter_matches(Path("test.txt"), r"pattern", budget)
191+
mock_searcher.iter_matches(Path("test.txt"), r"pattern")
230192

231193
# Verify both protocols accept the same OutputBudget instance
232194
assert True

tests/test_public_api_integration.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,24 @@ def test_globber_multiple_patterns(
113113

114114
def test_globber_budget_truncation(self, sandbox: Sandbox) -> None:
115115
"""Test that Globber respects budget limits."""
116-
small_budget = OutputBudget(limit=2)
116+
# First get all paths to understand what we're working with
117+
large_budget = OutputBudget(limit=10000)
117118
globber = Globber.from_sandbox(sandbox)
118-
result = globber.glob(["**/*"], small_budget)
119-
120-
assert isinstance(result, GlobOutput)
121-
assert len(result.paths) == 2 # Limited by budget
122-
assert result.truncated
119+
all_result = globber.glob(["**/*"], large_budget)
120+
121+
# Now test with a smaller budget that should allow some but not all paths
122+
if all_result.paths:
123+
# Use a budget that allows approximately half the paths
124+
first_path_len = len(all_result.paths[0].as_posix())
125+
small_budget = OutputBudget(
126+
limit=first_path_len + 10
127+
) # Should allow 1-2 paths
128+
result = globber.glob(["**/*"], small_budget)
129+
130+
assert isinstance(result, GlobOutput)
131+
assert len(result.paths) >= 1 # Should get at least one path
132+
assert len(result.paths) < len(all_result.paths) # Should be fewer than all
133+
assert result.truncated # Should be truncated due to budget limit
123134

124135
def test_grepper_finds_matches(
125136
self, sandbox: Sandbox, budget: OutputBudget

0 commit comments

Comments
 (0)