Refactor budget enforcement to use exception-based control flow (#19)

rbharvs · web-flow · commit 421c2e882046 · 2025-06-20T19:34:23.000-04:00
* Fix budget enforcement

* Fix budget enforcement tests for file path cost accounting

- Update test budgets to account for file path length being debited
- Fix FileWindow validation to require line_count &gt;= 1 (not &gt;= 0)
- Adjust budget calculations in tests to include both path and content costs
- All 145 tests now pass with proper budget enforcement
diff --git a/src/glob_grep_glance/_defaults.py b/src/glob_grep_glance/_defaults.py
@@ -9,7 +9,7 @@
 
 from pydantic import BaseModel
 
-from ._budget import OutputBudget
+from ._budget import BudgetExceeded, OutputBudget
 from ._sandbox import Sandbox
 from .common import FileContent, FileReadResult, FileWindow, GlobPattern, RegexPattern
 
@@ -76,6 +76,7 @@ def read_window(
 
         try:
             # Open file with UTF-8 encoding and ignore errors for binary files
+            budget.debit(len(file.as_posix()))  # Debit budget for file path length
             with open(file, "r", encoding="utf-8", errors="ignore") as f:
                 # Skip to the starting line offset
                 current_line = 0
@@ -91,13 +92,6 @@ def read_window(
                     line = f.readline()
                     if not line:  # EOF reached
                         break
-
-                    # Check if this line would exceed budget
-                    if len(line) > budget.remaining:
-                        truncated = True
-                        break
-
-                    # Line fits in budget, debit and add to contents
                     budget.debit(len(line))
                     contents += line
                     lines_read += 1
@@ -106,6 +100,9 @@ def read_window(
             # Re-raise file system errors (like file not found)
             raise e
 
+        except BudgetExceeded:
+            truncated = True
+
         return FileReadResult(contents=contents, truncated=truncated)
 
 
@@ -118,7 +115,6 @@ def iter_matches(
         self,
         file: Path,
         search_regex: RegexPattern,
-        budget: OutputBudget,
     ) -> Iterable[FileContent]:
         """Yield regex matches from file, one per matching line."""
         # Validate file access through sandbox
@@ -133,13 +129,6 @@ def iter_matches(
                 for line_number, line in enumerate(f):
                     # Check if line matches pattern
                     if compiled_pattern.search(line):
-                        # Check if this line would exceed budget
-                        if len(line) > budget.remaining:
-                            # Skip this line and continue to next
-                            continue
-
-                        # Line fits in budget, debit and yield match
-                        budget.debit(len(line))
                         yield FileContent(
                             path=file,
                             contents=line,
diff --git a/src/glob_grep_glance/_protocols.py b/src/glob_grep_glance/_protocols.py
@@ -35,7 +35,6 @@ def iter_matches(
         self,
         file: Path,
         search_regex: RegexPattern,
-        budget: OutputBudget,
     ) -> Iterable[FileContent]:
         """Yield regex matches from file, respecting budget constraints."""
         ...
diff --git a/src/glob_grep_glance/glob.py b/src/glob_grep_glance/glob.py
@@ -5,7 +5,7 @@
 
 from pydantic import BaseModel, Field
 
-from ._budget import OutputBudget
+from ._budget import BudgetExceeded, OutputBudget
 from ._defaults import FilesystemPathEnumerator
 from ._protocols import PathEnumerator
 from ._sandbox import Sandbox
@@ -38,13 +38,11 @@ def glob(
         truncated = False
 
         for path in self.path_enum.iter_paths(glob_patterns):
-            # Check if we have budget remaining
-            if budget.remaining <= 0:
+            try:
+                budget.debit(len(path.as_posix()))  # debit budget by path length
+                paths.append(path)
+            except BudgetExceeded:
                 truncated = True
                 break
 
-            # Count each path as 1 unit towards budget
-            budget.debit(1)
-            paths.append(path)
-
         return GlobOutput(paths=paths, truncated=truncated)
diff --git a/src/glob_grep_glance/grep.py b/src/glob_grep_glance/grep.py
@@ -4,7 +4,7 @@
 
 from pydantic import BaseModel, Field
 
-from ._budget import OutputBudget
+from ._budget import BudgetExceeded, OutputBudget
 from ._defaults import FilesystemPathEnumerator, StreamingRegexSearcher
 from ._protocols import PathEnumerator, RegexSearcher
 from ._sandbox import Sandbox
@@ -49,22 +49,17 @@ def grep(
         for file_path in self.path_enum.iter_paths(glob_patterns):
             try:
                 # Search for matches in this file
-                for match in self.regex_searcher.iter_matches(
-                    file_path, search_regex, budget
-                ):
+                for match in self.regex_searcher.iter_matches(file_path, search_regex):
+                    budget.debit(len(match.model_dump_json()))
                     matches.append(match)
 
-                    # Check if budget is exhausted
-                    if budget.remaining <= 0:
-                        truncated = True
-                        break
+            except BudgetExceeded:
+                # If budget exceeded, set truncated flag and break
+                truncated = True
+                break
 
             except Exception:
                 # Continue to next file if this one fails
                 continue
 
-            # Break out of file loop if budget exhausted
-            if truncated:
-                break
-
         return GrepOutput(matches=matches, truncated=truncated)
diff --git a/tests/test_file_reader.py b/tests/test_file_reader.py
@@ -46,14 +46,16 @@ def test_read_simple_file(
         test_content = "line 1\nline 2\nline 3\n"
         test_file.write_text(test_content, encoding="utf-8")
 
-        budget = OutputBudget(limit=100)
+        budget = OutputBudget(limit=200)  # Increased to account for file path cost
         window = FileWindow(line_offset=0, line_count=3)
 
         result = reader.read_window(test_file, window, budget)
 
         assert result.contents == test_content
         assert not result.truncated
-        assert budget.remaining == 100 - len(test_content)
+        # Budget remaining = initial - file_path_length - content_length
+        expected_remaining = 200 - len(test_file.as_posix()) - len(test_content)
+        assert budget.remaining == expected_remaining
 
     def test_read_file_with_window_offset(
         self, temp_sandbox: tuple[Path, Sandbox], reader: StreamingFileReader
@@ -131,7 +133,12 @@ def test_budget_constraint_enforcement(
             "short\nmedium line\nvery long line that exceeds budget\n", encoding="utf-8"
         )
 
-        budget = OutputBudget(limit=20)  # Small budget
+        # Calculate budget to allow file path + first two lines but not the third
+        first_two_lines = "short\nmedium line\n"
+        budget_needed = (
+            len(test_file.as_posix()) + len(first_two_lines) + 5
+        )  # +5 buffer
+        budget = OutputBudget(limit=budget_needed)
         window = FileWindow(line_offset=0, line_count=3)
 
         result = reader.read_window(test_file, window, budget)
@@ -328,7 +335,10 @@ def test_line_by_line_budget_debit(
         test_file = sandbox_dir / "test.txt"
         test_file.write_text("line1\nline2\nline3\n", encoding="utf-8")
 
-        budget = OutputBudget(limit=12)  # Exactly enough for first two lines (6+6)
+        # Calculate budget: file path + first two lines exactly
+        first_two_lines = "line1\nline2\n"
+        budget_limit = len(test_file.as_posix()) + len(first_two_lines)
+        budget = OutputBudget(limit=budget_limit)
         window = FileWindow(line_offset=0, line_count=3)
 
         result = reader.read_window(test_file, window, budget)
@@ -347,7 +357,8 @@ def test_utf8_with_special_characters(
         test_content = "héllo wørld 🌍\nünicode tëst ñoño\n"
         test_file.write_text(test_content, encoding="utf-8")
 
-        budget = OutputBudget(limit=100)
+        # Increase budget to account for file path + UTF-8 content
+        budget = OutputBudget(limit=200)
         window = FileWindow(line_offset=0, line_count=2)
 
         result = reader.read_window(test_file, window, budget)
@@ -364,7 +375,8 @@ def test_malformed_encoding_graceful_handling(
         # Write some invalid UTF-8 sequences
         test_file.write_bytes(b"valid text\n\xff\xfe invalid utf8\n more text\n")
 
-        budget = OutputBudget(limit=100)
+        # Increase budget to account for file path + content
+        budget = OutputBudget(limit=200)
         window = FileWindow(line_offset=0, line_count=3)
 
         # Should not crash due to encoding errors (errors="ignore")
diff --git a/tests/test_protocols.py b/tests/test_protocols.py
@@ -121,14 +121,10 @@ def test_protocol_compliance(self) -> None:
 
         file_path = Path("test.txt")
         search_regex: RegexPattern = r"match"
-        budget = OutputBudget(limit=1000)
-
-        result = list(mock_searcher.iter_matches(file_path, search_regex, budget))
+        result = list(mock_searcher.iter_matches(file_path, search_regex))
 
         assert result == expected_content
-        mock_searcher.iter_matches.assert_called_once_with(
-            file_path, search_regex, budget
-        )
+        mock_searcher.iter_matches.assert_called_once_with(file_path, search_regex)
 
     def test_regex_pattern_type_safety(self) -> None:
         """Test RegexSearcher uses validated RegexPattern type."""
@@ -137,39 +133,10 @@ def test_regex_pattern_type_safety(self) -> None:
 
         file_path = Path("test.txt")
         search_regex: RegexPattern = r"\d+"  # Valid regex pattern
-        budget = OutputBudget(limit=1000)
-
-        result = list(mock_searcher.iter_matches(file_path, search_regex, budget))
+        result = list(mock_searcher.iter_matches(file_path, search_regex))
 
         assert result == []
-        mock_searcher.iter_matches.assert_called_once_with(
-            file_path, search_regex, budget
-        )
-
-    def test_budget_aware_streaming(self) -> None:
-        """Test RegexSearcher respects budget constraints during streaming."""
-        mock_searcher = Mock(spec=RegexSearcher)
-        # Simulate budget-limited results
-        limited_content = [
-            FileContent(
-                path=Path("test.txt"),
-                contents="first match",
-                window=FileWindow(line_offset=0, line_count=1),
-            )
-        ]
-        mock_searcher.iter_matches = Mock(return_value=iter(limited_content))
-
-        file_path = Path("test.txt")
-        search_regex: RegexPattern = r"match"
-        budget = OutputBudget(limit=50)  # Limited budget
-
-        result = list(mock_searcher.iter_matches(file_path, search_regex, budget))
-
-        assert len(result) == 1
-        assert result[0].contents == "first match"
-        mock_searcher.iter_matches.assert_called_once_with(
-            file_path, search_regex, budget
-        )
+        mock_searcher.iter_matches.assert_called_once_with(file_path, search_regex)
 
     def test_consistent_parameter_naming(self) -> None:
         """Test RegexSearcher uses consistent parameter naming (search_regex)."""
@@ -178,13 +145,9 @@ def test_consistent_parameter_naming(self) -> None:
 
         file_path = Path("test.txt")
         search_regex: RegexPattern = r"pattern"
-        budget = OutputBudget(limit=1000)
-
         # This test ensures the parameter is named 'search_regex', not 'pattern' or 'regex'
-        mock_searcher.iter_matches(file_path, search_regex, budget)
-        mock_searcher.iter_matches.assert_called_once_with(
-            file_path, search_regex, budget
-        )
+        mock_searcher.iter_matches(file_path, search_regex)
+        mock_searcher.iter_matches.assert_called_once_with(file_path, search_regex)
 
 
 class TestProtocolIntegration:
@@ -204,8 +167,7 @@ def test_all_protocols_use_validated_types(self) -> None:
         mock_searcher.iter_matches = Mock(return_value=iter([]))
 
         regex_pattern: RegexPattern = r"\w+"
-        budget = OutputBudget(limit=1000)
-        mock_searcher.iter_matches(Path("test.txt"), regex_pattern, budget)
+        mock_searcher.iter_matches(Path("test.txt"), regex_pattern)
 
         # Verify type-safe calls completed without error
         assert True
@@ -226,7 +188,7 @@ def test_budget_consistency_across_protocols(self) -> None:
         # RegexSearcher uses OutputBudget
         mock_searcher = Mock(spec=RegexSearcher)
         mock_searcher.iter_matches = Mock(return_value=iter([]))
-        mock_searcher.iter_matches(Path("test.txt"), r"pattern", budget)
+        mock_searcher.iter_matches(Path("test.txt"), r"pattern")
 
         # Verify both protocols accept the same OutputBudget instance
         assert True
diff --git a/tests/test_public_api_integration.py b/tests/test_public_api_integration.py
@@ -113,13 +113,24 @@ def test_globber_multiple_patterns(
 
     def test_globber_budget_truncation(self, sandbox: Sandbox) -> None:
         """Test that Globber respects budget limits."""
-        small_budget = OutputBudget(limit=2)
+        # First get all paths to understand what we're working with
+        large_budget = OutputBudget(limit=10000)
         globber = Globber.from_sandbox(sandbox)
-        result = globber.glob(["**/*"], small_budget)
-
-        assert isinstance(result, GlobOutput)
-        assert len(result.paths) == 2  # Limited by budget
-        assert result.truncated
+        all_result = globber.glob(["**/*"], large_budget)
+
+        # Now test with a smaller budget that should allow some but not all paths
+        if all_result.paths:
+            # Use a budget that allows approximately half the paths
+            first_path_len = len(all_result.paths[0].as_posix())
+            small_budget = OutputBudget(
+                limit=first_path_len + 10
+            )  # Should allow 1-2 paths
+            result = globber.glob(["**/*"], small_budget)
+
+            assert isinstance(result, GlobOutput)
+            assert len(result.paths) >= 1  # Should get at least one path
+            assert len(result.paths) < len(all_result.paths)  # Should be fewer than all
+            assert result.truncated  # Should be truncated due to budget limit
 
     def test_grepper_finds_matches(
         self, sandbox: Sandbox, budget: OutputBudget
diff --git a/tests/test_regex_searcher.py b/tests/test_regex_searcher.py