Skip to content

Commit 57f8af0

Browse files
test multifile replacement
1 parent f48c77d commit 57f8af0

File tree

2 files changed

+170
-2
lines changed

2 files changed

+170
-2
lines changed
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from pathlib import Path
2+
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
3+
from codeflash.models.models import CodeOptimizationContext, get_code_block_splitter
4+
from codeflash.optimization.function_optimizer import FunctionOptimizer
5+
from codeflash.verification.verification_utils import TestConfig
6+
7+
8+
class Args:
9+
disable_imports_sorting = True
10+
formatter_cmds = ["disabled"]
11+
12+
def test_multi_file_replcement01() -> None:
13+
root_dir = Path(__file__).parent.parent.resolve()
14+
helper_file = (root_dir / "code_to_optimize/temp_helper.py").resolve()
15+
16+
helper_file.write_text("""import re
17+
from collections.abc import Sequence
18+
19+
from pydantic_ai_slim.pydantic_ai.messages import BinaryContent, UserContent
20+
21+
def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
22+
if not content:
23+
return 0
24+
25+
if isinstance(content, str):
26+
return len(_TOKEN_SPLIT_RE.split(content.strip()))
27+
28+
tokens = 0
29+
for part in content:
30+
if isinstance(part, str):
31+
tokens += len(_TOKEN_SPLIT_RE.split(part.strip()))
32+
elif isinstance(part, BinaryContent):
33+
tokens += len(part.data)
34+
# TODO(Marcelo): We need to study how we can estimate the tokens for AudioUrl or ImageUrl.
35+
36+
return tokens
37+
38+
39+
_TOKEN_SPLIT_RE = re.compile(r'[\\s",.:]+')
40+
""", encoding="utf-8")
41+
42+
main_file = (root_dir / "code_to_optimize/temp_main.py").resolve()
43+
44+
original_main = """from temp_helper import _estimate_string_tokens
45+
from pydantic_ai_slim.pydantic_ai.usage import Usage
46+
47+
def _get_string_usage(text: str) -> Usage:
48+
response_tokens = _estimate_string_tokens(text)
49+
return Usage(response_tokens=response_tokens, total_tokens=response_tokens)
50+
"""
51+
main_file.write_text(original_main, encoding="utf-8")
52+
53+
optimized_code = f"""{get_code_block_splitter(helper_file.relative_to(root_dir))}
54+
import re
55+
from collections.abc import Sequence
56+
57+
from pydantic_ai_slim.pydantic_ai.messages import BinaryContent, UserContent
58+
59+
# Compile regex once, as in original
60+
_TOKEN_SPLIT_RE = re.compile(r'[\\s",.:]+')
61+
62+
# Precompute translation table for fast token splitting for string input
63+
# This covers the chars: whitespace (\\x09-\\x0d, space), " (0x22), , (0x2c),
64+
# Map those codepoints to ' '
65+
_translate_table = {{ord(c): ord(' ') for c in ' \\t\\n\\r\\x0b\\x0c",.:'}}
66+
67+
def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
68+
if not content:
69+
return 0
70+
71+
if isinstance(content, str):
72+
# Fast path using translate and split instead of regex when separat
73+
s = content.strip()
74+
if s:
75+
s = s.translate(_translate_table)
76+
# Split on whitespace (default). This handles multiple consecut
77+
return len(s.split())
78+
return 0
79+
80+
tokens = 0
81+
for part in content:
82+
if isinstance(part, str):
83+
s = part.strip()
84+
if s:
85+
s = s.translate(_translate_table)
86+
tokens += len(s.split())
87+
elif isinstance(part, BinaryContent):
88+
tokens += len(part.data)
89+
90+
return tokens
91+
92+
{get_code_block_splitter(main_file.relative_to(root_dir))}
93+
from temp_helper import _estimate_string_tokens
94+
from pydantic_ai_slim.pydantic_ai.usage import Usage
95+
96+
def _get_string_usage(text: str) -> Usage:
97+
response_tokens = _estimate_string_tokens(text)
98+
return Usage(response_tokens=response_tokens, total_tokens=response_tokens)
99+
"""
100+
101+
102+
103+
func = FunctionToOptimize(function_name="_get_string_usage", parents=[], file_path=main_file)
104+
test_config = TestConfig(
105+
tests_root=root_dir / "tests/pytest",
106+
tests_project_rootdir=root_dir,
107+
project_root_path=root_dir,
108+
test_framework="pytest",
109+
pytest_cmd="pytest",
110+
)
111+
func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
112+
code_context: CodeOptimizationContext = func_optimizer.get_code_optimization_context().unwrap()
113+
114+
115+
116+
original_helper_code: dict[Path, str] = {}
117+
helper_function_paths = {hf.file_path for hf in code_context.helper_functions}
118+
for helper_function_path in helper_function_paths:
119+
with helper_function_path.open(encoding="utf8") as f:
120+
helper_code = f.read()
121+
original_helper_code[helper_function_path] = helper_code
122+
123+
func_optimizer.args = Args()
124+
func_optimizer.replace_function_and_helpers_with_optimized_code(
125+
code_context=code_context, optimized_code=optimized_code, original_helper_code=original_helper_code
126+
)
127+
new_code = main_file.read_text(encoding="utf-8")
128+
new_helper_code = helper_file.read_text(encoding="utf-8")
129+
130+
helper_file.unlink(missing_ok=True)
131+
main_file.unlink(missing_ok=True)
132+
133+
expected_helper = """import re
134+
from collections.abc import Sequence
135+
136+
from pydantic_ai_slim.pydantic_ai.messages import BinaryContent, UserContent
137+
138+
def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
139+
if not content:
140+
return 0
141+
142+
if isinstance(content, str):
143+
# Fast path using translate and split instead of regex when separat
144+
s = content.strip()
145+
if s:
146+
s = s.translate(_translate_table)
147+
# Split on whitespace (default). This handles multiple consecut
148+
return len(s.split())
149+
return 0
150+
151+
tokens = 0
152+
for part in content:
153+
if isinstance(part, str):
154+
s = part.strip()
155+
if s:
156+
s = s.translate(_translate_table)
157+
tokens += len(s.split())
158+
elif isinstance(part, BinaryContent):
159+
tokens += len(part.data)
160+
161+
return tokens
162+
163+
164+
_TOKEN_SPLIT_RE = re.compile(r'[\\s",.:]+')
165+
166+
_translate_table = {ord(c): ord(' ') for c in ' \\t\\n\\r\\x0b\\x0c",.:'}
167+
"""
168+
169+
assert new_code.rstrip() == original_main.rstrip() # No Change
170+
assert new_helper_code.rstrip() == expected_helper.rstrip()

tests/test_unused_helper_revert.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,6 @@ def entrypoint_function(n):
316316
\"\"\"Optimized function that only calls one helper.\"\"\"
317317
result1 = helper_function_1(n)
318318
return result1 + n * 3 # Inlined helper_function_2
319-
320-
{get_code_block_splitter("helpers.py")}
321319
"""
322320

323321
# Create test config

0 commit comments

Comments
 (0)