1+ from pathlib import Path
2+ from codeflash .discovery .functions_to_optimize import FunctionToOptimize
3+ from codeflash .models .models import CodeOptimizationContext , get_code_block_splitter
4+ from codeflash .optimization .function_optimizer import FunctionOptimizer
5+ from codeflash .verification .verification_utils import TestConfig
6+
7+
8+ class Args :
9+ disable_imports_sorting = True
10+ formatter_cmds = ["disabled" ]
11+
12+ def test_multi_file_replcement01 () -> None :
13+ root_dir = Path (__file__ ).parent .parent .resolve ()
14+ helper_file = (root_dir / "code_to_optimize/temp_helper.py" ).resolve ()
15+
16+ helper_file .write_text ("""import re
17+ from collections.abc import Sequence
18+
19+ from pydantic_ai_slim.pydantic_ai.messages import BinaryContent, UserContent
20+
21+ def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
22+ if not content:
23+ return 0
24+
25+ if isinstance(content, str):
26+ return len(_TOKEN_SPLIT_RE.split(content.strip()))
27+
28+ tokens = 0
29+ for part in content:
30+ if isinstance(part, str):
31+ tokens += len(_TOKEN_SPLIT_RE.split(part.strip()))
32+ elif isinstance(part, BinaryContent):
33+ tokens += len(part.data)
34+ # TODO(Marcelo): We need to study how we can estimate the tokens for AudioUrl or ImageUrl.
35+
36+ return tokens
37+
38+
39+ _TOKEN_SPLIT_RE = re.compile(r'[\\ s",.:]+')
40+ """ , encoding = "utf-8" )
41+
42+ main_file = (root_dir / "code_to_optimize/temp_main.py" ).resolve ()
43+
44+ original_main = """from temp_helper import _estimate_string_tokens
45+ from pydantic_ai_slim.pydantic_ai.usage import Usage
46+
47+ def _get_string_usage(text: str) -> Usage:
48+ response_tokens = _estimate_string_tokens(text)
49+ return Usage(response_tokens=response_tokens, total_tokens=response_tokens)
50+ """
51+ main_file .write_text (original_main , encoding = "utf-8" )
52+
53+ optimized_code = f"""{ get_code_block_splitter (helper_file .relative_to (root_dir ))}
54+ import re
55+ from collections.abc import Sequence
56+
57+ from pydantic_ai_slim.pydantic_ai.messages import BinaryContent, UserContent
58+
59+ # Compile regex once, as in original
60+ _TOKEN_SPLIT_RE = re.compile(r'[\\ s",.:]+')
61+
62+ # Precompute translation table for fast token splitting for string input
63+ # This covers the chars: whitespace (\\ x09-\\ x0d, space), " (0x22), , (0x2c),
64+ # Map those codepoints to ' '
65+ _translate_table = {{ord(c): ord(' ') for c in ' \\ t\\ n\\ r\\ x0b\\ x0c",.:'}}
66+
67+ def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
68+ if not content:
69+ return 0
70+
71+ if isinstance(content, str):
72+ # Fast path using translate and split instead of regex when separat
73+ s = content.strip()
74+ if s:
75+ s = s.translate(_translate_table)
76+ # Split on whitespace (default). This handles multiple consecut
77+ return len(s.split())
78+ return 0
79+
80+ tokens = 0
81+ for part in content:
82+ if isinstance(part, str):
83+ s = part.strip()
84+ if s:
85+ s = s.translate(_translate_table)
86+ tokens += len(s.split())
87+ elif isinstance(part, BinaryContent):
88+ tokens += len(part.data)
89+
90+ return tokens
91+
92+ { get_code_block_splitter (main_file .relative_to (root_dir ))}
93+ from temp_helper import _estimate_string_tokens
94+ from pydantic_ai_slim.pydantic_ai.usage import Usage
95+
96+ def _get_string_usage(text: str) -> Usage:
97+ response_tokens = _estimate_string_tokens(text)
98+ return Usage(response_tokens=response_tokens, total_tokens=response_tokens)
99+ """
100+
101+
102+
103+ func = FunctionToOptimize (function_name = "_get_string_usage" , parents = [], file_path = main_file )
104+ test_config = TestConfig (
105+ tests_root = root_dir / "tests/pytest" ,
106+ tests_project_rootdir = root_dir ,
107+ project_root_path = root_dir ,
108+ test_framework = "pytest" ,
109+ pytest_cmd = "pytest" ,
110+ )
111+ func_optimizer = FunctionOptimizer (function_to_optimize = func , test_cfg = test_config )
112+ code_context : CodeOptimizationContext = func_optimizer .get_code_optimization_context ().unwrap ()
113+
114+
115+
116+ original_helper_code : dict [Path , str ] = {}
117+ helper_function_paths = {hf .file_path for hf in code_context .helper_functions }
118+ for helper_function_path in helper_function_paths :
119+ with helper_function_path .open (encoding = "utf8" ) as f :
120+ helper_code = f .read ()
121+ original_helper_code [helper_function_path ] = helper_code
122+
123+ func_optimizer .args = Args ()
124+ func_optimizer .replace_function_and_helpers_with_optimized_code (
125+ code_context = code_context , optimized_code = optimized_code , original_helper_code = original_helper_code
126+ )
127+ new_code = main_file .read_text (encoding = "utf-8" )
128+ new_helper_code = helper_file .read_text (encoding = "utf-8" )
129+
130+ helper_file .unlink (missing_ok = True )
131+ main_file .unlink (missing_ok = True )
132+
133+ expected_helper = """import re
134+ from collections.abc import Sequence
135+
136+ from pydantic_ai_slim.pydantic_ai.messages import BinaryContent, UserContent
137+
138+ def _estimate_string_tokens(content: str | Sequence[UserContent]) -> int:
139+ if not content:
140+ return 0
141+
142+ if isinstance(content, str):
143+ # Fast path using translate and split instead of regex when separat
144+ s = content.strip()
145+ if s:
146+ s = s.translate(_translate_table)
147+ # Split on whitespace (default). This handles multiple consecut
148+ return len(s.split())
149+ return 0
150+
151+ tokens = 0
152+ for part in content:
153+ if isinstance(part, str):
154+ s = part.strip()
155+ if s:
156+ s = s.translate(_translate_table)
157+ tokens += len(s.split())
158+ elif isinstance(part, BinaryContent):
159+ tokens += len(part.data)
160+
161+ return tokens
162+
163+
164+ _TOKEN_SPLIT_RE = re.compile(r'[\\ s",.:]+')
165+
166+ _translate_table = {ord(c): ord(' ') for c in ' \\ t\\ n\\ r\\ x0b\\ x0c",.:'}
167+ """
168+
169+ assert new_code .rstrip () == original_main .rstrip () # No Change
170+ assert new_helper_code .rstrip () == expected_helper .rstrip ()
0 commit comments