Skip to content

Commit 19fc557

Browse files
committed
cleanup strategies
1 parent b7faf81 commit 19fc557

File tree

4 files changed

+29
-31
lines changed

4 files changed

+29
-31
lines changed

codeflash/optimization/function_optimizer.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ def __init__(
240240
self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {}
241241
self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {}
242242
self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None
243+
self.hypothesis_tests_dir: Path | None = None
243244
self.generate_and_instrument_tests_results: (
244245
tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None
245246
) = None
@@ -1147,7 +1148,11 @@ def generate_tests_and_optimizations(
11471148
generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
11481149
)
11491150
future_hypothesis_tests = self.executor.submit(
1150-
generate_hypothesis_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
1151+
generate_hypothesis_tests,
1152+
self.test_cfg,
1153+
self.args,
1154+
self.function_to_optimize,
1155+
self.function_to_optimize_ast,
11511156
)
11521157
futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_hypothesis_tests]
11531158
if run_experiment:
@@ -1201,7 +1206,8 @@ def generate_tests_and_optimizations(
12011206
logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
12021207
return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
12031208
function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
1204-
function_to_hypothesis_tests, hypothesis_test_str = future_hypothesis_tests.result()
1209+
function_to_hypothesis_tests, hypothesis_test_str, hypothesis_test_suite_dir = future_hypothesis_tests.result()
1210+
self.hypothesis_tests_dir = hypothesis_test_suite_dir
12051211

12061212
count_tests = len(tests)
12071213
if concolic_test_str:
@@ -2051,7 +2057,11 @@ def cleanup_generated_files(self) -> None:
20512057
paths_to_cleanup.append(test_file.instrumented_behavior_file_path)
20522058
paths_to_cleanup.append(test_file.benchmarking_file_path)
20532059

2060+
if self.hypothesis_tests_dir and self.hypothesis_tests_dir.exists():
2061+
paths_to_cleanup.append(self.hypothesis_tests_dir)
2062+
20542063
cleanup_paths(paths_to_cleanup)
2064+
self.hypothesis_tests_dir = None
20552065

20562066
def get_test_env(
20572067
self, codeflash_loop_index: int, codeflash_test_iteration: int, codeflash_tracer_disable: int = 1

codeflash/optimization/optimizer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(self, args: Namespace) -> None:
5353
self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None)
5454
self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None
5555
self.replay_tests_dir = None
56+
self.hypothesis_tests_dirs: list[Path] = [] # Track all hypothesis test directories
5657
self.functions_checkpoint: CodeflashRunCheckpoint | None = None
5758
self.current_function_being_optimized: FunctionToOptimize | None = None # current only for the LSP
5859
self.current_function_optimizer: FunctionOptimizer | None = None
@@ -337,6 +338,8 @@ def run(self) -> None:
337338
function_optimizer # needed to clean up from the outside of this function
338339
)
339340
best_optimization = function_optimizer.optimize_function()
341+
if function_optimizer.hypothesis_tests_dir:
342+
self.hypothesis_tests_dirs.append(function_optimizer.hypothesis_tests_dir)
340343
if self.functions_checkpoint:
341344
self.functions_checkpoint.add_function_to_checkpoint(
342345
function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root)
@@ -430,7 +433,11 @@ def cleanup_temporary_paths(self) -> None:
430433

431434
if self.current_function_optimizer:
432435
self.current_function_optimizer.cleanup_generated_files()
433-
cleanup_paths([self.test_cfg.concolic_test_root_dir, self.replay_tests_dir])
436+
437+
paths_to_cleanup = [self.test_cfg.concolic_test_root_dir, self.replay_tests_dir]
438+
paths_to_cleanup.extend(self.hypothesis_tests_dirs)
439+
cleanup_paths(paths_to_cleanup)
440+
self.hypothesis_tests_dirs.clear()
434441

435442
def worktree_mode(self) -> None:
436443
if self.current_worktree:

codeflash/verification/equivalence.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -167,16 +167,6 @@ def get_test_key(test_result: FunctionTestInvocation) -> tuple[str, str, str, st
167167
f"Candidate={len(candidate_by_func)} test functions ({cand_total_examples} examples)"
168168
)
169169

170-
# Check if all test functions in original are present in candidate
171-
missing_funcs = set(original_by_func.keys()) - set(candidate_by_func.keys())
172-
if missing_funcs:
173-
logger.warning(
174-
f"Hypothesis test functions missing in candidate: {len(missing_funcs)} functions. "
175-
f"First missing: {missing_funcs.__iter__().__next__()}"
176-
)
177-
return False
178-
179-
# Compare each test function's results
180170
for test_key in original_by_func:
181171
if test_key not in candidate_by_func:
182172
continue # Already handled above
@@ -196,12 +186,4 @@ def get_test_key(test_result: FunctionTestInvocation) -> tuple[str, str, str, st
196186
f"(original_failed={orig_had_failure}, candidate_failed={cand_had_failure})"
197187
)
198188
return False
199-
200-
if abs(len(orig_examples) - len(cand_examples)) > 10:
201-
logger.info(
202-
f"Hypothesis test '{test_key[2]}': example counts differ "
203-
f"(original={len(orig_examples)}, candidate={len(cand_examples)}). "
204-
f"This is expected when code performance changes."
205-
)
206-
207189
return True

codeflash/verification/hypothesis_testing.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def visit_Call(self, node: ast.Call) -> ast.Call:
182182

183183
def generate_hypothesis_tests(
184184
test_cfg: TestConfig, args: Namespace, function_to_optimize: FunctionToOptimize, function_to_optimize_ast: ast.AST
185-
) -> tuple[dict[str, list[FunctionCalledInTest]], str]:
185+
) -> tuple[dict[str, list[FunctionCalledInTest]], str, Path | None]:
186186
"""Generate property-based tests using Hypothesis ghostwriter.
187187
188188
This function:
@@ -193,12 +193,14 @@ def generate_hypothesis_tests(
193193
5. Formats the tests with the project formatter
194194
195195
Returns:
196-
Tuple of (function_to_tests_map, test_suite_code)
196+
Tuple of (function_to_tests_map, test_suite_code, hypothesis_test_suite_dir)
197+
The hypothesis_test_suite_dir is None if no tests were generated.
197198
198199
"""
199200
start_time = time.perf_counter()
200201
function_to_hypothesis_tests: dict[str, list[FunctionCalledInTest]] = {}
201202
hypothesis_test_suite_code: str = ""
203+
hypothesis_test_suite_dir: Path | None = None
202204

203205
if (
204206
test_cfg.project_root_path
@@ -212,8 +214,6 @@ def generate_hypothesis_tests(
212214
qualified_function_path = get_qualified_function_path(
213215
function_to_optimize.file_path, args.project_root, function_to_optimize.qualified_name
214216
)
215-
logger.info(f"command: hypothesis write {qualified_function_path}")
216-
217217
hypothesis_result = subprocess.run(
218218
["hypothesis", "write", qualified_function_path],
219219
capture_output=True,
@@ -226,11 +226,11 @@ def generate_hypothesis_tests(
226226
logger.debug("Hypothesis test generation timed out")
227227
end_time = time.perf_counter()
228228
logger.debug(f"Hypothesis test generation completed in {end_time - start_time:.2f} seconds")
229-
return function_to_hypothesis_tests, hypothesis_test_suite_code
229+
return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
230230

231231
if hypothesis_result.returncode == 0:
232232
hypothesis_test_suite_code = hypothesis_result.stdout
233-
hypothesis_test_suite_dir = Path(tempfile.mkdtemp(dir=test_cfg.tests_root))
233+
hypothesis_test_suite_dir = Path(tempfile.mkdtemp(prefix="codeflash_hypothesis_", dir=test_cfg.tests_root))
234234
hypothesis_path = hypothesis_test_suite_dir / "test_hypothesis.py"
235235
hypothesis_path.write_text(hypothesis_test_suite_code, encoding="utf8")
236236

@@ -250,12 +250,11 @@ def generate_hypothesis_tests(
250250

251251
unparsed = filter_hypothesis_tests_by_function_name(original_code, function_to_optimize.function_name)
252252

253-
console.print(f"modified src: {unparsed}")
254-
255253
hypothesis_test_suite_code = format_code(
256254
args.formatter_cmds,
257255
hypothesis_path,
258256
optimized_code=make_hypothesis_tests_deterministic(remove_functions_with_only_any_type(unparsed)),
257+
print_status=False,
259258
)
260259
with hypothesis_path.open("w", encoding="utf-8") as f:
261260
f.write(hypothesis_test_suite_code)
@@ -269,7 +268,7 @@ def generate_hypothesis_tests(
269268
console.rule()
270269
end_time = time.perf_counter()
271270
logger.debug(f"Generated hypothesis tests in {end_time - start_time:.2f} seconds")
272-
return function_to_hypothesis_tests, hypothesis_test_suite_code
271+
return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
273272

274273
logger.debug(
275274
f"Error running hypothesis write {': ' + hypothesis_result.stderr if hypothesis_result.stderr else '.'}"
@@ -278,4 +277,4 @@ def generate_hypothesis_tests(
278277

279278
end_time = time.perf_counter()
280279
logger.debug(f"Hypothesis test generation completed in {end_time - start_time:.2f} seconds")
281-
return function_to_hypothesis_tests, hypothesis_test_suite_code
280+
return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir

0 commit comments

Comments
 (0)