Skip to content

Commit dfb3927

Browse files
committed
fix: track and cleanup hypothesis test temp directories
- Modified generate_hypothesis_tests() to return the temp directory Path - Added hypothesis_tests_dir tracking in FunctionOptimizer - Extended cleanup_generated_files() to remove hypothesis test directories - Added hypothesis_tests_dirs list in Optimizer to track all directories - Updated cleanup_temporary_paths() to cleanup hypothesis test directories - Ensures cleanup on success, errors, and KeyboardInterrupt - Changed temp dir prefix to 'codeflash_hypothesis_' for clarity
1 parent b7faf81 commit dfb3927

File tree

3 files changed

+31
-9
lines changed

3 files changed

+31
-9
lines changed

codeflash/optimization/function_optimizer.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ def __init__(
240240
self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {}
241241
self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {}
242242
self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None
243+
self.hypothesis_tests_dir: Path | None = None
243244
self.generate_and_instrument_tests_results: (
244245
tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None
245246
) = None
@@ -1147,7 +1148,11 @@ def generate_tests_and_optimizations(
11471148
generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
11481149
)
11491150
future_hypothesis_tests = self.executor.submit(
1150-
generate_hypothesis_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
1151+
generate_hypothesis_tests,
1152+
self.test_cfg,
1153+
self.args,
1154+
self.function_to_optimize,
1155+
self.function_to_optimize_ast,
11511156
)
11521157
futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_hypothesis_tests]
11531158
if run_experiment:
@@ -1201,7 +1206,8 @@ def generate_tests_and_optimizations(
12011206
logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
12021207
return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
12031208
function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
1204-
function_to_hypothesis_tests, hypothesis_test_str = future_hypothesis_tests.result()
1209+
function_to_hypothesis_tests, hypothesis_test_str, hypothesis_test_suite_dir = future_hypothesis_tests.result()
1210+
self.hypothesis_tests_dir = hypothesis_test_suite_dir
12051211

12061212
count_tests = len(tests)
12071213
if concolic_test_str:
@@ -2051,7 +2057,12 @@ def cleanup_generated_files(self) -> None:
20512057
paths_to_cleanup.append(test_file.instrumented_behavior_file_path)
20522058
paths_to_cleanup.append(test_file.benchmarking_file_path)
20532059

2060+
# Add hypothesis test directory to cleanup
2061+
if self.hypothesis_tests_dir and self.hypothesis_tests_dir.exists():
2062+
paths_to_cleanup.append(self.hypothesis_tests_dir)
2063+
20542064
cleanup_paths(paths_to_cleanup)
2065+
self.hypothesis_tests_dir = None
20552066

20562067
def get_test_env(
20572068
self, codeflash_loop_index: int, codeflash_test_iteration: int, codeflash_tracer_disable: int = 1

codeflash/optimization/optimizer.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(self, args: Namespace) -> None:
5353
self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None)
5454
self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None
5555
self.replay_tests_dir = None
56+
self.hypothesis_tests_dirs: list[Path] = [] # Track all hypothesis test directories
5657
self.functions_checkpoint: CodeflashRunCheckpoint | None = None
5758
self.current_function_being_optimized: FunctionToOptimize | None = None # current only for the LSP
5859
self.current_function_optimizer: FunctionOptimizer | None = None
@@ -337,6 +338,9 @@ def run(self) -> None:
337338
function_optimizer # needed to clean up from the outside of this function
338339
)
339340
best_optimization = function_optimizer.optimize_function()
341+
# Track hypothesis test directory for cleanup
342+
if function_optimizer.hypothesis_tests_dir:
343+
self.hypothesis_tests_dirs.append(function_optimizer.hypothesis_tests_dir)
340344
if self.functions_checkpoint:
341345
self.functions_checkpoint.add_function_to_checkpoint(
342346
function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root)
@@ -430,7 +434,12 @@ def cleanup_temporary_paths(self) -> None:
430434

431435
if self.current_function_optimizer:
432436
self.current_function_optimizer.cleanup_generated_files()
433-
cleanup_paths([self.test_cfg.concolic_test_root_dir, self.replay_tests_dir])
437+
438+
# Cleanup all temporary test directories
439+
paths_to_cleanup = [self.test_cfg.concolic_test_root_dir, self.replay_tests_dir]
440+
paths_to_cleanup.extend(self.hypothesis_tests_dirs)
441+
cleanup_paths(paths_to_cleanup)
442+
self.hypothesis_tests_dirs.clear()
434443

435444
def worktree_mode(self) -> None:
436445
if self.current_worktree:

codeflash/verification/hypothesis_testing.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def visit_Call(self, node: ast.Call) -> ast.Call:
182182

183183
def generate_hypothesis_tests(
184184
test_cfg: TestConfig, args: Namespace, function_to_optimize: FunctionToOptimize, function_to_optimize_ast: ast.AST
185-
) -> tuple[dict[str, list[FunctionCalledInTest]], str]:
185+
) -> tuple[dict[str, list[FunctionCalledInTest]], str, Path | None]:
186186
"""Generate property-based tests using Hypothesis ghostwriter.
187187
188188
This function:
@@ -193,12 +193,14 @@ def generate_hypothesis_tests(
193193
5. Formats the tests with the project formatter
194194
195195
Returns:
196-
Tuple of (function_to_tests_map, test_suite_code)
196+
Tuple of (function_to_tests_map, test_suite_code, hypothesis_test_suite_dir)
197+
The hypothesis_test_suite_dir is None if no tests were generated.
197198
198199
"""
199200
start_time = time.perf_counter()
200201
function_to_hypothesis_tests: dict[str, list[FunctionCalledInTest]] = {}
201202
hypothesis_test_suite_code: str = ""
203+
hypothesis_test_suite_dir: Path | None = None
202204

203205
if (
204206
test_cfg.project_root_path
@@ -226,11 +228,11 @@ def generate_hypothesis_tests(
226228
logger.debug("Hypothesis test generation timed out")
227229
end_time = time.perf_counter()
228230
logger.debug(f"Hypothesis test generation completed in {end_time - start_time:.2f} seconds")
229-
return function_to_hypothesis_tests, hypothesis_test_suite_code
231+
return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
230232

231233
if hypothesis_result.returncode == 0:
232234
hypothesis_test_suite_code = hypothesis_result.stdout
233-
hypothesis_test_suite_dir = Path(tempfile.mkdtemp(dir=test_cfg.tests_root))
235+
hypothesis_test_suite_dir = Path(tempfile.mkdtemp(prefix="codeflash_hypothesis_", dir=test_cfg.tests_root))
234236
hypothesis_path = hypothesis_test_suite_dir / "test_hypothesis.py"
235237
hypothesis_path.write_text(hypothesis_test_suite_code, encoding="utf8")
236238

@@ -269,7 +271,7 @@ def generate_hypothesis_tests(
269271
console.rule()
270272
end_time = time.perf_counter()
271273
logger.debug(f"Generated hypothesis tests in {end_time - start_time:.2f} seconds")
272-
return function_to_hypothesis_tests, hypothesis_test_suite_code
274+
return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
273275

274276
logger.debug(
275277
f"Error running hypothesis write {': ' + hypothesis_result.stderr if hypothesis_result.stderr else '.'}"
@@ -278,4 +280,4 @@ def generate_hypothesis_tests(
278280

279281
end_time = time.perf_counter()
280282
logger.debug(f"Hypothesis test generation completed in {end_time - start_time:.2f} seconds")
281-
return function_to_hypothesis_tests, hypothesis_test_suite_code
283+
return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir

0 commit comments

Comments
 (0)