Skip to content

Commit 389b32c

Browse files
committed
first pass at hypothesis integration
1 parent 0598150 commit 389b32c

File tree

8 files changed

+336
-24
lines changed

8 files changed

+336
-24
lines changed

codeflash/code_utils/code_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,11 @@ def module_name_from_file_path(file_path: Path, project_root_path: Path, *, trav
254254
raise ValueError(msg) # noqa: B904
255255

256256

257+
def get_qualified_function_path(file_path: Path, project_root_path: Path, qualified_name: str) -> str:
258+
module_path = file_path.relative_to(project_root_path).with_suffix("").as_posix().replace("/", ".")
259+
return f"{module_path}.{qualified_name}"
260+
261+
257262
def file_path_from_module_name(module_name: str, project_root_path: Path) -> Path:
258263
"""Get file path from module path."""
259264
return project_root_path / (module_name.replace(".", os.sep) + ".py")

codeflash/discovery/discover_unit_tests.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,8 @@ def discover_tests_pytest(
497497
test_type = TestType.REPLAY_TEST
498498
elif "test_concolic_coverage" in test["test_file"]:
499499
test_type = TestType.CONCOLIC_COVERAGE_TEST
500+
elif "test_hypothesis" in test["test_file"]:
501+
test_type = TestType.HYPOTHESIS_TEST
500502
else:
501503
test_type = TestType.EXISTING_UNIT_TEST
502504

@@ -540,6 +542,8 @@ def get_test_details(_test: unittest.TestCase) -> TestsInFile | None:
540542
test_type = TestType.REPLAY_TEST
541543
elif "test_concolic_coverage" in str(_test_module_path):
542544
test_type = TestType.CONCOLIC_COVERAGE_TEST
545+
elif "test_hypothesis" in str(_test_module_path):
546+
test_type = TestType.HYPOTHESIS_TEST
543547
else:
544548
test_type = TestType.EXISTING_UNIT_TEST
545549
return TestsInFile(

codeflash/models/test_type.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class TestType(Enum):
88
REPLAY_TEST = 4
99
CONCOLIC_COVERAGE_TEST = 5
1010
INIT_STATE_TEST = 6
11+
HYPOTHESIS_TEST = 7
1112

1213
def to_name(self) -> str:
1314
if self is TestType.INIT_STATE_TEST:
@@ -18,5 +19,6 @@ def to_name(self) -> str:
1819
TestType.GENERATED_REGRESSION: "🌀 Generated Regression Tests",
1920
TestType.REPLAY_TEST: "⏪ Replay Tests",
2021
TestType.CONCOLIC_COVERAGE_TEST: "🔎 Concolic Coverage Tests",
22+
TestType.HYPOTHESIS_TEST: "🔮 Hypothesis Tests",
2123
}
2224
return names[self]

codeflash/optimization/function_optimizer.py

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
from codeflash.telemetry.posthog_cf import ph
9696
from codeflash.verification.concolic_testing import generate_concolic_tests
9797
from codeflash.verification.equivalence import compare_test_results
98+
from codeflash.verification.hypothesis_testing import generate_hypothesis_tests
9899
from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
99100
from codeflash.verification.parse_line_profile_test_output import parse_line_profile_results
100101
from codeflash.verification.parse_test_output import calculate_function_throughput_from_test_results, parse_test_results
@@ -281,6 +282,8 @@ def generate_and_instrument_tests(
281282
GeneratedTestsList,
282283
dict[str, set[FunctionCalledInTest]],
283284
str,
285+
dict[str, set[FunctionCalledInTest]],
286+
str,
284287
OptimizationSet,
285288
list[Path],
286289
list[Path],
@@ -323,9 +326,15 @@ def generate_and_instrument_tests(
323326

324327
generated_tests: GeneratedTestsList
325328
optimizations_set: OptimizationSet
326-
count_tests, generated_tests, function_to_concolic_tests, concolic_test_str, optimizations_set = (
327-
generated_results.unwrap()
328-
)
329+
(
330+
count_tests,
331+
generated_tests,
332+
function_to_concolic_tests,
333+
concolic_test_str,
334+
function_to_hypothesis_tests,
335+
hypothesis_test_str,
336+
optimizations_set,
337+
) = generated_results.unwrap()
329338

330339
for i, generated_test in enumerate(generated_tests.generated_tests):
331340
with generated_test.behavior_file_path.open("w", encoding="utf8") as f:
@@ -345,12 +354,19 @@ def generate_and_instrument_tests(
345354
logger.info(f"Generated test {i + 1}/{count_tests}:")
346355
code_print(generated_test.generated_original_test_source, file_name=f"test_{i + 1}.py")
347356
if concolic_test_str:
348-
logger.info(f"Generated test {count_tests}/{count_tests}:")
357+
logger.info(f"Generated test {count_tests - (1 if hypothesis_test_str else 0)}/{count_tests}:")
349358
code_print(concolic_test_str)
359+
if hypothesis_test_str:
360+
logger.info(f"Generated test {count_tests}/{count_tests}:")
361+
code_print(hypothesis_test_str)
350362

351363
function_to_all_tests = {
352-
key: self.function_to_tests.get(key, set()) | function_to_concolic_tests.get(key, set())
353-
for key in set(self.function_to_tests) | set(function_to_concolic_tests)
364+
key: (
365+
self.function_to_tests.get(key, set())
366+
| function_to_concolic_tests.get(key, set())
367+
| function_to_hypothesis_tests.get(key, set())
368+
)
369+
for key in set(self.function_to_tests) | set(function_to_concolic_tests) | set(function_to_hypothesis_tests)
354370
}
355371
instrumented_unittests_created_for_function = self.instrument_existing_tests(function_to_all_tests)
356372

@@ -366,6 +382,8 @@ def generate_and_instrument_tests(
366382
generated_tests,
367383
function_to_concolic_tests,
368384
concolic_test_str,
385+
function_to_hypothesis_tests,
386+
hypothesis_test_str,
369387
optimizations_set,
370388
generated_test_paths,
371389
generated_perf_test_paths,
@@ -398,6 +416,8 @@ def optimize_function(self) -> Result[BestOptimization, str]:
398416
generated_tests,
399417
function_to_concolic_tests,
400418
concolic_test_str,
419+
function_to_hypothesis_tests,
420+
_hypothesis_test_str,
401421
optimizations_set,
402422
generated_test_paths,
403423
generated_perf_test_paths,
@@ -409,6 +429,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
409429
code_context=code_context,
410430
original_helper_code=original_helper_code,
411431
function_to_concolic_tests=function_to_concolic_tests,
432+
function_to_hypothesis_tests=function_to_hypothesis_tests,
412433
generated_test_paths=generated_test_paths,
413434
generated_perf_test_paths=generated_perf_test_paths,
414435
instrumented_unittests_created_for_function=instrumented_unittests_created_for_function,
@@ -991,6 +1012,7 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
9911012
existing_test_files_count = 0
9921013
replay_test_files_count = 0
9931014
concolic_coverage_test_files_count = 0
1015+
hypothesis_test_files_count = 0
9941016
unique_instrumented_test_files = set()
9951017

9961018
func_qualname = self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root)
@@ -1011,6 +1033,8 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
10111033
replay_test_files_count += 1
10121034
elif test_type == TestType.CONCOLIC_COVERAGE_TEST:
10131035
concolic_coverage_test_files_count += 1
1036+
elif test_type == TestType.HYPOTHESIS_TEST:
1037+
hypothesis_test_files_count += 1
10141038
else:
10151039
msg = f"Unexpected test type: {test_type}"
10161040
raise ValueError(msg)
@@ -1069,9 +1093,11 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
10691093
logger.info(
10701094
f"Discovered {existing_test_files_count} existing unit test file"
10711095
f"{'s' if existing_test_files_count != 1 else ''}, {replay_test_files_count} replay test file"
1072-
f"{'s' if replay_test_files_count != 1 else ''}, and "
1096+
f"{'s' if replay_test_files_count != 1 else ''}, "
10731097
f"{concolic_coverage_test_files_count} concolic coverage test file"
1074-
f"{'s' if concolic_coverage_test_files_count != 1 else ''} for {func_qualname}"
1098+
f"{'s' if concolic_coverage_test_files_count != 1 else ''}, and "
1099+
f"{hypothesis_test_files_count} hypothesis test file"
1100+
f"{'s' if hypothesis_test_files_count != 1 else ''} for {func_qualname}"
10751101
)
10761102
console.rule()
10771103
return unique_instrumented_test_files
@@ -1085,7 +1111,15 @@ def generate_tests_and_optimizations(
10851111
generated_test_paths: list[Path],
10861112
generated_perf_test_paths: list[Path],
10871113
run_experiment: bool = False, # noqa: FBT001, FBT002
1088-
) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str]:
1114+
) -> Result[
1115+
tuple[
1116+
GeneratedTestsList,
1117+
dict[str, set[FunctionCalledInTest]],
1118+
dict[str, set[FunctionCalledInTest]],
1119+
OptimizationSet,
1120+
],
1121+
str,
1122+
]:
10891123
n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
10901124
assert len(generated_test_paths) == n_tests
10911125
console.rule()
@@ -1112,7 +1146,10 @@ def generate_tests_and_optimizations(
11121146
future_concolic_tests = self.executor.submit(
11131147
generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
11141148
)
1115-
futures = [*future_tests, future_optimization_candidates, future_concolic_tests]
1149+
future_hypothesis_tests = self.executor.submit(
1150+
generate_hypothesis_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
1151+
)
1152+
futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_hypothesis_tests]
11161153
if run_experiment:
11171154
future_candidates_exp = self.executor.submit(
11181155
self.local_aiservice_client.optimize_python_code,
@@ -1164,29 +1201,35 @@ def generate_tests_and_optimizations(
11641201
logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
11651202
return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
11661203
function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
1204+
function_to_hypothesis_tests, hypothesis_test_str = future_hypothesis_tests.result()
11671205

11681206
count_tests = len(tests)
11691207
if concolic_test_str:
11701208
count_tests += 1
1209+
if hypothesis_test_str:
1210+
count_tests += 1
11711211

11721212
logger.info(f"Generated '{count_tests}' tests for {self.function_to_optimize.function_name}")
11731213
console.rule()
11741214
generated_tests = GeneratedTestsList(generated_tests=tests)
1175-
result = (
1215+
1216+
self.generate_and_instrument_tests_results = (
11761217
count_tests,
11771218
generated_tests,
11781219
function_to_concolic_tests,
11791220
concolic_test_str,
1221+
function_to_hypothesis_tests,
1222+
hypothesis_test_str,
11801223
OptimizationSet(control=candidates, experiment=candidates_experiment),
11811224
)
1182-
self.generate_and_instrument_tests_results = result
1183-
return Success(result)
1225+
return Success(self.generate_and_instrument_tests_results)
11841226

11851227
def setup_and_establish_baseline(
11861228
self,
11871229
code_context: CodeOptimizationContext,
11881230
original_helper_code: dict[Path, str],
11891231
function_to_concolic_tests: dict[str, set[FunctionCalledInTest]],
1232+
function_to_hypothesis_tests: dict[str, set[FunctionCalledInTest]],
11901233
generated_test_paths: list[Path],
11911234
generated_perf_test_paths: list[Path],
11921235
instrumented_unittests_created_for_function: set[Path],
@@ -1197,8 +1240,12 @@ def setup_and_establish_baseline(
11971240
"""Set up baseline context and establish original code baseline."""
11981241
function_to_optimize_qualified_name = self.function_to_optimize.qualified_name
11991242
function_to_all_tests = {
1200-
key: self.function_to_tests.get(key, set()) | function_to_concolic_tests.get(key, set())
1201-
for key in set(self.function_to_tests) | set(function_to_concolic_tests)
1243+
key: (
1244+
self.function_to_tests.get(key, set())
1245+
| function_to_concolic_tests.get(key, set())
1246+
| function_to_hypothesis_tests.get(key, set())
1247+
)
1248+
for key in set(self.function_to_tests) | set(function_to_concolic_tests) | set(function_to_hypothesis_tests)
12021249
}
12031250

12041251
# Get a dict of file_path_to_classes of fto and helpers_of_fto

codeflash/verification/concolic_testing.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import TYPE_CHECKING
99

1010
from codeflash.cli_cmds.console import console, logger
11+
from codeflash.code_utils.code_utils import get_qualified_function_path
1112
from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
1213
from codeflash.code_utils.concolic_utils import clean_concolic_tests
1314
from codeflash.code_utils.static_analysis import has_typed_parameters
@@ -42,6 +43,9 @@ def generate_concolic_tests(
4243
logger.info("Generating concolic opcode coverage tests for the original code…")
4344
console.rule()
4445
try:
46+
qualified_function_path = get_qualified_function_path(
47+
function_to_optimize.file_path, args.project_root, function_to_optimize.qualified_name
48+
)
4549
cover_result = subprocess.run(
4650
[
4751
SAFE_SYS_EXECUTABLE,
@@ -50,15 +54,7 @@ def generate_concolic_tests(
5054
"cover",
5155
"--example_output_format=pytest",
5256
"--per_condition_timeout=20",
53-
".".join(
54-
[
55-
function_to_optimize.file_path.relative_to(args.project_root)
56-
.with_suffix("")
57-
.as_posix()
58-
.replace("/", "."),
59-
function_to_optimize.qualified_name,
60-
]
61-
),
57+
qualified_function_path,
6258
],
6359
capture_output=True,
6460
text=True,

0 commit comments

Comments
 (0)