created benchmarks for codeflash, modified codeflash-optimize to use codeflash --benchmark

alvin-r · alvin-r · commit 4e8483bef0e2 · 2025-04-15T21:18:59.000-04:00
diff --git a/.github/workflows/codeflash-optimize.yaml b/.github/workflows/codeflash-optimize.yaml
@@ -68,4 +68,4 @@ jobs:
         id: optimize_code
         run: |
           source .venv/bin/activate
-          poetry run codeflash
+          poetry run codeflash --benchmark
diff --git a/tests/benchmarks/test_benchmark_code_extract_code_context.py b/tests/benchmarks/test_benchmark_code_extract_code_context.py
@@ -0,0 +1,31 @@
+from argparse import Namespace
+from pathlib import Path
+
+from codeflash.context.code_context_extractor import get_code_optimization_context
+from codeflash.discovery.functions_to_optimize import FunctionToOptimize
+from codeflash.models.models import FunctionParent
+from codeflash.optimization.optimizer import Optimizer
+
+
+def test_benchmark_extract(benchmark)->None:
+    file_path = Path(__file__).parent.parent.parent.resolve() / "codeflash"
+    opt = Optimizer(
+        Namespace(
+            project_root=file_path.resolve(),
+            disable_telemetry=True,
+            tests_root=(file_path / "tests").resolve(),
+            test_framework="pytest",
+            pytest_cmd="pytest",
+            experiment_id=None,
+            test_project_root=Path.cwd(),
+        )
+    )
+    function_to_optimize = FunctionToOptimize(
+        function_name="replace_function_and_helpers_with_optimized_code",
+        file_path=file_path / "optimization" / "function_optimizer.py",
+        parents=[FunctionParent(name="FunctionOptimizer", type="ClassDef")],
+        starting_line=None,
+        ending_line=None,
+    )
+
+    benchmark(get_code_optimization_context,function_to_optimize, opt.args.project_root)
diff --git a/tests/benchmarks/test_benchmark_discover_unit_tests.py b/tests/benchmarks/test_benchmark_discover_unit_tests.py
@@ -0,0 +1,26 @@
+from pathlib import Path
+
+from codeflash.discovery.discover_unit_tests import discover_unit_tests
+from codeflash.verification.verification_utils import TestConfig
+
+
+def test_benchmark_code_to_optimize_test_discovery(benchmark) -> None:
+    project_path = Path(__file__).parent.parent.parent.resolve() / "code_to_optimize"
+    tests_path = project_path / "tests" / "pytest"
+    test_config = TestConfig(
+        tests_root=tests_path,
+        project_root_path=project_path,
+        test_framework="pytest",
+        tests_project_rootdir=tests_path.parent,
+    )
+    benchmark(discover_unit_tests, test_config)
+def test_benchmark_codeflash_test_discovery(benchmark) -> None:
+    project_path = Path(__file__).parent.parent.parent.resolve() / "codeflash"
+    tests_path = project_path / "tests"
+    test_config = TestConfig(
+        tests_root=tests_path,
+        project_root_path=project_path,
+        test_framework="pytest",
+        tests_project_rootdir=tests_path.parent,
+    )
+    benchmark(discover_unit_tests, test_config)
diff --git a/tests/benchmarks/test_benchmark_merge_test_results.py b/tests/benchmarks/test_benchmark_merge_test_results.py
@@ -0,0 +1,71 @@
+from codeflash.models.models import FunctionTestInvocation, InvocationId, TestResults, TestType
+from codeflash.verification.parse_test_output import merge_test_results
+
+
+def generate_test_invocations(count=100):
+    """Generate a set number of test invocations for benchmarking."""
+    test_results_xml = TestResults()
+    test_results_bin = TestResults()
+
+    # Generate test invocations in a loop
+    for i in range(count):
+        iteration_id = str(i * 3 + 5)  # Generate unique iteration IDs
+
+        # XML results - some with None runtime
+        test_results_xml.add(
+            FunctionTestInvocation(
+                id=InvocationId(
+                    test_module_path="code_to_optimize.tests.unittest.test_bubble_sort",
+                    test_class_name="TestPigLatin",
+                    test_function_name="test_sort",
+                    function_getting_tested="sorter",
+                    iteration_id=iteration_id,
+                ),
+                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
+                did_pass=True,
+                runtime=None if i % 3 == 0 else i * 100,  # Vary runtime values
+                test_framework="unittest",
+                test_type=TestType.EXISTING_UNIT_TEST,
+                return_value=None,
+                timed_out=False,
+                loop_index=i,
+            )
+        )
+
+        # Binary results - with actual runtime values
+        test_results_bin.add(
+            FunctionTestInvocation(
+                id=InvocationId(
+                    test_module_path="code_to_optimize.tests.unittest.test_bubble_sort",
+                    test_class_name="TestPigLatin",
+                    test_function_name="test_sort",
+                    function_getting_tested="sorter",
+                    iteration_id=iteration_id,
+                ),
+                file_name="/tmp/tests/unittest/test_bubble_sort__perfinstrumented.py",
+                did_pass=True,
+                runtime=500 + i * 20,  # Generate varying runtime values
+                test_framework="unittest",
+                test_type=TestType.EXISTING_UNIT_TEST,
+                return_value=None,
+                timed_out=False,
+                loop_index=i,
+            )
+        )
+
+    return test_results_xml, test_results_bin
+
+
+def run_merge_benchmark(count=100):
+    test_results_xml, test_results_bin = generate_test_invocations(count)
+
+    # Perform the merge operation that will be benchmarked
+    merge_test_results(
+        xml_test_results=test_results_xml,
+        bin_test_results=test_results_bin,
+        test_framework="unittest"
+    )
+
+
+def test_benchmark_merge_test_results(benchmark):
+    benchmark(run_merge_benchmark, 1000)  # Default to 100 test invocations