allow for multiple calls to the benchmark fixture within a test

KRRT7 · KRRT7 · commit 7697ad563630 · 2025-04-23T04:59:33.000-05:00
diff --git a/codeflash/benchmarking/plugin/plugin.py b/codeflash/benchmarking/plugin/plugin.py
@@ -5,10 +5,12 @@
 import sys
 import time
 from pathlib import Path
+from typing import Any, Callable
 
 import pytest
 
 from codeflash.benchmarking.codeflash_trace import codeflash_trace
+from codeflash.cli_cmds.cli import logger
 from codeflash.code_utils.code_utils import module_name_from_file_path
 from codeflash.models.models import BenchmarkKey
 
@@ -22,7 +24,6 @@ def __init__(self) -> None:
 
     def setup(self, trace_path: str, project_root: str) -> None:
         try:
-            # Open connection
             self.project_root = project_root
             self._trace_path = trace_path
             self._connection = sqlite3.connect(self._trace_path)
@@ -35,12 +36,10 @@ def setup(self, trace_path: str, project_root: str) -> None:
                 "benchmark_time_ns INTEGER)"
             )
             self._connection.commit()
-            self.close()  # Reopen only at the end of pytest session
+            self.close()
         except Exception as e:
-            print(f"Database setup error: {e}")
-            if self._connection:
-                self._connection.close()
-                self._connection = None
+            logger.error(f"Database setup error: {e}")
+            self.close()
             raise
 
     def write_benchmark_timings(self) -> None:
@@ -52,15 +51,14 @@ def write_benchmark_timings(self) -> None:
 
         try:
             cur = self._connection.cursor()
-            # Insert data into the benchmark_timings table
             cur.executemany(
                 "INSERT INTO benchmark_timings (benchmark_module_path, benchmark_function_name, benchmark_line_number, benchmark_time_ns) VALUES (?, ?, ?, ?)",
                 self.benchmark_timings,
             )
             self._connection.commit()
-            self.benchmark_timings = []  # Clear the benchmark timings list
+            self.benchmark_timings.clear()
         except Exception as e:
-            print(f"Error writing to benchmark timings database: {e}")
+            logger.error(f"Error writing to benchmark timings database: {e}")
             self._connection.rollback()
             raise
 
@@ -83,22 +81,18 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
             - Values are function timing in milliseconds
 
         """
-        # Initialize the result dictionary
         result = {}
 
-        # Connect to the SQLite database
         connection = sqlite3.connect(trace_path)
         cursor = connection.cursor()
 
         try:
-            # Query the function_calls table for all function calls
             cursor.execute(
                 "SELECT module_name, class_name, function_name, "
                 "benchmark_module_path, benchmark_function_name, benchmark_line_number, function_time_ns "
                 "FROM benchmark_function_timings"
             )
 
-            # Process each row
             for row in cursor.fetchall():
                 module_name, class_name, function_name, benchmark_file, benchmark_func, benchmark_line, time_ns = row
 
@@ -110,7 +104,6 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
 
                 # Create the benchmark key (file::function::line)
                 benchmark_key = BenchmarkKey(module_path=benchmark_file, function_name=benchmark_func)
-                # Initialize the inner dictionary if needed
                 if qualified_name not in result:
                     result[qualified_name] = {}
 
@@ -122,7 +115,6 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
                     result[qualified_name][benchmark_key] = time_ns
 
         finally:
-            # Close the connection
             connection.close()
 
         return result
@@ -140,11 +132,9 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
             - Values are total benchmark timing in milliseconds (with overhead subtracted)
 
         """
-        # Initialize the result dictionary
         result = {}
         overhead_by_benchmark = {}
 
-        # Connect to the SQLite database
         connection = sqlite3.connect(trace_path)
         cursor = connection.cursor()
 
@@ -156,7 +146,6 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
                 "GROUP BY benchmark_module_path, benchmark_function_name, benchmark_line_number"
             )
 
-            # Process overhead information
             for row in cursor.fetchall():
                 benchmark_file, benchmark_func, benchmark_line, total_overhead_ns = row
                 benchmark_key = BenchmarkKey(module_path=benchmark_file, function_name=benchmark_func)
@@ -168,52 +157,48 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
                 "FROM benchmark_timings"
             )
 
-            # Process each row and subtract overhead
             for row in cursor.fetchall():
                 benchmark_file, benchmark_func, benchmark_line, time_ns = row
 
-                # Create the benchmark key (file::function::line)
-                benchmark_key = BenchmarkKey(module_path=benchmark_file, function_name=benchmark_func)
+                benchmark_key = BenchmarkKey(
+                    module_path=benchmark_file, function_name=benchmark_func
+                )  # (file::function::line)
                 # Subtract overhead from total time
                 overhead = overhead_by_benchmark.get(benchmark_key, 0)
                 result[benchmark_key] = time_ns - overhead
 
         finally:
-            # Close the connection
             connection.close()
 
         return result
 
-    # Pytest hooks
     @pytest.hookimpl
-    def pytest_sessionfinish(self, session, exitstatus):
+    def pytest_sessionfinish(self, session: pytest.Session, exitstatus: int) -> None:  # noqa: ARG002
         """Execute after whole test run is completed."""
-        # Write any remaining benchmark timings to the database
         codeflash_trace.close()
         if self.benchmark_timings:
             self.write_benchmark_timings()
-        # Close the database connection
         self.close()
 
     @staticmethod
-    def pytest_addoption(parser):
+    def pytest_addoption(parser: pytest.Parser) -> None:
         parser.addoption("--codeflash-trace", action="store_true", default=False, help="Enable CodeFlash tracing")
 
     @staticmethod
-    def pytest_plugin_registered(plugin, manager):
+    def pytest_plugin_registered(plugin: Any, manager: Any) -> None:  # noqa: ANN401
         # Not necessary since run with -p no:benchmark, but just in case
         if hasattr(plugin, "name") and plugin.name == "pytest-benchmark":
             manager.unregister(plugin)
 
     @staticmethod
-    def pytest_configure(config):
+    def pytest_configure(config: pytest.Config) -> None:
         """Register the benchmark marker."""
         config.addinivalue_line(
             "markers", "benchmark: mark test as a benchmark that should be run with codeflash tracing"
         )
 
     @staticmethod
-    def pytest_collection_modifyitems(config, items):
+    def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None:
         # Skip tests that don't have the benchmark fixture
         if not config.getoption("--codeflash-trace"):
             return
@@ -236,54 +221,51 @@ def pytest_collection_modifyitems(config, items):
 
     # Benchmark fixture
     class Benchmark:
-        def __init__(self, request):
-            self.request = request
-
-        def __call__(self, func, *args, **kwargs):
-            """Handle both direct function calls and decorator usage."""
-            if args or kwargs:
-                # Used as benchmark(func, *args, **kwargs)
-                return self._run_benchmark(func, *args, **kwargs)
+        """Benchmark fixture class for running and timing benchmarked functions."""
 
-            # Used as @benchmark decorator
-            def wrapped_func(*inner_args, **inner_kwargs):
-                return self._run_benchmark(func, *inner_args, **inner_kwargs)
+        def __init__(self, request: pytest.FixtureRequest) -> None:
+            self.request = request
+            self._call_count = 0
 
-            return wrapped_func
+        def __call__(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
+            benchmark_name_suffix = kwargs.pop("benchmark_name_suffix", None)
+            return self._run_benchmark(func, args, kwargs, benchmark_name_suffix)
 
-        def _run_benchmark(self, func, *args, **kwargs):
-            """Actual benchmark implementation."""
+        def _run_benchmark(
+            self, func: Callable, args: tuple, kwargs: dict, benchmark_name_suffix: str | None = None
+        ) -> Any:  # noqa: ANN401
             benchmark_module_path = module_name_from_file_path(
                 Path(str(self.request.node.fspath)), Path(codeflash_benchmark_plugin.project_root)
             )
             benchmark_function_name = self.request.node.name
             line_number = int(str(sys._getframe(2).f_lineno))  # 2 frames up in the call stack
-            # Set env vars
-            os.environ["CODEFLASH_BENCHMARK_FUNCTION_NAME"] = benchmark_function_name
+            self._call_count += 1
+            if benchmark_name_suffix:
+                call_identifier = f"{benchmark_function_name}::{benchmark_name_suffix}"
+            else:
+                call_identifier = f"{benchmark_function_name}::call_{self._call_count}"
+
+            os.environ["CODEFLASH_BENCHMARKING"] = "True"
+            os.environ["CODEFLASH_BENCHMARK_FUNCTION_NAME"] = call_identifier
             os.environ["CODEFLASH_BENCHMARK_MODULE_PATH"] = benchmark_module_path
             os.environ["CODEFLASH_BENCHMARK_LINE_NUMBER"] = str(line_number)
             os.environ["CODEFLASH_BENCHMARKING"] = "True"
-            # Run the function
-            start = time.time_ns()
+            start = time.perf_counter_ns()
             result = func(*args, **kwargs)
-            end = time.time_ns()
-            # Reset the environment variable
+            end = time.perf_counter_ns()
             os.environ["CODEFLASH_BENCHMARKING"] = "False"
 
-            # Write function calls
             codeflash_trace.write_function_timings()
-            # Reset function call count
             codeflash_trace.function_call_count = 0
-            # Add to the benchmark timings buffer
             codeflash_benchmark_plugin.benchmark_timings.append(
-                (benchmark_module_path, benchmark_function_name, line_number, end - start)
+                (benchmark_module_path, call_identifier, line_number, end - start)
             )
 
             return result
 
     @staticmethod
     @pytest.fixture
-    def benchmark(request):
+    def benchmark(request: pytest.FixtureRequest) -> CodeFlashBenchmarkPlugin.Benchmark | None:
         if not request.config.getoption("--codeflash-trace"):
             return None
 
diff --git a/codeflash/benchmarking/utils.py b/codeflash/benchmarking/utils.py
@@ -1,12 +1,10 @@
 from __future__ import annotations
 
-import shutil
 from typing import TYPE_CHECKING, Optional
 
-from rich.console import Console
 from rich.table import Table
 
-from codeflash.cli_cmds.console import logger
+from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.models.models import BenchmarkDetail, ProcessedBenchmarkInfo
 from codeflash.result.critic import performance_gain
@@ -42,36 +40,51 @@ def validate_and_format_benchmark_table(
 
 
 def print_benchmark_table(function_to_results: dict[str, list[tuple[BenchmarkKey, float, float, float]]]) -> None:
-    try:
-        terminal_width = int(shutil.get_terminal_size().columns * 0.9)
-    except Exception:
-        terminal_width = 120  # Fallback width
-    console = Console(width=terminal_width)
     for func_path, sorted_tests in function_to_results.items():
         console.print()
         function_name = func_path.split(":")[-1]
 
-        # Create a table for this function
-        table = Table(title=f"Function: {function_name}", width=terminal_width, border_style="blue", show_lines=True)
-        benchmark_col_width = max(int(terminal_width * 0.4), 40)
-        # Add columns - split the benchmark test into two columns
-        table.add_column("Benchmark Module Path", width=benchmark_col_width, style="cyan", overflow="fold")
+        table = Table(title=f"Function: {function_name}", border_style="blue", show_lines=True)
+        table.add_column("Benchmark Module Path", style="cyan", overflow="fold")
         table.add_column("Test Function", style="magenta", overflow="fold")
         table.add_column("Total Time (ms)", justify="right", style="green")
         table.add_column("Function Time (ms)", justify="right", style="yellow")
         table.add_column("Percentage (%)", justify="right", style="red")
 
-        for benchmark_key, total_time, func_time, percentage in sorted_tests:
-            # Split the benchmark test into module path and function name
-            module_path = benchmark_key.module_path
+        multi_call_bases = set()
+        call_1_tests = []
+
+        for i, (benchmark_key, _, _, _) in enumerate(sorted_tests):
             test_function = benchmark_key.function_name
+            module_path = benchmark_key.module_path
+            if "::call_" in test_function:
+                try:
+                    base_name, call_part = test_function.rsplit("::call_", 1)
+                    call_num = int(call_part)
+                    if call_num == 1:
+                        call_1_tests.append((i, base_name, module_path))
+                    elif call_num > 1:
+                        multi_call_bases.add((base_name, module_path))
+                except ValueError:
+                    pass
+
+        tests_to_modify = {
+            index: base_name
+            for index, base_name, module_path in call_1_tests
+            if (base_name, module_path) not in multi_call_bases
+        }
+
+        for i, (benchmark_key, total_time, func_time, percentage) in enumerate(sorted_tests):
+            module_path = benchmark_key.module_path
+            test_function_display = tests_to_modify.get(i, benchmark_key.function_name)
 
             if total_time == 0.0:
-                table.add_row(module_path, test_function, "N/A", "N/A", "N/A")
+                table.add_row(module_path, test_function_display, "N/A", "N/A", "N/A")
             else:
-                table.add_row(module_path, test_function, f"{total_time:.3f}", f"{func_time:.3f}", f"{percentage:.2f}")
+                table.add_row(
+                    module_path, test_function_display, f"{total_time:.3f}", f"{func_time:.3f}", f"{percentage:.2f}"
+                )
 
-        # Print the table
         console.print(table)
 
 
diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
@@ -201,6 +201,7 @@ def get_functions_to_optimize(
             functions, test_cfg.tests_root, ignore_paths, project_root, module_root
         )
         logger.info(f"Found {functions_count} function{'s' if functions_count > 1 else ''} to optimize")
+        console.rule()
         return filtered_modified_functions, functions_count
 
 

Original file line number	Diff line number	Diff line change
`@@ -201,6 +201,7 @@ def get_functions_to_optimize(`
`201`	`201`	`functions, test_cfg.tests_root, ignore_paths, project_root, module_root`
`202`	`202`	`)`
`203`	`203`	`logger.info(f"Found {functions_count} function{'s' if functions_count > 1 else ''} to optimize")`
	`204`	`+ console.rule()`
`204`	`205`	`return filtered_modified_functions, functions_count`
`205`	`206`
`206`	`207`