refactored get_function_benchmark_timings and get_benchmark_timings into BenchmarkDatabaseUtils class

alvin-r · alvin-r · commit 6180c9de6515 · 2025-03-25T09:51:12.000-07:00
diff --git a/codeflash/benchmarking/benchmark_database_utils.py b/codeflash/benchmarking/benchmark_database_utils.py
@@ -177,3 +177,120 @@ def close(self) -> None:
             self.connection.close()
             self.connection = None
 
+
+    @staticmethod
+    def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[str, int]]:
+        """Process the trace file and extract timing data for all functions.
+
+        Args:
+            trace_path: Path to the trace file
+
+        Returns:
+            A nested dictionary where:
+            - Outer keys are module_name.qualified_name (module.class.function)
+            - Inner keys are benchmark filename :: benchmark test function :: line number
+            - Values are function timing in milliseconds
+
+        """
+        # Initialize the result dictionary
+        result = {}
+
+        # Connect to the SQLite database
+        connection = sqlite3.connect(trace_path)
+        cursor = connection.cursor()
+
+        try:
+            # Query the function_calls table for all function calls
+            cursor.execute(
+                "SELECT module_name, class_name, function_name, "
+                "benchmark_file_name, benchmark_function_name, benchmark_line_number, time_ns "
+                "FROM function_calls"
+            )
+
+            # Process each row
+            for row in cursor.fetchall():
+                module_name, class_name, function_name, benchmark_file, benchmark_func, benchmark_line, time_ns = row
+
+                # Create the function key (module_name.class_name.function_name)
+                if class_name:
+                    qualified_name = f"{module_name}.{class_name}.{function_name}"
+                else:
+                    qualified_name = f"{module_name}.{function_name}"
+
+                # Create the benchmark key (file::function::line)
+                benchmark_key = f"{benchmark_file}::{benchmark_func}::{benchmark_line}"
+
+                # Initialize the inner dictionary if needed
+                if qualified_name not in result:
+                    result[qualified_name] = {}
+
+                # If multiple calls to the same function in the same benchmark,
+                # add the times together
+                if benchmark_key in result[qualified_name]:
+                    result[qualified_name][benchmark_key] += time_ns
+                else:
+                    result[qualified_name][benchmark_key] = time_ns
+
+        finally:
+            # Close the connection
+            connection.close()
+
+        return result
+
+    @staticmethod
+    def get_benchmark_timings(trace_path: Path) -> dict[str, int]:
+        """Extract total benchmark timings from trace files.
+
+        Args:
+            trace_path: Path to the trace file
+
+        Returns:
+            A dictionary mapping where:
+            - Keys are benchmark filename :: benchmark test function :: line number
+            - Values are total benchmark timing in milliseconds (with overhead subtracted)
+
+        """
+        # Initialize the result dictionary
+        result = {}
+        overhead_by_benchmark = {}
+
+        # Connect to the SQLite database
+        connection = sqlite3.connect(trace_path)
+        cursor = connection.cursor()
+
+        try:
+            # Query the function_calls table to get total overhead for each benchmark
+            cursor.execute(
+                "SELECT benchmark_file_name, benchmark_function_name, benchmark_line_number, SUM(overhead_time_ns) "
+                "FROM function_calls "
+                "GROUP BY benchmark_file_name, benchmark_function_name, benchmark_line_number"
+            )
+
+            # Process overhead information
+            for row in cursor.fetchall():
+                benchmark_file, benchmark_func, benchmark_line, total_overhead_ns = row
+                benchmark_key = f"{benchmark_file}::{benchmark_func}::{benchmark_line}"
+                overhead_by_benchmark[benchmark_key] = total_overhead_ns or 0  # Handle NULL sum case
+
+            # Query the benchmark_timings table for total times
+            cursor.execute(
+                "SELECT benchmark_file_name, benchmark_function_name, benchmark_line_number, time_ns "
+                "FROM benchmark_timings"
+            )
+
+            # Process each row and subtract overhead
+            for row in cursor.fetchall():
+                benchmark_file, benchmark_func, benchmark_line, time_ns = row
+
+                # Create the benchmark key (file::function::line)
+                benchmark_key = f"{benchmark_file}::{benchmark_func}::{benchmark_line}"
+
+                # Subtract overhead from total time
+                overhead = overhead_by_benchmark.get(benchmark_key, 0)
+                result[benchmark_key] = time_ns - overhead
+
+        finally:
+            # Close the connection
+            connection.close()
+
+        return result
diff --git a/codeflash/benchmarking/get_trace_info.py b/codeflash/benchmarking/get_trace_info.py
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING
 
 from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient
+from codeflash.benchmarking.benchmark_database_utils import BenchmarkDatabaseUtils
 from codeflash.benchmarking.replay_test import generate_replay_test
 from codeflash.benchmarking.trace_benchmarks import trace_benchmarks_pytest
 from codeflash.benchmarking.utils import print_benchmark_table, validate_and_format_benchmark_table
@@ -24,7 +25,6 @@
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.verification.test_results import TestType
 from codeflash.verification.verification_utils import TestConfig
-from codeflash.benchmarking.get_trace_info import get_function_benchmark_timings, get_benchmark_timings
 from codeflash.benchmarking.utils import print_benchmark_table
 from codeflash.benchmarking.instrument_codeflash_trace import instrument_codeflash_trace_decorator
 
@@ -119,8 +119,8 @@ def run(self) -> None:
                     if replay_count == 0:
                         logger.info(f"No valid benchmarks found in {self.args.benchmarks_root} for functions to optimize, continuing optimization")
                     else:
-                        function_benchmark_timings = get_function_benchmark_timings(trace_file)
-                        total_benchmark_timings = get_benchmark_timings(trace_file)
+                        function_benchmark_timings = BenchmarkDatabaseUtils.get_function_benchmark_timings(trace_file)
+                        total_benchmark_timings = BenchmarkDatabaseUtils.get_benchmark_timings(trace_file)
                         function_to_results = validate_and_format_benchmark_table(function_benchmark_timings, total_benchmark_timings)
                         print_benchmark_table(function_to_results)
                         logger.info("Finished tracing existing benchmarks")
diff --git a/tests/test_trace_benchmarks.py b/tests/test_trace_benchmarks.py
@@ -1,13 +1,11 @@
 import sqlite3
 
-from codeflash.benchmarking.codeflash_trace import codeflash_trace
-from codeflash.benchmarking.get_trace_info import get_function_benchmark_timings, get_benchmark_timings
+from codeflash.benchmarking.benchmark_database_utils import BenchmarkDatabaseUtils
 from codeflash.benchmarking.trace_benchmarks import trace_benchmarks_pytest
 from codeflash.benchmarking.replay_test import generate_replay_test
 from pathlib import Path
 
 from codeflash.benchmarking.utils import print_benchmark_table, validate_and_format_benchmark_table
-from codeflash.code_utils.code_utils import get_run_tmp_file
 import shutil
 
 
@@ -180,9 +178,8 @@ def test_trace_multithreaded_benchmark() -> None:
 
         # Assert the length of function calls
         assert len(function_calls) == 10, f"Expected 10 function calls, but got {len(function_calls)}"
-        function_benchmark_timings = get_function_benchmark_timings(output_file)
-        total_benchmark_timings = get_benchmark_timings(output_file)
-        # This will throw an error if summed function timings exceed total benchmark timing
+        function_benchmark_timings = BenchmarkDatabaseUtils.get_function_benchmark_timings(output_file)
+        total_benchmark_timings = BenchmarkDatabaseUtils.get_benchmark_timings(output_file)
         function_to_results = validate_and_format_benchmark_table(function_benchmark_timings, total_benchmark_timings)
         assert "code_to_optimize.bubble_sort_codeflash_trace.sorter" in function_to_results