modified PR info

alvin-r · alvin-r · commit f4be9becb51e · 2025-03-26T10:20:28.000-07:00
diff --git a/codeflash/benchmarking/utils.py b/codeflash/benchmarking/utils.py
@@ -1,7 +1,12 @@
+from __future__ import annotations
+from typing import Optional
+
 from rich.console import Console
 from rich.table import Table
 
 from codeflash.cli_cmds.console import logger
+from codeflash.code_utils.time_utils import humanize_runtime
+from codeflash.models.models import ProcessedBenchmarkInfo, BenchmarkDetail
 
 
 def validate_and_format_benchmark_table(function_benchmark_timings: dict[str, dict[str, int]],
@@ -61,4 +66,58 @@ def print_benchmark_table(function_to_results: dict[str, list[tuple[str, float,
                 )
 
         # Print the table
-        console.print(table)
+        console.print(table)
+
+
+def process_benchmark_data(
+        replay_performance_gain: float,
+        fto_benchmark_timings: dict[str, int],
+        total_benchmark_timings: dict[str, int]
+) -> Optional[ProcessedBenchmarkInfo]:
+    """Process benchmark data and generate detailed benchmark information.
+
+    Args:
+        replay_performance_gain: The performance gain from replay
+        fto_benchmark_timings: Function to optimize benchmark timings
+        total_benchmark_timings: Total benchmark timings
+
+    Returns:
+        ProcessedBenchmarkInfo containing processed benchmark details
+
+    """
+    if not replay_performance_gain or not fto_benchmark_timings or not total_benchmark_timings:
+        return None
+
+    benchmark_details = []
+
+    for benchmark_key, og_benchmark_timing in fto_benchmark_timings.items():
+        try:
+            benchmark_file_name, benchmark_test_function, line_number = benchmark_key.split("::")
+        except ValueError:
+            continue  # Skip malformed benchmark keys
+
+        total_benchmark_timing = total_benchmark_timings.get(benchmark_key, 0)
+
+        if total_benchmark_timing == 0:
+            continue  # Skip benchmarks with zero timing
+
+        # Calculate expected new benchmark timing
+        expected_new_benchmark_timing = total_benchmark_timing - og_benchmark_timing + (
+                1 / (replay_performance_gain + 1)
+        ) * og_benchmark_timing
+
+        # Calculate speedup
+        benchmark_speedup_ratio = total_benchmark_timing / expected_new_benchmark_timing
+        benchmark_speedup_percent = (benchmark_speedup_ratio - 1) * 100
+
+        benchmark_details.append(
+            BenchmarkDetail(
+                benchmark_name=benchmark_file_name,
+                test_function=benchmark_test_function,
+                original_timing=humanize_runtime(int(total_benchmark_timing)),
+                expected_new_timing=humanize_runtime(int(expected_new_benchmark_timing)),
+                speedup_percent=benchmark_speedup_percent
+            )
+        )
+
+    return ProcessedBenchmarkInfo(benchmark_details=benchmark_details)
diff --git a/codeflash/github/PrComment.py b/codeflash/github/PrComment.py
@@ -1,9 +1,11 @@
-from typing import Union
+from __future__ import annotations
+from typing import Union, Optional
 
 from pydantic import BaseModel
 from pydantic.dataclasses import dataclass
 
 from codeflash.code_utils.time_utils import humanize_runtime
+from codeflash.models.models import BenchmarkDetail
 from codeflash.verification.test_results import TestResults
 
 
@@ -18,15 +20,16 @@ class PrComment:
     speedup_pct: str
     winning_behavioral_test_results: TestResults
     winning_benchmarking_test_results: TestResults
+    benchmark_details: Optional[list[BenchmarkDetail]] = None
 
-    def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str]]:
+    def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Optional[list[dict[str, any]]]]]:
         report_table = {
             test_type.to_name(): result
             for test_type, result in self.winning_behavioral_test_results.get_test_pass_fail_report_by_type().items()
             if test_type.to_name()
         }
 
-        return {
+        result = {
             "optimization_explanation": self.optimization_explanation,
             "best_runtime": humanize_runtime(self.best_runtime),
             "original_runtime": humanize_runtime(self.original_runtime),
@@ -38,6 +41,12 @@ def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str]]:
             "report_table": report_table,
         }
 
+        # Add benchmark details if available
+        if self.benchmark_details:
+            result["benchmark_details"] = self.benchmark_details
+
+        return result
+
 
 class FileDiffContent(BaseModel):
     oldContent: str
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -23,6 +23,7 @@
     generate_candidates,
 )
 from codeflash.code_utils.env_utils import is_end_to_end
+from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.verification.test_results import TestResults, TestType
 
 # If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
@@ -77,7 +78,47 @@ class BestOptimization(BaseModel):
     winning_benchmarking_test_results: TestResults
     winning_replay_benchmarking_test_results : Optional[TestResults] = None
 
+@dataclass
+class BenchmarkDetail:
+    benchmark_name: str
+    test_function: str
+    original_timing: str
+    expected_new_timing: str
+    speedup_percent: float
+
+    def to_string(self) -> str:
+        return (
+            f"Original timing for {self.benchmark_name}::{self.test_function}: {self.original_timing}\n"
+            f"Expected new timing for {self.benchmark_name}::{self.test_function}: {self.expected_new_timing}\n"
+            f"Benchmark speedup for {self.benchmark_name}::{self.test_function}: {self.speedup_percent:.2f}%\n"
+        )
+
+    def to_dict(self) -> dict[str, any]:
+        return {
+            "benchmark_name": self.benchmark_name,
+            "test_function": self.test_function,
+            "original_timing": self.original_timing,
+            "expected_new_timing": self.expected_new_timing,
+            "speedup_percent": self.speedup_percent
+        }
 
+@dataclass
+class ProcessedBenchmarkInfo:
+    benchmark_details: list[BenchmarkDetail]
+
+    def to_string(self) -> str:
+        if not self.benchmark_details:
+            return ""
+
+        result = "Benchmark Performance Details:\n"
+        for detail in self.benchmark_details:
+            result += detail.to_string() + "\n"
+        return result
+
+    def to_dict(self) -> dict[str, list[dict[str, any]]]:
+        return {
+            "benchmark_details": [detail.to_dict() for detail in self.benchmark_details]
+        }
 class CodeString(BaseModel):
     code: Annotated[str, AfterValidator(validate_python_code)]
     file_path: Optional[Path] = None
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -19,6 +19,7 @@
 from rich.tree import Tree
 
 from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient
+from codeflash.benchmarking.utils import process_benchmark_data
 from codeflash.cli_cmds.console import code_print, console, logger, progress_bar
 from codeflash.code_utils import env_utils
 from codeflash.code_utils.code_replacer import replace_function_definitions_in_module
@@ -263,6 +264,13 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                         best_optimization.candidate.explanation, title="Best Candidate Explanation", border_style="blue"
                     )
                 )
+                processed_benchmark_info = None
+                if self.args.benchmark:
+                    processed_benchmark_info = process_benchmark_data(
+                        replay_performance_gain=best_optimization.replay_performance_gain,
+                        fto_benchmark_timings=self.function_benchmark_timings,
+                        total_benchmark_timings=self.total_benchmark_timings
+                    )
                 explanation = Explanation(
                     raw_explanation_message=best_optimization.candidate.explanation,
                     winning_behavioral_test_results=best_optimization.winning_behavioral_test_results,
@@ -271,9 +279,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                     best_runtime_ns=best_optimization.runtime,
                     function_name=function_to_optimize_qualified_name,
                     file_path=self.function_to_optimize.file_path,
-                    replay_performance_gain=best_optimization.replay_performance_gain if self.args.benchmark else None,
-                    fto_benchmark_timings = self.function_benchmark_timings if self.args.benchmark else None,
-                    total_benchmark_timings = self.total_benchmark_timings if self.args.benchmark else None,
+                    benchmark_details=processed_benchmark_info.benchmark_details if processed_benchmark_info else None
                 )
 
                 self.log_successful_optimization(explanation, generated_tests)
diff --git a/codeflash/result/create_pr.py b/codeflash/result/create_pr.py
@@ -77,6 +77,7 @@ def check_create_pr(
                 speedup_pct=explanation.speedup_pct,
                 winning_behavioral_test_results=explanation.winning_behavioral_test_results,
                 winning_benchmarking_test_results=explanation.winning_benchmarking_test_results,
+                benchmark_details=explanation.benchmark_details
             ),
             existing_tests=existing_tests_source,
             generated_tests=generated_original_test_source,
@@ -123,6 +124,7 @@ def check_create_pr(
                 speedup_pct=explanation.speedup_pct,
                 winning_behavioral_test_results=explanation.winning_behavioral_test_results,
                 winning_benchmarking_test_results=explanation.winning_benchmarking_test_results,
+                benchmark_details=explanation.benchmark_details
             ),
             existing_tests=existing_tests_source,
             generated_tests=generated_original_test_source,
diff --git a/codeflash/result/explanation.py b/codeflash/result/explanation.py
@@ -5,6 +5,7 @@
 from pydantic.dataclasses import dataclass
 
 from codeflash.code_utils.time_utils import humanize_runtime
+from codeflash.models.models import BenchmarkDetail
 from codeflash.verification.test_results import TestResults
 
 
@@ -17,9 +18,7 @@ class Explanation:
     best_runtime_ns: int
     function_name: str
     file_path: Path
-    replay_performance_gain: Optional[float]
-    fto_benchmark_timings: Optional[dict[str, int]]
-    total_benchmark_timings: Optional[dict[str, int]]
+    benchmark_details: Optional[list[BenchmarkDetail]] = None
 
     @property
     def perf_improvement_line(self) -> str:
@@ -43,29 +42,13 @@ def to_console_string(self) -> str:
         original_runtime_human = humanize_runtime(self.original_runtime_ns)
         best_runtime_human = humanize_runtime(self.best_runtime_ns)
         benchmark_info = ""
-        if self.replay_performance_gain and self.fto_benchmark_timings and self.total_benchmark_timings:
-            benchmark_info += "Benchmark Performance Details:\n"
-            for benchmark_key, og_benchmark_timing in self.fto_benchmark_timings.items():
-                # benchmark key is benchmark filename :: benchmark test function :: line number
-                try:
-                    benchmark_file_name, benchmark_test_function, line_number = benchmark_key.split("::")
-                except ValueError:
-                    benchmark_info += f"Benchmark key {benchmark_key} is not in the expected format.\n"
-                    continue
 
-                total_benchmark_timing = self.total_benchmark_timings[benchmark_key]
-                if total_benchmark_timing == 0:
-                    benchmark_info += f"Benchmark timing for {benchmark_file_name}::{benchmark_test_function} was improved, but the speedup cannot be estimated.\n"
-                else:
-                    # find out expected new benchmark timing, then calculate how much total benchmark was sped up. print out intermediate values
-                    benchmark_info += f"Original timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(total_benchmark_timing)}\n"
-                    replay_speedup = self.replay_performance_gain
-                    expected_new_benchmark_timing = total_benchmark_timing - og_benchmark_timing + 1 / (
-                            replay_speedup + 1) * og_benchmark_timing
-                    benchmark_info += f"Expected new timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(int(expected_new_benchmark_timing))}\n"
-                    benchmark_speedup_ratio = total_benchmark_timing / expected_new_benchmark_timing
-                    benchmark_speedup_percent = (benchmark_speedup_ratio - 1) * 100
-                    benchmark_info += f"Benchmark speedup for {benchmark_file_name}::{benchmark_test_function}: {benchmark_speedup_percent:.2f}%\n\n"
+        if self.benchmark_details:
+            benchmark_info += "Benchmark Performance Details:\n"
+            for detail in self.benchmark_details:
+                benchmark_info += f"Original timing for {detail.benchmark_name}::{detail.test_function}: {detail.original_timing}\n"
+                benchmark_info += f"Expected new timing for {detail.benchmark_name}::{detail.test_function}: {detail.expected_new_timing}\n"
+                benchmark_info += f"Benchmark speedup for {detail.benchmark_name}::{detail.test_function}: {detail.speedup_percent:.2f}%\n\n"
 
         return (
                 f"Optimized {self.function_name} in {self.file_path}\n"