Skip to content

Commit f4be9be

Browse files
committed
modified PR info
1 parent 67d3f19 commit f4be9be

File tree

6 files changed

+132
-32
lines changed

6 files changed

+132
-32
lines changed

codeflash/benchmarking/utils.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
1+
from __future__ import annotations
2+
from typing import Optional
3+
14
from rich.console import Console
25
from rich.table import Table
36

47
from codeflash.cli_cmds.console import logger
8+
from codeflash.code_utils.time_utils import humanize_runtime
9+
from codeflash.models.models import ProcessedBenchmarkInfo, BenchmarkDetail
510

611

712
def validate_and_format_benchmark_table(function_benchmark_timings: dict[str, dict[str, int]],
@@ -61,4 +66,58 @@ def print_benchmark_table(function_to_results: dict[str, list[tuple[str, float,
6166
)
6267

6368
# Print the table
64-
console.print(table)
69+
console.print(table)
70+
71+
72+
def process_benchmark_data(
73+
replay_performance_gain: float,
74+
fto_benchmark_timings: dict[str, int],
75+
total_benchmark_timings: dict[str, int]
76+
) -> Optional[ProcessedBenchmarkInfo]:
77+
"""Process benchmark data and generate detailed benchmark information.
78+
79+
Args:
80+
replay_performance_gain: The performance gain from replay
81+
fto_benchmark_timings: Function to optimize benchmark timings
82+
total_benchmark_timings: Total benchmark timings
83+
84+
Returns:
85+
ProcessedBenchmarkInfo containing processed benchmark details
86+
87+
"""
88+
if not replay_performance_gain or not fto_benchmark_timings or not total_benchmark_timings:
89+
return None
90+
91+
benchmark_details = []
92+
93+
for benchmark_key, og_benchmark_timing in fto_benchmark_timings.items():
94+
try:
95+
benchmark_file_name, benchmark_test_function, line_number = benchmark_key.split("::")
96+
except ValueError:
97+
continue # Skip malformed benchmark keys
98+
99+
total_benchmark_timing = total_benchmark_timings.get(benchmark_key, 0)
100+
101+
if total_benchmark_timing == 0:
102+
continue # Skip benchmarks with zero timing
103+
104+
# Calculate expected new benchmark timing
105+
expected_new_benchmark_timing = total_benchmark_timing - og_benchmark_timing + (
106+
1 / (replay_performance_gain + 1)
107+
) * og_benchmark_timing
108+
109+
# Calculate speedup
110+
benchmark_speedup_ratio = total_benchmark_timing / expected_new_benchmark_timing
111+
benchmark_speedup_percent = (benchmark_speedup_ratio - 1) * 100
112+
113+
benchmark_details.append(
114+
BenchmarkDetail(
115+
benchmark_name=benchmark_file_name,
116+
test_function=benchmark_test_function,
117+
original_timing=humanize_runtime(int(total_benchmark_timing)),
118+
expected_new_timing=humanize_runtime(int(expected_new_benchmark_timing)),
119+
speedup_percent=benchmark_speedup_percent
120+
)
121+
)
122+
123+
return ProcessedBenchmarkInfo(benchmark_details=benchmark_details)

codeflash/github/PrComment.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
from typing import Union
1+
from __future__ import annotations
2+
from typing import Union, Optional
23

34
from pydantic import BaseModel
45
from pydantic.dataclasses import dataclass
56

67
from codeflash.code_utils.time_utils import humanize_runtime
8+
from codeflash.models.models import BenchmarkDetail
79
from codeflash.verification.test_results import TestResults
810

911

@@ -18,15 +20,16 @@ class PrComment:
1820
speedup_pct: str
1921
winning_behavioral_test_results: TestResults
2022
winning_benchmarking_test_results: TestResults
23+
benchmark_details: Optional[list[BenchmarkDetail]] = None
2124

22-
def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str]]:
25+
def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str, Optional[list[dict[str, any]]]]]:
2326
report_table = {
2427
test_type.to_name(): result
2528
for test_type, result in self.winning_behavioral_test_results.get_test_pass_fail_report_by_type().items()
2629
if test_type.to_name()
2730
}
2831

29-
return {
32+
result = {
3033
"optimization_explanation": self.optimization_explanation,
3134
"best_runtime": humanize_runtime(self.best_runtime),
3235
"original_runtime": humanize_runtime(self.original_runtime),
@@ -38,6 +41,12 @@ def to_json(self) -> dict[str, Union[dict[str, dict[str, int]], int, str]]:
3841
"report_table": report_table,
3942
}
4043

44+
# Add benchmark details if available
45+
if self.benchmark_details:
46+
result["benchmark_details"] = self.benchmark_details
47+
48+
return result
49+
4150

4251
class FileDiffContent(BaseModel):
4352
oldContent: str

codeflash/models/models.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
generate_candidates,
2424
)
2525
from codeflash.code_utils.env_utils import is_end_to_end
26+
from codeflash.code_utils.time_utils import humanize_runtime
2627
from codeflash.verification.test_results import TestResults, TestType
2728

2829
# If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
@@ -77,7 +78,47 @@ class BestOptimization(BaseModel):
7778
winning_benchmarking_test_results: TestResults
7879
winning_replay_benchmarking_test_results : Optional[TestResults] = None
7980

81+
@dataclass
82+
class BenchmarkDetail:
83+
benchmark_name: str
84+
test_function: str
85+
original_timing: str
86+
expected_new_timing: str
87+
speedup_percent: float
88+
89+
def to_string(self) -> str:
90+
return (
91+
f"Original timing for {self.benchmark_name}::{self.test_function}: {self.original_timing}\n"
92+
f"Expected new timing for {self.benchmark_name}::{self.test_function}: {self.expected_new_timing}\n"
93+
f"Benchmark speedup for {self.benchmark_name}::{self.test_function}: {self.speedup_percent:.2f}%\n"
94+
)
95+
96+
def to_dict(self) -> dict[str, any]:
97+
return {
98+
"benchmark_name": self.benchmark_name,
99+
"test_function": self.test_function,
100+
"original_timing": self.original_timing,
101+
"expected_new_timing": self.expected_new_timing,
102+
"speedup_percent": self.speedup_percent
103+
}
80104

105+
@dataclass
106+
class ProcessedBenchmarkInfo:
107+
benchmark_details: list[BenchmarkDetail]
108+
109+
def to_string(self) -> str:
110+
if not self.benchmark_details:
111+
return ""
112+
113+
result = "Benchmark Performance Details:\n"
114+
for detail in self.benchmark_details:
115+
result += detail.to_string() + "\n"
116+
return result
117+
118+
def to_dict(self) -> dict[str, list[dict[str, any]]]:
119+
return {
120+
"benchmark_details": [detail.to_dict() for detail in self.benchmark_details]
121+
}
81122
class CodeString(BaseModel):
82123
code: Annotated[str, AfterValidator(validate_python_code)]
83124
file_path: Optional[Path] = None

codeflash/optimization/function_optimizer.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from rich.tree import Tree
2020

2121
from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient
22+
from codeflash.benchmarking.utils import process_benchmark_data
2223
from codeflash.cli_cmds.console import code_print, console, logger, progress_bar
2324
from codeflash.code_utils import env_utils
2425
from codeflash.code_utils.code_replacer import replace_function_definitions_in_module
@@ -263,6 +264,13 @@ def optimize_function(self) -> Result[BestOptimization, str]:
263264
best_optimization.candidate.explanation, title="Best Candidate Explanation", border_style="blue"
264265
)
265266
)
267+
processed_benchmark_info = None
268+
if self.args.benchmark:
269+
processed_benchmark_info = process_benchmark_data(
270+
replay_performance_gain=best_optimization.replay_performance_gain,
271+
fto_benchmark_timings=self.function_benchmark_timings,
272+
total_benchmark_timings=self.total_benchmark_timings
273+
)
266274
explanation = Explanation(
267275
raw_explanation_message=best_optimization.candidate.explanation,
268276
winning_behavioral_test_results=best_optimization.winning_behavioral_test_results,
@@ -271,9 +279,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
271279
best_runtime_ns=best_optimization.runtime,
272280
function_name=function_to_optimize_qualified_name,
273281
file_path=self.function_to_optimize.file_path,
274-
replay_performance_gain=best_optimization.replay_performance_gain if self.args.benchmark else None,
275-
fto_benchmark_timings = self.function_benchmark_timings if self.args.benchmark else None,
276-
total_benchmark_timings = self.total_benchmark_timings if self.args.benchmark else None,
282+
benchmark_details=processed_benchmark_info.benchmark_details if processed_benchmark_info else None
277283
)
278284

279285
self.log_successful_optimization(explanation, generated_tests)

codeflash/result/create_pr.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def check_create_pr(
7777
speedup_pct=explanation.speedup_pct,
7878
winning_behavioral_test_results=explanation.winning_behavioral_test_results,
7979
winning_benchmarking_test_results=explanation.winning_benchmarking_test_results,
80+
benchmark_details=explanation.benchmark_details
8081
),
8182
existing_tests=existing_tests_source,
8283
generated_tests=generated_original_test_source,
@@ -123,6 +124,7 @@ def check_create_pr(
123124
speedup_pct=explanation.speedup_pct,
124125
winning_behavioral_test_results=explanation.winning_behavioral_test_results,
125126
winning_benchmarking_test_results=explanation.winning_benchmarking_test_results,
127+
benchmark_details=explanation.benchmark_details
126128
),
127129
existing_tests=existing_tests_source,
128130
generated_tests=generated_original_test_source,

codeflash/result/explanation.py

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pydantic.dataclasses import dataclass
66

77
from codeflash.code_utils.time_utils import humanize_runtime
8+
from codeflash.models.models import BenchmarkDetail
89
from codeflash.verification.test_results import TestResults
910

1011

@@ -17,9 +18,7 @@ class Explanation:
1718
best_runtime_ns: int
1819
function_name: str
1920
file_path: Path
20-
replay_performance_gain: Optional[float]
21-
fto_benchmark_timings: Optional[dict[str, int]]
22-
total_benchmark_timings: Optional[dict[str, int]]
21+
benchmark_details: Optional[list[BenchmarkDetail]] = None
2322

2423
@property
2524
def perf_improvement_line(self) -> str:
@@ -43,29 +42,13 @@ def to_console_string(self) -> str:
4342
original_runtime_human = humanize_runtime(self.original_runtime_ns)
4443
best_runtime_human = humanize_runtime(self.best_runtime_ns)
4544
benchmark_info = ""
46-
if self.replay_performance_gain and self.fto_benchmark_timings and self.total_benchmark_timings:
47-
benchmark_info += "Benchmark Performance Details:\n"
48-
for benchmark_key, og_benchmark_timing in self.fto_benchmark_timings.items():
49-
# benchmark key is benchmark filename :: benchmark test function :: line number
50-
try:
51-
benchmark_file_name, benchmark_test_function, line_number = benchmark_key.split("::")
52-
except ValueError:
53-
benchmark_info += f"Benchmark key {benchmark_key} is not in the expected format.\n"
54-
continue
5545

56-
total_benchmark_timing = self.total_benchmark_timings[benchmark_key]
57-
if total_benchmark_timing == 0:
58-
benchmark_info += f"Benchmark timing for {benchmark_file_name}::{benchmark_test_function} was improved, but the speedup cannot be estimated.\n"
59-
else:
60-
# find out expected new benchmark timing, then calculate how much total benchmark was sped up. print out intermediate values
61-
benchmark_info += f"Original timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(total_benchmark_timing)}\n"
62-
replay_speedup = self.replay_performance_gain
63-
expected_new_benchmark_timing = total_benchmark_timing - og_benchmark_timing + 1 / (
64-
replay_speedup + 1) * og_benchmark_timing
65-
benchmark_info += f"Expected new timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(int(expected_new_benchmark_timing))}\n"
66-
benchmark_speedup_ratio = total_benchmark_timing / expected_new_benchmark_timing
67-
benchmark_speedup_percent = (benchmark_speedup_ratio - 1) * 100
68-
benchmark_info += f"Benchmark speedup for {benchmark_file_name}::{benchmark_test_function}: {benchmark_speedup_percent:.2f}%\n\n"
46+
if self.benchmark_details:
47+
benchmark_info += "Benchmark Performance Details:\n"
48+
for detail in self.benchmark_details:
49+
benchmark_info += f"Original timing for {detail.benchmark_name}::{detail.test_function}: {detail.original_timing}\n"
50+
benchmark_info += f"Expected new timing for {detail.benchmark_name}::{detail.test_function}: {detail.expected_new_timing}\n"
51+
benchmark_info += f"Benchmark speedup for {detail.benchmark_name}::{detail.test_function}: {detail.speedup_percent:.2f}%\n\n"
6952

7053
return (
7154
f"Optimized {self.function_name} in {self.file_path}\n"

0 commit comments

Comments
 (0)