Skip to content

Commit 669e22a

Browse files
committed
Complete async throughput measurement support
- Add async throughput fields to Explanation dataclass - Implement throughput-based performance improvement calculation - Add MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD configuration constant - Update explanation logic to prefer throughput metrics for async functions - Restore LSP compatibility with conditional test result display
1 parent 759fdb1 commit 669e22a

File tree

3 files changed

+99
-13
lines changed

3 files changed

+99
-13
lines changed

codeflash/code_utils/config_consts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
MAX_FUNCTION_TEST_SECONDS = 60
44
N_CANDIDATES = 5
55
MIN_IMPROVEMENT_THRESHOLD = 0.05
6+
MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10 # 10% minimum improvement for async throughput
67
MAX_TEST_FUNCTION_RUNS = 50
78
MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms
89
N_TESTS_TO_GENERATE = 2

codeflash/result/critic.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
COVERAGE_THRESHOLD,
99
MIN_IMPROVEMENT_THRESHOLD,
1010
MIN_TESTCASE_PASSED_THRESHOLD,
11+
MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
1112
)
12-
from codeflash.models.test_type import TestType
13+
from codeflash.models.models import TestType
1314

1415
if TYPE_CHECKING:
1516
from codeflash.models.models import CoverageData, OptimizedCandidateResult, OriginalCodeBaseline
@@ -25,31 +26,73 @@ def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) ->
2526
return (original_runtime_ns - optimized_runtime_ns) / optimized_runtime_ns
2627

2728

29+
def throughput_gain(*, original_throughput: int, optimized_throughput: int) -> float:
30+
"""Calculate the throughput gain of an optimized code over the original code.
31+
32+
This value multiplied by 100 gives the percentage improvement in throughput.
33+
For throughput, higher values are better (more executions per time period).
34+
"""
35+
if original_throughput == 0:
36+
return 0.0
37+
return (optimized_throughput - original_throughput) / original_throughput
38+
39+
2840
def speedup_critic(
2941
candidate_result: OptimizedCandidateResult,
3042
original_code_runtime: int,
3143
best_runtime_until_now: int | None,
3244
*,
3345
disable_gh_action_noise: bool = False,
46+
original_async_throughput: int | None = None,
47+
best_throughput_until_now: int | None = None,
3448
) -> bool:
3549
"""Take in a correct optimized Test Result and decide if the optimization should actually be surfaced to the user.
3650
37-
Ensure that the optimization is actually faster than the original code, above the noise floor.
38-
The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
39-
when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
40-
The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance, also we want to be more confident there.
51+
Evaluates both runtime performance and async throughput improvements.
52+
53+
For runtime performance:
54+
- Ensures the optimization is actually faster than the original code, above the noise floor.
55+
- The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
56+
when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
57+
- The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance.
58+
59+
For async throughput (when available):
60+
- Evaluates throughput improvements using MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
61+
- Throughput improvements complement runtime improvements for async functions
4162
"""
63+
# Runtime performance evaluation
4264
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
4365
if not disable_gh_action_noise and env_utils.is_ci():
4466
noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode
4567

4668
perf_gain = performance_gain(
4769
original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
4870
)
49-
if best_runtime_until_now is None:
50-
# collect all optimizations with this
51-
return bool(perf_gain > noise_floor)
52-
return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now)
71+
runtime_improved = perf_gain > noise_floor
72+
73+
# Check runtime comparison with best so far
74+
runtime_is_best = best_runtime_until_now is None or candidate_result.best_test_runtime < best_runtime_until_now
75+
76+
throughput_improved = True # Default to True if no throughput data
77+
throughput_is_best = True # Default to True if no throughput data
78+
79+
if original_async_throughput is not None and candidate_result.async_throughput is not None:
80+
if original_async_throughput > 0:
81+
throughput_gain_value = throughput_gain(
82+
original_throughput=original_async_throughput, optimized_throughput=candidate_result.async_throughput
83+
)
84+
throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
85+
86+
throughput_is_best = (
87+
best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
88+
)
89+
90+
if original_async_throughput is not None and candidate_result.async_throughput is not None:
91+
# When throughput data is available, accept if EITHER throughput OR runtime improves significantly
92+
throughput_acceptance = throughput_improved and throughput_is_best
93+
runtime_acceptance = runtime_improved and runtime_is_best
94+
return throughput_acceptance or runtime_acceptance
95+
return runtime_improved and runtime_is_best
5396

5497

5598
def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult | OriginalCodeBaseline) -> bool:

codeflash/result/explanation.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from codeflash.code_utils.time_utils import humanize_runtime
1313
from codeflash.lsp.helpers import is_LSP_enabled
1414
from codeflash.models.models import BenchmarkDetail, TestResults
15+
from codeflash.result.critic import performance_gain, throughput_gain
1516

1617

1718
@dataclass(frozen=True, config={"arbitrary_types_allowed": True})
@@ -24,9 +25,28 @@ class Explanation:
2425
function_name: str
2526
file_path: Path
2627
benchmark_details: Optional[list[BenchmarkDetail]] = None
28+
original_async_throughput: Optional[int] = None
29+
best_async_throughput: Optional[int] = None
2730

2831
@property
2932
def perf_improvement_line(self) -> str:
33+
runtime_improvement = self.speedup
34+
35+
if (
36+
self.original_async_throughput is not None
37+
and self.best_async_throughput is not None
38+
and self.original_async_throughput > 0
39+
):
40+
throughput_improvement = throughput_gain(
41+
original_throughput=self.original_async_throughput, optimized_throughput=self.best_async_throughput
42+
)
43+
44+
# Use throughput metrics if throughput improvement is better or runtime got worse
45+
if throughput_improvement > runtime_improvement or runtime_improvement <= 0:
46+
throughput_pct = f"{throughput_improvement * 100:,.0f}%"
47+
throughput_x = f"{throughput_improvement + 1:,.2f}x"
48+
return f"{throughput_pct} improvement ({throughput_x} faster)."
49+
3050
return f"{self.speedup_pct} improvement ({self.speedup_x} faster)."
3151

3252
@property
@@ -46,6 +66,23 @@ def __str__(self) -> str:
4666
# TODO: Sometimes the explanation says something similar to "This is the code that was optimized", remove such parts
4767
original_runtime_human = humanize_runtime(self.original_runtime_ns)
4868
best_runtime_human = humanize_runtime(self.best_runtime_ns)
69+
70+
# Determine if we're showing throughput or runtime improvements
71+
runtime_improvement = self.speedup
72+
is_using_throughput_metric = False
73+
74+
if (
75+
self.original_async_throughput is not None
76+
and self.best_async_throughput is not None
77+
and self.original_async_throughput > 0
78+
):
79+
throughput_improvement = throughput_gain(
80+
original_throughput=self.original_async_throughput, optimized_throughput=self.best_async_throughput
81+
)
82+
83+
if throughput_improvement > runtime_improvement or runtime_improvement <= 0:
84+
is_using_throughput_metric = True
85+
4986
benchmark_info = ""
5087

5188
if self.benchmark_details:
@@ -86,13 +123,18 @@ def __str__(self) -> str:
86123
console.print(table)
87124
benchmark_info = cast("StringIO", console.file).getvalue() + "\n" # Cast for mypy
88125

89-
test_report = self.winning_behavior_test_results.get_test_pass_fail_report_by_type()
90-
test_report_str = TestResults.report_to_string(test_report)
126+
if is_using_throughput_metric:
127+
performance_description = (
128+
f"Throughput improved from {self.original_async_throughput} to {self.best_async_throughput} operations/second "
129+
f"(runtime: {original_runtime_human}{best_runtime_human})\n\n"
130+
)
131+
else:
132+
performance_description = f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n"
91133

92134
return (
93135
f"Optimized {self.function_name} in {self.file_path}\n"
94136
f"{self.perf_improvement_line}\n"
95-
f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n"
137+
+ performance_description
96138
+ (benchmark_info if benchmark_info else "")
97139
+ self.raw_explanation_message
98140
+ " \n\n"
@@ -101,7 +143,7 @@ def __str__(self) -> str:
101143
""
102144
if is_LSP_enabled()
103145
else "The new optimized code was tested for correctness. The results are listed below.\n"
104-
+ test_report_str
146+
+ f"{TestResults.report_to_string(self.winning_behavior_test_results.get_test_pass_fail_report_by_type())}\n"
105147
)
106148
)
107149

0 commit comments

Comments
 (0)