1212from codeflash .models .models import TestType
1313
1414if TYPE_CHECKING :
15- from codeflash .models .models import CoverageData , OptimizedCandidateResult , OriginalCodeBaseline
15+ from codeflash .discovery .functions_to_optimize import FunctionToOptimize
16+ from codeflash .models .models import CoverageData , OptimizedCandidateResult , OriginalCodeBaseline , TestResults
1617
1718
1819def performance_gain (* , original_runtime_ns : int , optimized_runtime_ns : int ) -> float :
@@ -29,16 +30,29 @@ def speedup_critic(
2930 candidate_result : OptimizedCandidateResult ,
3031 original_code_runtime : int ,
3132 best_runtime_until_now : int | None ,
33+ function_to_optimize : FunctionToOptimize ,
3234 * ,
3335 disable_gh_action_noise : bool = False ,
36+ original_baseline_results : OriginalCodeBaseline | None = None ,
3437) -> bool :
3538 """Take in a correct optimized Test Result and decide if the optimization should actually be surfaced to the user.
3639
40+ For async functions, dispatches to async_speedup_critic for specialized evaluation.
41+ For sync functions, uses traditional runtime-only evaluation.
42+
3743 Ensure that the optimization is actually faster than the original code, above the noise floor.
3844 The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
3945 when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
4046 The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance, also we want to be more confident there.
4147 """
48+ if function_to_optimize .is_async and original_baseline_results :
49+ return async_speedup_critic (
50+ candidate_result = candidate_result ,
51+ original_baseline_results = original_baseline_results ,
52+ best_runtime_until_now = best_runtime_until_now ,
53+ disable_gh_action_noise = disable_gh_action_noise ,
54+ )
55+
4256 noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
4357 if not disable_gh_action_noise and env_utils .is_ci ():
4458 noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode
@@ -47,11 +61,62 @@ def speedup_critic(
4761 original_runtime_ns = original_code_runtime , optimized_runtime_ns = candidate_result .best_test_runtime
4862 )
4963 if best_runtime_until_now is None :
50- # collect all optimizations with this
5164 return bool (perf_gain > noise_floor )
5265 return bool (perf_gain > noise_floor and candidate_result .best_test_runtime < best_runtime_until_now )
5366
5467
68+ def async_speedup_critic (
69+ candidate_result : OptimizedCandidateResult ,
70+ original_baseline_results : OriginalCodeBaseline ,
71+ best_runtime_until_now : int | None ,
72+ * ,
73+ disable_gh_action_noise : bool = False ,
74+ ) -> bool :
75+ """Simplified speedup evaluation for async functions with throughput-first approach.
76+
77+ For async functions:
78+ 1. If throughput data exists and shows improvement, accept the optimization
79+ 2. Otherwise, fall back to traditional runtime evaluation
80+ """
81+ # Calculate noise floor with same logic as sync functions
82+ noise_floor = (
83+ 3 * MIN_IMPROVEMENT_THRESHOLD if original_baseline_results .runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
84+ )
85+ if not disable_gh_action_noise and env_utils .is_ci ():
86+ noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode
87+
88+ # Check for throughput improvement first
89+ candidate_throughput = _calculate_average_throughput (candidate_result .benchmarking_test_results )
90+ original_throughput = _calculate_average_throughput (original_baseline_results .benchmarking_test_results )
91+
92+ if original_throughput and original_throughput > 0 and candidate_throughput :
93+ throughput_gain = (candidate_throughput - original_throughput ) / original_throughput
94+ if throughput_gain > noise_floor :
95+ # Throughput improved above noise floor - accept optimization
96+ return True if best_runtime_until_now is None else candidate_result .best_test_runtime < best_runtime_until_now
97+
98+ # Fall back to traditional runtime evaluation
99+ perf_gain = performance_gain (
100+ original_runtime_ns = original_baseline_results .runtime , optimized_runtime_ns = candidate_result .best_test_runtime
101+ )
102+
103+ if best_runtime_until_now is None :
104+ return bool (perf_gain > noise_floor )
105+ return bool (perf_gain > noise_floor and candidate_result .best_test_runtime < best_runtime_until_now )
106+
107+
108+ def _calculate_average_throughput (test_results : TestResults ) -> float | None :
109+ """Calculate average throughput from test results that have throughput data."""
110+ throughput_values = [
111+ result .throughput for result in test_results .test_results if result .throughput is not None and result .did_pass
112+ ]
113+
114+ if not throughput_values :
115+ return None
116+
117+ return sum (throughput_values ) / len (throughput_values )
118+
119+
55120def quantity_of_tests_critic (candidate_result : OptimizedCandidateResult | OriginalCodeBaseline ) -> bool :
56121 test_results = candidate_result .behavior_test_results
57122 report = test_results .get_test_pass_fail_report_by_type ()
0 commit comments