Complete async throughput measurement support

KRRT7 · KRRT7 · commit 669e22ab026e · 2025-09-26T14:32:37.000-07:00
- Add async throughput fields to Explanation dataclass
- Implement throughput-based performance improvement calculation
- Add MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD configuration constant
- Update explanation logic to prefer throughput metrics for async functions
- Restore LSP compatibility with conditional test result display
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
@@ -3,6 +3,7 @@
 MAX_FUNCTION_TEST_SECONDS = 60
 N_CANDIDATES = 5
 MIN_IMPROVEMENT_THRESHOLD = 0.05
+MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10  # 10% minimum improvement for async throughput
 MAX_TEST_FUNCTION_RUNS = 50
 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6  # 100ms
 N_TESTS_TO_GENERATE = 2
diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
@@ -8,8 +8,9 @@
     COVERAGE_THRESHOLD,
     MIN_IMPROVEMENT_THRESHOLD,
     MIN_TESTCASE_PASSED_THRESHOLD,
+    MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
 )
-from codeflash.models.test_type import TestType
+from codeflash.models.models import TestType
 
 if TYPE_CHECKING:
     from codeflash.models.models import CoverageData, OptimizedCandidateResult, OriginalCodeBaseline
@@ -25,31 +26,73 @@ def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) ->
     return (original_runtime_ns - optimized_runtime_ns) / optimized_runtime_ns
 
 
+def throughput_gain(*, original_throughput: int, optimized_throughput: int) -> float:
+    """Calculate the throughput gain of an optimized code over the original code.
+
+    This value multiplied by 100 gives the percentage improvement in throughput.
+    For throughput, higher values are better (more executions per time period).
+    """
+    if original_throughput == 0:
+        return 0.0
+    return (optimized_throughput - original_throughput) / original_throughput
+
+
 def speedup_critic(
     candidate_result: OptimizedCandidateResult,
     original_code_runtime: int,
     best_runtime_until_now: int | None,
     *,
     disable_gh_action_noise: bool = False,
+    original_async_throughput: int | None = None,
+    best_throughput_until_now: int | None = None,
 ) -> bool:
     """Take in a correct optimized Test Result and decide if the optimization should actually be surfaced to the user.
 
-    Ensure that the optimization is actually faster than the original code, above the noise floor.
-    The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
-    when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
-    The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance, also we want to be more confident there.
+    Evaluates both runtime performance and async throughput improvements.
+
+    For runtime performance:
+    - Ensures the optimization is actually faster than the original code, above the noise floor.
+    - The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
+      when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
+    - The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance.
+
+    For async throughput (when available):
+    - Evaluates throughput improvements using MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
+    - Throughput improvements complement runtime improvements for async functions
     """
+    # Runtime performance evaluation
     noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
     if not disable_gh_action_noise and env_utils.is_ci():
         noise_floor = noise_floor * 2  # Increase the noise floor in GitHub Actions mode
 
     perf_gain = performance_gain(
         original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
     )
-    if best_runtime_until_now is None:
-        # collect all optimizations with this
-        return bool(perf_gain > noise_floor)
-    return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now)
+    runtime_improved = perf_gain > noise_floor
+
+    # Check runtime comparison with best so far
+    runtime_is_best = best_runtime_until_now is None or candidate_result.best_test_runtime < best_runtime_until_now
+
+    throughput_improved = True  # Default to True if no throughput data
+    throughput_is_best = True  # Default to True if no throughput data
+
+    if original_async_throughput is not None and candidate_result.async_throughput is not None:
+        if original_async_throughput > 0:
+            throughput_gain_value = throughput_gain(
+                original_throughput=original_async_throughput, optimized_throughput=candidate_result.async_throughput
+            )
+            throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
+
+        throughput_is_best = (
+            best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
+        )
+
+    if original_async_throughput is not None and candidate_result.async_throughput is not None:
+        # When throughput data is available, accept if EITHER throughput OR runtime improves significantly
+        throughput_acceptance = throughput_improved and throughput_is_best
+        runtime_acceptance = runtime_improved and runtime_is_best
+        return throughput_acceptance or runtime_acceptance
+    return runtime_improved and runtime_is_best
 
 
 def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult | OriginalCodeBaseline) -> bool:
diff --git a/codeflash/result/explanation.py b/codeflash/result/explanation.py
@@ -12,6 +12,7 @@
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.lsp.helpers import is_LSP_enabled
 from codeflash.models.models import BenchmarkDetail, TestResults
+from codeflash.result.critic import performance_gain, throughput_gain
 
 
 @dataclass(frozen=True, config={"arbitrary_types_allowed": True})
@@ -24,9 +25,28 @@ class Explanation:
     function_name: str
     file_path: Path
     benchmark_details: Optional[list[BenchmarkDetail]] = None
+    original_async_throughput: Optional[int] = None
+    best_async_throughput: Optional[int] = None
 
     @property
     def perf_improvement_line(self) -> str:
+        runtime_improvement = self.speedup
+
+        if (
+            self.original_async_throughput is not None
+            and self.best_async_throughput is not None
+            and self.original_async_throughput > 0
+        ):
+            throughput_improvement = throughput_gain(
+                original_throughput=self.original_async_throughput, optimized_throughput=self.best_async_throughput
+            )
+
+            # Use throughput metrics if throughput improvement is better or runtime got worse
+            if throughput_improvement > runtime_improvement or runtime_improvement <= 0:
+                throughput_pct = f"{throughput_improvement * 100:,.0f}%"
+                throughput_x = f"{throughput_improvement + 1:,.2f}x"
+                return f"{throughput_pct} improvement ({throughput_x} faster)."
+
         return f"{self.speedup_pct} improvement ({self.speedup_x} faster)."
 
     @property
@@ -46,6 +66,23 @@ def __str__(self) -> str:
         # TODO: Sometimes the explanation says something similar to "This is the code that was optimized", remove such parts
         original_runtime_human = humanize_runtime(self.original_runtime_ns)
         best_runtime_human = humanize_runtime(self.best_runtime_ns)
+
+        # Determine if we're showing throughput or runtime improvements
+        runtime_improvement = self.speedup
+        is_using_throughput_metric = False
+
+        if (
+            self.original_async_throughput is not None
+            and self.best_async_throughput is not None
+            and self.original_async_throughput > 0
+        ):
+            throughput_improvement = throughput_gain(
+                original_throughput=self.original_async_throughput, optimized_throughput=self.best_async_throughput
+            )
+
+            if throughput_improvement > runtime_improvement or runtime_improvement <= 0:
+                is_using_throughput_metric = True
+
         benchmark_info = ""
 
         if self.benchmark_details:
@@ -86,13 +123,18 @@ def __str__(self) -> str:
             console.print(table)
             benchmark_info = cast("StringIO", console.file).getvalue() + "\n"  # Cast for mypy
 
-        test_report = self.winning_behavior_test_results.get_test_pass_fail_report_by_type()
-        test_report_str = TestResults.report_to_string(test_report)
+        if is_using_throughput_metric:
+            performance_description = (
+                f"Throughput improved from {self.original_async_throughput} to {self.best_async_throughput} operations/second "
+                f"(runtime: {original_runtime_human} → {best_runtime_human})\n\n"
+            )
+        else:
+            performance_description = f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n"
 
         return (
             f"Optimized {self.function_name} in {self.file_path}\n"
             f"{self.perf_improvement_line}\n"
-            f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n"
+            + performance_description
             + (benchmark_info if benchmark_info else "")
             + self.raw_explanation_message
             + " \n\n"
@@ -101,7 +143,7 @@ def __str__(self) -> str:
                 ""
                 if is_LSP_enabled()
                 else "The new optimized code was tested for correctness. The results are listed below.\n"
-                + test_report_str
+                + f"{TestResults.report_to_string(self.winning_behavior_test_results.get_test_pass_fail_report_by_type())}\n"
             )
         )