Revert "basic async critic - WIP"

KRRT7 · KRRT7 · commit ce421d43ceaa · 2025-09-08T17:11:32.000-07:00
This reverts commit a364075.
diff --git a/codeflash/code_utils/codeflash_wrap_decorator.py b/codeflash/code_utils/codeflash_wrap_decorator.py
@@ -299,12 +299,6 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
         else:
             async_wrapper.index[test_id] = 0
 
-        # Initialize cumulative throughput tracking
-        if not hasattr(async_wrapper, "start_time"):
-            async_wrapper.start_time = time.perf_counter()
-        if not hasattr(async_wrapper, "total_operations"):
-            async_wrapper.total_operations = 0
-
         codeflash_test_index = async_wrapper.index[test_id]
         invocation_id = f"{line_id}_{codeflash_test_index}"
         test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
@@ -325,12 +319,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
         finally:
             gc.enable()
 
-        # Update cumulative throughput tracking
-        async_wrapper.total_operations += 1
-        elapsed_time = time.perf_counter() - async_wrapper.start_time
-        throughput = async_wrapper.total_operations / elapsed_time if elapsed_time > 0 else 0
-
-        print(f"!######{test_stdout_tag}:{codeflash_duration}:throughput_{throughput:.2f}_ops_per_sec######!")
+        print(f"!######{test_stdout_tag}:{codeflash_duration}######!")
 
         if exception:
             raise exception
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -552,7 +552,6 @@ class FunctionTestInvocation:
     timed_out: Optional[bool]
     verification_type: Optional[str] = VerificationType.FUNCTION_CALL
     stdout: Optional[str] = None
-    throughput: Optional[float] = None  # Operations per second
 
     @property
     def unique_invocation_loop_id(self) -> str:
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -566,11 +566,7 @@ def determine_best_candidate(
                     tree = Tree(f"Candidate #{candidate_index} - Runtime Information")
                     benchmark_tree = None
                     if speedup_critic(
-                        candidate_result, 
-                        original_code_baseline.runtime, 
-                        None, 
-                        self.function_to_optimize,
-                        original_baseline_results=original_code_baseline
+                        candidate_result, original_code_baseline.runtime, best_runtime_until_now=None
                     ) and quantity_of_tests_critic(candidate_result):
                         tree.add("This candidate is faster than the original code. 🚀")  # TODO: Change this description
                         tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}")
diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
@@ -12,8 +12,7 @@
 from codeflash.models.models import TestType
 
 if TYPE_CHECKING:
-    from codeflash.discovery.functions_to_optimize import FunctionToOptimize
-    from codeflash.models.models import CoverageData, OptimizedCandidateResult, OriginalCodeBaseline, TestResults
+    from codeflash.models.models import CoverageData, OptimizedCandidateResult, OriginalCodeBaseline
 
 
 def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
@@ -30,29 +29,16 @@ def speedup_critic(
     candidate_result: OptimizedCandidateResult,
     original_code_runtime: int,
     best_runtime_until_now: int | None,
-    function_to_optimize: FunctionToOptimize,
     *,
     disable_gh_action_noise: bool = False,
-    original_baseline_results: OriginalCodeBaseline | None = None,
 ) -> bool:
     """Take in a correct optimized Test Result and decide if the optimization should actually be surfaced to the user.
 
-    For async functions, dispatches to async_speedup_critic for specialized evaluation.
-    For sync functions, uses traditional runtime-only evaluation.
-
     Ensure that the optimization is actually faster than the original code, above the noise floor.
     The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
     when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
     The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance, also we want to be more confident there.
     """
-    if function_to_optimize.is_async and original_baseline_results:
-        return async_speedup_critic(
-            candidate_result=candidate_result,
-            original_baseline_results=original_baseline_results,
-            best_runtime_until_now=best_runtime_until_now,
-            disable_gh_action_noise=disable_gh_action_noise,
-        )
-
     noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
     if not disable_gh_action_noise and env_utils.is_ci():
         noise_floor = noise_floor * 2  # Increase the noise floor in GitHub Actions mode
@@ -61,62 +47,11 @@ def speedup_critic(
         original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
     )
     if best_runtime_until_now is None:
+        # collect all optimizations with this
         return bool(perf_gain > noise_floor)
     return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now)
 
 
-def async_speedup_critic(
-    candidate_result: OptimizedCandidateResult,
-    original_baseline_results: OriginalCodeBaseline,
-    best_runtime_until_now: int | None,
-    *,
-    disable_gh_action_noise: bool = False,
-) -> bool:
-    """Simplified speedup evaluation for async functions with throughput-first approach.
-
-    For async functions:
-    1. If throughput data exists and shows improvement, accept the optimization
-    2. Otherwise, fall back to traditional runtime evaluation
-    """
-    # Calculate noise floor with same logic as sync functions
-    noise_floor = (
-        3 * MIN_IMPROVEMENT_THRESHOLD if original_baseline_results.runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
-    )
-    if not disable_gh_action_noise and env_utils.is_ci():
-        noise_floor = noise_floor * 2  # Increase the noise floor in GitHub Actions mode
-
-    # Check for throughput improvement first
-    candidate_throughput = _calculate_average_throughput(candidate_result.benchmarking_test_results)
-    original_throughput = _calculate_average_throughput(original_baseline_results.benchmarking_test_results)
-
-    if original_throughput and original_throughput > 0 and candidate_throughput:
-        throughput_gain = (candidate_throughput - original_throughput) / original_throughput
-        if throughput_gain > noise_floor:
-            # Throughput improved above noise floor - accept optimization
-            return True if best_runtime_until_now is None else candidate_result.best_test_runtime < best_runtime_until_now
-
-    # Fall back to traditional runtime evaluation
-    perf_gain = performance_gain(
-        original_runtime_ns=original_baseline_results.runtime, optimized_runtime_ns=candidate_result.best_test_runtime
-    )
-
-    if best_runtime_until_now is None:
-        return bool(perf_gain > noise_floor)
-    return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now)
-
-
-def _calculate_average_throughput(test_results: TestResults) -> float | None:
-    """Calculate average throughput from test results that have throughput data."""
-    throughput_values = [
-        result.throughput for result in test_results.test_results if result.throughput is not None and result.did_pass
-    ]
-
-    if not throughput_values:
-        return None
-
-    return sum(throughput_values) / len(throughput_values)
-
-
 def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult | OriginalCodeBaseline) -> bool:
     test_results = candidate_result.behavior_test_results
     report = test_results.get_test_pass_fail_report_by_type()
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
@@ -37,7 +37,7 @@ def parse_func(file_path: Path) -> XMLParser:
 
 
 matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n")
-matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)(?::throughput_([\d\.]+)_ops_per_sec)?######!")
+matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
 
 
 def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
@@ -93,7 +93,6 @@ def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, tes
                         return_value=test_pickle,
                         timed_out=False,
                         verification_type=VerificationType.FUNCTION_CALL,
-                        throughput=None,
                     )
                 )
         except Exception as e:
@@ -161,7 +160,6 @@ def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, tes
                     return_value=ret_val,
                     timed_out=False,
                     verification_type=VerificationType(verification_type) if verification_type else None,
-                    throughput=None,
                 )
             )
         except Exception:
@@ -295,27 +293,22 @@ def parse_test_xml(
                         return_value=None,
                         timed_out=timed_out,
                         stdout="",
-                        throughput=None,
                     )
                 )
 
             else:
                 for match_index, match in enumerate(begin_matches):
                     groups = match.groups()
                     end_match = end_matches.get(groups)
-                    iteration_id, runtime, throughput = groups[5], None, None
+                    iteration_id, runtime = groups[5], None
                     if end_match:
                         stdout = sys_stdout[match.end() : end_match.start()]
-                        end_groups = end_match.groups()
-                        split_val = end_groups[5].split(":")
+                        split_val = end_match.groups()[5].split(":")
                         if len(split_val) > 1:
                             iteration_id = split_val[0]
                             runtime = int(split_val[1])
                         else:
                             iteration_id, runtime = split_val[0], None
-                        # Extract throughput if present (group 6 is the throughput capture group)
-                        if len(end_groups) > 6 and end_groups[6] is not None:
-                            throughput = float(end_groups[6])
                     elif match_index == len(begin_matches) - 1:
                         stdout = sys_stdout[match.end() :]
                     else:
@@ -339,7 +332,6 @@ def parse_test_xml(
                             return_value=None,
                             timed_out=timed_out,
                             stdout=stdout,
-                            throughput=throughput,
                         )
                     )
 
@@ -428,7 +420,6 @@ def merge_test_results(
                         if result_bin.verification_type
                         else None,
                         stdout=xml_result.stdout,
-                        throughput=None,
                     )
                 )
         elif xml_results.test_results[0].id.iteration_id is not None:
@@ -459,7 +450,6 @@ def merge_test_results(
                         if bin_result.verification_type
                         else None,
                         stdout=xml_result.stdout,
-                        throughput=None,
                     )
                 )
         else:
@@ -487,7 +477,6 @@ def merge_test_results(
                         if bin_result.verification_type
                         else None,
                         stdout=xml_result.stdout,
-                        throughput=None,
                     )
                 )
 

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ def parse_func(file_path: Path) -> XMLParser:`
`37`	`37`
`38`	`38`
`39`	`39`	`matches_re_start = re.compile(r"!\$######(.?):(.?)([^\.:]?):(.?):(.?):(.?)######\$!\n")`
`40`		`-matches_re_end = re.compile(r"!######(.?):(.?)([^\.:]?):(.?):(.?):(.?)(?::throughput_([\d\.]+)_ops_per_sec)?######!")`
	`40`	`+matches_re_end = re.compile(r"!######(.?):(.?)([^\.:]?):(.?):(.?):(.?)######!")`
`41`	`41`
`42`	`42`
`43`	`43`	`def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:`
`@@ -93,7 +93,6 @@ def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, tes`
`93`	`93`	`return_value=test_pickle,`
`94`	`94`	`timed_out=False,`
`95`	`95`	`verification_type=VerificationType.FUNCTION_CALL,`
`96`		`- throughput=None,`
`97`	`96`	`)`
`98`	`97`	`)`
`99`	`98`	`except Exception as e:`
`@@ -161,7 +160,6 @@ def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, tes`
`161`	`160`	`return_value=ret_val,`
`162`	`161`	`timed_out=False,`
`163`	`162`	`verification_type=VerificationType(verification_type) if verification_type else None,`
`164`		`- throughput=None,`
`165`	`163`	`)`
`166`	`164`	`)`
`167`	`165`	`except Exception:`
`@@ -295,27 +293,22 @@ def parse_test_xml(`
`295`	`293`	`return_value=None,`
`296`	`294`	`timed_out=timed_out,`
`297`	`295`	`stdout="",`
`298`		`- throughput=None,`
`299`	`296`	`)`
`300`	`297`	`)`
`301`	`298`
`302`	`299`	`else:`
`303`	`300`	`for match_index, match in enumerate(begin_matches):`
`304`	`301`	`groups = match.groups()`
`305`	`302`	`end_match = end_matches.get(groups)`
`306`		`- iteration_id, runtime, throughput = groups[5], None, None`
	`303`	`+ iteration_id, runtime = groups[5], None`
`307`	`304`	`if end_match:`
`308`	`305`	`stdout = sys_stdout[match.end() : end_match.start()]`
`309`		`- end_groups = end_match.groups()`
`310`		`- split_val = end_groups[5].split(":")`
	`306`	`+ split_val = end_match.groups()[5].split(":")`
`311`	`307`	`if len(split_val) > 1:`
`312`	`308`	`iteration_id = split_val[0]`
`313`	`309`	`runtime = int(split_val[1])`
`314`	`310`	`else:`
`315`	`311`	`iteration_id, runtime = split_val[0], None`
`316`		`- # Extract throughput if present (group 6 is the throughput capture group)`
`317`		`- if len(end_groups) > 6 and end_groups[6] is not None:`
`318`		`- throughput = float(end_groups[6])`
`319`	`312`	`elif match_index == len(begin_matches) - 1:`
`320`	`313`	`stdout = sys_stdout[match.end() :]`
`321`	`314`	`else:`
`@@ -339,7 +332,6 @@ def parse_test_xml(`
`339`	`332`	`return_value=None,`
`340`	`333`	`timed_out=timed_out,`
`341`	`334`	`stdout=stdout,`
`342`		`- throughput=throughput,`
`343`	`335`	`)`
`344`	`336`	`)`
`345`	`337`
`@@ -428,7 +420,6 @@ def merge_test_results(`
`428`	`420`	`if result_bin.verification_type`
`429`	`421`	`else None,`
`430`	`422`	`stdout=xml_result.stdout,`
`431`		`- throughput=None,`
`432`	`423`	`)`
`433`	`424`	`)`
`434`	`425`	`elif xml_results.test_results[0].id.iteration_id is not None:`
`@@ -459,7 +450,6 @@ def merge_test_results(`
`459`	`450`	`if bin_result.verification_type`
`460`	`451`	`else None,`
`461`	`452`	`stdout=xml_result.stdout,`
`462`		`- throughput=None,`
`463`	`453`	`)`
`464`	`454`	`)`
`465`	`455`	`else:`
`@@ -487,7 +477,6 @@ def merge_test_results(`
`487`	`477`	`if bin_result.verification_type`
`488`	`478`	`else None,`
`489`	`479`	`stdout=xml_result.stdout,`
`490`		`- throughput=None,`
`491`	`480`	`)`
`492`	`481`	`)`
`493`	`482`