⚡️ Speed up method TestResults.total_passed_runtime by 26%

codeflash-ai[bot] · web-flow · commit fd6812cc78de · 2025-05-13T20:32:03.000Z
Here’s a high-performance, memory-efficient rewrite.  
**Key optimizations**.
- Avoids building large intermediate lists (`usable_runtimes` and dictionary comprehensions).
- Uses a single pass to populate `by_id` and `debug_missing` data, accumulating lists in-place.
- Skips repeated set/list comprehensions.
- Reduces logging to a single loop.
- Uses generator expressions for `sum`.
- Preserves all original comments.



**Notes:**  
- Results and log messages are produced in a single loop without intermediate list/dict comprehensions.
- `.setdefault()` is the fastest way to accumulate into lists by ID.
- This approach uses less memory and time, greatly improving performance for large numbers of test results.
- All original comments are preserved.
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -3,9 +3,10 @@
 from collections import defaultdict
 from typing import TYPE_CHECKING
 
+from pydantic import BaseModel
 from rich.tree import Tree
 
-from codeflash.cli_cmds.console import DEBUG_MODE
+from codeflash.cli_cmds.console import DEBUG_MODE, logger
 
 if TYPE_CHECKING:
     from collections.abc import Iterator
@@ -19,10 +20,10 @@
 from typing import Annotated, Optional, cast
 
 from jedi.api.classes import Name
-from pydantic import AfterValidator, BaseModel, ConfigDict, Field
+from pydantic import AfterValidator, ConfigDict, Field
 from pydantic.dataclasses import dataclass
 
-from codeflash.cli_cmds.console import console, logger
+from codeflash.cli_cmds.console import console
 from codeflash.code_utils.code_utils import module_name_from_file_path, validate_python_code
 from codeflash.code_utils.env_utils import is_end_to_end
 from codeflash.verification.comparator import comparator
@@ -59,24 +60,29 @@ class FunctionSource:
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, FunctionSource):
             return False
-        return (self.file_path == other.file_path and
-                self.qualified_name == other.qualified_name and
-                self.fully_qualified_name == other.fully_qualified_name and
-                self.only_function_name == other.only_function_name and
-                self.source_code == other.source_code)
+        return (
+            self.file_path == other.file_path
+            and self.qualified_name == other.qualified_name
+            and self.fully_qualified_name == other.fully_qualified_name
+            and self.only_function_name == other.only_function_name
+            and self.source_code == other.source_code
+        )
 
     def __hash__(self) -> int:
-        return hash((self.file_path, self.qualified_name, self.fully_qualified_name,
-                     self.only_function_name, self.source_code))
+        return hash(
+            (self.file_path, self.qualified_name, self.fully_qualified_name, self.only_function_name, self.source_code)
+        )
+
 
 class BestOptimization(BaseModel):
     candidate: OptimizedCandidate
     helper_functions: list[FunctionSource]
     runtime: int
-    replay_performance_gain: Optional[dict[BenchmarkKey,float]] = None
+    replay_performance_gain: Optional[dict[BenchmarkKey, float]] = None
     winning_behavioral_test_results: TestResults
     winning_benchmarking_test_results: TestResults
-    winning_replay_benchmarking_test_results : Optional[TestResults] = None
+    winning_replay_benchmarking_test_results: Optional[TestResults] = None
+
 
 @dataclass(frozen=True)
 class BenchmarkKey:
@@ -86,6 +92,7 @@ class BenchmarkKey:
     def __str__(self) -> str:
         return f"{self.module_path}::{self.function_name}"
 
+
 @dataclass
 class BenchmarkDetail:
     benchmark_name: str
@@ -107,9 +114,10 @@ def to_dict(self) -> dict[str, any]:
             "test_function": self.test_function,
             "original_timing": self.original_timing,
             "expected_new_timing": self.expected_new_timing,
-            "speedup_percent": self.speedup_percent
+            "speedup_percent": self.speedup_percent,
         }
 
+
 @dataclass
 class ProcessedBenchmarkInfo:
     benchmark_details: list[BenchmarkDetail]
@@ -124,9 +132,9 @@ def to_string(self) -> str:
         return result
 
     def to_dict(self) -> dict[str, list[dict[str, any]]]:
-        return {
-            "benchmark_details": [detail.to_dict() for detail in self.benchmark_details]
-        }
+        return {"benchmark_details": [detail.to_dict() for detail in self.benchmark_details]}
+
+
 class CodeString(BaseModel):
     code: Annotated[str, AfterValidator(validate_python_code)]
     file_path: Optional[Path] = None
@@ -151,7 +159,8 @@ class CodeOptimizationContext(BaseModel):
     read_writable_code: str = Field(min_length=1)
     read_only_context_code: str = ""
     helper_functions: list[FunctionSource]
-    preexisting_objects: set[tuple[str, tuple[FunctionParent,...]]]
+    preexisting_objects: set[tuple[str, tuple[FunctionParent, ...]]]
+
 
 class CodeContextType(str, Enum):
     READ_WRITABLE = "READ_WRITABLE"
@@ -347,6 +356,7 @@ def create_empty(cls, file_path: Path, function_name: str, code_context: CodeOpt
             status=CoverageStatus.NOT_FOUND,
         )
 
+
 @dataclass
 class FunctionCoverage:
     """Represents the coverage data for a specific function in a source file."""
@@ -364,7 +374,8 @@ class TestingMode(enum.Enum):
     PERFORMANCE = "performance"
     LINE_PROFILE = "line_profile"
 
-#TODO this class is duplicated in codeflash_capture
+
+# TODO this class is duplicated in codeflash_capture
 class VerificationType(str, Enum):
     FUNCTION_CALL = (
         "function_call"  # Correctness verification for a test function, checks input values and output values)
@@ -473,14 +484,20 @@ def merge(self, other: TestResults) -> None:
                 raise ValueError(msg)
             self.test_result_idx[k] = v + original_len
 
-    def group_by_benchmarks(self, benchmark_keys:list[BenchmarkKey], benchmark_replay_test_dir: Path, project_root: Path) -> dict[BenchmarkKey, TestResults]:
+    def group_by_benchmarks(
+        self, benchmark_keys: list[BenchmarkKey], benchmark_replay_test_dir: Path, project_root: Path
+    ) -> dict[BenchmarkKey, TestResults]:
         """Group TestResults by benchmark for calculating improvements for each benchmark."""
         test_results_by_benchmark = defaultdict(TestResults)
         benchmark_module_path = {}
         for benchmark_key in benchmark_keys:
-            benchmark_module_path[benchmark_key] = module_name_from_file_path(benchmark_replay_test_dir.resolve() / f"test_{benchmark_key.module_path.replace('.', '_')}__replay_test_", project_root)
+            benchmark_module_path[benchmark_key] = module_name_from_file_path(
+                benchmark_replay_test_dir.resolve()
+                / f"test_{benchmark_key.module_path.replace('.', '_')}__replay_test_",
+                project_root,
+            )
         for test_result in self.test_results:
-            if (test_result.test_type == TestType.REPLAY_TEST):
+            if test_result.test_type == TestType.REPLAY_TEST:
                 for benchmark_key, module_path in benchmark_module_path.items():
                     if test_result.id.test_module_path.startswith(module_path):
                         test_results_by_benchmark[benchmark_key].add(test_result)
@@ -537,22 +554,20 @@ def report_to_tree(report: dict[TestType, dict[str, int]], title: str) -> Tree:
         return tree
 
     def usable_runtime_data_by_test_case(self) -> dict[InvocationId, list[int]]:
+        # Efficient single traversal, directly accumulating into a dict.
+        by_id: dict[InvocationId, list[int]] = {}
         for result in self.test_results:
-            if result.did_pass and not result.runtime:
-                msg = (
-                    f"Ignoring test case that passed but had no runtime -> {result.id}, "
-                    f"Loop # {result.loop_index}, Test Type: {result.test_type}, "
-                    f"Verification Type: {result.verification_type}"
-                )
-                logger.debug(msg)
-
-        usable_runtimes = [
-            (result.id, result.runtime) for result in self.test_results if result.did_pass and result.runtime
-        ]
-        return {
-            usable_id: [runtime[1] for runtime in usable_runtimes if runtime[0] == usable_id]
-            for usable_id in {runtime[0] for runtime in usable_runtimes}
-        }
+            if result.did_pass:
+                if result.runtime:
+                    by_id.setdefault(result.id, []).append(result.runtime)
+                else:
+                    msg = (
+                        f"Ignoring test case that passed but had no runtime -> {result.id}, "
+                        f"Loop # {result.loop_index}, Test Type: {result.test_type}, "
+                        f"Verification Type: {result.verification_type}"
+                    )
+                    logger.debug(msg)
+        return by_id
 
     def total_passed_runtime(self) -> int:
         """Calculate the sum of runtimes of all test cases that passed.
@@ -561,10 +576,9 @@ def total_passed_runtime(self) -> int:
 
         :return: The runtime in nanoseconds.
         """
-        #TODO this doesn't look at the intersection of tests of baseline and original
-        return sum(
-            [min(usable_runtime_data) for _, usable_runtime_data in self.usable_runtime_data_by_test_case().items()]
-        )
+        # TODO this doesn't look at the intersection of tests of baseline and original
+        runtime_data = self.usable_runtime_data_by_test_case()
+        return sum(min(times) for times in runtime_data.values() if times)
 
     def __iter__(self) -> Iterator[FunctionTestInvocation]:
         return iter(self.test_results)
@@ -591,7 +605,7 @@ def __eq__(self, other: object) -> bool:
         if len(self) != len(other):
             return False
         original_recursion_limit = sys.getrecursionlimit()
-        cast(TestResults, other)
+        cast("TestResults", other)
         for test_result in self:
             other_test_result = other.get_by_unique_invocation_loop_id(test_result.unique_invocation_loop_id)
             if other_test_result is None: