codeflash-ai
diff --git a/‎codeflash/code_utils/instrument_existing_tests.py‎
Lines changed: 1 addition & 2 deletions b/‎codeflash/code_utils/instrument_existing_tests.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎codeflash/discovery/discover_unit_tests.py‎
Lines changed: 1 addition & 2 deletions b/‎codeflash/discovery/discover_unit_tests.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎codeflash/github/PrComment.py‎
Lines changed: 1 addition & 1 deletion b/‎codeflash/github/PrComment.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎codeflash/models/models.py‎
Lines changed: 244 additions & 2 deletions b/‎codeflash/models/models.py‎
Lines changed: 244 additions & 2 deletions
diff --git a/‎codeflash/optimization/function_optimizer.py‎
Lines changed: 2 additions & 1 deletion b/‎codeflash/optimization/function_optimizer.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎codeflash/optimization/optimizer.py‎
Lines changed: 1 addition & 2 deletions b/‎codeflash/optimization/optimizer.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎codeflash/result/critic.py‎
Lines changed: 1 addition & 3 deletions b/‎codeflash/result/critic.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎codeflash/result/explanation.py‎
Lines changed: 1 addition & 1 deletion b/‎codeflash/result/explanation.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎codeflash/verification/codeflash_capture.py‎
Lines changed: 1 addition & 1 deletion b/‎codeflash/verification/codeflash_capture.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎codeflash/verification/equivalence.py‎
Lines changed: 1 addition & 1 deletion b/‎codeflash/verification/equivalence.py‎
Lines changed: 1 addition & 1 deletion
@@ -9,8 +9,7 @@
 from codeflash.cli_cmds.console import logger
 from codeflash.code_utils.code_utils import get_run_tmp_file, module_name_from_file_path
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize
-from codeflash.models.models import FunctionParent, TestingMode
-from codeflash.verification.test_results import VerificationType
+from codeflash.models.models import FunctionParent, TestingMode, VerificationType
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
@@ -16,8 +16,7 @@
 from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.code_utils import get_run_tmp_file, module_name_from_file_path
 from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
-from codeflash.models.models import CodePosition, FunctionCalledInTest, TestsInFile
-from codeflash.verification.test_results import TestType
+from codeflash.models.models import CodePosition, FunctionCalledInTest, TestsInFile, TestType
 
 if TYPE_CHECKING:
     from codeflash.verification.verification_utils import TestConfig
 
@@ -4,7 +4,7 @@
 from pydantic.dataclasses import dataclass
 
 from codeflash.code_utils.time_utils import humanize_runtime
-from codeflash.verification.test_results import TestResults
+from codeflash.models.models import TestResults
 
 
 @dataclass(frozen=True, config={"arbitrary_types_allowed": True})
 
@@ -1,13 +1,22 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING, Optional, cast
+
+from rich.tree import Tree
+
+from codeflash.cli_cmds.console import DEBUG_MODE, logger
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
 import enum
 import json
 import re
+import sys
 from collections.abc import Collection, Iterator
 from enum import Enum, IntEnum
 from pathlib import Path
 from re import Pattern
-from typing import Annotated, Any, Optional, Union
+from typing import Annotated, Any, Optional, Union, cast
 
 import sentry_sdk
 from coverage.exceptions import NoDataError
@@ -23,7 +32,7 @@
     generate_candidates,
 )
 from codeflash.code_utils.env_utils import is_end_to_end
-from codeflash.verification.test_results import TestResults, TestType
+from codeflash.verification.comparator import comparator
 
 # If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
 # qualified name of the method is foo.eggs.Ham.spam, its qualified name is Ham.spam, and its name is spam. The full name
@@ -511,3 +520,236 @@ class FunctionCoverage:
 class TestingMode(enum.Enum):
     BEHAVIOR = "behavior"
     PERFORMANCE = "performance"
+
+
+class VerificationType(str, Enum):
+    FUNCTION_CALL = (
+        "function_call"  # Correctness verification for a test function, checks input values and output values)
+    )
+    INIT_STATE_FTO = "init_state_fto"  # Correctness verification for fto class instance attributes after init
+    INIT_STATE_HELPER = "init_state_helper"  # Correctness verification for helper class instance attributes after init
+
+
+class TestType(Enum):
+    EXISTING_UNIT_TEST = 1
+    INSPIRED_REGRESSION = 2
+    GENERATED_REGRESSION = 3
+    REPLAY_TEST = 4
+    CONCOLIC_COVERAGE_TEST = 5
+    INIT_STATE_TEST = 6
+
+    def to_name(self) -> str:
+        if self is TestType.INIT_STATE_TEST:
+            return ""
+        names = {
+            TestType.EXISTING_UNIT_TEST: "⚙️ Existing Unit Tests",
+            TestType.INSPIRED_REGRESSION: "🎨 Inspired Regression Tests",
+            TestType.GENERATED_REGRESSION: "🌀 Generated Regression Tests",
+            TestType.REPLAY_TEST: "⏪ Replay Tests",
+            TestType.CONCOLIC_COVERAGE_TEST: "🔎 Concolic Coverage Tests",
+        }
+        return names[self]
+
+
+@dataclass(frozen=True)
+class InvocationId:
+    test_module_path: str  # The fully qualified name of the test module
+    test_class_name: Optional[str]  # The name of the class where the test is defined
+    test_function_name: Optional[str]  # The name of the test_function. Does not include the components of the file_name
+    function_getting_tested: str
+    iteration_id: Optional[str]
+
+    # test_module_path:TestSuiteClass.test_function_name:function_tested:iteration_id
+    def id(self) -> str:
+        class_prefix = f"{self.test_class_name}." if self.test_class_name else ""
+        return (
+            f"{self.test_module_path}:{class_prefix}{self.test_function_name}:"
+            f"{self.function_getting_tested}:{self.iteration_id}"
+        )
+
+    @staticmethod
+    def from_str_id(string_id: str, iteration_id: Optional[str] = None) -> InvocationId:
+        components = string_id.split(":")
+        assert len(components) == 4
+        second_components = components[1].split(".")
+        if len(second_components) == 1:
+            test_class_name = None
+            test_function_name = second_components[0]
+        else:
+            test_class_name = second_components[0]
+            test_function_name = second_components[1]
+        return InvocationId(
+            test_module_path=components[0],
+            test_class_name=test_class_name,
+            test_function_name=test_function_name,
+            function_getting_tested=components[2],
+            iteration_id=iteration_id if iteration_id else components[3],
+        )
+
+
+@dataclass(frozen=True)
+class FunctionTestInvocation:
+    loop_index: int  # The loop index of the function invocation, starts at 1
+    id: InvocationId  # The fully qualified name of the function invocation (id)
+    file_name: Path  # The file where the test is defined
+    did_pass: bool  # Whether the test this function invocation was part of, passed or failed
+    runtime: Optional[int]  # Time in nanoseconds
+    test_framework: str  # unittest or pytest
+    test_type: TestType
+    return_value: Optional[object]  # The return value of the function invocation
+    timed_out: Optional[bool]
+    verification_type: Optional[str] = VerificationType.FUNCTION_CALL
+    stdout: Optional[str] = None
+
+    @property
+    def unique_invocation_loop_id(self) -> str:
+        return f"{self.loop_index}:{self.id.id()}"
+
+
+class TestResults(BaseModel):
+    # don't modify these directly, use the add method
+    # also we don't support deletion of test results elements - caution is advised
+    test_results: list[FunctionTestInvocation] = []
+    test_result_idx: dict[str, int] = {}
+
+    def add(self, function_test_invocation: FunctionTestInvocation) -> None:
+        unique_id = function_test_invocation.unique_invocation_loop_id
+        if unique_id in self.test_result_idx:
+            if DEBUG_MODE:
+                logger.warning(f"Test result with id {unique_id} already exists. SKIPPING")
+            return
+        self.test_result_idx[unique_id] = len(self.test_results)
+        self.test_results.append(function_test_invocation)
+
+    def merge(self, other: TestResults) -> None:
+        original_len = len(self.test_results)
+        self.test_results.extend(other.test_results)
+        for k, v in other.test_result_idx.items():
+            if k in self.test_result_idx:
+                msg = f"Test result with id {k} already exists."
+                raise ValueError(msg)
+            self.test_result_idx[k] = v + original_len
+
+    def get_by_unique_invocation_loop_id(self, unique_invocation_loop_id: str) -> FunctionTestInvocation | None:
+        try:
+            return self.test_results[self.test_result_idx[unique_invocation_loop_id]]
+        except (IndexError, KeyError):
+            return None
+
+    def get_all_ids(self) -> set[InvocationId]:
+        return {test_result.id for test_result in self.test_results}
+
+    def get_all_unique_invocation_loop_ids(self) -> set[str]:
+        return {test_result.unique_invocation_loop_id for test_result in self.test_results}
+
+    def number_of_loops(self) -> int:
+        if not self.test_results:
+            return 0
+        return max(test_result.loop_index for test_result in self.test_results)
+
+    def get_test_pass_fail_report_by_type(self) -> dict[TestType, dict[str, int]]:
+        report = {}
+        for test_type in TestType:
+            report[test_type] = {"passed": 0, "failed": 0}
+        for test_result in self.test_results:
+            if test_result.loop_index == 1:
+                if test_result.did_pass:
+                    report[test_result.test_type]["passed"] += 1
+                else:
+                    report[test_result.test_type]["failed"] += 1
+        return report
+
+    @staticmethod
+    def report_to_string(report: dict[TestType, dict[str, int]]) -> str:
+        return " ".join(
+            [
+                f"{test_type.to_name()}- (Passed: {report[test_type]['passed']}, Failed: {report[test_type]['failed']})"
+                for test_type in TestType
+            ]
+        )
+
+    @staticmethod
+    def report_to_tree(report: dict[TestType, dict[str, int]], title: str) -> Tree:
+        tree = Tree(title)
+        for test_type in TestType:
+            if test_type is TestType.INIT_STATE_TEST:
+                continue
+            tree.add(
+                f"{test_type.to_name()} - Passed: {report[test_type]['passed']}, Failed: {report[test_type]['failed']}"
+            )
+        return tree
+
+    def usable_runtime_data_by_test_case(self) -> dict[InvocationId, list[int]]:
+        for result in self.test_results:
+            if result.did_pass and not result.runtime:
+                msg = (
+                    f"Ignoring test case that passed but had no runtime -> {result.id}, "
+                    f"Loop # {result.loop_index}, Test Type: {result.test_type}, "
+                    f"Verification Type: {result.verification_type}"
+                )
+                logger.debug(msg)
+
+        usable_runtimes = [
+            (result.id, result.runtime) for result in self.test_results if result.did_pass and result.runtime
+        ]
+        return {
+            usable_id: [runtime[1] for runtime in usable_runtimes if runtime[0] == usable_id]
+            for usable_id in {runtime[0] for runtime in usable_runtimes}
+        }
+
+    def total_passed_runtime(self) -> int:
+        """Calculate the sum of runtimes of all test cases that passed.
+
+        A testcase runtime is the minimum value of all looped execution runtimes.
+
+        :return: The runtime in nanoseconds.
+        """
+        return sum(
+            [min(usable_runtime_data) for _, usable_runtime_data in self.usable_runtime_data_by_test_case().items()]
+        )
+
+    def __iter__(self) -> Iterator[FunctionTestInvocation]:
+        return iter(self.test_results)
+
+    def __len__(self) -> int:
+        return len(self.test_results)
+
+    def __getitem__(self, index: int) -> FunctionTestInvocation:
+        return self.test_results[index]
+
+    def __setitem__(self, index: int, value: FunctionTestInvocation) -> None:
+        self.test_results[index] = value
+
+    def __contains__(self, value: FunctionTestInvocation) -> bool:
+        return value in self.test_results
+
+    def __bool__(self) -> bool:
+        return bool(self.test_results)
+
+    def __eq__(self, other: object) -> bool:
+        # Unordered comparison
+        if type(self) is not type(other):
+            return False
+        if len(self) != len(other):
+            return False
+        original_recursion_limit = sys.getrecursionlimit()
+        cast(TestResults, other)
+        for test_result in self:
+            other_test_result = other.get_by_unique_invocation_loop_id(test_result.unique_invocation_loop_id)
+            if other_test_result is None:
+                return False
+
+            if original_recursion_limit < 5000:
+                sys.setrecursionlimit(5000)
+            if (
+                test_result.file_name != other_test_result.file_name
+                or test_result.did_pass != other_test_result.did_pass
+                or test_result.runtime != other_test_result.runtime
+                or test_result.test_framework != other_test_result.test_framework
+                or test_result.test_type != other_test_result.test_type
+                or not comparator(test_result.return_value, other_test_result.return_value)
+            ):
+                sys.setrecursionlimit(original_recursion_limit)
+                return False
+        sys.setrecursionlimit(original_recursion_limit)
+        return True
@@ -56,6 +56,8 @@
     TestFile,
     TestFiles,
     TestingMode,
+    TestResults,
+    TestType
 )
 from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
 from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
@@ -65,7 +67,6 @@
 from codeflash.verification.equivalence import compare_test_results
 from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
 from codeflash.verification.parse_test_output import parse_test_results
-from codeflash.verification.test_results import TestResults, TestType
 from codeflash.verification.test_runner import run_behavioral_tests, run_benchmarking_tests
 from codeflash.verification.verification_utils import get_test_file_path
 from codeflash.verification.verifier import generate_tests
 
@@ -16,10 +16,9 @@
 from codeflash.discovery.discover_unit_tests import discover_unit_tests
 from codeflash.discovery.functions_to_optimize import get_functions_to_optimize
 from codeflash.either import is_successful
-from codeflash.models.models import ValidCode
+from codeflash.models.models import ValidCode, TestType
 from codeflash.optimization.function_optimizer import FunctionOptimizer
 from codeflash.telemetry.posthog_cf import ph
-from codeflash.verification.test_results import TestType
 from codeflash.verification.verification_utils import TestConfig
 
 if TYPE_CHECKING:
 
@@ -3,9 +3,7 @@
 from codeflash.cli_cmds.console import logger
 from codeflash.code_utils import env_utils
 from codeflash.code_utils.config_consts import COVERAGE_THRESHOLD, MIN_IMPROVEMENT_THRESHOLD
-from codeflash.models.models import CoverageData, OptimizedCandidateResult
-from codeflash.verification.test_results import TestType
-
+from codeflash.models.models import CoverageData, OptimizedCandidateResult, TestType
 
 def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
     """Calculate the performance gain of an optimized code over the original code.
 
@@ -3,7 +3,7 @@
 from pydantic.dataclasses import dataclass
 
 from codeflash.code_utils.time_utils import humanize_runtime
-from codeflash.verification.test_results import TestResults
+from codeflash.models.models import TestResults
 
 
 @dataclass(frozen=True, config={"arbitrary_types_allowed": True})
 
@@ -10,7 +10,7 @@
 
 import dill as pickle
 
-from codeflash.verification.test_results import VerificationType
+from codeflash.models.models import VerificationType
 
 
 def get_test_info_from_stack(tests_root: str) -> tuple[str, str | None, str, str]:
 
@@ -3,7 +3,7 @@
 
 from codeflash.cli_cmds.console import console, logger
 from codeflash.verification.comparator import comparator
-from codeflash.verification.test_results import TestResults, TestType, VerificationType
+from codeflash.models.models import TestResults, TestType, VerificationType
 
 INCREASED_RECURSION_LIMIT = 5000