From ab500c194b728d517cc96988c7d55772dcb44459 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 12 Sep 2025 17:37:30 -0700
Subject: [PATCH 01/40] Update codeflash.code-workspace

---
 codeflash.code-workspace | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/codeflash.code-workspace b/codeflash.code-workspace
index 5d86915cc..e84ca9084 100644
--- a/codeflash.code-workspace
+++ b/codeflash.code-workspace
@@ -70,7 +70,11 @@
                 "request": "launch",
                 "program": "${workspaceFolder:codeflash}/codeflash/main.py",
                 "args": [
-                    "--file", "src/async_examples/shocker.py", "--verbose"
+                    "--file",
+                    "src/async_examples/concurrency.py",
+                    "--function",
+                    "retry_with_backoff",
+                    "--verbose"
                 ],
                 "cwd": "${input:chooseCwd}",
                 "console": "integratedTerminal",

From f98dae65029baa2486f56793c8cb8ffa82e92c1b Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 15 Sep 2025 17:22:03 -0700
Subject: [PATCH 02/40] Update parse_test_output.py

---
 codeflash/verification/parse_test_output.py | 57 ++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 4af1eec50..654139971 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -26,6 +26,7 @@
 if TYPE_CHECKING:
     import subprocess
 
+    from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.models.models import CodeOptimizationContext, CoverageData, TestFiles
     from codeflash.verification.verification_utils import TestConfig
 
@@ -40,6 +41,44 @@ def parse_func(file_path: Path) -> XMLParser:
 matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
 
 
+def calculate_async_throughput_from_stdout(stdout: str, async_function_names: set[str]) -> dict[str, int]:
+    if not stdout or not async_function_names:
+        return {}
+
+    throughput_counts = {}
+
+    # Find all complete performance tag pairs (start + end)
+    begin_matches = list(matches_re_start.finditer(stdout))
+    end_matches = set()
+
+    for match in matches_re_end.finditer(stdout):
+        groups = match.groups()
+        # Remove timing info from the last group to match start tags
+        # End format: 'iteration_id:timing_info', Start format: 'iteration_id'
+        # We need to remove only the last ':timing_info' part
+        last_group = groups[5]
+        split_parts = last_group.split(":")
+        if len(split_parts) > 2:  # Has timing info (format: prefix:suffix:timing)
+            # Reconstruct without the timing info (last part)
+            iteration_id = ":".join(split_parts[:-1])
+            normalized_groups = (*groups[:5], iteration_id)
+        else:
+            normalized_groups = groups
+        end_matches.add(normalized_groups)
+
+    # Count complete tags for async functions only
+    for begin_match in begin_matches:
+        groups = begin_match.groups()
+        function_getting_tested = groups[4]
+
+        if function_getting_tested in async_function_names and groups in end_matches:
+            if function_getting_tested not in throughput_counts:
+                throughput_counts[function_getting_tested] = 0
+            throughput_counts[function_getting_tested] += 1
+
+    return throughput_counts
+
+
 def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
     test_results = TestResults()
     if not file_location.exists():
@@ -495,7 +534,10 @@ def parse_test_results(
     code_context: CodeOptimizationContext | None = None,
     run_result: subprocess.CompletedProcess | None = None,
     unittest_loop_index: int | None = None,
-) -> tuple[TestResults, CoverageData | None]:
+    function_to_optimize: FunctionToOptimize | None = None,
+    *,
+    calculate_throughput: bool = False,
+) -> tuple[TestResults, CoverageData | None, dict[str, int]]:
     test_results_xml = parse_test_xml(
         test_xml_path,
         test_files=test_files,
@@ -532,6 +574,18 @@ def parse_test_results(
     get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.sqlite")).unlink(missing_ok=True)
     results = merge_test_results(test_results_xml, test_results_bin_file, test_config.test_framework)
 
+    # Calculate throughput for async functions only when requested (during performance testing)
+    throughput_counts = {}
+    if calculate_throughput and function_to_optimize and function_to_optimize.is_async:
+        logger.info(f"Calculating throughput for async function: {function_to_optimize.function_name}")
+        all_stdout = ""
+        for result in results.test_results:
+            if result.stdout:
+                all_stdout += result.stdout
+
+        async_function_names = {function_to_optimize.function_name}
+        throughput_counts = calculate_async_throughput_from_stdout(all_stdout, async_function_names)
+
     all_args = False
     if coverage_database_file and source_file and code_context and function_name:
         all_args = True
@@ -543,4 +597,5 @@ def parse_test_results(
             function_name=function_name,
         )
         coverage.log_coverage()
+    # return results, coverage if all_args else None, throughput_counts
     return results, coverage if all_args else None

From 96211b8985e76bf44cb0882070cd21354182411a Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 15 Sep 2025 17:26:48 -0700
Subject: [PATCH 03/40] add tests_project_root to the wrappers

---
 .../code_utils/codeflash_wrap_decorator.py    | 282 +++++++++---------
 .../code_utils/instrument_existing_tests.py   |  77 +++--
 codeflash/optimization/function_optimizer.py  |   8 +-
 tests/test_async_run_and_parse_tests.py       |  96 ++----
 tests/test_async_wrapper_sqlite_validation.py |  14 +-
 tests/test_extract_test_context_from_frame.py |  15 +-
 tests/test_instrument_async_tests.py          |  27 +-
 7 files changed, 272 insertions(+), 247 deletions(-)

diff --git a/codeflash/code_utils/codeflash_wrap_decorator.py b/codeflash/code_utils/codeflash_wrap_decorator.py
index cb4da64a0..f49c5e5a4 100644
--- a/codeflash/code_utils/codeflash_wrap_decorator.py
+++ b/codeflash/code_utils/codeflash_wrap_decorator.py
@@ -6,9 +6,8 @@
 import inspect
 import os
 import sqlite3
-import time
 from enum import Enum
-from functools import wraps
+from functools import lru_cache, wraps
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import TYPE_CHECKING, Any, Callable, TypeVar
@@ -36,6 +35,25 @@ def get_run_tmp_file(file_path: Path) -> Path:  # moved from codeflash/code_util
     return Path(get_run_tmp_file.tmpdir.name) / file_path
 
 
+def module_name_from_file_path(
+    file_path: Path, project_root_path: Path, *, traverse_up: bool = False
+) -> str:  # moved from codeflash/code_utils/code_utils.py
+    try:
+        relative_path = file_path.relative_to(project_root_path)
+        return relative_path.with_suffix("").as_posix().replace("/", ".")
+    except ValueError:
+        if traverse_up:
+            parent = file_path.parent
+            while parent not in (project_root_path, parent.parent):
+                try:
+                    relative_path = file_path.relative_to(parent)
+                    return relative_path.with_suffix("").as_posix().replace("/", ".")
+                except ValueError:
+                    parent = parent.parent
+        msg = f"File {file_path} is not within the project root {project_root_path}."
+        raise ValueError(msg)  # noqa: B904
+
+
 def _extract_class_name_tracer(frame_locals: dict[str, Any]) -> str | None:
     try:
         self_arg = frame_locals.get("self")
@@ -73,9 +91,9 @@ def _get_module_name_cf_tracer(frame: FrameType | None) -> str:
     return "unknown_module"
 
 
-def extract_test_context_from_frame() -> tuple[str, str | None, str]:
+@lru_cache(maxsize=32)
+def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str, str]:
     frame = inspect.currentframe()
-    # optimize?
     try:
         frames_info = []
         potential_tests = []
@@ -90,7 +108,7 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
                 filename = frame.f_code.co_filename
                 filename_path = Path(filename)
                 frame_locals = frame.f_locals
-                test_module_name = _get_module_name_cf_tracer(frame)
+                test_module_name = module_name_from_file_path(filename_path, tests_project_root)
                 class_name = _extract_class_name_tracer(frame_locals)
 
                 frames_info.append(
@@ -108,7 +126,6 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
                 continue
 
             frame = frame.f_back
-
         # Second pass: analyze frames with full context
         test_class_candidates = []
         for frame_info in frames_info:
@@ -122,7 +139,7 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
             # Keep track of test classes
             if class_name and (
                 class_name.startswith("Test") or class_name.endswith("Test") or "test" in class_name.lower()
-            ):
+            ) and not class_name.startswith(("Pytest", "_Pytest")):
                 test_class_candidates.append((class_name, test_module_name))
 
         # Now process frames again looking for test functions with full candidates list
@@ -138,22 +155,15 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
 
             # Collect test functions
             if function_name.startswith("test_"):
-                test_class_name = class_name
-
-                # If no class found in current frame, check if we have any test class candidates
-                # Prefer the innermost (first) test class candidate which is more specific
-                if test_class_name is None and test_class_candidates:
+                test_class_name = class_name or None
+                if not test_class_name and test_class_candidates:
                     test_class_name = test_class_candidates[0][0]
-
                 test_functions.append((test_module_name, test_class_name, function_name))
 
-        # Prioritize test functions with class context, then innermost
         if test_functions:
-            # First prefer test functions with class context
             for test_func in test_functions:
-                if test_func[1] is not None:  # has class_name
+                if test_func[1]:  # has non-empty class_name
                     return test_func
-            # If no test function has class context, return the outermost (most likely the actual test method)
             return test_functions[-1]
 
         # If no direct test functions found, look for other test patterns
@@ -173,14 +183,12 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
             ):
                 if class_name and (
                     class_name.startswith("Test") or class_name.endswith("Test") or "test" in class_name.lower()
-                ):
+                ) and not class_name.startswith(("Pytest", "_Pytest")):
                     potential_tests.append((test_module_name, class_name, function_name))
                 elif "test" in test_module_name or filename_path.stem.startswith("test_"):
-                    # For functions without class context, try to find the most recent test class
                     best_class = test_class_candidates[0][0] if test_class_candidates else None
                     potential_tests.append((test_module_name, best_class, function_name))
 
-            # Framework integration detection
             if (
                 (
                     function_name in ["runTest", "_runTest", "run", "_testMethodName"]
@@ -189,6 +197,7 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
                 )
                 and class_name
                 and (class_name.startswith("Test") or "test" in class_name.lower())
+                and not class_name.startswith(("Pytest", "_Pytest"))
             ):
                 test_method = function_name
                 if "self" in frame_locals:
@@ -207,125 +216,132 @@ def extract_test_context_from_frame() -> tuple[str, str | None, str]:
         del frame
 
 
-def codeflash_behavior_async(func: F) -> F:
-    @wraps(func)
-    async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
-        loop = asyncio.get_running_loop()
-        function_name = func.__name__
-        line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
-        loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
-        test_module_name, test_class_name, test_name = extract_test_context_from_frame()
+def codeflash_behavior_async(*, tests_project_root: Path) -> Callable[[F], F]:
+    def decorator(func: F) -> F:
+        @wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
+            loop = asyncio.get_running_loop()
+            function_name = func.__name__
+            line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
+            loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
 
-        test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
+            test_module_name, test_class_name, test_name = extract_test_context_from_frame(tests_project_root)
 
-        if not hasattr(async_wrapper, "index"):
-            async_wrapper.index = {}
-        if test_id in async_wrapper.index:
-            async_wrapper.index[test_id] += 1
-        else:
-            async_wrapper.index[test_id] = 0
-
-        codeflash_test_index = async_wrapper.index[test_id]
-        invocation_id = f"{line_id}_{codeflash_test_index}"
-        test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
-
-        print(f"!$######{test_stdout_tag}######$!")
-
-        iteration = os.environ.get("CODEFLASH_TEST_ITERATION", "0")
-        db_path = get_run_tmp_file(Path(f"test_return_values_{iteration}.sqlite"))
-        codeflash_con = sqlite3.connect(db_path)
-        codeflash_cur = codeflash_con.cursor()
-
-        codeflash_cur.execute(
-            "CREATE TABLE IF NOT EXISTS test_results (test_module_path TEXT, test_class_name TEXT, "
-            "test_function_name TEXT, function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, "
-            "runtime INTEGER, return_value BLOB, verification_type TEXT)"
-        )
-
-        exception = None
-        counter = loop.time()
-        gc.disable()
-        try:
-            ret = func(*args, **kwargs)  # coroutine creation has some overhead, though it is very small
-            counter = loop.time()
-            return_value = await ret  # let's measure the actual execution time of the code
-            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-        except Exception as e:
-            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-            exception = e
-        finally:
-            gc.enable()
-
-        print(f"!######{test_stdout_tag}######!")
-
-        pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps((args, kwargs, return_value))
-        codeflash_cur.execute(
-            "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
-            (
-                test_module_name,
-                test_class_name,
-                test_name,
-                function_name,
-                loop_index,
-                invocation_id,
-                codeflash_duration,
-                pickled_return_value,
-                VerificationType.FUNCTION_CALL.value,
-            ),
-        )
-        codeflash_con.commit()
-        codeflash_con.close()
-
-        if exception:
-            raise exception
-        return return_value
-
-    return async_wrapper
-
-
-def codeflash_performance_async(func: F) -> F:
-    @wraps(func)
-    async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
-        loop = asyncio.get_running_loop()
-        function_name = func.__name__
-        line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
-        loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
-
-        test_module_name, test_class_name, test_name = extract_test_context_from_frame()
-
-        test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
-
-        if not hasattr(async_wrapper, "index"):
-            async_wrapper.index = {}
-        if test_id in async_wrapper.index:
-            async_wrapper.index[test_id] += 1
-        else:
-            async_wrapper.index[test_id] = 0
+            test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
+
+            if not hasattr(async_wrapper, "index"):
+                async_wrapper.index = {}
+            if test_id in async_wrapper.index:
+                async_wrapper.index[test_id] += 1
+            else:
+                async_wrapper.index[test_id] = 0
+
+            codeflash_test_index = async_wrapper.index[test_id]
+            invocation_id = f"{line_id}_{codeflash_test_index}"
+            test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
 
-        codeflash_test_index = async_wrapper.index[test_id]
-        invocation_id = f"{line_id}_{codeflash_test_index}"
-        test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
+            print(f"!$######{test_stdout_tag}######$!")
 
-        print(f"!$######{test_stdout_tag}######$!")
+            iteration = os.environ.get("CODEFLASH_TEST_ITERATION", "0")
+            db_path = get_run_tmp_file(Path(f"test_return_values_{iteration}.sqlite"))
+            codeflash_con = sqlite3.connect(db_path)
+            codeflash_cur = codeflash_con.cursor()
 
-        exception = None
-        counter = loop.time()
-        gc.disable()
-        try:
-            ret = func(*args, **kwargs)
+            codeflash_cur.execute(
+                "CREATE TABLE IF NOT EXISTS test_results (test_module_path TEXT, test_class_name TEXT, "
+                "test_function_name TEXT, function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, "
+                "runtime INTEGER, return_value BLOB, verification_type TEXT)"
+            )
+
+            exception = None
             counter = loop.time()
-            return_value = await ret
-            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-        except Exception as e:
-            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-            exception = e
-        finally:
-            gc.enable()
+            gc.disable()
+            try:
+                ret = func(*args, **kwargs)  # coroutine creation has some overhead, though it is very small
+                counter = loop.time()
+                return_value = await ret  # let's measure the actual execution time of the code
+                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+            except Exception as e:
+                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+                exception = e
+            finally:
+                gc.enable()
+
+            print(f"!######{test_stdout_tag}######!")
+
+            pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps((args, kwargs, return_value))
+            codeflash_cur.execute(
+                "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (
+                    test_module_name,
+                    test_class_name,
+                    test_name,
+                    function_name,
+                    loop_index,
+                    invocation_id,
+                    codeflash_duration,
+                    pickled_return_value,
+                    VerificationType.FUNCTION_CALL.value,
+                ),
+            )
+            codeflash_con.commit()
+            codeflash_con.close()
+
+            if exception:
+                raise exception
+            return return_value
+
+        return async_wrapper
+
+    return decorator
+
+
+def codeflash_performance_async(*, tests_project_root: Path) -> Callable[[F], F]:
+    def decorator(func: F) -> F:
+        @wraps(func)
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
+            loop = asyncio.get_running_loop()
+            function_name = func.__name__
+            line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
+            loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
+
+            test_module_name, test_class_name, test_name = extract_test_context_from_frame(tests_project_root)
+
+            test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
+
+            if not hasattr(async_wrapper, "index"):
+                async_wrapper.index = {}
+            if test_id in async_wrapper.index:
+                async_wrapper.index[test_id] += 1
+            else:
+                async_wrapper.index[test_id] = 0
+
+            codeflash_test_index = async_wrapper.index[test_id]
+            invocation_id = f"{line_id}_{codeflash_test_index}"
+            test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
+
+            print(f"!$######{test_stdout_tag}######$!")
+
+            exception = None
+            counter = loop.time()
+            gc.disable()
+            try:
+                ret = func(*args, **kwargs)
+                counter = loop.time()
+                return_value = await ret
+                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+            except Exception as e:
+                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+                exception = e
+            finally:
+                gc.enable()
+
+            print(f"!######{test_stdout_tag}:{codeflash_duration}######!")
 
-        print(f"!######{test_stdout_tag}:{codeflash_duration}######!")
+            if exception:
+                raise exception
+            return return_value
 
-        if exception:
-            raise exception
-        return return_value
+        return async_wrapper
 
-    return async_wrapper
+    return decorator
diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index be75eac85..a65a8f27b 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -332,7 +332,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
 
 
 def instrument_source_module_with_async_decorators(
-    source_path: Path, function_to_optimize: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR
+    source_path: Path, function_to_optimize: FunctionToOptimize, tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR
 ) -> tuple[bool, str | None]:
     if not function_to_optimize.is_async:
         return False, None
@@ -341,7 +341,7 @@ def instrument_source_module_with_async_decorators(
         with source_path.open(encoding="utf8") as f:
             source_code = f.read()
 
-        modified_code, decorator_added = add_async_decorator_to_function(source_code, function_to_optimize, mode)
+        modified_code, decorator_added = add_async_decorator_to_function(source_code, function_to_optimize, tests_project_root, mode)
 
         if decorator_added:
             return True, modified_code
@@ -770,18 +770,20 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
 class AsyncDecoratorAdder(cst.CSTTransformer):
     """Transformer that adds async decorator to async function definitions."""
 
-    def __init__(self, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR) -> None:
+    def __init__(self, function: FunctionToOptimize, tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR) -> None:
         """Initialize the transformer.
 
         Args:
         ----
             function: The FunctionToOptimize object representing the target async function.
             mode: The testing mode to determine which decorator to apply.
+            tests_project_root: The root path for tests, used to compute relative module names.
 
         """
         super().__init__()
         self.function = function
         self.mode = mode
+        self.tests_project_root = tests_project_root
         self.qualified_name_parts = function.qualified_name.split(".")
         self.context_stack = []
         self.added_decorator = False
@@ -814,7 +816,25 @@ def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.Fu
 
             # Only add the decorator if it's not already there
             if not has_decorator:
-                new_decorator = cst.Decorator(decorator=cst.Name(value=self.decorator_name))
+                # Always create parameterized decorator with tests_project_root (required)
+                if self.tests_project_root is None:
+                    raise ValueError("tests_project_root is required for async decorators")
+                    
+                decorator_call = cst.Call(
+                    func=cst.Name(value=self.decorator_name),
+                    args=[
+                        cst.Arg(
+                            keyword=cst.Name("tests_project_root"),
+                            value=cst.Call(
+                                func=cst.Name("Path"),
+                                args=[
+                                    cst.Arg(value=cst.SimpleString(f'r"{self.tests_project_root}"'))
+                                ]
+                            )
+                        )
+                    ]
+                )
+                new_decorator = cst.Decorator(decorator=decorator_call)
 
                 # Add our new decorator to the existing decorators
                 updated_decorators = [new_decorator, *list(updated_node.decorators)]
@@ -848,6 +868,7 @@ class AsyncDecoratorImportAdder(cst.CSTTransformer):
     def __init__(self, mode: TestingMode = TestingMode.BEHAVIOR) -> None:
         self.mode = mode
         self.has_import = False
+        self.has_path_import = False
 
     def visit_ImportFrom(self, node: cst.ImportFrom) -> None:
         # Check if the async decorator import is already present
@@ -866,26 +887,41 @@ def visit_ImportFrom(self, node: cst.ImportFrom) -> None:
             for import_alias in node.names:
                 if import_alias.name.value == decorator_name:
                     self.has_import = True
+        
+        # Check if Path is already imported from pathlib
+        if (
+            isinstance(node.module, cst.Name)
+            and node.module.value == "pathlib"
+            and not isinstance(node.names, cst.ImportStar)
+        ):
+            for import_alias in node.names:
+                if import_alias.name.value == "Path":
+                    self.has_path_import = True
 
     def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> cst.Module:  # noqa: ARG002
-        # If the import is already there, don't add it again
-        if self.has_import:
-            return updated_node
-
-        # Choose import based on mode
-        decorator_name = (
-            "codeflash_behavior_async" if self.mode == TestingMode.BEHAVIOR else "codeflash_performance_async"
-        )
-
-        # Parse the import statement into a CST node
-        import_node = cst.parse_statement(f"from codeflash.code_utils.codeflash_wrap_decorator import {decorator_name}")
-
-        # Add the import to the module's body
-        return updated_node.with_changes(body=[import_node, *list(updated_node.body)])
+        new_imports = []
+        
+        # Add decorator import if not present
+        if not self.has_import:
+            decorator_name = (
+                "codeflash_behavior_async" if self.mode == TestingMode.BEHAVIOR else "codeflash_performance_async"
+            )
+            decorator_import = cst.parse_statement(f"from codeflash.code_utils.codeflash_wrap_decorator import {decorator_name}")
+            new_imports.append(decorator_import)
+        
+        # Always add Path import if not present (required for parameterized decorators)
+        if not self.has_path_import:
+            path_import = cst.parse_statement("from pathlib import Path")
+            new_imports.append(path_import)
+        
+        if new_imports:
+            return updated_node.with_changes(body=[*new_imports, *list(updated_node.body)])
+        
+        return updated_node
 
 
 def add_async_decorator_to_function(
-    source_code: str, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR
+    source_code: str, function: FunctionToOptimize, tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR
 ) -> tuple[str, bool]:
     """Add async decorator to an async function definition.
 
@@ -894,6 +930,7 @@ def add_async_decorator_to_function(
         source_code: The source code to modify.
         function: The FunctionToOptimize object representing the target async function.
         mode: The testing mode to determine which decorator to apply.
+        tests_project_root: The root path for tests, used to compute relative module names.
 
     Returns:
     -------
@@ -907,7 +944,7 @@ def add_async_decorator_to_function(
         module = cst.parse_module(source_code)
 
         # Add the decorator to the function
-        decorator_transformer = AsyncDecoratorAdder(function, mode)
+        decorator_transformer = AsyncDecoratorAdder(function, tests_project_root, mode)
         module = module.visit(decorator_transformer)
 
         # Add the import if decorator was added
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 99f6d42f0..46190c544 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1376,7 +1376,7 @@ def establish_original_code_baseline(
                 )
 
                 success, instrumented_source = instrument_source_module_with_async_decorators(
-                    self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR
+                    self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.BEHAVIOR
                 )
                 if success and instrumented_source:
                     with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
@@ -1421,7 +1421,7 @@ def establish_original_code_baseline(
                     )
 
                     success, instrumented_source = instrument_source_module_with_async_decorators(
-                        self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE
+                        self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.PERFORMANCE
                     )
                     if success and instrumented_source:
                         with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
@@ -1551,7 +1551,7 @@ def run_optimized_candidate(
                 )
 
                 success, instrumented_source = instrument_source_module_with_async_decorators(
-                    self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR
+                    self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.BEHAVIOR
                 )
                 if success and instrumented_source:
                     with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
@@ -1600,7 +1600,7 @@ def run_optimized_candidate(
                     )
 
                     success, instrumented_source = instrument_source_module_with_async_decorators(
-                        self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE
+                        self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.PERFORMANCE
                     )
                     if success and instrumented_source:
                         with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py
index 1c5ddae63..44b4beaed 100644
--- a/tests/test_async_run_and_parse_tests.py
+++ b/tests/test_async_run_and_parse_tests.py
@@ -48,12 +48,16 @@ async def test_async_sort():
 
         # For async functions, instrument the source module directly with decorators
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, TestingMode.BEHAVIOR
+            fto_path, func, tests_root, TestingMode.BEHAVIOR
         )
 
         assert source_success
         assert instrumented_source is not None
-        assert '''import asyncio\nfrom typing import List, Union\n\nfrom codeflash.code_utils.codeflash_wrap_decorator import \\\n    codeflash_behavior_async\n\n\n@codeflash_behavior_async\nasync def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:\n    """\n    Async bubble sort implementation for testing.\n    """\n    print("codeflash stdout: Async sorting list")\n    \n    await asyncio.sleep(0.01)\n    \n    n = len(lst)\n    for i in range(n):\n        for j in range(0, n - i - 1):\n            if lst[j] > lst[j + 1]:\n                lst[j], lst[j + 1] = lst[j + 1], lst[j]\n    \n    result = lst.copy()\n    print(f"result: {result}")\n    return result\n\n\nclass AsyncBubbleSorter:\n    """Class with async sorting method for testing."""\n    \n    async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:\n        """\n        Async bubble sort implementation within a class.\n        """\n        print("codeflash stdout: AsyncBubbleSorter.sorter() called")\n        \n        # Add some async delay\n        await asyncio.sleep(0.005)\n        \n        n = len(lst)\n        for i in range(n):\n            for j in range(0, n - i - 1):\n                if lst[j] > lst[j + 1]:\n                    lst[j], lst[j + 1] = lst[j + 1], lst[j]\n        \n        result = lst.copy()\n        return result\n''' in instrumented_source
+        # Check that the decorator was applied with tests_project_root parameter
+        assert '@codeflash_behavior_async(tests_project_root = Path(' in instrumented_source
+        assert 'from pathlib import Path' in instrumented_source
+        assert 'from codeflash.code_utils.codeflash_wrap_decorator import' in instrumented_source
+        assert 'async def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:' in instrumented_source
 
         # Write the instrumented source back
         fto_path.write_text(instrumented_source, "utf-8")
@@ -107,7 +111,8 @@ async def test_async_sort():
 
         results_list = test_results.test_results
         assert results_list[0].id.function_getting_tested == "async_sorter"
-        assert results_list[0].id.test_class_name == "PytestPluginManager" 
+        assert results_list[0].id.test_module_path == "test_async_bubble_sort_temp"
+        assert results_list[0].id.test_class_name is None
         assert results_list[0].id.test_function_name == "test_async_sort"
         assert results_list[0].did_pass
         assert results_list[0].runtime is None or results_list[0].runtime >= 0
@@ -117,6 +122,8 @@ async def test_async_sort():
 
 
         assert results_list[1].id.function_getting_tested == "async_sorter"
+        assert results_list[1].id.test_module_path == "test_async_bubble_sort_temp"
+        assert results_list[1].id.test_class_name is None
         assert results_list[1].id.test_function_name == "test_async_sort"
         assert results_list[1].did_pass
 
@@ -171,7 +178,7 @@ async def test_async_class_sort():
         )
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, TestingMode.BEHAVIOR
+            fto_path, func, tests_root, TestingMode.BEHAVIOR
         )
 
         assert source_success
@@ -233,7 +240,8 @@ async def test_async_class_sort():
 
 
         assert sorter_result.id.function_getting_tested == "sorter"
-        assert sorter_result.id.test_class_name == "PytestPluginManager"
+        assert sorter_result.id.test_module_path == "test_async_class_bubble_sort_temp"
+        assert sorter_result.id.test_class_name is None
         assert sorter_result.id.test_function_name == "test_async_class_sort"
         assert sorter_result.did_pass
         assert sorter_result.runtime is None or sorter_result.runtime >= 0
@@ -280,12 +288,16 @@ async def test_async_perf():
 
         # Instrument the source module with async performance decorators
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, TestingMode.PERFORMANCE
+            fto_path, func, tests_root, TestingMode.PERFORMANCE
         )
 
         assert source_success
         assert instrumented_source is not None
-        assert '''import asyncio\nfrom typing import List, Union\n\nfrom codeflash.code_utils.codeflash_wrap_decorator import \\\n    codeflash_performance_async\n\n\n@codeflash_performance_async\nasync def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:\n    """\n    Async bubble sort implementation for testing.\n    """\n    print("codeflash stdout: Async sorting list")\n    \n    await asyncio.sleep(0.01)\n    \n    n = len(lst)\n    for i in range(n):\n        for j in range(0, n - i - 1):\n            if lst[j] > lst[j + 1]:\n                lst[j], lst[j + 1] = lst[j + 1], lst[j]\n    \n    result = lst.copy()\n    print(f"result: {result}")\n    return result\n\n\nclass AsyncBubbleSorter:\n    """Class with async sorting method for testing."""\n    \n    async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:\n        """\n        Async bubble sort implementation within a class.\n        """\n        print("codeflash stdout: AsyncBubbleSorter.sorter() called")\n        \n        # Add some async delay\n        await asyncio.sleep(0.005)\n        \n        n = len(lst)\n        for i in range(n):\n            for j in range(0, n - i - 1):\n                if lst[j] > lst[j + 1]:\n                    lst[j], lst[j + 1] = lst[j + 1], lst[j]\n        \n        result = lst.copy()\n        return result\n''' == instrumented_source
+        # Check that the performance decorator was applied with tests_project_root parameter
+        assert '@codeflash_performance_async(tests_project_root = Path(' in instrumented_source
+        assert 'from pathlib import Path' in instrumented_source
+        assert 'from codeflash.code_utils.codeflash_wrap_decorator import' in instrumented_source
+        assert 'async def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:' in instrumented_source
 
         fto_path.write_text(instrumented_source, "utf-8")
 
@@ -379,67 +391,17 @@ async def async_error_function(lst):
         func = FunctionToOptimize(function_name="async_error_function", parents=[], file_path=Path(fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, TestingMode.BEHAVIOR
+            fto_path, func, tests_root, TestingMode.BEHAVIOR
         )
 
         assert source_success
         assert instrumented_source is not None
         
-        expected_instrumented_source = """import asyncio
-from typing import List, Union
-
-from codeflash.code_utils.codeflash_wrap_decorator import \\
-    codeflash_behavior_async
-
-
-async def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:
-    \"\"\"
-    Async bubble sort implementation for testing.
-    \"\"\"
-    print("codeflash stdout: Async sorting list")
-    
-    await asyncio.sleep(0.01)
-    
-    n = len(lst)
-    for i in range(n):
-        for j in range(0, n - i - 1):
-            if lst[j] > lst[j + 1]:
-                lst[j], lst[j + 1] = lst[j + 1], lst[j]
-    
-    result = lst.copy()
-    print(f"result: {result}")
-    return result
-
-
-class AsyncBubbleSorter:
-    \"\"\"Class with async sorting method for testing.\"\"\"
-    
-    async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:
-        \"\"\"
-        Async bubble sort implementation within a class.
-        \"\"\"
-        print("codeflash stdout: AsyncBubbleSorter.sorter() called")
-        
-        # Add some async delay
-        await asyncio.sleep(0.005)
-        
-        n = len(lst)
-        for i in range(n):
-            for j in range(0, n - i - 1):
-                if lst[j] > lst[j + 1]:
-                    lst[j], lst[j + 1] = lst[j + 1], lst[j]
-        
-        result = lst.copy()
-        return result
-
-
-@codeflash_behavior_async
-async def async_error_function(lst):
-    \"\"\"Async function that raises an error for testing.\"\"\"
-    await asyncio.sleep(0.001)  # Small delay
-    raise ValueError("Test error")
-"""
-        assert expected_instrumented_source == instrumented_source
+        # Check that the behavior decorator was applied with tests_project_root parameter
+        assert '@codeflash_behavior_async(tests_project_root = Path(' in instrumented_source
+        assert 'from pathlib import Path' in instrumented_source
+        assert 'from codeflash.code_utils.codeflash_wrap_decorator import' in instrumented_source
+        assert 'async def async_error_function(lst):' in instrumented_source
 
         fto_path.write_text(instrumented_source, "utf-8")
         instrument_codeflash_capture(func, {}, tests_root)
@@ -531,7 +493,7 @@ async def test_async_multi():
         func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, TestingMode.BEHAVIOR
+            fto_path, func, tests_root, TestingMode.BEHAVIOR
         )
 
         assert source_success
@@ -642,7 +604,7 @@ async def test_async_edge_cases():
         func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, TestingMode.BEHAVIOR
+            fto_path, func, tests_root, TestingMode.BEHAVIOR
         )
 
         assert source_success
@@ -826,6 +788,7 @@ def test_sync_sort():
         results_list = test_results.test_results
         assert results_list[0].id.function_getting_tested == "sync_sorter"
         assert results_list[0].id.iteration_id == "1_0"
+        assert results_list[0].id.test_module_path == "code_to_optimize.tests.pytest.test_sync_in_async_temp"
         assert results_list[0].id.test_class_name is None
         assert results_list[0].id.test_function_name == "test_sync_sort"
         assert results_list[0].did_pass
@@ -837,6 +800,7 @@ def test_sync_sort():
         if len(results_list) > 1:
             assert results_list[1].id.function_getting_tested == "sync_sorter"
             assert results_list[1].id.iteration_id == "4_0"
+            assert results_list[1].id.test_module_path == "code_to_optimize.tests.pytest.test_sync_in_async_temp"
             assert results_list[1].id.test_function_name == "test_sync_sort"
             assert results_list[1].did_pass
 
@@ -935,7 +899,7 @@ async def test_mixed_sorting():
         async_func = FunctionToOptimize(function_name="async_merge_sort", parents=[], file_path=Path(mixed_fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            mixed_fto_path, async_func, TestingMode.BEHAVIOR
+            mixed_fto_path, async_func, tests_root, TestingMode.BEHAVIOR
         )
 
         assert source_success
diff --git a/tests/test_async_wrapper_sqlite_validation.py b/tests/test_async_wrapper_sqlite_validation.py
index 4386ba5ab..0ffed7805 100644
--- a/tests/test_async_wrapper_sqlite_validation.py
+++ b/tests/test_async_wrapper_sqlite_validation.py
@@ -52,7 +52,7 @@ def temp_db_path(self, test_env_setup):
     @pytest.mark.asyncio
     async def test_behavior_async_basic_function(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async
+        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
         async def simple_async_add(a: int, b: int) -> int:
             await asyncio.sleep(0.001)
             return a + b
@@ -100,7 +100,7 @@ async def simple_async_add(a: int, b: int) -> int:
     @pytest.mark.asyncio
     async def test_behavior_async_exception_handling(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async
+        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
         async def async_divide(a: int, b: int) -> float:
             await asyncio.sleep(0.001)
             if b == 0:
@@ -138,7 +138,7 @@ async def async_divide(a: int, b: int) -> float:
     async def test_performance_async_no_database_storage(self, test_env_setup, temp_db_path, capsys):
         """Test performance async decorator doesn't store to database."""
         
-        @codeflash_performance_async
+        @codeflash_performance_async(tests_project_root=Path("/tmp/tests"))
         async def async_multiply(a: int, b: int) -> int:
             """Async function for performance testing."""
             await asyncio.sleep(0.002)
@@ -166,7 +166,7 @@ async def async_multiply(a: int, b: int) -> int:
     @pytest.mark.asyncio
     async def test_multiple_calls_indexing(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async
+        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
         async def async_increment(value: int) -> int:
             await asyncio.sleep(0.001)
             return value + 1
@@ -203,7 +203,7 @@ async def async_increment(value: int) -> int:
     @pytest.mark.asyncio
     async def test_complex_async_function_with_kwargs(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async
+        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
         async def complex_async_func(
             pos_arg: str,
             *args: int,
@@ -251,7 +251,7 @@ async def complex_async_func(
     @pytest.mark.asyncio
     async def test_database_schema_validation(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async
+        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
         async def schema_test_func() -> str:
             return "schema_test"
         
@@ -281,7 +281,7 @@ async def schema_test_func() -> str:
     def test_sync_test_context_extraction(self):
         from codeflash.code_utils.codeflash_wrap_decorator import extract_test_context_from_frame
         
-        test_module, test_class, test_func = extract_test_context_from_frame()
+        test_module, test_class, test_func = extract_test_context_from_frame(Path("/tmp/tests"))
         assert test_module == __name__
         assert test_class == "TestAsyncWrapperSQLiteValidation"
         assert test_func == "test_sync_test_context_extraction"
diff --git a/tests/test_extract_test_context_from_frame.py b/tests/test_extract_test_context_from_frame.py
index f33a65fa6..31f5e5142 100644
--- a/tests/test_extract_test_context_from_frame.py
+++ b/tests/test_extract_test_context_from_frame.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from pathlib import Path
 from unittest.mock import Mock, patch
 
 import pytest
@@ -127,7 +128,7 @@ class TestExtractTestContextFromFrame:
     
     def test_direct_test_function_call(self):
         def test_example_function():
-            return extract_test_context_from_frame()
+            return extract_test_context_from_frame(Path("/tmp/tests"))
         
         result = test_example_function()
         module_name, class_name, function_name = result
@@ -139,7 +140,7 @@ def test_example_function():
     def test_with_test_class_method(self):
         class TestExampleClass:
             def test_method(self):
-                return extract_test_context_from_frame()
+                return extract_test_context_from_frame(Path("/tmp/tests"))
         
         instance = TestExampleClass()
         result = instance.test_method()
@@ -150,7 +151,7 @@ def test_method(self):
         assert function_name == "test_method"
     
     def test_function_without_test_prefix(self):
-        result = extract_test_context_from_frame()
+        result = extract_test_context_from_frame(Path("/tmp/tests"))
         module_name, class_name, function_name = result
         
         assert module_name == __name__
@@ -169,12 +170,12 @@ def test_no_test_context_raises_runtime_error(self, mock_current_frame):
         mock_current_frame.return_value = mock_frame
         
         with pytest.raises(RuntimeError, match="No test function found in call stack"):
-            extract_test_context_from_frame()
+            extract_test_context_from_frame(Path("/tmp/tests"))
     
     def test_real_call_stack_context(self):
         def nested_function():
             def deeper_function():
-                return extract_test_context_from_frame()
+                return extract_test_context_from_frame(Path("/tmp/tests"))
             return deeper_function()
         
         result = nested_function()
@@ -191,7 +192,7 @@ class TestIntegrationScenarios:
     def test_pytest_class_method_scenario(self):
         class TestExampleIntegration:
             def test_integration_method(self):
-                return extract_test_context_from_frame()
+                return extract_test_context_from_frame(Path("/tmp/tests"))
         
         instance = TestExampleIntegration()
         result = instance.test_integration_method()
@@ -205,7 +206,7 @@ def test_nested_helper_functions(self):
         def outer_helper():
             def inner_helper():
                 def deepest_helper():
-                    return extract_test_context_from_frame()
+                    return extract_test_context_from_frame(Path("/tmp/tests"))
                 return deepest_helper()
             return inner_helper()
         
diff --git a/tests/test_instrument_async_tests.py b/tests/test_instrument_async_tests.py
index 97c4dd659..2025f781a 100644
--- a/tests/test_instrument_async_tests.py
+++ b/tests/test_instrument_async_tests.py
@@ -30,12 +30,13 @@ async def async_function(x: int, y: int) -> int:
     
     expected_decorated_code = '''
 import asyncio
+from pathlib import Path
 
 from codeflash.code_utils.codeflash_wrap_decorator import \\
     codeflash_behavior_async
 
 
-@codeflash_behavior_async
+@codeflash_behavior_async(tests_project_root = Path(r"/tmp/tests"))
 async def async_function(x: int, y: int) -> int:
     """Simple async function for testing."""
     await asyncio.sleep(0.01)
@@ -49,8 +50,9 @@ async def async_function(x: int, y: int) -> int:
         is_async=True
     )
     
+    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        async_function_code, func, TestingMode.BEHAVIOR
+        async_function_code, func, tests_root, TestingMode.BEHAVIOR
     )
     
     assert decorator_added
@@ -69,12 +71,13 @@ async def async_function(x: int, y: int) -> int:
     
     expected_decorated_code = '''
 import asyncio
+from pathlib import Path
 
 from codeflash.code_utils.codeflash_wrap_decorator import \\
     codeflash_performance_async
 
 
-@codeflash_performance_async
+@codeflash_performance_async(tests_project_root = Path(r"/tmp/tests"))
 async def async_function(x: int, y: int) -> int:
     """Simple async function for testing."""
     await asyncio.sleep(0.01)
@@ -88,8 +91,9 @@ async def async_function(x: int, y: int) -> int:
         is_async=True
     )
     
+    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        async_function_code, func, TestingMode.PERFORMANCE
+        async_function_code, func, tests_root, TestingMode.PERFORMANCE
     )
     
     assert decorator_added
@@ -115,6 +119,7 @@ def sync_method(self, a: int, b: int) -> int:
     
     expected_decorated_code = '''
 import asyncio
+from pathlib import Path
 
 from codeflash.code_utils.codeflash_wrap_decorator import \\
     codeflash_behavior_async
@@ -123,7 +128,7 @@ def sync_method(self, a: int, b: int) -> int:
 class Calculator:
     """Test class with async methods."""
     
-    @codeflash_behavior_async
+    @codeflash_behavior_async(tests_project_root = Path(r"/tmp/tests"))
     async def async_method(self, a: int, b: int) -> int:
         """Async method in class."""
         await asyncio.sleep(0.005)
@@ -141,8 +146,9 @@ def sync_method(self, a: int, b: int) -> int:
         is_async=True
     )
     
+    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        async_class_code, func, TestingMode.BEHAVIOR
+        async_class_code, func, tests_root, TestingMode.BEHAVIOR
     )
     
     assert decorator_added
@@ -182,8 +188,9 @@ async def async_function(x: int, y: int) -> int:
         is_async=True
     )
     
+    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        already_decorated_code, func, TestingMode.BEHAVIOR
+        already_decorated_code, func, tests_root, TestingMode.BEHAVIOR
     )
     
     assert not decorator_added
@@ -231,7 +238,7 @@ async def test_async_function():
     # First instrument the source module
     from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
     source_success, instrumented_source = instrument_source_module_with_async_decorators(
-        source_file, func, TestingMode.BEHAVIOR
+        source_file, func, temp_dir, TestingMode.BEHAVIOR
     )
     
     assert source_success
@@ -294,7 +301,7 @@ async def test_async_function():
     # First instrument the source module
     from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
     source_success, instrumented_source = instrument_source_module_with_async_decorators(
-        source_file, func, TestingMode.PERFORMANCE
+        source_file, func, temp_dir, TestingMode.PERFORMANCE
     )
     
     assert source_success
@@ -365,7 +372,7 @@ async def test_mixed_functions():
     
     from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
     source_success, instrumented_source = instrument_source_module_with_async_decorators(
-        source_file, async_func, TestingMode.BEHAVIOR
+        source_file, async_func, temp_dir, TestingMode.BEHAVIOR
     )
     
     assert source_success

From f872d5d9260ef80d775ac2287b87975eed5bb9da Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 16 Sep 2025 14:33:28 -0700
Subject: [PATCH 04/40] Update codeflash.code-workspace

---
 codeflash.code-workspace | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash.code-workspace b/codeflash.code-workspace
index e84ca9084..a07674eb7 100644
--- a/codeflash.code-workspace
+++ b/codeflash.code-workspace
@@ -73,7 +73,7 @@
                     "--file",
                     "src/async_examples/concurrency.py",
                     "--function",
-                    "retry_with_backoff",
+                    "task",
                     "--verbose"
                 ],
                 "cwd": "${input:chooseCwd}",

From ee1d386bbb409adc05c2842f6fb4f37a8762ea3d Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 16 Sep 2025 15:11:50 -0700
Subject: [PATCH 05/40] Revert "add tests_project_root to the wrappers"

This reverts commit 96211b8985e76bf44cb0882070cd21354182411a.
---
 .../code_utils/codeflash_wrap_decorator.py    | 282 +++++++++---------
 .../code_utils/instrument_existing_tests.py   |  77 ++---
 codeflash/optimization/function_optimizer.py  |   8 +-
 tests/test_async_run_and_parse_tests.py       |  96 ++++--
 tests/test_async_wrapper_sqlite_validation.py |  14 +-
 tests/test_extract_test_context_from_frame.py |  15 +-
 tests/test_instrument_async_tests.py          |  27 +-
 7 files changed, 247 insertions(+), 272 deletions(-)

diff --git a/codeflash/code_utils/codeflash_wrap_decorator.py b/codeflash/code_utils/codeflash_wrap_decorator.py
index f49c5e5a4..cb4da64a0 100644
--- a/codeflash/code_utils/codeflash_wrap_decorator.py
+++ b/codeflash/code_utils/codeflash_wrap_decorator.py
@@ -6,8 +6,9 @@
 import inspect
 import os
 import sqlite3
+import time
 from enum import Enum
-from functools import lru_cache, wraps
+from functools import wraps
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import TYPE_CHECKING, Any, Callable, TypeVar
@@ -35,25 +36,6 @@ def get_run_tmp_file(file_path: Path) -> Path:  # moved from codeflash/code_util
     return Path(get_run_tmp_file.tmpdir.name) / file_path
 
 
-def module_name_from_file_path(
-    file_path: Path, project_root_path: Path, *, traverse_up: bool = False
-) -> str:  # moved from codeflash/code_utils/code_utils.py
-    try:
-        relative_path = file_path.relative_to(project_root_path)
-        return relative_path.with_suffix("").as_posix().replace("/", ".")
-    except ValueError:
-        if traverse_up:
-            parent = file_path.parent
-            while parent not in (project_root_path, parent.parent):
-                try:
-                    relative_path = file_path.relative_to(parent)
-                    return relative_path.with_suffix("").as_posix().replace("/", ".")
-                except ValueError:
-                    parent = parent.parent
-        msg = f"File {file_path} is not within the project root {project_root_path}."
-        raise ValueError(msg)  # noqa: B904
-
-
 def _extract_class_name_tracer(frame_locals: dict[str, Any]) -> str | None:
     try:
         self_arg = frame_locals.get("self")
@@ -91,9 +73,9 @@ def _get_module_name_cf_tracer(frame: FrameType | None) -> str:
     return "unknown_module"
 
 
-@lru_cache(maxsize=32)
-def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str, str]:
+def extract_test_context_from_frame() -> tuple[str, str | None, str]:
     frame = inspect.currentframe()
+    # optimize?
     try:
         frames_info = []
         potential_tests = []
@@ -108,7 +90,7 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
                 filename = frame.f_code.co_filename
                 filename_path = Path(filename)
                 frame_locals = frame.f_locals
-                test_module_name = module_name_from_file_path(filename_path, tests_project_root)
+                test_module_name = _get_module_name_cf_tracer(frame)
                 class_name = _extract_class_name_tracer(frame_locals)
 
                 frames_info.append(
@@ -126,6 +108,7 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
                 continue
 
             frame = frame.f_back
+
         # Second pass: analyze frames with full context
         test_class_candidates = []
         for frame_info in frames_info:
@@ -139,7 +122,7 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
             # Keep track of test classes
             if class_name and (
                 class_name.startswith("Test") or class_name.endswith("Test") or "test" in class_name.lower()
-            ) and not class_name.startswith(("Pytest", "_Pytest")):
+            ):
                 test_class_candidates.append((class_name, test_module_name))
 
         # Now process frames again looking for test functions with full candidates list
@@ -155,15 +138,22 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
 
             # Collect test functions
             if function_name.startswith("test_"):
-                test_class_name = class_name or None
-                if not test_class_name and test_class_candidates:
+                test_class_name = class_name
+
+                # If no class found in current frame, check if we have any test class candidates
+                # Prefer the innermost (first) test class candidate which is more specific
+                if test_class_name is None and test_class_candidates:
                     test_class_name = test_class_candidates[0][0]
+
                 test_functions.append((test_module_name, test_class_name, function_name))
 
+        # Prioritize test functions with class context, then innermost
         if test_functions:
+            # First prefer test functions with class context
             for test_func in test_functions:
-                if test_func[1]:  # has non-empty class_name
+                if test_func[1] is not None:  # has class_name
                     return test_func
+            # If no test function has class context, return the outermost (most likely the actual test method)
             return test_functions[-1]
 
         # If no direct test functions found, look for other test patterns
@@ -183,12 +173,14 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
             ):
                 if class_name and (
                     class_name.startswith("Test") or class_name.endswith("Test") or "test" in class_name.lower()
-                ) and not class_name.startswith(("Pytest", "_Pytest")):
+                ):
                     potential_tests.append((test_module_name, class_name, function_name))
                 elif "test" in test_module_name or filename_path.stem.startswith("test_"):
+                    # For functions without class context, try to find the most recent test class
                     best_class = test_class_candidates[0][0] if test_class_candidates else None
                     potential_tests.append((test_module_name, best_class, function_name))
 
+            # Framework integration detection
             if (
                 (
                     function_name in ["runTest", "_runTest", "run", "_testMethodName"]
@@ -197,7 +189,6 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
                 )
                 and class_name
                 and (class_name.startswith("Test") or "test" in class_name.lower())
-                and not class_name.startswith(("Pytest", "_Pytest"))
             ):
                 test_method = function_name
                 if "self" in frame_locals:
@@ -216,132 +207,125 @@ def extract_test_context_from_frame(tests_project_root: Path) -> tuple[str, str,
         del frame
 
 
-def codeflash_behavior_async(*, tests_project_root: Path) -> Callable[[F], F]:
-    def decorator(func: F) -> F:
-        @wraps(func)
-        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
-            loop = asyncio.get_running_loop()
-            function_name = func.__name__
-            line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
-            loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
-
-            test_module_name, test_class_name, test_name = extract_test_context_from_frame(tests_project_root)
-
-            test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
+def codeflash_behavior_async(func: F) -> F:
+    @wraps(func)
+    async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
+        loop = asyncio.get_running_loop()
+        function_name = func.__name__
+        line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
+        loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
+        test_module_name, test_class_name, test_name = extract_test_context_from_frame()
 
-            if not hasattr(async_wrapper, "index"):
-                async_wrapper.index = {}
-            if test_id in async_wrapper.index:
-                async_wrapper.index[test_id] += 1
-            else:
-                async_wrapper.index[test_id] = 0
+        test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
 
-            codeflash_test_index = async_wrapper.index[test_id]
-            invocation_id = f"{line_id}_{codeflash_test_index}"
-            test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
-
-            print(f"!$######{test_stdout_tag}######$!")
+        if not hasattr(async_wrapper, "index"):
+            async_wrapper.index = {}
+        if test_id in async_wrapper.index:
+            async_wrapper.index[test_id] += 1
+        else:
+            async_wrapper.index[test_id] = 0
+
+        codeflash_test_index = async_wrapper.index[test_id]
+        invocation_id = f"{line_id}_{codeflash_test_index}"
+        test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
+
+        print(f"!$######{test_stdout_tag}######$!")
+
+        iteration = os.environ.get("CODEFLASH_TEST_ITERATION", "0")
+        db_path = get_run_tmp_file(Path(f"test_return_values_{iteration}.sqlite"))
+        codeflash_con = sqlite3.connect(db_path)
+        codeflash_cur = codeflash_con.cursor()
+
+        codeflash_cur.execute(
+            "CREATE TABLE IF NOT EXISTS test_results (test_module_path TEXT, test_class_name TEXT, "
+            "test_function_name TEXT, function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, "
+            "runtime INTEGER, return_value BLOB, verification_type TEXT)"
+        )
+
+        exception = None
+        counter = loop.time()
+        gc.disable()
+        try:
+            ret = func(*args, **kwargs)  # coroutine creation has some overhead, though it is very small
+            counter = loop.time()
+            return_value = await ret  # let's measure the actual execution time of the code
+            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+        except Exception as e:
+            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+            exception = e
+        finally:
+            gc.enable()
+
+        print(f"!######{test_stdout_tag}######!")
+
+        pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps((args, kwargs, return_value))
+        codeflash_cur.execute(
+            "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                test_module_name,
+                test_class_name,
+                test_name,
+                function_name,
+                loop_index,
+                invocation_id,
+                codeflash_duration,
+                pickled_return_value,
+                VerificationType.FUNCTION_CALL.value,
+            ),
+        )
+        codeflash_con.commit()
+        codeflash_con.close()
+
+        if exception:
+            raise exception
+        return return_value
+
+    return async_wrapper
+
+
+def codeflash_performance_async(func: F) -> F:
+    @wraps(func)
+    async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
+        loop = asyncio.get_running_loop()
+        function_name = func.__name__
+        line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
+        loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
+
+        test_module_name, test_class_name, test_name = extract_test_context_from_frame()
+
+        test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
+
+        if not hasattr(async_wrapper, "index"):
+            async_wrapper.index = {}
+        if test_id in async_wrapper.index:
+            async_wrapper.index[test_id] += 1
+        else:
+            async_wrapper.index[test_id] = 0
 
-            iteration = os.environ.get("CODEFLASH_TEST_ITERATION", "0")
-            db_path = get_run_tmp_file(Path(f"test_return_values_{iteration}.sqlite"))
-            codeflash_con = sqlite3.connect(db_path)
-            codeflash_cur = codeflash_con.cursor()
+        codeflash_test_index = async_wrapper.index[test_id]
+        invocation_id = f"{line_id}_{codeflash_test_index}"
+        test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
 
-            codeflash_cur.execute(
-                "CREATE TABLE IF NOT EXISTS test_results (test_module_path TEXT, test_class_name TEXT, "
-                "test_function_name TEXT, function_getting_tested TEXT, loop_index INTEGER, iteration_id TEXT, "
-                "runtime INTEGER, return_value BLOB, verification_type TEXT)"
-            )
+        print(f"!$######{test_stdout_tag}######$!")
 
-            exception = None
+        exception = None
+        counter = loop.time()
+        gc.disable()
+        try:
+            ret = func(*args, **kwargs)
             counter = loop.time()
-            gc.disable()
-            try:
-                ret = func(*args, **kwargs)  # coroutine creation has some overhead, though it is very small
-                counter = loop.time()
-                return_value = await ret  # let's measure the actual execution time of the code
-                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-            except Exception as e:
-                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-                exception = e
-            finally:
-                gc.enable()
-
-            print(f"!######{test_stdout_tag}######!")
-
-            pickled_return_value = pickle.dumps(exception) if exception else pickle.dumps((args, kwargs, return_value))
-            codeflash_cur.execute(
-                "INSERT INTO test_results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
-                (
-                    test_module_name,
-                    test_class_name,
-                    test_name,
-                    function_name,
-                    loop_index,
-                    invocation_id,
-                    codeflash_duration,
-                    pickled_return_value,
-                    VerificationType.FUNCTION_CALL.value,
-                ),
-            )
-            codeflash_con.commit()
-            codeflash_con.close()
-
-            if exception:
-                raise exception
-            return return_value
-
-        return async_wrapper
-
-    return decorator
-
-
-def codeflash_performance_async(*, tests_project_root: Path) -> Callable[[F], F]:
-    def decorator(func: F) -> F:
-        @wraps(func)
-        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
-            loop = asyncio.get_running_loop()
-            function_name = func.__name__
-            line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
-            loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
-
-            test_module_name, test_class_name, test_name = extract_test_context_from_frame(tests_project_root)
-
-            test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
-
-            if not hasattr(async_wrapper, "index"):
-                async_wrapper.index = {}
-            if test_id in async_wrapper.index:
-                async_wrapper.index[test_id] += 1
-            else:
-                async_wrapper.index[test_id] = 0
-
-            codeflash_test_index = async_wrapper.index[test_id]
-            invocation_id = f"{line_id}_{codeflash_test_index}"
-            test_stdout_tag = f"{test_module_name}:{(test_class_name + '.' if test_class_name else '')}{test_name}:{function_name}:{loop_index}:{invocation_id}"
-
-            print(f"!$######{test_stdout_tag}######$!")
-
-            exception = None
-            counter = loop.time()
-            gc.disable()
-            try:
-                ret = func(*args, **kwargs)
-                counter = loop.time()
-                return_value = await ret
-                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-            except Exception as e:
-                codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
-                exception = e
-            finally:
-                gc.enable()
-
-            print(f"!######{test_stdout_tag}:{codeflash_duration}######!")
+            return_value = await ret
+            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+        except Exception as e:
+            codeflash_duration = int((loop.time() - counter) * 1_000_000_000)
+            exception = e
+        finally:
+            gc.enable()
 
-            if exception:
-                raise exception
-            return return_value
+        print(f"!######{test_stdout_tag}:{codeflash_duration}######!")
 
-        return async_wrapper
+        if exception:
+            raise exception
+        return return_value
 
-    return decorator
+    return async_wrapper
diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index a65a8f27b..be75eac85 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -332,7 +332,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
 
 
 def instrument_source_module_with_async_decorators(
-    source_path: Path, function_to_optimize: FunctionToOptimize, tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR
+    source_path: Path, function_to_optimize: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR
 ) -> tuple[bool, str | None]:
     if not function_to_optimize.is_async:
         return False, None
@@ -341,7 +341,7 @@ def instrument_source_module_with_async_decorators(
         with source_path.open(encoding="utf8") as f:
             source_code = f.read()
 
-        modified_code, decorator_added = add_async_decorator_to_function(source_code, function_to_optimize, tests_project_root, mode)
+        modified_code, decorator_added = add_async_decorator_to_function(source_code, function_to_optimize, mode)
 
         if decorator_added:
             return True, modified_code
@@ -770,20 +770,18 @@ def create_wrapper_function(mode: TestingMode = TestingMode.BEHAVIOR) -> ast.Fun
 class AsyncDecoratorAdder(cst.CSTTransformer):
     """Transformer that adds async decorator to async function definitions."""
 
-    def __init__(self, function: FunctionToOptimize, tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR) -> None:
+    def __init__(self, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR) -> None:
         """Initialize the transformer.
 
         Args:
         ----
             function: The FunctionToOptimize object representing the target async function.
             mode: The testing mode to determine which decorator to apply.
-            tests_project_root: The root path for tests, used to compute relative module names.
 
         """
         super().__init__()
         self.function = function
         self.mode = mode
-        self.tests_project_root = tests_project_root
         self.qualified_name_parts = function.qualified_name.split(".")
         self.context_stack = []
         self.added_decorator = False
@@ -816,25 +814,7 @@ def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.Fu
 
             # Only add the decorator if it's not already there
             if not has_decorator:
-                # Always create parameterized decorator with tests_project_root (required)
-                if self.tests_project_root is None:
-                    raise ValueError("tests_project_root is required for async decorators")
-                    
-                decorator_call = cst.Call(
-                    func=cst.Name(value=self.decorator_name),
-                    args=[
-                        cst.Arg(
-                            keyword=cst.Name("tests_project_root"),
-                            value=cst.Call(
-                                func=cst.Name("Path"),
-                                args=[
-                                    cst.Arg(value=cst.SimpleString(f'r"{self.tests_project_root}"'))
-                                ]
-                            )
-                        )
-                    ]
-                )
-                new_decorator = cst.Decorator(decorator=decorator_call)
+                new_decorator = cst.Decorator(decorator=cst.Name(value=self.decorator_name))
 
                 # Add our new decorator to the existing decorators
                 updated_decorators = [new_decorator, *list(updated_node.decorators)]
@@ -868,7 +848,6 @@ class AsyncDecoratorImportAdder(cst.CSTTransformer):
     def __init__(self, mode: TestingMode = TestingMode.BEHAVIOR) -> None:
         self.mode = mode
         self.has_import = False
-        self.has_path_import = False
 
     def visit_ImportFrom(self, node: cst.ImportFrom) -> None:
         # Check if the async decorator import is already present
@@ -887,41 +866,26 @@ def visit_ImportFrom(self, node: cst.ImportFrom) -> None:
             for import_alias in node.names:
                 if import_alias.name.value == decorator_name:
                     self.has_import = True
-        
-        # Check if Path is already imported from pathlib
-        if (
-            isinstance(node.module, cst.Name)
-            and node.module.value == "pathlib"
-            and not isinstance(node.names, cst.ImportStar)
-        ):
-            for import_alias in node.names:
-                if import_alias.name.value == "Path":
-                    self.has_path_import = True
 
     def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> cst.Module:  # noqa: ARG002
-        new_imports = []
-        
-        # Add decorator import if not present
-        if not self.has_import:
-            decorator_name = (
-                "codeflash_behavior_async" if self.mode == TestingMode.BEHAVIOR else "codeflash_performance_async"
-            )
-            decorator_import = cst.parse_statement(f"from codeflash.code_utils.codeflash_wrap_decorator import {decorator_name}")
-            new_imports.append(decorator_import)
-        
-        # Always add Path import if not present (required for parameterized decorators)
-        if not self.has_path_import:
-            path_import = cst.parse_statement("from pathlib import Path")
-            new_imports.append(path_import)
-        
-        if new_imports:
-            return updated_node.with_changes(body=[*new_imports, *list(updated_node.body)])
-        
-        return updated_node
+        # If the import is already there, don't add it again
+        if self.has_import:
+            return updated_node
+
+        # Choose import based on mode
+        decorator_name = (
+            "codeflash_behavior_async" if self.mode == TestingMode.BEHAVIOR else "codeflash_performance_async"
+        )
+
+        # Parse the import statement into a CST node
+        import_node = cst.parse_statement(f"from codeflash.code_utils.codeflash_wrap_decorator import {decorator_name}")
+
+        # Add the import to the module's body
+        return updated_node.with_changes(body=[import_node, *list(updated_node.body)])
 
 
 def add_async_decorator_to_function(
-    source_code: str, function: FunctionToOptimize, tests_project_root: Path, mode: TestingMode = TestingMode.BEHAVIOR
+    source_code: str, function: FunctionToOptimize, mode: TestingMode = TestingMode.BEHAVIOR
 ) -> tuple[str, bool]:
     """Add async decorator to an async function definition.
 
@@ -930,7 +894,6 @@ def add_async_decorator_to_function(
         source_code: The source code to modify.
         function: The FunctionToOptimize object representing the target async function.
         mode: The testing mode to determine which decorator to apply.
-        tests_project_root: The root path for tests, used to compute relative module names.
 
     Returns:
     -------
@@ -944,7 +907,7 @@ def add_async_decorator_to_function(
         module = cst.parse_module(source_code)
 
         # Add the decorator to the function
-        decorator_transformer = AsyncDecoratorAdder(function, tests_project_root, mode)
+        decorator_transformer = AsyncDecoratorAdder(function, mode)
         module = module.visit(decorator_transformer)
 
         # Add the import if decorator was added
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 46190c544..99f6d42f0 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1376,7 +1376,7 @@ def establish_original_code_baseline(
                 )
 
                 success, instrumented_source = instrument_source_module_with_async_decorators(
-                    self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.BEHAVIOR
+                    self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR
                 )
                 if success and instrumented_source:
                     with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
@@ -1421,7 +1421,7 @@ def establish_original_code_baseline(
                     )
 
                     success, instrumented_source = instrument_source_module_with_async_decorators(
-                        self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.PERFORMANCE
+                        self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE
                     )
                     if success and instrumented_source:
                         with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
@@ -1551,7 +1551,7 @@ def run_optimized_candidate(
                 )
 
                 success, instrumented_source = instrument_source_module_with_async_decorators(
-                    self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.BEHAVIOR
+                    self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR
                 )
                 if success and instrumented_source:
                     with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
@@ -1600,7 +1600,7 @@ def run_optimized_candidate(
                     )
 
                     success, instrumented_source = instrument_source_module_with_async_decorators(
-                        self.function_to_optimize.file_path, self.function_to_optimize, self.test_cfg.tests_root, TestingMode.PERFORMANCE
+                        self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.PERFORMANCE
                     )
                     if success and instrumented_source:
                         with self.function_to_optimize.file_path.open("w", encoding="utf8") as f:
diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py
index 44b4beaed..1c5ddae63 100644
--- a/tests/test_async_run_and_parse_tests.py
+++ b/tests/test_async_run_and_parse_tests.py
@@ -48,16 +48,12 @@ async def test_async_sort():
 
         # For async functions, instrument the source module directly with decorators
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, tests_root, TestingMode.BEHAVIOR
+            fto_path, func, TestingMode.BEHAVIOR
         )
 
         assert source_success
         assert instrumented_source is not None
-        # Check that the decorator was applied with tests_project_root parameter
-        assert '@codeflash_behavior_async(tests_project_root = Path(' in instrumented_source
-        assert 'from pathlib import Path' in instrumented_source
-        assert 'from codeflash.code_utils.codeflash_wrap_decorator import' in instrumented_source
-        assert 'async def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:' in instrumented_source
+        assert '''import asyncio\nfrom typing import List, Union\n\nfrom codeflash.code_utils.codeflash_wrap_decorator import \\\n    codeflash_behavior_async\n\n\n@codeflash_behavior_async\nasync def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:\n    """\n    Async bubble sort implementation for testing.\n    """\n    print("codeflash stdout: Async sorting list")\n    \n    await asyncio.sleep(0.01)\n    \n    n = len(lst)\n    for i in range(n):\n        for j in range(0, n - i - 1):\n            if lst[j] > lst[j + 1]:\n                lst[j], lst[j + 1] = lst[j + 1], lst[j]\n    \n    result = lst.copy()\n    print(f"result: {result}")\n    return result\n\n\nclass AsyncBubbleSorter:\n    """Class with async sorting method for testing."""\n    \n    async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:\n        """\n        Async bubble sort implementation within a class.\n        """\n        print("codeflash stdout: AsyncBubbleSorter.sorter() called")\n        \n        # Add some async delay\n        await asyncio.sleep(0.005)\n        \n        n = len(lst)\n        for i in range(n):\n            for j in range(0, n - i - 1):\n                if lst[j] > lst[j + 1]:\n                    lst[j], lst[j + 1] = lst[j + 1], lst[j]\n        \n        result = lst.copy()\n        return result\n''' in instrumented_source
 
         # Write the instrumented source back
         fto_path.write_text(instrumented_source, "utf-8")
@@ -111,8 +107,7 @@ async def test_async_sort():
 
         results_list = test_results.test_results
         assert results_list[0].id.function_getting_tested == "async_sorter"
-        assert results_list[0].id.test_module_path == "test_async_bubble_sort_temp"
-        assert results_list[0].id.test_class_name is None
+        assert results_list[0].id.test_class_name == "PytestPluginManager" 
         assert results_list[0].id.test_function_name == "test_async_sort"
         assert results_list[0].did_pass
         assert results_list[0].runtime is None or results_list[0].runtime >= 0
@@ -122,8 +117,6 @@ async def test_async_sort():
 
 
         assert results_list[1].id.function_getting_tested == "async_sorter"
-        assert results_list[1].id.test_module_path == "test_async_bubble_sort_temp"
-        assert results_list[1].id.test_class_name is None
         assert results_list[1].id.test_function_name == "test_async_sort"
         assert results_list[1].did_pass
 
@@ -178,7 +171,7 @@ async def test_async_class_sort():
         )
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, tests_root, TestingMode.BEHAVIOR
+            fto_path, func, TestingMode.BEHAVIOR
         )
 
         assert source_success
@@ -240,8 +233,7 @@ async def test_async_class_sort():
 
 
         assert sorter_result.id.function_getting_tested == "sorter"
-        assert sorter_result.id.test_module_path == "test_async_class_bubble_sort_temp"
-        assert sorter_result.id.test_class_name is None
+        assert sorter_result.id.test_class_name == "PytestPluginManager"
         assert sorter_result.id.test_function_name == "test_async_class_sort"
         assert sorter_result.did_pass
         assert sorter_result.runtime is None or sorter_result.runtime >= 0
@@ -288,16 +280,12 @@ async def test_async_perf():
 
         # Instrument the source module with async performance decorators
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, tests_root, TestingMode.PERFORMANCE
+            fto_path, func, TestingMode.PERFORMANCE
         )
 
         assert source_success
         assert instrumented_source is not None
-        # Check that the performance decorator was applied with tests_project_root parameter
-        assert '@codeflash_performance_async(tests_project_root = Path(' in instrumented_source
-        assert 'from pathlib import Path' in instrumented_source
-        assert 'from codeflash.code_utils.codeflash_wrap_decorator import' in instrumented_source
-        assert 'async def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:' in instrumented_source
+        assert '''import asyncio\nfrom typing import List, Union\n\nfrom codeflash.code_utils.codeflash_wrap_decorator import \\\n    codeflash_performance_async\n\n\n@codeflash_performance_async\nasync def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:\n    """\n    Async bubble sort implementation for testing.\n    """\n    print("codeflash stdout: Async sorting list")\n    \n    await asyncio.sleep(0.01)\n    \n    n = len(lst)\n    for i in range(n):\n        for j in range(0, n - i - 1):\n            if lst[j] > lst[j + 1]:\n                lst[j], lst[j + 1] = lst[j + 1], lst[j]\n    \n    result = lst.copy()\n    print(f"result: {result}")\n    return result\n\n\nclass AsyncBubbleSorter:\n    """Class with async sorting method for testing."""\n    \n    async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:\n        """\n        Async bubble sort implementation within a class.\n        """\n        print("codeflash stdout: AsyncBubbleSorter.sorter() called")\n        \n        # Add some async delay\n        await asyncio.sleep(0.005)\n        \n        n = len(lst)\n        for i in range(n):\n            for j in range(0, n - i - 1):\n                if lst[j] > lst[j + 1]:\n                    lst[j], lst[j + 1] = lst[j + 1], lst[j]\n        \n        result = lst.copy()\n        return result\n''' == instrumented_source
 
         fto_path.write_text(instrumented_source, "utf-8")
 
@@ -391,17 +379,67 @@ async def async_error_function(lst):
         func = FunctionToOptimize(function_name="async_error_function", parents=[], file_path=Path(fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, tests_root, TestingMode.BEHAVIOR
+            fto_path, func, TestingMode.BEHAVIOR
         )
 
         assert source_success
         assert instrumented_source is not None
         
-        # Check that the behavior decorator was applied with tests_project_root parameter
-        assert '@codeflash_behavior_async(tests_project_root = Path(' in instrumented_source
-        assert 'from pathlib import Path' in instrumented_source
-        assert 'from codeflash.code_utils.codeflash_wrap_decorator import' in instrumented_source
-        assert 'async def async_error_function(lst):' in instrumented_source
+        expected_instrumented_source = """import asyncio
+from typing import List, Union
+
+from codeflash.code_utils.codeflash_wrap_decorator import \\
+    codeflash_behavior_async
+
+
+async def async_sorter(lst: List[Union[int, float]]) -> List[Union[int, float]]:
+    \"\"\"
+    Async bubble sort implementation for testing.
+    \"\"\"
+    print("codeflash stdout: Async sorting list")
+    
+    await asyncio.sleep(0.01)
+    
+    n = len(lst)
+    for i in range(n):
+        for j in range(0, n - i - 1):
+            if lst[j] > lst[j + 1]:
+                lst[j], lst[j + 1] = lst[j + 1], lst[j]
+    
+    result = lst.copy()
+    print(f"result: {result}")
+    return result
+
+
+class AsyncBubbleSorter:
+    \"\"\"Class with async sorting method for testing.\"\"\"
+    
+    async def sorter(self, lst: List[Union[int, float]]) -> List[Union[int, float]]:
+        \"\"\"
+        Async bubble sort implementation within a class.
+        \"\"\"
+        print("codeflash stdout: AsyncBubbleSorter.sorter() called")
+        
+        # Add some async delay
+        await asyncio.sleep(0.005)
+        
+        n = len(lst)
+        for i in range(n):
+            for j in range(0, n - i - 1):
+                if lst[j] > lst[j + 1]:
+                    lst[j], lst[j + 1] = lst[j + 1], lst[j]
+        
+        result = lst.copy()
+        return result
+
+
+@codeflash_behavior_async
+async def async_error_function(lst):
+    \"\"\"Async function that raises an error for testing.\"\"\"
+    await asyncio.sleep(0.001)  # Small delay
+    raise ValueError("Test error")
+"""
+        assert expected_instrumented_source == instrumented_source
 
         fto_path.write_text(instrumented_source, "utf-8")
         instrument_codeflash_capture(func, {}, tests_root)
@@ -493,7 +531,7 @@ async def test_async_multi():
         func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, tests_root, TestingMode.BEHAVIOR
+            fto_path, func, TestingMode.BEHAVIOR
         )
 
         assert source_success
@@ -604,7 +642,7 @@ async def test_async_edge_cases():
         func = FunctionToOptimize(function_name="async_sorter", parents=[], file_path=Path(fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            fto_path, func, tests_root, TestingMode.BEHAVIOR
+            fto_path, func, TestingMode.BEHAVIOR
         )
 
         assert source_success
@@ -788,7 +826,6 @@ def test_sync_sort():
         results_list = test_results.test_results
         assert results_list[0].id.function_getting_tested == "sync_sorter"
         assert results_list[0].id.iteration_id == "1_0"
-        assert results_list[0].id.test_module_path == "code_to_optimize.tests.pytest.test_sync_in_async_temp"
         assert results_list[0].id.test_class_name is None
         assert results_list[0].id.test_function_name == "test_sync_sort"
         assert results_list[0].did_pass
@@ -800,7 +837,6 @@ def test_sync_sort():
         if len(results_list) > 1:
             assert results_list[1].id.function_getting_tested == "sync_sorter"
             assert results_list[1].id.iteration_id == "4_0"
-            assert results_list[1].id.test_module_path == "code_to_optimize.tests.pytest.test_sync_in_async_temp"
             assert results_list[1].id.test_function_name == "test_sync_sort"
             assert results_list[1].did_pass
 
@@ -899,7 +935,7 @@ async def test_mixed_sorting():
         async_func = FunctionToOptimize(function_name="async_merge_sort", parents=[], file_path=Path(mixed_fto_path), is_async=True)
 
         source_success, instrumented_source = instrument_source_module_with_async_decorators(
-            mixed_fto_path, async_func, tests_root, TestingMode.BEHAVIOR
+            mixed_fto_path, async_func, TestingMode.BEHAVIOR
         )
 
         assert source_success
diff --git a/tests/test_async_wrapper_sqlite_validation.py b/tests/test_async_wrapper_sqlite_validation.py
index 0ffed7805..4386ba5ab 100644
--- a/tests/test_async_wrapper_sqlite_validation.py
+++ b/tests/test_async_wrapper_sqlite_validation.py
@@ -52,7 +52,7 @@ def temp_db_path(self, test_env_setup):
     @pytest.mark.asyncio
     async def test_behavior_async_basic_function(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
+        @codeflash_behavior_async
         async def simple_async_add(a: int, b: int) -> int:
             await asyncio.sleep(0.001)
             return a + b
@@ -100,7 +100,7 @@ async def simple_async_add(a: int, b: int) -> int:
     @pytest.mark.asyncio
     async def test_behavior_async_exception_handling(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
+        @codeflash_behavior_async
         async def async_divide(a: int, b: int) -> float:
             await asyncio.sleep(0.001)
             if b == 0:
@@ -138,7 +138,7 @@ async def async_divide(a: int, b: int) -> float:
     async def test_performance_async_no_database_storage(self, test_env_setup, temp_db_path, capsys):
         """Test performance async decorator doesn't store to database."""
         
-        @codeflash_performance_async(tests_project_root=Path("/tmp/tests"))
+        @codeflash_performance_async
         async def async_multiply(a: int, b: int) -> int:
             """Async function for performance testing."""
             await asyncio.sleep(0.002)
@@ -166,7 +166,7 @@ async def async_multiply(a: int, b: int) -> int:
     @pytest.mark.asyncio
     async def test_multiple_calls_indexing(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
+        @codeflash_behavior_async
         async def async_increment(value: int) -> int:
             await asyncio.sleep(0.001)
             return value + 1
@@ -203,7 +203,7 @@ async def async_increment(value: int) -> int:
     @pytest.mark.asyncio
     async def test_complex_async_function_with_kwargs(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
+        @codeflash_behavior_async
         async def complex_async_func(
             pos_arg: str,
             *args: int,
@@ -251,7 +251,7 @@ async def complex_async_func(
     @pytest.mark.asyncio
     async def test_database_schema_validation(self, test_env_setup, temp_db_path):
         
-        @codeflash_behavior_async(tests_project_root=Path("/tmp/tests"))
+        @codeflash_behavior_async
         async def schema_test_func() -> str:
             return "schema_test"
         
@@ -281,7 +281,7 @@ async def schema_test_func() -> str:
     def test_sync_test_context_extraction(self):
         from codeflash.code_utils.codeflash_wrap_decorator import extract_test_context_from_frame
         
-        test_module, test_class, test_func = extract_test_context_from_frame(Path("/tmp/tests"))
+        test_module, test_class, test_func = extract_test_context_from_frame()
         assert test_module == __name__
         assert test_class == "TestAsyncWrapperSQLiteValidation"
         assert test_func == "test_sync_test_context_extraction"
diff --git a/tests/test_extract_test_context_from_frame.py b/tests/test_extract_test_context_from_frame.py
index 31f5e5142..f33a65fa6 100644
--- a/tests/test_extract_test_context_from_frame.py
+++ b/tests/test_extract_test_context_from_frame.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-from pathlib import Path
 from unittest.mock import Mock, patch
 
 import pytest
@@ -128,7 +127,7 @@ class TestExtractTestContextFromFrame:
     
     def test_direct_test_function_call(self):
         def test_example_function():
-            return extract_test_context_from_frame(Path("/tmp/tests"))
+            return extract_test_context_from_frame()
         
         result = test_example_function()
         module_name, class_name, function_name = result
@@ -140,7 +139,7 @@ def test_example_function():
     def test_with_test_class_method(self):
         class TestExampleClass:
             def test_method(self):
-                return extract_test_context_from_frame(Path("/tmp/tests"))
+                return extract_test_context_from_frame()
         
         instance = TestExampleClass()
         result = instance.test_method()
@@ -151,7 +150,7 @@ def test_method(self):
         assert function_name == "test_method"
     
     def test_function_without_test_prefix(self):
-        result = extract_test_context_from_frame(Path("/tmp/tests"))
+        result = extract_test_context_from_frame()
         module_name, class_name, function_name = result
         
         assert module_name == __name__
@@ -170,12 +169,12 @@ def test_no_test_context_raises_runtime_error(self, mock_current_frame):
         mock_current_frame.return_value = mock_frame
         
         with pytest.raises(RuntimeError, match="No test function found in call stack"):
-            extract_test_context_from_frame(Path("/tmp/tests"))
+            extract_test_context_from_frame()
     
     def test_real_call_stack_context(self):
         def nested_function():
             def deeper_function():
-                return extract_test_context_from_frame(Path("/tmp/tests"))
+                return extract_test_context_from_frame()
             return deeper_function()
         
         result = nested_function()
@@ -192,7 +191,7 @@ class TestIntegrationScenarios:
     def test_pytest_class_method_scenario(self):
         class TestExampleIntegration:
             def test_integration_method(self):
-                return extract_test_context_from_frame(Path("/tmp/tests"))
+                return extract_test_context_from_frame()
         
         instance = TestExampleIntegration()
         result = instance.test_integration_method()
@@ -206,7 +205,7 @@ def test_nested_helper_functions(self):
         def outer_helper():
             def inner_helper():
                 def deepest_helper():
-                    return extract_test_context_from_frame(Path("/tmp/tests"))
+                    return extract_test_context_from_frame()
                 return deepest_helper()
             return inner_helper()
         
diff --git a/tests/test_instrument_async_tests.py b/tests/test_instrument_async_tests.py
index 2025f781a..97c4dd659 100644
--- a/tests/test_instrument_async_tests.py
+++ b/tests/test_instrument_async_tests.py
@@ -30,13 +30,12 @@ async def async_function(x: int, y: int) -> int:
     
     expected_decorated_code = '''
 import asyncio
-from pathlib import Path
 
 from codeflash.code_utils.codeflash_wrap_decorator import \\
     codeflash_behavior_async
 
 
-@codeflash_behavior_async(tests_project_root = Path(r"/tmp/tests"))
+@codeflash_behavior_async
 async def async_function(x: int, y: int) -> int:
     """Simple async function for testing."""
     await asyncio.sleep(0.01)
@@ -50,9 +49,8 @@ async def async_function(x: int, y: int) -> int:
         is_async=True
     )
     
-    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        async_function_code, func, tests_root, TestingMode.BEHAVIOR
+        async_function_code, func, TestingMode.BEHAVIOR
     )
     
     assert decorator_added
@@ -71,13 +69,12 @@ async def async_function(x: int, y: int) -> int:
     
     expected_decorated_code = '''
 import asyncio
-from pathlib import Path
 
 from codeflash.code_utils.codeflash_wrap_decorator import \\
     codeflash_performance_async
 
 
-@codeflash_performance_async(tests_project_root = Path(r"/tmp/tests"))
+@codeflash_performance_async
 async def async_function(x: int, y: int) -> int:
     """Simple async function for testing."""
     await asyncio.sleep(0.01)
@@ -91,9 +88,8 @@ async def async_function(x: int, y: int) -> int:
         is_async=True
     )
     
-    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        async_function_code, func, tests_root, TestingMode.PERFORMANCE
+        async_function_code, func, TestingMode.PERFORMANCE
     )
     
     assert decorator_added
@@ -119,7 +115,6 @@ def sync_method(self, a: int, b: int) -> int:
     
     expected_decorated_code = '''
 import asyncio
-from pathlib import Path
 
 from codeflash.code_utils.codeflash_wrap_decorator import \\
     codeflash_behavior_async
@@ -128,7 +123,7 @@ def sync_method(self, a: int, b: int) -> int:
 class Calculator:
     """Test class with async methods."""
     
-    @codeflash_behavior_async(tests_project_root = Path(r"/tmp/tests"))
+    @codeflash_behavior_async
     async def async_method(self, a: int, b: int) -> int:
         """Async method in class."""
         await asyncio.sleep(0.005)
@@ -146,9 +141,8 @@ def sync_method(self, a: int, b: int) -> int:
         is_async=True
     )
     
-    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        async_class_code, func, tests_root, TestingMode.BEHAVIOR
+        async_class_code, func, TestingMode.BEHAVIOR
     )
     
     assert decorator_added
@@ -188,9 +182,8 @@ async def async_function(x: int, y: int) -> int:
         is_async=True
     )
     
-    tests_root = Path("/tmp/tests")
     modified_code, decorator_added = add_async_decorator_to_function(
-        already_decorated_code, func, tests_root, TestingMode.BEHAVIOR
+        already_decorated_code, func, TestingMode.BEHAVIOR
     )
     
     assert not decorator_added
@@ -238,7 +231,7 @@ async def test_async_function():
     # First instrument the source module
     from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
     source_success, instrumented_source = instrument_source_module_with_async_decorators(
-        source_file, func, temp_dir, TestingMode.BEHAVIOR
+        source_file, func, TestingMode.BEHAVIOR
     )
     
     assert source_success
@@ -301,7 +294,7 @@ async def test_async_function():
     # First instrument the source module
     from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
     source_success, instrumented_source = instrument_source_module_with_async_decorators(
-        source_file, func, temp_dir, TestingMode.PERFORMANCE
+        source_file, func, TestingMode.PERFORMANCE
     )
     
     assert source_success
@@ -372,7 +365,7 @@ async def test_mixed_functions():
     
     from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
     source_success, instrumented_source = instrument_source_module_with_async_decorators(
-        source_file, async_func, temp_dir, TestingMode.BEHAVIOR
+        source_file, async_func, TestingMode.BEHAVIOR
     )
     
     assert source_success

From b858881eb6c42f6c5d4b0a93d39f472ff3ca6f87 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 16 Sep 2025 15:16:48 -0700
Subject: [PATCH 06/40] Update test_runner.py

---
 codeflash/verification/test_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index 85e347641..7cb2d2c3f 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -98,7 +98,7 @@ def run_behavioral_tests(
                 coverage_cmd.extend(shlex.split(pytest_cmd, posix=IS_POSIX)[1:])
 
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]
-
+            logger.info(f"{' '.join(coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files)}")
             results = execute_test_subprocess(
                 coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files,
                 cwd=cwd,

From a42c63abb0c581a015de49f4fe292c1f36dc53c5 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 16 Sep 2025 18:50:42 -0700
Subject: [PATCH 07/40] Delete test_extract_test_context_from_frame.py

---
 tests/test_extract_test_context_from_frame.py | 217 ------------------
 1 file changed, 217 deletions(-)
 delete mode 100644 tests/test_extract_test_context_from_frame.py

diff --git a/tests/test_extract_test_context_from_frame.py b/tests/test_extract_test_context_from_frame.py
deleted file mode 100644
index f33a65fa6..000000000
--- a/tests/test_extract_test_context_from_frame.py
+++ /dev/null
@@ -1,217 +0,0 @@
-from __future__ import annotations
-
-from unittest.mock import Mock, patch
-
-import pytest
-
-from codeflash.code_utils.codeflash_wrap_decorator import (
-    _extract_class_name_tracer,
-    _get_module_name_cf_tracer,
-    extract_test_context_from_frame,
-)
-
-
-@pytest.fixture
-def mock_instance():
-    mock_obj = Mock()
-    mock_obj.__class__.__name__ = "TestClassName"
-    return mock_obj
-
-
-@pytest.fixture
-def mock_class():
-    mock_cls = Mock()
-    mock_cls.__name__ = "TestClassMethod"
-    return mock_cls
-
-
-class TestExtractClassNameTracer:
-    
-    def test_extract_class_name_with_self(self, mock_instance):
-        frame_locals = {"self": mock_instance}
-        result = _extract_class_name_tracer(frame_locals)
-        
-        assert result == "TestClassName"
-    
-    def test_extract_class_name_with_cls(self, mock_class):
-        frame_locals = {"cls": mock_class}
-        result = _extract_class_name_tracer(frame_locals)
-        
-        assert result == "TestClassMethod"
-    
-    def test_extract_class_name_self_no_class(self, mock_class):
-        class NoClassMock:
-            @property
-            def __class__(self):
-                raise AttributeError("no __class__ attribute")
-        
-        mock_instance = NoClassMock()
-        frame_locals = {"self": mock_instance, "cls": mock_class}
-        result = _extract_class_name_tracer(frame_locals)
-        
-        assert result == "TestClassMethod"
-    
-    def test_extract_class_name_no_self_or_cls(self):
-        frame_locals = {"some_var": "value"}
-        result = _extract_class_name_tracer(frame_locals)
-        
-        assert result is None
-    
-    def test_extract_class_name_exception_handling(self):
-        class ExceptionMock:
-            @property
-            def __class__(self):
-                raise Exception("Test exception")
-        
-        mock_instance = ExceptionMock()
-        frame_locals = {"self": mock_instance}
-        result = _extract_class_name_tracer(frame_locals)
-        
-        assert result is None
-    
-    def test_extract_class_name_with_attribute_error(self):
-        class AttributeErrorMock:
-            @property
-            def __class__(self):
-                raise AttributeError("Wrapt-like error")
-        
-        mock_instance = AttributeErrorMock()
-        frame_locals = {"self": mock_instance}
-        result = _extract_class_name_tracer(frame_locals)
-        
-        assert result is None
-
-
-class TestGetModuleNameCfTracer:
-    
-    def test_get_module_name_with_valid_frame(self):
-        mock_frame = Mock()
-        mock_module = Mock()
-        mock_module.__name__ = "test_module_name"
-        
-        with patch("inspect.getmodule", return_value=mock_module):
-            result = _get_module_name_cf_tracer(mock_frame)
-            assert result == "test_module_name"
-    
-    def test_get_module_name_from_frame_globals(self):
-        mock_frame = Mock()
-        mock_frame.f_globals = {"__name__": "module_from_globals"}
-        
-        with patch("inspect.getmodule", side_effect=Exception("Module not found")):
-            result = _get_module_name_cf_tracer(mock_frame)
-            assert result == "module_from_globals"
-    
-    def test_get_module_name_no_name_in_globals(self):
-        mock_frame = Mock()
-        mock_frame.f_globals = {}
-        
-        with patch("inspect.getmodule", side_effect=Exception("Module not found")):
-            result = _get_module_name_cf_tracer(mock_frame)
-            assert result == "unknown_module"
-    
-    def test_get_module_name_none_frame(self):
-        result = _get_module_name_cf_tracer(None)
-        assert result == "unknown_module"
-    
-    def test_get_module_name_module_no_name_attribute(self):
-        mock_frame = Mock()
-        mock_module = Mock(spec=[])
-        mock_frame.f_globals = {"__name__": "fallback_name"}
-        
-        with patch("inspect.getmodule", return_value=mock_module):
-            result = _get_module_name_cf_tracer(mock_frame)
-            assert result == "fallback_name"
-
-
-class TestExtractTestContextFromFrame:
-    
-    def test_direct_test_function_call(self):
-        def test_example_function():
-            return extract_test_context_from_frame()
-        
-        result = test_example_function()
-        module_name, class_name, function_name = result
-        
-        assert module_name == __name__
-        assert class_name == "TestExtractTestContextFromFrame"
-        assert function_name == "test_example_function"
-    
-    def test_with_test_class_method(self):
-        class TestExampleClass:
-            def test_method(self):
-                return extract_test_context_from_frame()
-        
-        instance = TestExampleClass()
-        result = instance.test_method()
-        module_name, class_name, function_name = result
-        
-        assert module_name == __name__
-        assert class_name == "TestExampleClass"  
-        assert function_name == "test_method"
-    
-    def test_function_without_test_prefix(self):
-        result = extract_test_context_from_frame()
-        module_name, class_name, function_name = result
-        
-        assert module_name == __name__
-        assert class_name == "TestExtractTestContextFromFrame"
-        assert function_name == "test_function_without_test_prefix"
-    
-    @patch('inspect.currentframe')
-    def test_no_test_context_raises_runtime_error(self, mock_current_frame):
-        mock_frame = Mock()
-        mock_frame.f_back = None
-        mock_frame.f_code.co_name = "regular_function"
-        mock_frame.f_code.co_filename = "/path/to/regular_file.py"
-        mock_frame.f_locals = {}
-        mock_frame.f_globals = {"__name__": "regular_module"}
-        
-        mock_current_frame.return_value = mock_frame
-        
-        with pytest.raises(RuntimeError, match="No test function found in call stack"):
-            extract_test_context_from_frame()
-    
-    def test_real_call_stack_context(self):
-        def nested_function():
-            def deeper_function():
-                return extract_test_context_from_frame()
-            return deeper_function()
-        
-        result = nested_function()
-        module_name, class_name, function_name = result
-        
-        assert module_name == __name__
-        assert class_name == "TestExtractTestContextFromFrame"
-        assert function_name == "test_real_call_stack_context"
-    
-
-
-class TestIntegrationScenarios:
-    
-    def test_pytest_class_method_scenario(self):
-        class TestExampleIntegration:
-            def test_integration_method(self):
-                return extract_test_context_from_frame()
-        
-        instance = TestExampleIntegration()
-        result = instance.test_integration_method()
-        module_name, class_name, function_name = result
-        
-        assert module_name == __name__
-        assert class_name == "TestExampleIntegration"
-        assert function_name == "test_integration_method"
-    
-    def test_nested_helper_functions(self):
-        def outer_helper():
-            def inner_helper():
-                def deepest_helper():
-                    return extract_test_context_from_frame()
-                return deepest_helper()
-            return inner_helper()
-        
-        result = outer_helper()
-        module_name, class_name, function_name = result
-        
-        assert module_name == __name__
-        assert class_name == "TestIntegrationScenarios"
-        assert function_name == "test_nested_helper_functions"

From fadbdf75ba9247a7e312a79bb4f43ba6a00d6ec1 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 16 Sep 2025 19:09:12 -0700
Subject: [PATCH 08/40] better impl

---
 .../code_utils/codeflash_wrap_decorator.py    | 186 ++----------------
 codeflash/verification/pytest_plugin.py       |  22 +++
 2 files changed, 34 insertions(+), 174 deletions(-)

diff --git a/codeflash/code_utils/codeflash_wrap_decorator.py b/codeflash/code_utils/codeflash_wrap_decorator.py
index cb4da64a0..fc72ccb16 100644
--- a/codeflash/code_utils/codeflash_wrap_decorator.py
+++ b/codeflash/code_utils/codeflash_wrap_decorator.py
@@ -1,23 +1,17 @@
 from __future__ import annotations
 
 import asyncio
-import contextlib
 import gc
-import inspect
 import os
 import sqlite3
-import time
 from enum import Enum
 from functools import wraps
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Any, Callable, TypeVar
+from typing import Any, Callable, TypeVar
 
 import dill as pickle
 
-if TYPE_CHECKING:
-    from types import FrameType
-
 
 class VerificationType(str, Enum):  # moved from codeflash/verification/codeflash_capture.py
     FUNCTION_CALL = (
@@ -36,175 +30,19 @@ def get_run_tmp_file(file_path: Path) -> Path:  # moved from codeflash/code_util
     return Path(get_run_tmp_file.tmpdir.name) / file_path
 
 
-def _extract_class_name_tracer(frame_locals: dict[str, Any]) -> str | None:
-    try:
-        self_arg = frame_locals.get("self")
-        if self_arg is not None:
-            try:
-                return self_arg.__class__.__name__
-            except (AttributeError, Exception):
-                cls_arg = frame_locals.get("cls")
-                if cls_arg is not None:
-                    with contextlib.suppress(AttributeError, Exception):
-                        return cls_arg.__name__
-        else:
-            cls_arg = frame_locals.get("cls")
-            if cls_arg is not None:
-                with contextlib.suppress(AttributeError, Exception):
-                    return cls_arg.__name__
-    except Exception:
-        return None
-    return None
-
-
-def _get_module_name_cf_tracer(frame: FrameType | None) -> str:
-    try:
-        test_module = inspect.getmodule(frame)
-    except Exception:
-        test_module = None
-
-    if test_module is not None:
-        module_name = getattr(test_module, "__name__", None)
-        if module_name is not None:
-            return module_name
-
-    if frame is not None:
-        return frame.f_globals.get("__name__", "unknown_module")
-    return "unknown_module"
+def extract_test_context_from_frame() -> tuple[str, str | None, str]:
+    # test_module = os.environ.get("CODEFLASH_TEST_MODULE")
+    test_module = os.environ["CODEFLASH_TEST_MODULE"]
+    test_class = os.environ.get("CODEFLASH_TEST_CLASS", None)
+    # test_function = os.environ.get("CODEFLASH_TEST_FUNCTION")
+    test_function = os.environ["CODEFLASH_TEST_FUNCTION"]
 
+    if test_module and test_function:
+        return (test_module, test_class if test_class else None, test_function)
 
-def extract_test_context_from_frame() -> tuple[str, str | None, str]:
-    frame = inspect.currentframe()
-    # optimize?
-    try:
-        frames_info = []
-        potential_tests = []
-
-        # First pass: collect all frame information
-        if frame is not None:
-            frame = frame.f_back
-
-        while frame is not None:
-            try:
-                function_name = frame.f_code.co_name
-                filename = frame.f_code.co_filename
-                filename_path = Path(filename)
-                frame_locals = frame.f_locals
-                test_module_name = _get_module_name_cf_tracer(frame)
-                class_name = _extract_class_name_tracer(frame_locals)
-
-                frames_info.append(
-                    {
-                        "function_name": function_name,
-                        "filename_path": filename_path,
-                        "frame_locals": frame_locals,
-                        "test_module_name": test_module_name,
-                        "class_name": class_name,
-                        "frame": frame,
-                    }
-                )
-
-            except Exception:  # noqa: S112
-                continue
-
-            frame = frame.f_back
-
-        # Second pass: analyze frames with full context
-        test_class_candidates = []
-        for frame_info in frames_info:
-            function_name = frame_info["function_name"]
-            filename_path = frame_info["filename_path"]
-            frame_locals = frame_info["frame_locals"]
-            test_module_name = frame_info["test_module_name"]
-            class_name = frame_info["class_name"]
-            frame_obj = frame_info["frame"]
-
-            # Keep track of test classes
-            if class_name and (
-                class_name.startswith("Test") or class_name.endswith("Test") or "test" in class_name.lower()
-            ):
-                test_class_candidates.append((class_name, test_module_name))
-
-        # Now process frames again looking for test functions with full candidates list
-        # Collect all test functions to prioritize outer ones over nested ones
-        test_functions = []
-        for frame_info in frames_info:
-            function_name = frame_info["function_name"]
-            filename_path = frame_info["filename_path"]
-            frame_locals = frame_info["frame_locals"]
-            test_module_name = frame_info["test_module_name"]
-            class_name = frame_info["class_name"]
-            frame_obj = frame_info["frame"]
-
-            # Collect test functions
-            if function_name.startswith("test_"):
-                test_class_name = class_name
-
-                # If no class found in current frame, check if we have any test class candidates
-                # Prefer the innermost (first) test class candidate which is more specific
-                if test_class_name is None and test_class_candidates:
-                    test_class_name = test_class_candidates[0][0]
-
-                test_functions.append((test_module_name, test_class_name, function_name))
-
-        # Prioritize test functions with class context, then innermost
-        if test_functions:
-            # First prefer test functions with class context
-            for test_func in test_functions:
-                if test_func[1] is not None:  # has class_name
-                    return test_func
-            # If no test function has class context, return the outermost (most likely the actual test method)
-            return test_functions[-1]
-
-        # If no direct test functions found, look for other test patterns
-        for frame_info in frames_info:
-            function_name = frame_info["function_name"]
-            filename_path = frame_info["filename_path"]
-            frame_locals = frame_info["frame_locals"]
-            test_module_name = frame_info["test_module_name"]
-            class_name = frame_info["class_name"]
-            frame_obj = frame_info["frame"]
-
-            # Test file/module detection
-            if (
-                frame_obj.f_globals.get("__name__", "").startswith("test_")
-                or filename_path.stem.startswith("test_")
-                or "test" in filename_path.parts
-            ):
-                if class_name and (
-                    class_name.startswith("Test") or class_name.endswith("Test") or "test" in class_name.lower()
-                ):
-                    potential_tests.append((test_module_name, class_name, function_name))
-                elif "test" in test_module_name or filename_path.stem.startswith("test_"):
-                    # For functions without class context, try to find the most recent test class
-                    best_class = test_class_candidates[0][0] if test_class_candidates else None
-                    potential_tests.append((test_module_name, best_class, function_name))
-
-            # Framework integration detection
-            if (
-                (
-                    function_name in ["runTest", "_runTest", "run", "_testMethodName"]
-                    or "pytest" in str(frame_obj.f_globals.get("__file__", ""))
-                    or "unittest" in str(frame_obj.f_globals.get("__file__", ""))
-                )
-                and class_name
-                and (class_name.startswith("Test") or "test" in class_name.lower())
-            ):
-                test_method = function_name
-                if "self" in frame_locals:
-                    with contextlib.suppress(AttributeError, TypeError):
-                        test_method = getattr(frame_locals["self"], "_testMethodName", function_name)
-                potential_tests.append((test_module_name, class_name, test_method))
-
-        if potential_tests:
-            for test_module, test_class, test_func in potential_tests:
-                if test_func.startswith("test_"):
-                    return test_module, test_class, test_func
-            return potential_tests[0]
-
-        raise RuntimeError("No test function found in call stack")
-    finally:
-        del frame
+    raise RuntimeError(
+        "Test context environment variables not set - ensure tests are run through codeflash test runner"
+    )
 
 
 def codeflash_behavior_async(func: F) -> F:
diff --git a/codeflash/verification/pytest_plugin.py b/codeflash/verification/pytest_plugin.py
index 85cd4d13c..00f99370c 100644
--- a/codeflash/verification/pytest_plugin.py
+++ b/codeflash/verification/pytest_plugin.py
@@ -450,3 +450,25 @@ def make_progress_id(i: int, n: int = count) -> str:
             metafunc.parametrize(
                 "__pytest_loop_step_number", range(count), indirect=True, ids=make_progress_id, scope=scope
             )
+
+    @pytest.hookimpl(tryfirst=True)
+    def pytest_runtest_setup(self, item: pytest.Item) -> None:
+        test_module_name = item.module.__name__ if item.module else "unknown_module"
+
+        test_class_name = None
+        if item.cls:
+            test_class_name = item.cls.__name__
+
+        test_function_name = item.name
+        if "[" in test_function_name:
+            test_function_name = test_function_name.split("[", 1)[0]
+
+        os.environ["CODEFLASH_TEST_MODULE"] = test_module_name
+        os.environ["CODEFLASH_TEST_CLASS"] = test_class_name or ""
+        os.environ["CODEFLASH_TEST_FUNCTION"] = test_function_name
+
+    @pytest.hookimpl(trylast=True)
+    def pytest_runtest_teardown(self, _: pytest.Item) -> None:
+        """Clean up test context environment variables after each test."""
+        for var in ["CODEFLASH_TEST_MODULE", "CODEFLASH_TEST_CLASS", "CODEFLASH_TEST_FUNCTION"]:
+            os.environ.pop(var, None)

From 1034ee8edcff7973707f75fa5dda7583c1cc2fc2 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 13:55:09 -0700
Subject: [PATCH 09/40] Update parse_test_output.py

---
 codeflash/verification/parse_test_output.py | 44 ++++++++++++---------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 654139971..ca701c1d7 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -26,7 +26,6 @@
 if TYPE_CHECKING:
     import subprocess
 
-    from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.models.models import CodeOptimizationContext, CoverageData, TestFiles
     from codeflash.verification.verification_utils import TestConfig
 
@@ -79,6 +78,31 @@ def calculate_async_throughput_from_stdout(stdout: str, async_function_names: se
     return throughput_counts
 
 
+start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
+end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######!")
+
+
+def calculate_function_throughput_from_stdout(stdout: str, function_name: str) -> int:
+    """A completed execution is defined as having both a start tag and matching end tag:
+    Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
+    End:   !######test_module:test_function:function_name:loop_index:iteration_id######!
+    """
+    start_matches = start_pattern.findall(stdout)
+    end_matches = end_pattern.findall(stdout)
+    end_matches_set = set(end_matches)
+
+    # Count completed executions for the specific function only
+    function_throughput = 0
+
+    for start_match in start_matches:
+        # Check if this execution is for the function we're interested in and has a matching end tag
+        # function_name is at index 2 in the match tuple
+        if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
+            function_throughput += 1
+
+    return function_throughput
+
+
 def parse_test_return_values_bin(file_location: Path, test_files: TestFiles, test_config: TestConfig) -> TestResults:
     test_results = TestResults()
     if not file_location.exists():
@@ -534,10 +558,7 @@ def parse_test_results(
     code_context: CodeOptimizationContext | None = None,
     run_result: subprocess.CompletedProcess | None = None,
     unittest_loop_index: int | None = None,
-    function_to_optimize: FunctionToOptimize | None = None,
-    *,
-    calculate_throughput: bool = False,
-) -> tuple[TestResults, CoverageData | None, dict[str, int]]:
+) -> tuple[TestResults, CoverageData | None]:
     test_results_xml = parse_test_xml(
         test_xml_path,
         test_files=test_files,
@@ -574,18 +595,6 @@ def parse_test_results(
     get_run_tmp_file(Path(f"test_return_values_{optimization_iteration}.sqlite")).unlink(missing_ok=True)
     results = merge_test_results(test_results_xml, test_results_bin_file, test_config.test_framework)
 
-    # Calculate throughput for async functions only when requested (during performance testing)
-    throughput_counts = {}
-    if calculate_throughput and function_to_optimize and function_to_optimize.is_async:
-        logger.info(f"Calculating throughput for async function: {function_to_optimize.function_name}")
-        all_stdout = ""
-        for result in results.test_results:
-            if result.stdout:
-                all_stdout += result.stdout
-
-        async_function_names = {function_to_optimize.function_name}
-        throughput_counts = calculate_async_throughput_from_stdout(all_stdout, async_function_names)
-
     all_args = False
     if coverage_database_file and source_file and code_context and function_name:
         all_args = True
@@ -597,5 +606,4 @@ def parse_test_results(
             function_name=function_name,
         )
         coverage.log_coverage()
-    # return results, coverage if all_args else None, throughput_counts
     return results, coverage if all_args else None

From 4ae0f93f6c9462984a44fa0921d6adfadf824330 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 13:58:17 -0700
Subject: [PATCH 10/40] calculate throughput

---
 codeflash/models/models.py                   |  1 +
 codeflash/optimization/function_optimizer.py | 54 ++++++++++++++++----
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index 8417148ef..a8fbc3524 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -380,6 +380,7 @@ class OriginalCodeBaseline(BaseModel):
     line_profile_results: dict
     runtime: int
     coverage_results: Optional[CoverageData]
+    async_throughput: Optional[dict[str, int]] = None
 
 
 class CoverageStatus(Enum):
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 99f6d42f0..341a580a2 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1388,7 +1388,7 @@ def establish_original_code_baseline(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                behavioral_results, coverage_results = self.run_and_parse_tests(
+                behavioral_results, coverage_results, behavioral_test_results_for_throughput = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
                     test_files=self.test_files,
@@ -1409,6 +1409,8 @@ def establish_original_code_baseline(
                 return Failure("Failed to establish a baseline for the original code - bevhavioral tests failed.")
             if not coverage_critic(coverage_results, self.args.test_framework):
                 return Failure("The threshold for test coverage was not met.")
+            benchmarking_test_results_for_throughput = None
+
             if test_framework == "pytest":
                 line_profile_results = self.line_profiler_step(
                     code_context=code_context, original_helper_code=original_helper_code, candidate_index=0
@@ -1431,7 +1433,7 @@ def establish_original_code_baseline(
                         )
 
                 try:
-                    benchmarking_results, _ = self.run_and_parse_tests(
+                    benchmarking_results, _, benchmarking_test_results_for_throughput = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1455,7 +1457,7 @@ def establish_original_code_baseline(
                         # * 1.5 to give unittest a bit more time to run
                         break
                     test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
-                    unittest_loop_results, _ = self.run_and_parse_tests(
+                    unittest_loop_results, _, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1502,6 +1504,10 @@ def establish_original_code_baseline(
             console.rule()
             logger.debug(f"Total original code runtime (ns): {total_timing}")
 
+            async_throughput = self.calculate_async_throughput(
+                behavioral_test_results_for_throughput, benchmarking_test_results_for_throughput
+            )
+
             if self.args.benchmark:
                 replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
                     self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root
@@ -1517,6 +1523,7 @@ def establish_original_code_baseline(
                         runtime=total_timing,
                         coverage_results=coverage_results,
                         line_profile_results=line_profile_results,
+                        async_throughput=async_throughput,
                     ),
                     functions_to_remove,
                 )
@@ -1564,7 +1571,7 @@ def run_optimized_candidate(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                candidate_behavior_results, _ = self.run_and_parse_tests(
+                candidate_behavior_results, _, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
                     test_files=self.test_files,
@@ -1610,7 +1617,7 @@ def run_optimized_candidate(
                         )
 
                 try:
-                    candidate_benchmarking_results, _ = self.run_and_parse_tests(
+                    candidate_benchmarking_results, _, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1643,7 +1650,7 @@ def run_optimized_candidate(
                         # * 1.5 to give unittest a bit more time to run
                         break
                     test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
-                    unittest_loop_results, cov = self.run_and_parse_tests(
+                    unittest_loop_results, cov, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1695,7 +1702,7 @@ def run_and_parse_tests(
         code_context: CodeOptimizationContext | None = None,
         unittest_loop_index: int | None = None,
         line_profiler_output_file: Path | None = None,
-    ) -> tuple[TestResults | dict, CoverageData | None]:
+    ) -> tuple[TestResults | dict, CoverageData | None, TestResults | None]:
         coverage_database_file = None
         coverage_config_file = None
         try:
@@ -1741,7 +1748,7 @@ def run_and_parse_tests(
             logger.exception(
                 f"Error running tests in {', '.join(str(f) for f in test_files.test_files)}.\nTimeout Error"
             )
-            return TestResults(), None
+            return TestResults(), None, None
         if run_result.returncode != 0 and testing_type == TestingMode.BEHAVIOR:
             logger.debug(
                 f"Nonzero return code {run_result.returncode} when running tests in "
@@ -1769,9 +1776,11 @@ def run_and_parse_tests(
                 coverage_database_file=coverage_database_file,
                 coverage_config_file=coverage_config_file,
             )
+            # Return the test results for async throughput calculation
+            return results, coverage_results, results if isinstance(results, TestResults) else None
         else:
             results, coverage_results = parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)
-        return results, coverage_results
+            return results, coverage_results, None
 
     def submit_test_generation_tasks(
         self,
@@ -1822,6 +1831,31 @@ def get_test_env(
             test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
         return test_env
 
+    def calculate_async_throughput(
+        self, behavioral_test_results: TestResults | None, benchmarking_test_results: TestResults | None
+    ) -> dict[str, int] | None:
+        if not self.function_to_optimize.is_async:
+            return None
+
+        from codeflash.verification.parse_test_output import calculate_function_throughput_from_stdout
+
+        all_stdout = ""
+
+        for test_results in [behavioral_test_results, benchmarking_test_results]:
+            if test_results:
+                for result in test_results.test_results:
+                    if result.stdout:
+                        all_stdout += result.stdout
+
+        if not all_stdout:
+            return None
+
+        function_throughput = calculate_function_throughput_from_stdout(
+            all_stdout, self.function_to_optimize.function_name
+        )
+
+        return {self.function_to_optimize.function_name: function_throughput} if function_throughput > 0 else None
+
     def line_profiler_step(
         self, code_context: CodeOptimizationContext, original_helper_code: dict[Path, str], candidate_index: int
     ) -> dict:
@@ -1830,7 +1864,7 @@ def line_profiler_step(
                 codeflash_loop_index=0, codeflash_test_iteration=candidate_index, codeflash_tracer_disable=1
             )
             line_profiler_output_file = add_decorator_imports(self.function_to_optimize, code_context)
-            line_profile_results, _ = self.run_and_parse_tests(
+            line_profile_results, _, _ = self.run_and_parse_tests(
                 testing_type=TestingMode.LINE_PROFILE,
                 test_env=test_env,
                 test_files=self.test_files,

From fb66ff51926d1f75ff4ba279abf36bc76f093d3e Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 14:09:12 -0700
Subject: [PATCH 11/40] add unit tests

---
 codeflash/verification/pytest_plugin.py       |  2 +-
 tests/test_async_run_and_parse_tests.py       | 20 ++++-----
 tests/test_codeflash_capture.py               | 24 +++++-----
 tests/test_instrument_all_and_run.py          | 10 ++---
 tests/test_instrument_tests.py                | 44 +++++++++----------
 ...t_instrumentation_run_results_aiservice.py | 10 ++---
 tests/test_pickle_patcher.py                  |  8 ++--
 7 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/codeflash/verification/pytest_plugin.py b/codeflash/verification/pytest_plugin.py
index 00f99370c..cf558fb5a 100644
--- a/codeflash/verification/pytest_plugin.py
+++ b/codeflash/verification/pytest_plugin.py
@@ -468,7 +468,7 @@ def pytest_runtest_setup(self, item: pytest.Item) -> None:
         os.environ["CODEFLASH_TEST_FUNCTION"] = test_function_name
 
     @pytest.hookimpl(trylast=True)
-    def pytest_runtest_teardown(self, _: pytest.Item) -> None:
+    def pytest_runtest_teardown(self, item: pytest.Item) -> None:
         """Clean up test context environment variables after each test."""
         for var in ["CODEFLASH_TEST_MODULE", "CODEFLASH_TEST_CLASS", "CODEFLASH_TEST_FUNCTION"]:
             os.environ.pop(var, None)
diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py
index 1c5ddae63..010fd5b43 100644
--- a/tests/test_async_run_and_parse_tests.py
+++ b/tests/test_async_run_and_parse_tests.py
@@ -92,7 +92,7 @@ async def test_async_sort():
             ]
         )
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -107,7 +107,7 @@ async def test_async_sort():
 
         results_list = test_results.test_results
         assert results_list[0].id.function_getting_tested == "async_sorter"
-        assert results_list[0].id.test_class_name == "PytestPluginManager" 
+        assert results_list[0].id.test_class_name is None
         assert results_list[0].id.test_function_name == "test_async_sort"
         assert results_list[0].did_pass
         assert results_list[0].runtime is None or results_list[0].runtime >= 0
@@ -211,7 +211,7 @@ async def test_async_class_sort():
             ]
         )
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -233,7 +233,7 @@ async def test_async_class_sort():
 
 
         assert sorter_result.id.function_getting_tested == "sorter"
-        assert sorter_result.id.test_class_name == "PytestPluginManager"
+        assert sorter_result.id.test_class_name is None
         assert sorter_result.id.test_function_name == "test_async_class_sort"
         assert sorter_result.did_pass
         assert sorter_result.runtime is None or sorter_result.runtime >= 0
@@ -320,7 +320,7 @@ async def test_async_perf():
             ]
         )
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -473,7 +473,7 @@ async def async_error_function(lst):
             ]
         )
 
-        test_results, _ = func_optimizer.run_and_parse_tests(
+        test_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -567,7 +567,7 @@ async def test_async_multi():
             ]
         )
 
-        test_results, _ = func_optimizer.run_and_parse_tests(
+        test_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -678,7 +678,7 @@ async def test_async_edge_cases():
             ]
         )
 
-        test_results, _ = func_optimizer.run_and_parse_tests(
+        test_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -810,7 +810,7 @@ def test_sync_sort():
             ]
         )
 
-        test_results, _ = func_optimizer.run_and_parse_tests(
+        test_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -976,7 +976,7 @@ async def test_mixed_sorting():
             ]
         )
 
-        test_results, _ = func_optimizer.run_and_parse_tests(
+        test_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
diff --git a/tests/test_codeflash_capture.py b/tests/test_codeflash_capture.py
index 469d1be6a..bc5bec790 100644
--- a/tests/test_codeflash_capture.py
+++ b/tests/test_codeflash_capture.py
@@ -459,7 +459,7 @@ def __init__(self, x=2):
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -492,7 +492,7 @@ def __init__(self, x=2):
         assert test_results[2].id.function_getting_tested == "some_function"
         assert test_results[2].id.iteration_id == "16_0"
 
-        test_results2, _ = func_optimizer.run_and_parse_tests(
+        test_results2, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -580,7 +580,7 @@ def __init__(self, *args, **kwargs):
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -614,7 +614,7 @@ def __init__(self, *args, **kwargs):
         assert test_results[2].id.function_getting_tested == "some_function"
         assert test_results[2].id.iteration_id == "16_0"
 
-        results2, _ = func_optimizer.run_and_parse_tests(
+        results2, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -705,7 +705,7 @@ def __init__(self, x=2):
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -741,7 +741,7 @@ def __init__(self, x=2):
         assert test_results[2].id.function_getting_tested == "some_function"
         assert test_results[2].id.iteration_id == "12_2"  # Third call
 
-        test_results2, _ = func_optimizer.run_and_parse_tests(
+        test_results2, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -867,7 +867,7 @@ def another_helper(self):
             ]
         )
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -888,7 +888,7 @@ def another_helper(self):
         assert test_results[3].id.function_getting_tested == "AnotherHelperClass.__init__"
         assert test_results[3].verification_type == VerificationType.INIT_STATE_HELPER
 
-        results2, _ = func_optimizer.run_and_parse_tests(
+        results2, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1026,7 +1026,7 @@ def another_helper(self):
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1078,7 +1078,7 @@ def target_function(self):
             Path(helper_path_2): {"HelperClass2", "AnotherHelperClass"},
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
-        modified_test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        modified_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1117,7 +1117,7 @@ def target_function(self):
             Path(helper_path_2): {"HelperClass2", "AnotherHelperClass"},
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
-        mutated_test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        mutated_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1155,7 +1155,7 @@ def target_function(self):
             Path(helper_path_2): {"HelperClass2", "AnotherHelperClass"},
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
-        no_helper1_test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        no_helper1_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
index 7e1a20f49..44044ed55 100644
--- a/tests/test_instrument_all_and_run.py
+++ b/tests/test_instrument_all_and_run.py
@@ -160,7 +160,7 @@ def test_sort():
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -205,7 +205,7 @@ def test_sort():
 result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
 """
         assert test_results[1].stdout == out_str
-        results2, _ = func_optimizer.run_and_parse_tests(
+        results2, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -331,7 +331,7 @@ def test_sort():
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -376,7 +376,7 @@ def test_sort():
         assert test_results[3].did_pass
         assert test_results[3].stdout == """codeflash stdout : BubbleSorter.sorter() called\n"""
 
-        results2, _ = func_optimizer.run_and_parse_tests(
+        results2, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -439,7 +439,7 @@ def sorter(self, arr):
                 )
             ]
         )
-        new_test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        new_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index ccec5ffe3..e42b41f6f 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -434,7 +434,7 @@ def test_sort():
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -469,7 +469,7 @@ def test_sort():
         with test_path_perf.open("w") as f:
             f.write(new_perf_test)
 
-        test_results_perf, _ = func_optimizer.run_and_parse_tests(
+        test_results_perf, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -521,7 +521,7 @@ def test_sort():
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -677,7 +677,7 @@ def test_sort_parametrized(input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -731,7 +731,7 @@ def test_sort_parametrized(input, expected_output):
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
 
-        test_results_perf, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results_perf, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -788,7 +788,7 @@ def test_sort_parametrized(input, expected_output):
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -962,7 +962,7 @@ def test_sort_parametrized_loop(input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1053,7 +1053,7 @@ def test_sort_parametrized_loop(input, expected_output):
         assert test_results[5].did_pass
         assert test_results[5].stdout == out_str
 
-        test_results, _ = func_optimizer.run_and_parse_tests(
+        test_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -1143,7 +1143,7 @@ def test_sort_parametrized_loop(input, expected_output):
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -1315,7 +1315,7 @@ def test_sort():
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1357,7 +1357,7 @@ def test_sort():
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -1421,7 +1421,7 @@ def test_sort():
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -1623,7 +1623,7 @@ def test_sort(self):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1673,7 +1673,7 @@ def test_sort(self):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -1882,7 +1882,7 @@ def test_sort(self, input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1937,7 +1937,7 @@ def test_sort(self, input, expected_output):
 """
         assert test_results[2].stdout == out_str
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -2145,7 +2145,7 @@ def test_sort(self):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             test_env=test_env,
             testing_type=TestingMode.BEHAVIOR,
             test_files=test_files,
@@ -2200,7 +2200,7 @@ def test_sort(self):
 """
         assert test_results[2].stdout == out_str
 
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             test_env=test_env,
             testing_type=TestingMode.PERFORMANCE,
             test_files=test_files,
@@ -2406,7 +2406,7 @@ def test_sort(self, input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=f, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -2493,7 +2493,7 @@ def test_sort(self, input, expected_output):
         )
         assert test_results[5].runtime > 0
         assert test_results[5].did_pass
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -3058,7 +3058,7 @@ def test_sleepfunc_sequence_short(n, expected_total_sleep_time):
                 )
             ]
         )
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -3185,7 +3185,7 @@ def test_sleepfunc_sequence_short(self, n, expected_total_sleep_time):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py
index 78d9973f1..9514214bc 100644
--- a/tests/test_instrumentation_run_results_aiservice.py
+++ b/tests/test_instrumentation_run_results_aiservice.py
@@ -170,7 +170,7 @@ def test_single_element_list():
         a = BubbleSorter()
         function_to_optimize = FunctionToOptimize("sorter", fto_path, [FunctionParent("BubbleSorter", "ClassDef")])
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results, coverage_data = func_opt.run_and_parse_tests(
+        test_results, coverage_data, _ = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -210,7 +210,7 @@ def sorter(self, arr):
                         """
         fto_path.write_text(optimized_code_mutated_attr, "utf-8")
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results_mutated_attr, coverage_data = func_opt.run_and_parse_tests(
+        test_results_mutated_attr, coverage_data, _ = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -311,7 +311,7 @@ def test_single_element_list():
             ]
         )
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results, coverage_data = func_opt.run_and_parse_tests(
+        test_results, coverage_data, _ = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -388,7 +388,7 @@ def sorter(self, arr):
             )
         )
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results_mutated_attr, coverage_data = func_opt.run_and_parse_tests(
+        test_results_mutated_attr, coverage_data, _ = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -441,7 +441,7 @@ def sorter(self, arr):
             )
         )
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results_new_attr, coverage_data = func_opt.run_and_parse_tests(
+        test_results_new_attr, coverage_data, _ = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
diff --git a/tests/test_pickle_patcher.py b/tests/test_pickle_patcher.py
index 346153674..38cec3b1d 100644
--- a/tests/test_pickle_patcher.py
+++ b/tests/test_pickle_patcher.py
@@ -377,7 +377,7 @@ def test_run_and_parse_picklepatch() -> None:
                 )
             ]
         )
-        test_results_unused_socket, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results_unused_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -402,7 +402,7 @@ def bubble_sort_with_unused_socket(data_container):
     return sorted(numbers)
 """)
         # Run optimized code for unused socket
-        optimized_test_results_unused_socket, coverage_data = func_optimizer.run_and_parse_tests(
+        optimized_test_results_unused_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -454,7 +454,7 @@ def bubble_sort_with_unused_socket(data_container):
                 )
             ]
         )
-        test_results_used_socket, coverage_data = func_optimizer.run_and_parse_tests(
+        test_results_used_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -485,7 +485,7 @@ def bubble_sort_with_used_socket(data_container):
         """)
 
         # Run test for optimized function code that uses the socket. This should fail, as the PicklePlaceholder is accessed.
-        optimized_test_results_used_socket, coverage_data = func_optimizer.run_and_parse_tests(
+        optimized_test_results_used_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,

From 2190963ee565c7c54bc9d03c09e077726fe72720 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 14:44:36 -0700
Subject: [PATCH 12/40] fix wrapper validation test

---
 tests/test_async_wrapper_sqlite_validation.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_async_wrapper_sqlite_validation.py b/tests/test_async_wrapper_sqlite_validation.py
index 4386ba5ab..c1c71fd4c 100644
--- a/tests/test_async_wrapper_sqlite_validation.py
+++ b/tests/test_async_wrapper_sqlite_validation.py
@@ -19,11 +19,14 @@
 class TestAsyncWrapperSQLiteValidation:
 
     @pytest.fixture
-    def test_env_setup(self):
+    def test_env_setup(self, request):
         original_env = {}
         test_env = {
             "CODEFLASH_LOOP_INDEX": "1",
             "CODEFLASH_TEST_ITERATION": "0",
+            "CODEFLASH_TEST_MODULE": __name__,
+            "CODEFLASH_TEST_CLASS": "TestAsyncWrapperSQLiteValidation",
+            "CODEFLASH_TEST_FUNCTION": request.node.name,
         }
         
         for key, value in test_env.items():
@@ -278,7 +281,7 @@ async def schema_test_func() -> str:
         assert columns == expected_columns
         con.close()
 
-    def test_sync_test_context_extraction(self):
+    def test_sync_test_context_extraction(self, test_env_setup):
         from codeflash.code_utils.codeflash_wrap_decorator import extract_test_context_from_frame
         
         test_module, test_class, test_func = extract_test_context_from_frame()

From 2a3c9758608c0ff47b28a0330eaa9c3a24ee6f72 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 15:32:57 -0700
Subject: [PATCH 13/40] fix linting

---
 codeflash/optimization/function_optimizer.py | 5 ++---
 codeflash/verification/parse_test_output.py  | 3 ++-
 codeflash/verification/pytest_plugin.py      | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 341a580a2..492cb6d6f 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1778,9 +1778,8 @@ def run_and_parse_tests(
             )
             # Return the test results for async throughput calculation
             return results, coverage_results, results if isinstance(results, TestResults) else None
-        else:
-            results, coverage_results = parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)
-            return results, coverage_results, None
+        results, coverage_results = parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)
+        return results, coverage_results, None
 
     def submit_test_generation_tasks(
         self,
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index ca701c1d7..c4380675d 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -83,7 +83,8 @@ def calculate_async_throughput_from_stdout(stdout: str, async_function_names: se
 
 
 def calculate_function_throughput_from_stdout(stdout: str, function_name: str) -> int:
-    """A completed execution is defined as having both a start tag and matching end tag:
+    """Calculate function throughput from stdout. A completed execution is defined as having both a start tag and matching end tag.
+
     Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
     End:   !######test_module:test_function:function_name:loop_index:iteration_id######!
     """
diff --git a/codeflash/verification/pytest_plugin.py b/codeflash/verification/pytest_plugin.py
index cf558fb5a..4dbdcf762 100644
--- a/codeflash/verification/pytest_plugin.py
+++ b/codeflash/verification/pytest_plugin.py
@@ -468,7 +468,7 @@ def pytest_runtest_setup(self, item: pytest.Item) -> None:
         os.environ["CODEFLASH_TEST_FUNCTION"] = test_function_name
 
     @pytest.hookimpl(trylast=True)
-    def pytest_runtest_teardown(self, item: pytest.Item) -> None:
+    def pytest_runtest_teardown(self, item: pytest.Item) -> None:  # noqa: ARG002
         """Clean up test context environment variables after each test."""
         for var in ["CODEFLASH_TEST_MODULE", "CODEFLASH_TEST_CLASS", "CODEFLASH_TEST_FUNCTION"]:
             os.environ.pop(var, None)

From 9c53948b8d715b0edbdfe6253798e67f85865da3 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 16:18:09 -0700
Subject: [PATCH 14/40] cleanup

---
 codeflash/models/models.py                   |  2 +-
 codeflash/optimization/function_optimizer.py | 49 ++++++--------------
 2 files changed, 16 insertions(+), 35 deletions(-)

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index a8fbc3524..f6f991363 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -380,7 +380,7 @@ class OriginalCodeBaseline(BaseModel):
     line_profile_results: dict
     runtime: int
     coverage_results: Optional[CoverageData]
-    async_throughput: Optional[dict[str, int]] = None
+    async_throughput: Optional[int] = None
 
 
 class CoverageStatus(Enum):
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 492cb6d6f..dfb4a2643 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -84,7 +84,7 @@
 from codeflash.verification.equivalence import compare_test_results
 from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
 from codeflash.verification.parse_line_profile_test_output import parse_line_profile_results
-from codeflash.verification.parse_test_output import parse_test_results
+from codeflash.verification.parse_test_output import calculate_function_throughput_from_stdout, parse_test_results
 from codeflash.verification.test_runner import run_behavioral_tests, run_benchmarking_tests, run_line_profile_tests
 from codeflash.verification.verification_utils import get_test_file_path
 from codeflash.verification.verifier import generate_tests
@@ -1388,7 +1388,7 @@ def establish_original_code_baseline(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                behavioral_results, coverage_results, behavioral_test_results_for_throughput = self.run_and_parse_tests(
+                behavioral_results, coverage_results, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
                     test_files=self.test_files,
@@ -1409,7 +1409,6 @@ def establish_original_code_baseline(
                 return Failure("Failed to establish a baseline for the original code - bevhavioral tests failed.")
             if not coverage_critic(coverage_results, self.args.test_framework):
                 return Failure("The threshold for test coverage was not met.")
-            benchmarking_test_results_for_throughput = None
 
             if test_framework == "pytest":
                 line_profile_results = self.line_profiler_step(
@@ -1433,7 +1432,7 @@ def establish_original_code_baseline(
                         )
 
                 try:
-                    benchmarking_results, _, benchmarking_test_results_for_throughput = self.run_and_parse_tests(
+                    benchmarking_results, _, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1504,9 +1503,17 @@ def establish_original_code_baseline(
             console.rule()
             logger.debug(f"Total original code runtime (ns): {total_timing}")
 
-            async_throughput = self.calculate_async_throughput(
-                behavioral_test_results_for_throughput, benchmarking_test_results_for_throughput
-            )
+            async_throughput = None
+            if self.function_to_optimize.is_async and benchmarking_results:
+                all_stdout = ""
+                for result in benchmarking_results.test_results:
+                    if result.stdout:
+                        all_stdout += result.stdout
+
+                if all_stdout:
+                    async_throughput = calculate_function_throughput_from_stdout(
+                        all_stdout, self.function_to_optimize.function_name
+                    )
 
             if self.args.benchmark:
                 replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
@@ -1776,8 +1783,7 @@ def run_and_parse_tests(
                 coverage_database_file=coverage_database_file,
                 coverage_config_file=coverage_config_file,
             )
-            # Return the test results for async throughput calculation
-            return results, coverage_results, results if isinstance(results, TestResults) else None
+            return results, coverage_results, None
         results, coverage_results = parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)
         return results, coverage_results, None
 
@@ -1830,31 +1836,6 @@ def get_test_env(
             test_env["PYTHONPATH"] += os.pathsep + str(self.args.project_root)
         return test_env
 
-    def calculate_async_throughput(
-        self, behavioral_test_results: TestResults | None, benchmarking_test_results: TestResults | None
-    ) -> dict[str, int] | None:
-        if not self.function_to_optimize.is_async:
-            return None
-
-        from codeflash.verification.parse_test_output import calculate_function_throughput_from_stdout
-
-        all_stdout = ""
-
-        for test_results in [behavioral_test_results, benchmarking_test_results]:
-            if test_results:
-                for result in test_results.test_results:
-                    if result.stdout:
-                        all_stdout += result.stdout
-
-        if not all_stdout:
-            return None
-
-        function_throughput = calculate_function_throughput_from_stdout(
-            all_stdout, self.function_to_optimize.function_name
-        )
-
-        return {self.function_to_optimize.function_name: function_throughput} if function_throughput > 0 else None
-
     def line_profiler_step(
         self, code_context: CodeOptimizationContext, original_helper_code: dict[Path, str], candidate_index: int
     ) -> dict:

From e5c4562504b97ae2299a9001347d5dff16d42b2d Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 16:27:10 -0700
Subject: [PATCH 15/40] Update function_optimizer.py

---
 codeflash/optimization/function_optimizer.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index dfb4a2643..46cfe9ac9 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1504,16 +1504,15 @@ def establish_original_code_baseline(
             logger.debug(f"Total original code runtime (ns): {total_timing}")
 
             async_throughput = None
-            if self.function_to_optimize.is_async and benchmarking_results:
+            if self.function_to_optimize.is_async:
                 all_stdout = ""
                 for result in benchmarking_results.test_results:
                     if result.stdout:
                         all_stdout += result.stdout
 
-                if all_stdout:
-                    async_throughput = calculate_function_throughput_from_stdout(
-                        all_stdout, self.function_to_optimize.function_name
-                    )
+                async_throughput = calculate_function_throughput_from_stdout(
+                    all_stdout, self.function_to_optimize.function_name
+                )
 
             if self.args.benchmark:
                 replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(

From 04d31b5e67b003a4e7023228a35bbf30b81c209d Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 16:31:14 -0700
Subject: [PATCH 16/40] optimized candidate result oo

---
 codeflash/models/models.py                   |  1 +
 codeflash/optimization/function_optimizer.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index f6f991363..b4e29effa 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -274,6 +274,7 @@ class OptimizedCandidateResult(BaseModel):
     replay_benchmarking_test_results: Optional[dict[BenchmarkKey, TestResults]] = None
     optimization_candidate_index: int
     total_candidate_timing: int
+    async_throughput: Optional[int] = None
 
 
 class GeneratedTests(BaseModel):
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 46cfe9ac9..5e3f5581e 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1672,6 +1672,19 @@ def run_optimized_candidate(
                 console.rule()
 
             logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
+
+            candidate_async_throughput = None
+            if self.function_to_optimize.is_async and candidate_benchmarking_results:
+                all_stdout = ""
+                for result in candidate_benchmarking_results.test_results:
+                    if result.stdout:
+                        all_stdout += result.stdout
+
+
+                candidate_async_throughput = calculate_function_throughput_from_stdout(
+                    all_stdout, self.function_to_optimize.function_name
+                )
+
             if self.args.benchmark:
                 candidate_replay_benchmarking_results = candidate_benchmarking_results.group_by_benchmarks(
                     self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root
@@ -1691,6 +1704,7 @@ def run_optimized_candidate(
                     else None,
                     optimization_candidate_index=optimization_candidate_index,
                     total_candidate_timing=total_candidate_timing,
+                    async_throughput=candidate_async_throughput,
                 )
             )
 

From 8e516fc8682a6d20c45d79fdcc70196f163dd9f3 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 18:25:56 -0700
Subject: [PATCH 17/40] critic

---
 codeflash/code_utils/config_consts.py        |   1 +
 codeflash/optimization/function_optimizer.py |  32 +++-
 codeflash/result/critic.py                   |  59 ++++++-
 codeflash/verification/parse_test_output.py  |   5 +-
 tests/test_critic.py                         | 164 ++++++++++++++++++-
 5 files changed, 247 insertions(+), 14 deletions(-)

diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 50b4bce16..a8fc74733 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -11,3 +11,4 @@
 MIN_TESTCASE_PASSED_THRESHOLD = 6
 REPEAT_OPTIMIZATION_PROBABILITY = 0.1
 DEFAULT_IMPORTANCE_THRESHOLD = 0.001
+MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD = 0.10  # 10% minimum improvement for async throughput
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 5e3f5581e..cb25df992 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -77,7 +77,13 @@
     TestType,
 )
 from codeflash.result.create_pr import check_create_pr, existing_tests_source_for
-from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
+from codeflash.result.critic import (
+    coverage_critic,
+    performance_gain,
+    quantity_of_tests_critic,
+    speedup_critic,
+    throughput_gain,
+)
 from codeflash.result.explanation import Explanation
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.verification.concolic_testing import generate_concolic_tests
@@ -566,7 +572,11 @@ def determine_best_candidate(
                     tree = Tree(f"Candidate #{candidate_index} - Runtime Information")
                     benchmark_tree = None
                     if speedup_critic(
-                        candidate_result, original_code_baseline.runtime, best_runtime_until_now=None
+                        candidate_result,
+                        original_code_baseline.runtime,
+                        best_runtime_until_now=None,
+                        original_async_throughput=original_code_baseline.async_throughput,
+                        best_throughput_until_now=None,
                     ) and quantity_of_tests_critic(candidate_result):
                         tree.add("This candidate is faster than the original code. 🚀")  # TODO: Change this description
                         tree.add(f"Original summed runtime: {humanize_runtime(original_code_baseline.runtime)}")
@@ -577,6 +587,19 @@ def determine_best_candidate(
                         )
                         tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%")
                         tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X")
+                        logger.info(f"orig_async_throughput: {original_code_baseline.async_throughput}")
+                        logger.info(f"candidate_result.async_throughput: {candidate_result.async_throughput}")
+                        if (
+                            original_code_baseline.async_throughput is not None
+                            and candidate_result.async_throughput is not None
+                        ):
+                            throughput_gain_value = throughput_gain(
+                                original_throughput=original_code_baseline.async_throughput,
+                                optimized_throughput=candidate_result.async_throughput,
+                            )
+                            tree.add(f"Original async throughput: {original_code_baseline.async_throughput} executions")
+                            tree.add(f"Optimized async throughput: {candidate_result.async_throughput} executions")
+                            tree.add(f"Throughput improvement: {throughput_gain_value * 100:.1f}%")
                         line_profile_test_results = self.line_profiler_step(
                             code_context=code_context,
                             original_helper_code=original_helper_code,
@@ -1509,10 +1532,12 @@ def establish_original_code_baseline(
                 for result in benchmarking_results.test_results:
                     if result.stdout:
                         all_stdout += result.stdout
-
+                logger.info("Calculating async function throughput from test output...")
+                logger.info(f"All stdout for async throughput calculation:\n{all_stdout}")
                 async_throughput = calculate_function_throughput_from_stdout(
                     all_stdout, self.function_to_optimize.function_name
                 )
+                logger.info(f"Original async function throughput: {async_throughput} calls/second")
 
             if self.args.benchmark:
                 replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
@@ -1680,7 +1705,6 @@ def run_optimized_candidate(
                     if result.stdout:
                         all_stdout += result.stdout
 
-
                 candidate_async_throughput = calculate_function_throughput_from_stdout(
                     all_stdout, self.function_to_optimize.function_name
                 )
diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
index 8aea5ebae..1869dfc06 100644
--- a/codeflash/result/critic.py
+++ b/codeflash/result/critic.py
@@ -8,6 +8,7 @@
     COVERAGE_THRESHOLD,
     MIN_IMPROVEMENT_THRESHOLD,
     MIN_TESTCASE_PASSED_THRESHOLD,
+    MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
 )
 from codeflash.models.models import TestType
 
@@ -25,20 +26,41 @@ def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) ->
     return (original_runtime_ns - optimized_runtime_ns) / optimized_runtime_ns
 
 
+def throughput_gain(*, original_throughput: int, optimized_throughput: int) -> float:
+    """Calculate the throughput gain of an optimized code over the original code.
+
+    This value multiplied by 100 gives the percentage improvement in throughput.
+    For throughput, higher values are better (more executions per time period).
+    """
+    if original_throughput == 0:
+        return 0.0
+    return (optimized_throughput - original_throughput) / original_throughput
+
+
 def speedup_critic(
     candidate_result: OptimizedCandidateResult,
     original_code_runtime: int,
     best_runtime_until_now: int | None,
     *,
     disable_gh_action_noise: bool = False,
+    original_async_throughput: int | None = None,
+    best_throughput_until_now: int | None = None,
 ) -> bool:
     """Take in a correct optimized Test Result and decide if the optimization should actually be surfaced to the user.
 
-    Ensure that the optimization is actually faster than the original code, above the noise floor.
-    The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
-    when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
-    The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance, also we want to be more confident there.
+    Evaluates both runtime performance and async throughput improvements.
+
+    For runtime performance:
+    - Ensures the optimization is actually faster than the original code, above the noise floor.
+    - The noise floor is a function of the original code runtime. Currently, the noise floor is 2xMIN_IMPROVEMENT_THRESHOLD
+      when the original runtime is less than 10 microseconds, and becomes MIN_IMPROVEMENT_THRESHOLD for any higher runtime.
+    - The noise floor is doubled when benchmarking on a (noisy) GitHub Action virtual instance.
+
+    For async throughput (when available):
+    - Evaluates throughput improvements using MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
+    - Throughput improvements complement runtime improvements for async functions
     """
+    # Runtime performance evaluation
     noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
     if not disable_gh_action_noise and env_utils.is_ci():
         noise_floor = noise_floor * 2  # Increase the noise floor in GitHub Actions mode
@@ -46,10 +68,31 @@ def speedup_critic(
     perf_gain = performance_gain(
         original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
     )
-    if best_runtime_until_now is None:
-        # collect all optimizations with this
-        return bool(perf_gain > noise_floor)
-    return bool(perf_gain > noise_floor and candidate_result.best_test_runtime < best_runtime_until_now)
+    runtime_improved = perf_gain > noise_floor
+
+    # Check runtime comparison with best so far
+    runtime_is_best = best_runtime_until_now is None or candidate_result.best_test_runtime < best_runtime_until_now
+
+    # Async throughput evaluation (if throughput data is available)
+    throughput_improved = True  # Default to True if no throughput data
+    throughput_is_best = True   # Default to True if no throughput data
+
+    if original_async_throughput is not None and candidate_result.async_throughput is not None:
+        if original_async_throughput > 0:  # Avoid division by zero
+            throughput_gain_value = throughput_gain(
+                original_throughput=original_async_throughput,
+                optimized_throughput=candidate_result.async_throughput
+            )
+            throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
+            logger.debug(f"Async throughput gain: {throughput_gain_value * 100:.1f}% (original: {original_async_throughput}, optimized: {candidate_result.async_throughput})")
+
+        throughput_is_best = best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
+
+    # For async functions with throughput data, both runtime and throughput should improve
+    # For sync functions or when throughput data is unavailable, only runtime matters
+    if original_async_throughput is not None and candidate_result.async_throughput is not None:
+        return runtime_improved and runtime_is_best and throughput_improved and throughput_is_best
+    return runtime_improved and runtime_is_best
 
 
 def quantity_of_tests_critic(candidate_result: OptimizedCandidateResult | OriginalCodeBaseline) -> bool:
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index c4380675d..13208b9a6 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -94,10 +94,13 @@ def calculate_function_throughput_from_stdout(stdout: str, function_name: str) -
 
     # Count completed executions for the specific function only
     function_throughput = 0
-
+    logger.info(f"Total start matches: {len(start_matches)}, Total end matches: {len(end_matches)}")
     for start_match in start_matches:
         # Check if this execution is for the function we're interested in and has a matching end tag
         # function_name is at index 2 in the match tuple
+        logger.info(f"Start match: {start_match}")
+        logger.info(f"End matches: {end_matches_set}")
+        logger.info(f"Function name: {function_name}")
         if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
             function_throughput += 1
 
diff --git a/tests/test_critic.py b/tests/test_critic.py
index 27df4dde9..17bc3daa2 100644
--- a/tests/test_critic.py
+++ b/tests/test_critic.py
@@ -14,7 +14,13 @@
     TestResults,
     TestType,
 )
-from codeflash.result.critic import coverage_critic, performance_gain, quantity_of_tests_critic, speedup_critic
+from codeflash.result.critic import (
+    coverage_critic,
+    performance_gain,
+    quantity_of_tests_critic,
+    speedup_critic,
+    throughput_gain,
+)
 
 
 def test_performance_gain() -> None:
@@ -429,3 +435,159 @@ def test_coverage_critic() -> None:
     )
 
     assert coverage_critic(unittest_coverage, "unittest") is True
+
+
+def test_throughput_gain() -> None:
+    """Test throughput_gain calculation."""
+    # Test basic throughput improvement
+    assert throughput_gain(original_throughput=100, optimized_throughput=150) == 0.5  # 50% improvement
+
+    # Test no improvement
+    assert throughput_gain(original_throughput=100, optimized_throughput=100) == 0.0
+
+    # Test regression
+    assert throughput_gain(original_throughput=100, optimized_throughput=80) == -0.2  # 20% regression
+
+    # Test zero original throughput (edge case)
+    assert throughput_gain(original_throughput=0, optimized_throughput=50) == 0.0
+
+    # Test large improvement
+    assert throughput_gain(original_throughput=50, optimized_throughput=200) == 3.0  # 300% improvement
+
+
+def test_speedup_critic_with_async_throughput() -> None:
+    """Test speedup_critic with async throughput evaluation."""
+    original_code_runtime = 10000  # 10 microseconds
+    original_async_throughput = 100
+
+    # Test case 1: Both runtime and throughput improve significantly
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=8000,  # 20% runtime improvement
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=8000,
+        async_throughput=120,  # 20% throughput improvement
+    )
+
+    assert speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=original_async_throughput,
+        best_throughput_until_now=None,
+        disable_gh_action_noise=True
+    )
+
+    # Test case 2: Runtime improves but throughput doesn't meet threshold
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=8000,  # 20% runtime improvement
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=8000,
+        async_throughput=105,  # Only 5% throughput improvement (below 10% threshold)
+    )
+
+    assert not speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=original_async_throughput,
+        best_throughput_until_now=None,
+        disable_gh_action_noise=True
+    )
+
+    # Test case 3: Throughput improves but runtime doesn't meet threshold
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=9800,  # Only 2% runtime improvement (below 5% threshold)
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=9800,
+        async_throughput=120,  # 20% throughput improvement
+    )
+
+    assert not speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=original_async_throughput,
+        best_throughput_until_now=None,
+        disable_gh_action_noise=True
+    )
+
+    # Test case 4: No throughput data - should fall back to runtime-only evaluation
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=8000,  # 20% runtime improvement
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=8000,
+        async_throughput=None,  # No throughput data
+    )
+
+    assert speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=None,  # No original throughput data
+        best_throughput_until_now=None,
+        disable_gh_action_noise=True
+    )
+
+    # Test case 5: Test best_throughput_until_now comparison
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=8000,  # 20% runtime improvement
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=8000,
+        async_throughput=115,  # 15% throughput improvement
+    )
+
+    # Should pass when no best throughput yet
+    assert speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=original_async_throughput,
+        best_throughput_until_now=None,
+        disable_gh_action_noise=True
+    )
+
+    # Should fail when there's a better throughput already
+    assert not speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=original_async_throughput,
+        best_throughput_until_now=120,  # Better throughput already exists
+        disable_gh_action_noise=True
+    )
+
+    # Test case 6: Zero original throughput (edge case)
+    candidate_result = OptimizedCandidateResult(
+        max_loop_count=5,
+        best_test_runtime=8000,  # 20% runtime improvement
+        behavior_test_results=TestResults(),
+        benchmarking_test_results=TestResults(),
+        optimization_candidate_index=0,
+        total_candidate_timing=8000,
+        async_throughput=50,
+    )
+
+    # Should pass when original throughput is 0 (throughput evaluation skipped)
+    assert speedup_critic(
+        candidate_result=candidate_result,
+        original_code_runtime=original_code_runtime,
+        best_runtime_until_now=None,
+        original_async_throughput=0,  # Zero original throughput
+        best_throughput_until_now=None,
+        disable_gh_action_noise=True
+    )

From d7f9bbde593d6693aa4ff53ab741e9e6856f4c3d Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Wed, 17 Sep 2025 20:32:04 -0700
Subject: [PATCH 18/40] update tests again

---
 codeflash/optimization/function_optimizer.py  | 22 +++++-----
 codeflash/result/critic.py                    | 13 +++---
 tests/test_async_run_and_parse_tests.py       | 16 +++----
 tests/test_codeflash_capture.py               | 24 +++++-----
 tests/test_instrument_all_and_run.py          | 10 ++---
 tests/test_instrument_tests.py                | 44 +++++++++----------
 ...t_instrumentation_run_results_aiservice.py | 10 ++---
 tests/test_pickle_patcher.py                  |  8 ++--
 8 files changed, 75 insertions(+), 72 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index cb25df992..ee4050a67 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1411,7 +1411,7 @@ def establish_original_code_baseline(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                behavioral_results, coverage_results, _ = self.run_and_parse_tests(
+                behavioral_results, coverage_results = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
                     test_files=self.test_files,
@@ -1455,7 +1455,7 @@ def establish_original_code_baseline(
                         )
 
                 try:
-                    benchmarking_results, _, _ = self.run_and_parse_tests(
+                    benchmarking_results, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1479,7 +1479,7 @@ def establish_original_code_baseline(
                         # * 1.5 to give unittest a bit more time to run
                         break
                     test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
-                    unittest_loop_results, _, _ = self.run_and_parse_tests(
+                    unittest_loop_results, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1602,7 +1602,7 @@ def run_optimized_candidate(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                candidate_behavior_results, _, _ = self.run_and_parse_tests(
+                candidate_behavior_results, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
                     test_files=self.test_files,
@@ -1648,7 +1648,7 @@ def run_optimized_candidate(
                         )
 
                 try:
-                    candidate_benchmarking_results, _, _ = self.run_and_parse_tests(
+                    candidate_benchmarking_results, _ = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1681,7 +1681,7 @@ def run_optimized_candidate(
                         # * 1.5 to give unittest a bit more time to run
                         break
                     test_env["CODEFLASH_LOOP_INDEX"] = str(i + 1)
-                    unittest_loop_results, cov, _ = self.run_and_parse_tests(
+                    unittest_loop_results, cov = self.run_and_parse_tests(
                         testing_type=TestingMode.PERFORMANCE,
                         test_env=test_env,
                         test_files=self.test_files,
@@ -1746,7 +1746,7 @@ def run_and_parse_tests(
         code_context: CodeOptimizationContext | None = None,
         unittest_loop_index: int | None = None,
         line_profiler_output_file: Path | None = None,
-    ) -> tuple[TestResults | dict, CoverageData | None, TestResults | None]:
+    ) -> tuple[TestResults | dict, CoverageData | None]:
         coverage_database_file = None
         coverage_config_file = None
         try:
@@ -1792,7 +1792,7 @@ def run_and_parse_tests(
             logger.exception(
                 f"Error running tests in {', '.join(str(f) for f in test_files.test_files)}.\nTimeout Error"
             )
-            return TestResults(), None, None
+            return TestResults(), None
         if run_result.returncode != 0 and testing_type == TestingMode.BEHAVIOR:
             logger.debug(
                 f"Nonzero return code {run_result.returncode} when running tests in "
@@ -1820,9 +1820,9 @@ def run_and_parse_tests(
                 coverage_database_file=coverage_database_file,
                 coverage_config_file=coverage_config_file,
             )
-            return results, coverage_results, None
+            return results, coverage_results
         results, coverage_results = parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)
-        return results, coverage_results, None
+        return results, coverage_results
 
     def submit_test_generation_tasks(
         self,
@@ -1881,7 +1881,7 @@ def line_profiler_step(
                 codeflash_loop_index=0, codeflash_test_iteration=candidate_index, codeflash_tracer_disable=1
             )
             line_profiler_output_file = add_decorator_imports(self.function_to_optimize, code_context)
-            line_profile_results, _, _ = self.run_and_parse_tests(
+            line_profile_results, _ = self.run_and_parse_tests(
                 testing_type=TestingMode.LINE_PROFILE,
                 test_env=test_env,
                 test_files=self.test_files,
diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
index 1869dfc06..fa433a2f5 100644
--- a/codeflash/result/critic.py
+++ b/codeflash/result/critic.py
@@ -75,18 +75,21 @@ def speedup_critic(
 
     # Async throughput evaluation (if throughput data is available)
     throughput_improved = True  # Default to True if no throughput data
-    throughput_is_best = True   # Default to True if no throughput data
+    throughput_is_best = True  # Default to True if no throughput data
 
     if original_async_throughput is not None and candidate_result.async_throughput is not None:
         if original_async_throughput > 0:  # Avoid division by zero
             throughput_gain_value = throughput_gain(
-                original_throughput=original_async_throughput,
-                optimized_throughput=candidate_result.async_throughput
+                original_throughput=original_async_throughput, optimized_throughput=candidate_result.async_throughput
             )
             throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
-            logger.debug(f"Async throughput gain: {throughput_gain_value * 100:.1f}% (original: {original_async_throughput}, optimized: {candidate_result.async_throughput})")
+            logger.debug(
+                f"Async throughput gain: {throughput_gain_value * 100:.1f}% (original: {original_async_throughput}, optimized: {candidate_result.async_throughput})"
+            )
 
-        throughput_is_best = best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
+        throughput_is_best = (
+            best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
+        )
 
     # For async functions with throughput data, both runtime and throughput should improve
     # For sync functions or when throughput data is unavailable, only runtime matters
diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py
index 010fd5b43..4cb75aa85 100644
--- a/tests/test_async_run_and_parse_tests.py
+++ b/tests/test_async_run_and_parse_tests.py
@@ -92,7 +92,7 @@ async def test_async_sort():
             ]
         )
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -211,7 +211,7 @@ async def test_async_class_sort():
             ]
         )
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -320,7 +320,7 @@ async def test_async_perf():
             ]
         )
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -473,7 +473,7 @@ async def async_error_function(lst):
             ]
         )
 
-        test_results, _, _ = func_optimizer.run_and_parse_tests(
+        test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -567,7 +567,7 @@ async def test_async_multi():
             ]
         )
 
-        test_results, _, _ = func_optimizer.run_and_parse_tests(
+        test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -678,7 +678,7 @@ async def test_async_edge_cases():
             ]
         )
 
-        test_results, _, _ = func_optimizer.run_and_parse_tests(
+        test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -810,7 +810,7 @@ def test_sync_sort():
             ]
         )
 
-        test_results, _, _ = func_optimizer.run_and_parse_tests(
+        test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -976,7 +976,7 @@ async def test_mixed_sorting():
             ]
         )
 
-        test_results, _, _ = func_optimizer.run_and_parse_tests(
+        test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
diff --git a/tests/test_codeflash_capture.py b/tests/test_codeflash_capture.py
index bc5bec790..469d1be6a 100644
--- a/tests/test_codeflash_capture.py
+++ b/tests/test_codeflash_capture.py
@@ -459,7 +459,7 @@ def __init__(self, x=2):
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -492,7 +492,7 @@ def __init__(self, x=2):
         assert test_results[2].id.function_getting_tested == "some_function"
         assert test_results[2].id.iteration_id == "16_0"
 
-        test_results2, _, _ = func_optimizer.run_and_parse_tests(
+        test_results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -580,7 +580,7 @@ def __init__(self, *args, **kwargs):
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -614,7 +614,7 @@ def __init__(self, *args, **kwargs):
         assert test_results[2].id.function_getting_tested == "some_function"
         assert test_results[2].id.iteration_id == "16_0"
 
-        results2, _, _ = func_optimizer.run_and_parse_tests(
+        results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -705,7 +705,7 @@ def __init__(self, x=2):
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -741,7 +741,7 @@ def __init__(self, x=2):
         assert test_results[2].id.function_getting_tested == "some_function"
         assert test_results[2].id.iteration_id == "12_2"  # Third call
 
-        test_results2, _, _ = func_optimizer.run_and_parse_tests(
+        test_results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -867,7 +867,7 @@ def another_helper(self):
             ]
         )
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -888,7 +888,7 @@ def another_helper(self):
         assert test_results[3].id.function_getting_tested == "AnotherHelperClass.__init__"
         assert test_results[3].verification_type == VerificationType.INIT_STATE_HELPER
 
-        results2, _, _ = func_optimizer.run_and_parse_tests(
+        results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1026,7 +1026,7 @@ def another_helper(self):
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1078,7 +1078,7 @@ def target_function(self):
             Path(helper_path_2): {"HelperClass2", "AnotherHelperClass"},
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
-        modified_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        modified_test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1117,7 +1117,7 @@ def target_function(self):
             Path(helper_path_2): {"HelperClass2", "AnotherHelperClass"},
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
-        mutated_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        mutated_test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -1155,7 +1155,7 @@ def target_function(self):
             Path(helper_path_2): {"HelperClass2", "AnotherHelperClass"},
         }
         instrument_codeflash_capture(fto, file_path_to_helper_classes, tests_root)
-        no_helper1_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        no_helper1_test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
index 44044ed55..7e1a20f49 100644
--- a/tests/test_instrument_all_and_run.py
+++ b/tests/test_instrument_all_and_run.py
@@ -160,7 +160,7 @@ def test_sort():
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -205,7 +205,7 @@ def test_sort():
 result: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
 """
         assert test_results[1].stdout == out_str
-        results2, _, _ = func_optimizer.run_and_parse_tests(
+        results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -331,7 +331,7 @@ def test_sort():
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -376,7 +376,7 @@ def test_sort():
         assert test_results[3].did_pass
         assert test_results[3].stdout == """codeflash stdout : BubbleSorter.sorter() called\n"""
 
-        results2, _, _ = func_optimizer.run_and_parse_tests(
+        results2, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -439,7 +439,7 @@ def sorter(self, arr):
                 )
             ]
         )
-        new_test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        new_test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
diff --git a/tests/test_instrument_tests.py b/tests/test_instrument_tests.py
index e42b41f6f..ccec5ffe3 100644
--- a/tests/test_instrument_tests.py
+++ b/tests/test_instrument_tests.py
@@ -434,7 +434,7 @@ def test_sort():
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -469,7 +469,7 @@ def test_sort():
         with test_path_perf.open("w") as f:
             f.write(new_perf_test)
 
-        test_results_perf, _, _ = func_optimizer.run_and_parse_tests(
+        test_results_perf, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -521,7 +521,7 @@ def test_sort():
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -677,7 +677,7 @@ def test_sort_parametrized(input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -731,7 +731,7 @@ def test_sort_parametrized(input, expected_output):
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
 
-        test_results_perf, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results_perf, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -788,7 +788,7 @@ def test_sort_parametrized(input, expected_output):
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -962,7 +962,7 @@ def test_sort_parametrized_loop(input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1053,7 +1053,7 @@ def test_sort_parametrized_loop(input, expected_output):
         assert test_results[5].did_pass
         assert test_results[5].stdout == out_str
 
-        test_results, _, _ = func_optimizer.run_and_parse_tests(
+        test_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -1143,7 +1143,7 @@ def test_sort_parametrized_loop(input, expected_output):
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -1315,7 +1315,7 @@ def test_sort():
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1357,7 +1357,7 @@ def test_sort():
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -1421,7 +1421,7 @@ def test_sort():
                 original_helper_code[helper_function_path] = helper_code
         computed_fn_opt = True
         line_profiler_output_file = add_decorator_imports(func_optimizer.function_to_optimize, code_context)
-        line_profile_results, _, _ = func_optimizer.run_and_parse_tests(
+        line_profile_results, _ = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.LINE_PROFILE,
             test_env=test_env,
             test_files=test_files,
@@ -1623,7 +1623,7 @@ def test_sort(self):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1673,7 +1673,7 @@ def test_sort(self):
         )
         assert test_results[2].runtime > 0
         assert test_results[2].did_pass
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -1882,7 +1882,7 @@ def test_sort(self, input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -1937,7 +1937,7 @@ def test_sort(self, input, expected_output):
 """
         assert test_results[2].stdout == out_str
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -2145,7 +2145,7 @@ def test_sort(self):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             test_env=test_env,
             testing_type=TestingMode.BEHAVIOR,
             test_files=test_files,
@@ -2200,7 +2200,7 @@ def test_sort(self):
 """
         assert test_results[2].stdout == out_str
 
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             test_env=test_env,
             testing_type=TestingMode.PERFORMANCE,
             test_files=test_files,
@@ -2406,7 +2406,7 @@ def test_sort(self, input, expected_output):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=f, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -2493,7 +2493,7 @@ def test_sort(self, input, expected_output):
         )
         assert test_results[5].runtime > 0
         assert test_results[5].did_pass
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -3058,7 +3058,7 @@ def test_sleepfunc_sequence_short(n, expected_total_sleep_time):
                 )
             ]
         )
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
@@ -3185,7 +3185,7 @@ def test_sleepfunc_sequence_short(self, n, expected_total_sleep_time):
             pytest_cmd="pytest",
         )
         func_optimizer = FunctionOptimizer(function_to_optimize=func, test_cfg=test_config)
-        test_results, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.PERFORMANCE,
             test_env=test_env,
             test_files=test_files,
diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py
index 9514214bc..78d9973f1 100644
--- a/tests/test_instrumentation_run_results_aiservice.py
+++ b/tests/test_instrumentation_run_results_aiservice.py
@@ -170,7 +170,7 @@ def test_single_element_list():
         a = BubbleSorter()
         function_to_optimize = FunctionToOptimize("sorter", fto_path, [FunctionParent("BubbleSorter", "ClassDef")])
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results, coverage_data, _ = func_opt.run_and_parse_tests(
+        test_results, coverage_data = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -210,7 +210,7 @@ def sorter(self, arr):
                         """
         fto_path.write_text(optimized_code_mutated_attr, "utf-8")
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results_mutated_attr, coverage_data, _ = func_opt.run_and_parse_tests(
+        test_results_mutated_attr, coverage_data = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -311,7 +311,7 @@ def test_single_element_list():
             ]
         )
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results, coverage_data, _ = func_opt.run_and_parse_tests(
+        test_results, coverage_data = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -388,7 +388,7 @@ def sorter(self, arr):
             )
         )
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results_mutated_attr, coverage_data, _ = func_opt.run_and_parse_tests(
+        test_results_mutated_attr, coverage_data = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
@@ -441,7 +441,7 @@ def sorter(self, arr):
             )
         )
         func_opt = opt.create_function_optimizer(function_to_optimize)
-        test_results_new_attr, coverage_data, _ = func_opt.run_and_parse_tests(
+        test_results_new_attr, coverage_data = func_opt.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=test_files,
diff --git a/tests/test_pickle_patcher.py b/tests/test_pickle_patcher.py
index 38cec3b1d..346153674 100644
--- a/tests/test_pickle_patcher.py
+++ b/tests/test_pickle_patcher.py
@@ -377,7 +377,7 @@ def test_run_and_parse_picklepatch() -> None:
                 )
             ]
         )
-        test_results_unused_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results_unused_socket, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -402,7 +402,7 @@ def bubble_sort_with_unused_socket(data_container):
     return sorted(numbers)
 """)
         # Run optimized code for unused socket
-        optimized_test_results_unused_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        optimized_test_results_unused_socket, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -454,7 +454,7 @@ def bubble_sort_with_unused_socket(data_container):
                 )
             ]
         )
-        test_results_used_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        test_results_used_socket, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,
@@ -485,7 +485,7 @@ def bubble_sort_with_used_socket(data_container):
         """)
 
         # Run test for optimized function code that uses the socket. This should fail, as the PicklePlaceholder is accessed.
-        optimized_test_results_used_socket, coverage_data, _ = func_optimizer.run_and_parse_tests(
+        optimized_test_results_used_socket, coverage_data = func_optimizer.run_and_parse_tests(
             testing_type=TestingMode.BEHAVIOR,
             test_env=test_env,
             test_files=func_optimizer.test_files,

From 58e559252a05da163e1db8f8ded9b0d75a265fb4 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Thu, 18 Sep 2025 15:20:23 -0700
Subject: [PATCH 19/40] Update codeflash_wrap_decorator.py

---
 codeflash/code_utils/codeflash_wrap_decorator.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/codeflash/code_utils/codeflash_wrap_decorator.py b/codeflash/code_utils/codeflash_wrap_decorator.py
index fc72ccb16..85c578a28 100644
--- a/codeflash/code_utils/codeflash_wrap_decorator.py
+++ b/codeflash/code_utils/codeflash_wrap_decorator.py
@@ -30,11 +30,9 @@ def get_run_tmp_file(file_path: Path) -> Path:  # moved from codeflash/code_util
     return Path(get_run_tmp_file.tmpdir.name) / file_path
 
 
-def extract_test_context_from_frame() -> tuple[str, str | None, str]:
-    # test_module = os.environ.get("CODEFLASH_TEST_MODULE")
+def extract_test_context_from_env() -> tuple[str, str | None, str]:
     test_module = os.environ["CODEFLASH_TEST_MODULE"]
     test_class = os.environ.get("CODEFLASH_TEST_CLASS", None)
-    # test_function = os.environ.get("CODEFLASH_TEST_FUNCTION")
     test_function = os.environ["CODEFLASH_TEST_FUNCTION"]
 
     if test_module and test_function:
@@ -52,7 +50,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
         function_name = func.__name__
         line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
         loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
-        test_module_name, test_class_name, test_name = extract_test_context_from_frame()
+        test_module_name, test_class_name, test_name = extract_test_context_from_env()
 
         test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
 
@@ -129,7 +127,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
         line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
         loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
 
-        test_module_name, test_class_name, test_name = extract_test_context_from_frame()
+        test_module_name, test_class_name, test_name = extract_test_context_from_env()
 
         test_id = f"{test_module_name}:{test_class_name}:{test_name}:{line_id}:{loop_index}"
 

From a7a06ea2795a657039beae373f4bc0c62149d90b Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Sun, 21 Sep 2025 22:05:31 -0700
Subject: [PATCH 20/40] Update test_async_wrapper_sqlite_validation.py

---
 tests/test_async_wrapper_sqlite_validation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_async_wrapper_sqlite_validation.py b/tests/test_async_wrapper_sqlite_validation.py
index c1c71fd4c..6950a324c 100644
--- a/tests/test_async_wrapper_sqlite_validation.py
+++ b/tests/test_async_wrapper_sqlite_validation.py
@@ -27,6 +27,7 @@ def test_env_setup(self, request):
             "CODEFLASH_TEST_MODULE": __name__,
             "CODEFLASH_TEST_CLASS": "TestAsyncWrapperSQLiteValidation",
             "CODEFLASH_TEST_FUNCTION": request.node.name,
+            "CODEFLASH_CURRENT_LINE_ID": "test_unit",
         }
         
         for key, value in test_env.items():
@@ -60,6 +61,7 @@ async def simple_async_add(a: int, b: int) -> int:
             await asyncio.sleep(0.001)
             return a + b
 
+        os.environ['CODEFLASH_CURRENT_LINE_ID'] = 'simple_async_add_59'
         result = await simple_async_add(5, 3)
         
         assert result == 8

From d1b40d07f584c599924dea8fb0db692acd76f976 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Sun, 21 Sep 2025 22:06:55 -0700
Subject: [PATCH 21/40] check.

---
 .../code_utils/codeflash_wrap_decorator.py    |   4 +-
 .../code_utils/instrument_existing_tests.py   | 175 +++++++++++++++-
 tests/test_instrument_async_tests.py          | 191 ++++++++++++++++++
 3 files changed, 367 insertions(+), 3 deletions(-)

diff --git a/codeflash/code_utils/codeflash_wrap_decorator.py b/codeflash/code_utils/codeflash_wrap_decorator.py
index 85c578a28..5dda746de 100644
--- a/codeflash/code_utils/codeflash_wrap_decorator.py
+++ b/codeflash/code_utils/codeflash_wrap_decorator.py
@@ -48,7 +48,7 @@ def codeflash_behavior_async(func: F) -> F:
     async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
         loop = asyncio.get_running_loop()
         function_name = func.__name__
-        line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
+        line_id = os.environ["CODEFLASH_CURRENT_LINE_ID"]
         loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
         test_module_name, test_class_name, test_name = extract_test_context_from_env()
 
@@ -124,7 +124,7 @@ def codeflash_performance_async(func: F) -> F:
     async def async_wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
         loop = asyncio.get_running_loop()
         function_name = func.__name__
-        line_id = f"{func.__name__}_{func.__code__.co_firstlineno}"
+        line_id = os.environ["CODEFLASH_CURRENT_LINE_ID"]
         loop_index = int(os.environ["CODEFLASH_LOOP_INDEX"])
 
         test_module_name, test_class_name, test_name = extract_test_context_from_env()
diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index be75eac85..569772a1a 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -291,6 +291,139 @@ def visit_FunctionDef(self, node: ast.FunctionDef, test_class_name: str | None =
         return node
 
 
+class AsyncCallInstrumenter(ast.NodeTransformer):
+    def __init__(
+        self,
+        function: FunctionToOptimize,
+        module_path: str,
+        test_framework: str,
+        call_positions: list[CodePosition],
+        mode: TestingMode = TestingMode.BEHAVIOR,
+    ) -> None:
+        self.mode = mode
+        self.function_object = function
+        self.class_name = None
+        self.only_function_name = function.function_name
+        self.module_path = module_path
+        self.test_framework = test_framework
+        self.call_positions = call_positions
+        self.did_instrument = False
+        # Track function call count per test function
+        self.async_call_counter: dict[str, int] = {}
+        if len(function.parents) == 1 and function.parents[0].type == "ClassDef":
+            self.class_name = function.top_level_parent_name
+
+    def visit_ClassDef(self, node: ast.ClassDef) -> ast.ClassDef:
+        # Add timeout decorator for unittest test classes if needed
+        if self.test_framework == "unittest":
+            for item in node.body:
+                if (
+                    isinstance(item, ast.FunctionDef)
+                    and item.name.startswith("test_")
+                    and not any(
+                        isinstance(d, ast.Call)
+                        and isinstance(d.func, ast.Name)
+                        and d.func.id == "timeout_decorator.timeout"
+                        for d in item.decorator_list
+                    )
+                ):
+                    timeout_decorator = ast.Call(
+                        func=ast.Name(id="timeout_decorator.timeout", ctx=ast.Load()),
+                        args=[ast.Constant(value=15)],
+                        keywords=[],
+                    )
+                    item.decorator_list.append(timeout_decorator)
+        return self.generic_visit(node)
+
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> ast.AsyncFunctionDef:
+        if not node.name.startswith("test_"):
+            return node
+
+        return self._process_test_function(node)
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef:
+        # Only process test functions
+        if not node.name.startswith("test_"):
+            return node
+
+        return self._process_test_function(node)
+
+    def _process_test_function(
+        self, node: ast.AsyncFunctionDef | ast.FunctionDef
+    ) -> ast.AsyncFunctionDef | ast.FunctionDef:
+        if self.test_framework == "unittest" and not any(
+            isinstance(d, ast.Call) and isinstance(d.func, ast.Name) and d.func.id == "timeout_decorator.timeout"
+            for d in node.decorator_list
+        ):
+            timeout_decorator = ast.Call(
+                func=ast.Name(id="timeout_decorator.timeout", ctx=ast.Load()),
+                args=[ast.Constant(value=15)],
+                keywords=[],
+            )
+            node.decorator_list.append(timeout_decorator)
+
+        # Initialize counter for this test function
+        if node.name not in self.async_call_counter:
+            self.async_call_counter[node.name] = 0
+
+        new_body = []
+
+        for i, stmt in enumerate(node.body):
+            transformed_stmt, added_env_assignment = self._instrument_statement(stmt, node.name)
+
+            if added_env_assignment:
+                current_call_index = self.async_call_counter[node.name]
+                self.async_call_counter[node.name] += 1
+
+                env_assignment = ast.Assign(
+                    targets=[
+                        ast.Subscript(
+                            value=ast.Attribute(
+                                value=ast.Name(id="os", ctx=ast.Load()), attr="environ", ctx=ast.Load()
+                            ),
+                            slice=ast.Constant(value="CODEFLASH_CURRENT_LINE_ID"),
+                            ctx=ast.Store(),
+                        )
+                    ],
+                    value=ast.Constant(value=f"{current_call_index}"),
+                    lineno=stmt.lineno if hasattr(stmt, "lineno") else 1,
+                )
+                new_body.append(env_assignment)
+                self.did_instrument = True
+
+            new_body.append(transformed_stmt)
+
+        node.body = new_body
+        return node
+
+    def _instrument_statement(self, stmt: ast.stmt, node_name: str) -> tuple[ast.stmt, bool]:
+        for node in ast.walk(stmt):
+            if (
+                isinstance(node, ast.Await)
+                and isinstance(node.value, ast.Call)
+                and self._is_target_call(node.value)
+                and self._call_in_positions(node.value)
+            ):
+                # Check if this call is in one of our target positions
+                return stmt, True  # Return original statement but signal we added env var
+
+        return stmt, False
+
+    def _is_target_call(self, call_node: ast.Call) -> bool:
+        """Check if this call node is calling our target async function."""
+        if isinstance(call_node.func, ast.Name):
+            return call_node.func.id == self.function_object.function_name
+        if isinstance(call_node.func, ast.Attribute):
+            return call_node.func.attr == self.function_object.function_name
+        return False
+
+    def _call_in_positions(self, call_node: ast.Call) -> bool:
+        if not hasattr(call_node, "lineno") or not hasattr(call_node, "col_offset"):
+            return False
+
+        return node_in_call_position(call_node, self.call_positions)
+
+
 class FunctionImportedAsVisitor(ast.NodeVisitor):
     """Checks if a function has been imported as an alias. We only care about the alias then.
 
@@ -352,6 +485,44 @@ def instrument_source_module_with_async_decorators(
         return False, None
 
 
+def inject_async_profiling_into_existing_test(
+    test_path: Path,
+    call_positions: list[CodePosition],
+    function_to_optimize: FunctionToOptimize,
+    tests_project_root: Path,
+    test_framework: str,
+    mode: TestingMode = TestingMode.BEHAVIOR,
+) -> tuple[bool, str | None]:
+    """Inject profiling for async function calls by setting environment variables before each call."""
+    with test_path.open(encoding="utf8") as f:
+        test_code = f.read()
+
+    try:
+        tree = ast.parse(test_code)
+    except SyntaxError:
+        logger.exception(f"Syntax error in code in file - {test_path}")
+        return False, None
+
+    test_module_path = module_name_from_file_path(test_path, tests_project_root)
+    import_visitor = FunctionImportedAsVisitor(function_to_optimize)
+    import_visitor.visit(tree)
+    func = import_visitor.imported_as
+
+    async_instrumenter = AsyncCallInstrumenter(func, test_module_path, test_framework, call_positions, mode=mode)
+    tree = async_instrumenter.visit(tree)
+
+    if not async_instrumenter.did_instrument:
+        return False, None
+
+    # Add necessary imports
+    new_imports = [ast.Import(names=[ast.alias(name="os")])]
+    if test_framework == "unittest":
+        new_imports.append(ast.Import(names=[ast.alias(name="timeout_decorator")]))
+
+    tree.body = [*new_imports, *tree.body]
+    return True, isort.code(ast.unparse(tree), float_to_top=True)
+
+
 def inject_profiling_into_existing_test(
     test_path: Path,
     call_positions: list[CodePosition],
@@ -361,7 +532,9 @@ def inject_profiling_into_existing_test(
     mode: TestingMode = TestingMode.BEHAVIOR,
 ) -> tuple[bool, str | None]:
     if function_to_optimize.is_async:
-        return False, None
+        return inject_async_profiling_into_existing_test(
+            test_path, call_positions, function_to_optimize, tests_project_root, test_framework, mode
+        )
 
     with test_path.open(encoding="utf8") as f:
         test_code = f.read()
diff --git a/tests/test_instrument_async_tests.py b/tests/test_instrument_async_tests.py
index 97c4dd659..cdce5bf82 100644
--- a/tests/test_instrument_async_tests.py
+++ b/tests/test_instrument_async_tests.py
@@ -535,3 +535,194 @@ def test_qualified_name_with_nested_parents():
         is_async=False
     )
     assert func_mixed_parents.qualified_name == 'MyClass.outer_function.inner_function'
+
+
+def test_inject_profiling_async_multiple_calls_same_test(temp_dir):
+    """Test that multiple async function calls within the same test function get correctly numbered 0, 1, 2, etc."""
+    source_module_code = '''
+import asyncio
+
+async def async_sorter(items):
+    """Simple async sorter for testing."""
+    await asyncio.sleep(0.001)
+    return sorted(items)
+'''
+    
+    source_file = temp_dir / "async_sorter.py"
+    source_file.write_text(source_module_code)
+    
+    test_code_multiple_calls = '''
+import asyncio
+import pytest
+from async_sorter import async_sorter
+
+@pytest.mark.asyncio
+async def test_single_call():
+    result = await async_sorter([42])
+    assert result == [42]
+
+@pytest.mark.asyncio
+async def test_multiple_calls():
+    result1 = await async_sorter([3, 1, 2])
+    result2 = await async_sorter([5, 4])  
+    result3 = await async_sorter([9, 8, 7, 6])
+    assert result1 == [1, 2, 3]
+    assert result2 == [4, 5]
+    assert result3 == [6, 7, 8, 9]
+'''
+    
+    test_file = temp_dir / "test_async_sorter.py"
+    test_file.write_text(test_code_multiple_calls)
+    
+    func = FunctionToOptimize(
+        function_name="async_sorter",
+        parents=[],
+        file_path=Path("async_sorter.py"),
+        is_async=True
+    )
+    
+    # First instrument the source module with async decorators
+    from codeflash.code_utils.instrument_existing_tests import instrument_source_module_with_async_decorators
+    source_success, instrumented_source = instrument_source_module_with_async_decorators(
+        source_file, func, TestingMode.BEHAVIOR
+    )
+    
+    assert source_success
+    assert instrumented_source is not None
+    assert '@codeflash_behavior_async' in instrumented_source
+    
+    # Write the instrumented source back
+    source_file.write_text(instrumented_source)
+    
+    # Now test injection with multiple call positions
+    # Parse the test file to get exact positions for async calls
+    import ast
+    tree = ast.parse(test_code_multiple_calls)
+    call_positions = []
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Await) and isinstance(node.value, ast.Call):
+            if hasattr(node.value.func, 'id') and node.value.func.id == 'async_sorter':
+                call_positions.append(CodePosition(node.lineno, node.col_offset))
+            elif hasattr(node.value.func, 'attr') and node.value.func.attr == 'async_sorter':
+                call_positions.append(CodePosition(node.lineno, node.col_offset))
+    
+    # Should find 4 calls total: 1 in test_single_call + 3 in test_multiple_calls
+    assert len(call_positions) == 4
+    
+    success, instrumented_test_code = inject_profiling_into_existing_test(
+        test_file,
+        call_positions,
+        func,
+        temp_dir,
+        "pytest",
+        mode=TestingMode.BEHAVIOR
+    )
+    
+    assert success
+    assert instrumented_test_code is not None
+    
+    # Verify the instrumentation adds correct line_id assignments
+    # Each test function should start from 0
+    assert "os.environ['CODEFLASH_CURRENT_LINE_ID'] = '0'" in instrumented_test_code
+    
+    # Count occurrences of each line_id to verify numbering
+    line_id_0_count = instrumented_test_code.count("os.environ['CODEFLASH_CURRENT_LINE_ID'] = '0'")
+    line_id_1_count = instrumented_test_code.count("os.environ['CODEFLASH_CURRENT_LINE_ID'] = '1'")
+    line_id_2_count = instrumented_test_code.count("os.environ['CODEFLASH_CURRENT_LINE_ID'] = '2'")
+    
+    # Should have:
+    # - 2 occurrences of '0' (first call in each test function)
+    # - 1 occurrence of '1' (second call in test_multiple_calls)
+    # - 1 occurrence of '2' (third call in test_multiple_calls)
+    assert line_id_0_count == 2, f"Expected 2 occurrences of line_id '0', got {line_id_0_count}"
+    assert line_id_1_count == 1, f"Expected 1 occurrence of line_id '1', got {line_id_1_count}"
+    assert line_id_2_count == 1, f"Expected 1 occurrence of line_id '2', got {line_id_2_count}"
+    
+    # Verify no higher numbers
+    line_id_3_count = instrumented_test_code.count("os.environ['CODEFLASH_CURRENT_LINE_ID'] = '3'")
+    assert line_id_3_count == 0, f"Unexpected occurrence of line_id '3'"
+    
+    # Check that imports are added
+    assert 'import os' in instrumented_test_code
+
+
+def test_sync_functions_do_not_get_async_instrumentation(temp_dir):
+    """Test that sync functions do NOT get async instrumentation (os.environ assignments)."""
+    # Create a sync function module
+    sync_module_code = '''
+def sync_sorter(items):
+    """Simple sync sorter for testing."""
+    return sorted(items)
+'''
+    
+    source_file = temp_dir / "sync_sorter.py"
+    source_file.write_text(sync_module_code)
+    
+    # Create test code with sync function calls
+    sync_test_code = '''
+import pytest
+from sync_sorter import sync_sorter
+
+def test_single_call():
+    result = sync_sorter([42])
+    assert result == [42]
+
+def test_multiple_calls():
+    result1 = sync_sorter([3, 1, 2])
+    result2 = sync_sorter([5, 4])  
+    result3 = sync_sorter([9, 8, 7, 6])
+    assert result1 == [1, 2, 3]
+    assert result2 == [4, 5]
+    assert result3 == [6, 7, 8, 9]
+'''
+    
+    test_file = temp_dir / "test_sync_sorter.py"
+    test_file.write_text(sync_test_code)
+    
+    sync_func = FunctionToOptimize(
+        function_name="sync_sorter",
+        parents=[],
+        file_path=Path("sync_sorter.py"),
+        is_async=False  # SYNC function
+    )
+    
+    # Parse the test file to get exact positions for sync calls
+    import ast
+    tree = ast.parse(sync_test_code)
+    call_positions = []
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Call):
+            if hasattr(node.func, 'id') and node.func.id == 'sync_sorter':
+                call_positions.append(CodePosition(node.lineno, node.col_offset))
+            elif hasattr(node.func, 'attr') and node.func.attr == 'sync_sorter':
+                call_positions.append(CodePosition(node.lineno, node.col_offset))
+    
+    # Should find 4 calls total: 1 in test_single_call + 3 in test_multiple_calls
+    assert len(call_positions) == 4
+    
+    success, instrumented_test_code = inject_profiling_into_existing_test(
+        test_file,
+        call_positions,
+        sync_func,
+        temp_dir,
+        "pytest",
+        mode=TestingMode.BEHAVIOR
+    )
+    
+    assert success
+    assert instrumented_test_code is not None
+    
+    # Verify the sync function does NOT get async instrumentation
+    assert "os.environ['CODEFLASH_CURRENT_LINE_ID']" not in instrumented_test_code
+    
+    # But should get proper sync instrumentation
+    assert 'codeflash_wrap' in instrumented_test_code
+    assert 'codeflash_loop_index' in instrumented_test_code
+    assert 'sqlite3' in instrumented_test_code  # sync behavior mode includes sqlite
+    
+    # Verify the line_id values are correct for sync functions (statement-based)
+    # Sync functions use statement index, not per-test-function counter
+    assert "'0'" in instrumented_test_code  # first call in test_single_call
+    assert "'0'" in instrumented_test_code  # first call in test_multiple_calls (second occurrence)
+    assert "'1'" in instrumented_test_code  # second call in test_multiple_calls
+    assert "'2'" in instrumented_test_code  # third call in test_multiple_calls

From a947ea07ed928be6ea18958821b80812288f6095 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 00:13:44 -0700
Subject: [PATCH 22/40] go

---
 codeflash/verification/parse_test_output.py | 38 ---------------------
 1 file changed, 38 deletions(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 13208b9a6..7f356d943 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -40,44 +40,6 @@ def parse_func(file_path: Path) -> XMLParser:
 matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
 
 
-def calculate_async_throughput_from_stdout(stdout: str, async_function_names: set[str]) -> dict[str, int]:
-    if not stdout or not async_function_names:
-        return {}
-
-    throughput_counts = {}
-
-    # Find all complete performance tag pairs (start + end)
-    begin_matches = list(matches_re_start.finditer(stdout))
-    end_matches = set()
-
-    for match in matches_re_end.finditer(stdout):
-        groups = match.groups()
-        # Remove timing info from the last group to match start tags
-        # End format: 'iteration_id:timing_info', Start format: 'iteration_id'
-        # We need to remove only the last ':timing_info' part
-        last_group = groups[5]
-        split_parts = last_group.split(":")
-        if len(split_parts) > 2:  # Has timing info (format: prefix:suffix:timing)
-            # Reconstruct without the timing info (last part)
-            iteration_id = ":".join(split_parts[:-1])
-            normalized_groups = (*groups[:5], iteration_id)
-        else:
-            normalized_groups = groups
-        end_matches.add(normalized_groups)
-
-    # Count complete tags for async functions only
-    for begin_match in begin_matches:
-        groups = begin_match.groups()
-        function_getting_tested = groups[4]
-
-        if function_getting_tested in async_function_names and groups in end_matches:
-            if function_getting_tested not in throughput_counts:
-                throughput_counts[function_getting_tested] = 0
-            throughput_counts[function_getting_tested] += 1
-
-    return throughput_counts
-
-
 start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
 end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######!")
 

From 2e0f38f77eee8ff0db5bb6668546312907579fa3 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 00:41:38 -0700
Subject: [PATCH 23/40] throughput

---
 codeflash/optimization/function_optimizer.py | 20 +++++--------------
 codeflash/verification/parse_test_output.py  | 21 ++++++++++----------
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index ee4050a67..10eaa55ab 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -90,7 +90,7 @@
 from codeflash.verification.equivalence import compare_test_results
 from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
 from codeflash.verification.parse_line_profile_test_output import parse_line_profile_results
-from codeflash.verification.parse_test_output import calculate_function_throughput_from_stdout, parse_test_results
+from codeflash.verification.parse_test_output import calculate_function_throughput_from_test_results, parse_test_results
 from codeflash.verification.test_runner import run_behavioral_tests, run_benchmarking_tests, run_line_profile_tests
 from codeflash.verification.verification_utils import get_test_file_path
 from codeflash.verification.verifier import generate_tests
@@ -1528,14 +1528,9 @@ def establish_original_code_baseline(
 
             async_throughput = None
             if self.function_to_optimize.is_async:
-                all_stdout = ""
-                for result in benchmarking_results.test_results:
-                    if result.stdout:
-                        all_stdout += result.stdout
                 logger.info("Calculating async function throughput from test output...")
-                logger.info(f"All stdout for async throughput calculation:\n{all_stdout}")
-                async_throughput = calculate_function_throughput_from_stdout(
-                    all_stdout, self.function_to_optimize.function_name
+                async_throughput = calculate_function_throughput_from_test_results(
+                    benchmarking_results, self.function_to_optimize.function_name
                 )
                 logger.info(f"Original async function throughput: {async_throughput} calls/second")
 
@@ -1700,13 +1695,8 @@ def run_optimized_candidate(
 
             candidate_async_throughput = None
             if self.function_to_optimize.is_async and candidate_benchmarking_results:
-                all_stdout = ""
-                for result in candidate_benchmarking_results.test_results:
-                    if result.stdout:
-                        all_stdout += result.stdout
-
-                candidate_async_throughput = calculate_function_throughput_from_stdout(
-                    all_stdout, self.function_to_optimize.function_name
+                candidate_async_throughput = calculate_function_throughput_from_test_results(
+                    candidate_benchmarking_results, self.function_to_optimize.function_name
                 )
 
             if self.args.benchmark:
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 7f356d943..08eb41297 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -44,26 +44,27 @@ def parse_func(file_path: Path) -> XMLParser:
 end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######!")
 
 
-def calculate_function_throughput_from_stdout(stdout: str, function_name: str) -> int:
-    """Calculate function throughput from stdout. A completed execution is defined as having both a start tag and matching end tag.
+def calculate_function_throughput_from_test_results(test_results: TestResults, function_name: str) -> int:
+    """Calculate function throughput from TestResults by extracting stdout.
 
+    A completed execution is defined as having both a start tag and matching end tag.
     Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
     End:   !######test_module:test_function:function_name:loop_index:iteration_id######!
     """
-    start_matches = start_pattern.findall(stdout)
-    end_matches = end_pattern.findall(stdout)
+    all_stdout = ""
+    for result in test_results.test_results:
+        if result.stdout:
+            all_stdout += result.stdout
+
+    start_matches = start_pattern.findall(all_stdout)
+    end_matches = end_pattern.findall(all_stdout)
     end_matches_set = set(end_matches)
 
-    # Count completed executions for the specific function only
     function_throughput = 0
     logger.info(f"Total start matches: {len(start_matches)}, Total end matches: {len(end_matches)}")
     for start_match in start_matches:
-        # Check if this execution is for the function we're interested in and has a matching end tag
-        # function_name is at index 2 in the match tuple
-        logger.info(f"Start match: {start_match}")
-        logger.info(f"End matches: {end_matches_set}")
-        logger.info(f"Function name: {function_name}")
         if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
+            logger.info(f"Matched start-end pair for function '{function_name}': {start_match}")
             function_throughput += 1
 
     return function_throughput

From d3eefca2476cbab1757839dd7d6562a8e94feb73 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 02:13:38 -0700
Subject: [PATCH 24/40] perfstdout

---
 codeflash/models/models.py                   |  1 +
 codeflash/optimization/function_optimizer.py |  2 ++
 codeflash/verification/parse_test_output.py  | 13 +++++--------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index b4e29effa..836c6ad20 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -565,6 +565,7 @@ class TestResults(BaseModel):  # noqa: PLW1641
     # also we don't support deletion of test results elements - caution is advised
     test_results: list[FunctionTestInvocation] = []
     test_result_idx: dict[str, int] = {}
+    perf_stdout: Optional[str] = None
 
     def add(self, function_test_invocation: FunctionTestInvocation) -> None:
         unique_id = function_test_invocation.unique_invocation_loop_id
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 10eaa55ab..ac5162980 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1810,6 +1810,8 @@ def run_and_parse_tests(
                 coverage_database_file=coverage_database_file,
                 coverage_config_file=coverage_config_file,
             )
+            if testing_type == TestingMode.PERFORMANCE:
+                results.perf_stdout = run_result.stdout
             return results, coverage_results
         results, coverage_results = parse_line_profile_results(line_profiler_output_file=line_profiler_output_file)
         return results, coverage_results
diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index 08eb41297..abdbb36ef 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -51,13 +51,9 @@ def calculate_function_throughput_from_test_results(test_results: TestResults, f
     Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
     End:   !######test_module:test_function:function_name:loop_index:iteration_id######!
     """
-    all_stdout = ""
-    for result in test_results.test_results:
-        if result.stdout:
-            all_stdout += result.stdout
-
-    start_matches = start_pattern.findall(all_stdout)
-    end_matches = end_pattern.findall(all_stdout)
+    logger.info(test_results.perf_stdout)
+    start_matches = start_pattern.findall(test_results.perf_stdout or "")
+    end_matches = end_pattern.findall(test_results.perf_stdout or "")
     end_matches_set = set(end_matches)
 
     function_throughput = 0
@@ -66,7 +62,8 @@ def calculate_function_throughput_from_test_results(test_results: TestResults, f
         if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
             logger.info(f"Matched start-end pair for function '{function_name}': {start_match}")
             function_throughput += 1
-
+    logger.info(f"Function '{function_name}' throughput: {function_throughput}")
+    raise SystemExit
     return function_throughput
 
 

From 039c5bacae080cf8e4b5edd57e4e68c62d34965f Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 02:34:16 -0700
Subject: [PATCH 25/40] calculate throughtput for baseline

---
 codeflash/verification/parse_test_output.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/codeflash/verification/parse_test_output.py b/codeflash/verification/parse_test_output.py
index abdbb36ef..3b19d94c8 100644
--- a/codeflash/verification/parse_test_output.py
+++ b/codeflash/verification/parse_test_output.py
@@ -41,29 +41,26 @@ def parse_func(file_path: Path) -> XMLParser:
 
 
 start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
-end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######!")
+end_pattern = re.compile(r"!######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+):([^:]+)######!")
 
 
 def calculate_function_throughput_from_test_results(test_results: TestResults, function_name: str) -> int:
-    """Calculate function throughput from TestResults by extracting stdout.
+    """Calculate function throughput from TestResults by extracting performance stdout.
 
-    A completed execution is defined as having both a start tag and matching end tag.
+    A completed execution is defined as having both a start tag and matching end tag from performance wrappers.
     Start: !$######test_module:test_function:function_name:loop_index:iteration_id######$!
-    End:   !######test_module:test_function:function_name:loop_index:iteration_id######!
+    End:   !######test_module:test_function:function_name:loop_index:iteration_id:duration######!
     """
-    logger.info(test_results.perf_stdout)
     start_matches = start_pattern.findall(test_results.perf_stdout or "")
     end_matches = end_pattern.findall(test_results.perf_stdout or "")
-    end_matches_set = set(end_matches)
+
+    end_matches_truncated = [end_match[:5] for end_match in end_matches]
+    end_matches_set = set(end_matches_truncated)
 
     function_throughput = 0
-    logger.info(f"Total start matches: {len(start_matches)}, Total end matches: {len(end_matches)}")
     for start_match in start_matches:
         if start_match in end_matches_set and len(start_match) > 2 and start_match[2] == function_name:
-            logger.info(f"Matched start-end pair for function '{function_name}': {start_match}")
             function_throughput += 1
-    logger.info(f"Function '{function_name}' throughput: {function_throughput}")
-    raise SystemExit
     return function_throughput
 
 

From aba3dcb6a5386f302ca9663ed036d8d0b6f085ca Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 02:38:11 -0700
Subject: [PATCH 26/40] Update function_optimizer.py

---
 codeflash/optimization/function_optimizer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index ac5162980..bd2192664 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1528,7 +1528,6 @@ def establish_original_code_baseline(
 
             async_throughput = None
             if self.function_to_optimize.is_async:
-                logger.info("Calculating async function throughput from test output...")
                 async_throughput = calculate_function_throughput_from_test_results(
                     benchmarking_results, self.function_to_optimize.function_name
                 )
@@ -1693,11 +1692,11 @@ def run_optimized_candidate(
 
             logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
 
-            candidate_async_throughput = None
-            if self.function_to_optimize.is_async and candidate_benchmarking_results:
+            if self.function_to_optimize.is_async:
                 candidate_async_throughput = calculate_function_throughput_from_test_results(
                     candidate_benchmarking_results, self.function_to_optimize.function_name
                 )
+                logger.info(f"Candidate async function throughput: {candidate_async_throughput} calls/second")
 
             if self.args.benchmark:
                 candidate_replay_benchmarking_results = candidate_benchmarking_results.group_by_benchmarks(

From d3afd8ad6b73347e3082decd0103b221679996f2 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 02:42:47 -0700
Subject: [PATCH 27/40] Update critic.py

---
 codeflash/result/critic.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
index fa433a2f5..3be5c7987 100644
--- a/codeflash/result/critic.py
+++ b/codeflash/result/critic.py
@@ -73,17 +73,16 @@ def speedup_critic(
     # Check runtime comparison with best so far
     runtime_is_best = best_runtime_until_now is None or candidate_result.best_test_runtime < best_runtime_until_now
 
-    # Async throughput evaluation (if throughput data is available)
     throughput_improved = True  # Default to True if no throughput data
     throughput_is_best = True  # Default to True if no throughput data
 
     if original_async_throughput is not None and candidate_result.async_throughput is not None:
-        if original_async_throughput > 0:  # Avoid division by zero
+        if original_async_throughput > 0:
             throughput_gain_value = throughput_gain(
                 original_throughput=original_async_throughput, optimized_throughput=candidate_result.async_throughput
             )
             throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
-            logger.debug(
+            logger.info(
                 f"Async throughput gain: {throughput_gain_value * 100:.1f}% (original: {original_async_throughput}, optimized: {candidate_result.async_throughput})"
             )
 
@@ -91,10 +90,9 @@ def speedup_critic(
             best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
         )
 
-    # For async functions with throughput data, both runtime and throughput should improve
-    # For sync functions or when throughput data is unavailable, only runtime matters
     if original_async_throughput is not None and candidate_result.async_throughput is not None:
-        return runtime_improved and runtime_is_best and throughput_improved and throughput_is_best
+        # prioritize throughput improvement
+        return (throughput_improved and throughput_is_best) and (runtime_improved or runtime_is_best)
     return runtime_improved and runtime_is_best
 
 

From 971470efb01b6cffe398ec76bd397116041d51f9 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 12:21:12 -0700
Subject: [PATCH 28/40] adjust critic

---
 codeflash/result/critic.py |  6 ++++--
 tests/test_critic.py       | 10 +++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
index 3be5c7987..938b02c27 100644
--- a/codeflash/result/critic.py
+++ b/codeflash/result/critic.py
@@ -91,8 +91,10 @@ def speedup_critic(
         )
 
     if original_async_throughput is not None and candidate_result.async_throughput is not None:
-        # prioritize throughput improvement
-        return (throughput_improved and throughput_is_best) and (runtime_improved or runtime_is_best)
+        # When throughput data is available, accept if EITHER throughput OR runtime improves significantly
+        throughput_acceptance = throughput_improved and throughput_is_best
+        runtime_acceptance = runtime_improved and runtime_is_best
+        return throughput_acceptance or runtime_acceptance
     return runtime_improved and runtime_is_best
 
 
diff --git a/tests/test_critic.py b/tests/test_critic.py
index 17bc3daa2..3004c53d0 100644
--- a/tests/test_critic.py
+++ b/tests/test_critic.py
@@ -480,7 +480,7 @@ def test_speedup_critic_with_async_throughput() -> None:
         disable_gh_action_noise=True
     )
 
-    # Test case 2: Runtime improves but throughput doesn't meet threshold
+    # Test case 2: Runtime improves significantly, throughput doesn't meet threshold (should pass)
     candidate_result = OptimizedCandidateResult(
         max_loop_count=5,
         best_test_runtime=8000,  # 20% runtime improvement
@@ -491,7 +491,7 @@ def test_speedup_critic_with_async_throughput() -> None:
         async_throughput=105,  # Only 5% throughput improvement (below 10% threshold)
     )
 
-    assert not speedup_critic(
+    assert speedup_critic(
         candidate_result=candidate_result,
         original_code_runtime=original_code_runtime,
         best_runtime_until_now=None,
@@ -500,7 +500,7 @@ def test_speedup_critic_with_async_throughput() -> None:
         disable_gh_action_noise=True
     )
 
-    # Test case 3: Throughput improves but runtime doesn't meet threshold
+    # Test case 3: Throughput improves significantly, runtime doesn't meet threshold (should pass)
     candidate_result = OptimizedCandidateResult(
         max_loop_count=5,
         best_test_runtime=9800,  # Only 2% runtime improvement (below 5% threshold)
@@ -511,7 +511,7 @@ def test_speedup_critic_with_async_throughput() -> None:
         async_throughput=120,  # 20% throughput improvement
     )
 
-    assert not speedup_critic(
+    assert speedup_critic(
         candidate_result=candidate_result,
         original_code_runtime=original_code_runtime,
         best_runtime_until_now=None,
@@ -565,7 +565,7 @@ def test_speedup_critic_with_async_throughput() -> None:
     assert not speedup_critic(
         candidate_result=candidate_result,
         original_code_runtime=original_code_runtime,
-        best_runtime_until_now=None,
+        best_runtime_until_now=7000,  # Better runtime already exists
         original_async_throughput=original_async_throughput,
         best_throughput_until_now=120,  # Better throughput already exists
         disable_gh_action_noise=True

From 00a8981851497e1c8d25bbe8246ea7a457b827ee Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 12:21:20 -0700
Subject: [PATCH 29/40] update tests

---
 tests/test_async_run_and_parse_tests.py       | 32 +++++++++++++++++++
 tests/test_async_wrapper_sqlite_validation.py |  7 ----
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/tests/test_async_run_and_parse_tests.py b/tests/test_async_run_and_parse_tests.py
index 4cb75aa85..b83be5c5a 100644
--- a/tests/test_async_run_and_parse_tests.py
+++ b/tests/test_async_run_and_parse_tests.py
@@ -77,6 +77,10 @@ async def test_async_sort():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_async_bubble_sort_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_async_sort"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         # Create function optimizer and set up test files
@@ -197,6 +201,10 @@ async def test_async_class_sort():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_async_class_bubble_sort_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_async_class_sort"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(func)
@@ -306,6 +314,10 @@ async def test_async_perf():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_async_perf_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_async_perf"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(func)
@@ -459,6 +471,10 @@ async def async_error_function(lst):
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_async_error_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_async_error"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(func)
@@ -553,6 +569,10 @@ async def test_async_multi():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "3"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_async_multi_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_async_multi"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(func)
@@ -664,6 +684,10 @@ async def test_async_edge_cases():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_async_edge_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_async_edge_cases"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(func)
@@ -796,6 +820,10 @@ def test_sync_sort():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_sync_in_async_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_sync_sort"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(func)
@@ -962,6 +990,10 @@ async def test_mixed_sorting():
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
+        test_env["CODEFLASH_TEST_MODULE"] = "code_to_optimize.tests.pytest.test_mixed_sort_temp"
+        test_env["CODEFLASH_TEST_CLASS"] = ""
+        test_env["CODEFLASH_TEST_FUNCTION"] = "test_mixed_sorting"
+        test_env["CODEFLASH_CURRENT_LINE_ID"] = "0"
         test_type = TestType.EXISTING_UNIT_TEST
 
         func_optimizer = opt.create_function_optimizer(async_func)
diff --git a/tests/test_async_wrapper_sqlite_validation.py b/tests/test_async_wrapper_sqlite_validation.py
index 6950a324c..5cf7252f6 100644
--- a/tests/test_async_wrapper_sqlite_validation.py
+++ b/tests/test_async_wrapper_sqlite_validation.py
@@ -283,10 +283,3 @@ async def schema_test_func() -> str:
         assert columns == expected_columns
         con.close()
 
-    def test_sync_test_context_extraction(self, test_env_setup):
-        from codeflash.code_utils.codeflash_wrap_decorator import extract_test_context_from_frame
-        
-        test_module, test_class, test_func = extract_test_context_from_frame()
-        assert test_module == __name__
-        assert test_class == "TestAsyncWrapperSQLiteValidation"
-        assert test_func == "test_sync_test_context_extraction"

From 1e4b3baaa38bb424a0ad09ce98f1002b5e089636 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 12:22:31 -0700
Subject: [PATCH 30/40] pre-commit fixes

---
 codeflash/code_utils/instrument_existing_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index 569772a1a..f93c304b1 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -368,7 +368,7 @@ def _process_test_function(
 
         new_body = []
 
-        for i, stmt in enumerate(node.body):
+        for _i, stmt in enumerate(node.body):
             transformed_stmt, added_env_assignment = self._instrument_statement(stmt, node.name)
 
             if added_env_assignment:
@@ -396,7 +396,7 @@ def _process_test_function(
         node.body = new_body
         return node
 
-    def _instrument_statement(self, stmt: ast.stmt, node_name: str) -> tuple[ast.stmt, bool]:
+    def _instrument_statement(self, stmt: ast.stmt, _node_name: str) -> tuple[ast.stmt, bool]:
         for node in ast.walk(stmt):
             if (
                 isinstance(node, ast.Await)

From 654055d3c99a40c2909f5420cd5554fb9fe549c2 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 12:29:22 -0700
Subject: [PATCH 31/40] unbound local

---
 codeflash/optimization/function_optimizer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index bd2192664..628391238 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1692,6 +1692,7 @@ def run_optimized_candidate(
 
             logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
 
+            candidate_async_throughput = None
             if self.function_to_optimize.is_async:
                 candidate_async_throughput = calculate_function_throughput_from_test_results(
                     candidate_benchmarking_results, self.function_to_optimize.function_name

From 540b8aa8aa055a201c939615a93b2fdfc9aa2e83 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Mon, 22 Sep 2025 19:41:35 +0000
Subject: [PATCH 32/40] Optimize AsyncCallInstrumenter.visit_ClassDef

The optimization achieves a 25% speedup by **eliminating redundant AST node creation** inside the loop.

**Key change:** The `timeout_decorator` AST node is now created once before the loop instead of being recreated for every test method that needs it. In the original code, this AST structure was built 3,411 times during profiling, consuming significant time in object allocation and initialization.

**Why this works:** AST nodes are immutable once created, so the same `timeout_decorator` instance can be safely appended to multiple method decorator lists. This eliminates:
- Repeated `ast.Call()` constructor calls
- Redundant `ast.Name()` and `ast.Constant()` object creation
- Multiple attribute assignments for the same decorator structure

**Performance characteristics:** The optimization is most effective for large test classes with many test methods (showing 24-33% improvements in tests with 500+ methods), while having minimal impact on classes with few or no test methods. This makes it particularly valuable for comprehensive test suites where classes commonly contain dozens of test methods.

The line profiler shows the AST node creation operations dropped from ~3,400 hits to just ~25 hits, directly correlating with the observed speedup.
---
 codeflash/code_utils/instrument_existing_tests.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/codeflash/code_utils/instrument_existing_tests.py b/codeflash/code_utils/instrument_existing_tests.py
index f93c304b1..8eb671540 100644
--- a/codeflash/code_utils/instrument_existing_tests.py
+++ b/codeflash/code_utils/instrument_existing_tests.py
@@ -316,6 +316,11 @@ def __init__(
     def visit_ClassDef(self, node: ast.ClassDef) -> ast.ClassDef:
         # Add timeout decorator for unittest test classes if needed
         if self.test_framework == "unittest":
+            timeout_decorator = ast.Call(
+                func=ast.Name(id="timeout_decorator.timeout", ctx=ast.Load()),
+                args=[ast.Constant(value=15)],
+                keywords=[],
+            )
             for item in node.body:
                 if (
                     isinstance(item, ast.FunctionDef)
@@ -327,11 +332,6 @@ def visit_ClassDef(self, node: ast.ClassDef) -> ast.ClassDef:
                         for d in item.decorator_list
                     )
                 ):
-                    timeout_decorator = ast.Call(
-                        func=ast.Name(id="timeout_decorator.timeout", ctx=ast.Load()),
-                        args=[ast.Constant(value=15)],
-                        keywords=[],
-                    )
                     item.decorator_list.append(timeout_decorator)
         return self.generic_visit(node)
 

From fecdc940648d5aebb35471a9a315a084d359b173 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 14:18:11 -0700
Subject: [PATCH 33/40] Update .pre-commit-config.yaml

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9a9af7cd8..1ff2c1c9d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.12.7
+  rev: v0.13.1
   hooks:
     # Run the linter.
     - id: ruff-check

From eccdeff1a31ee27f5533065ac8fa0d2bd8ffdbf2 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 14:18:59 -0700
Subject: [PATCH 34/40] Update pre-commit.yaml

---
 .github/workflows/pre-commit.yaml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index bc0a20ae8..2d1241252 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -10,10 +10,8 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  lint:
-    name: Run pre-commit hooks
+  prek:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-      - uses: pre-commit/action@v3.0.1
\ No newline at end of file
+      - uses: actions/checkout@v5
+      - uses: j178/prek-action@v1

From 8b1b2befb13ea42d042c1092807d4ae5b3c24663 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 14:21:41 -0700
Subject: [PATCH 35/40] Update .pre-commit-config.yaml

---
 .pre-commit-config.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1ff2c1c9d..8d08a77f3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,5 +4,6 @@ repos:
   hooks:
     # Run the linter.
     - id: ruff-check
+      args: [ --config=pyproject.toml ]
     # Run the formatter.
     - id: ruff-format
\ No newline at end of file

From 515b976ba5a2a73d7528a0e52b1ed65fa500aa33 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 14:22:52 -0700
Subject: [PATCH 36/40] Delete pre-commit.yaml

---
 .github/workflows/pre-commit.yaml | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 .github/workflows/pre-commit.yaml

diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
deleted file mode 100644
index 2d1241252..000000000
--- a/.github/workflows/pre-commit.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: Lint
-on:
-  pull_request:
-  push:
-    branches:
-      - main
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  prek:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: j178/prek-action@v1

From 30c8988945daa709fcc42765e09cb69cf7ff0e29 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 15:17:29 -0700
Subject: [PATCH 37/40] give throughput context to explanations

---
 codeflash/api/aiservice.py                   |  9 ++++++++
 codeflash/models/models.py                   |  1 +
 codeflash/optimization/function_optimizer.py | 22 ++++++++++++++++++++
 3 files changed, 32 insertions(+)

diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
index 3e24d5bac..79f4d5300 100644
--- a/codeflash/api/aiservice.py
+++ b/codeflash/api/aiservice.py
@@ -298,6 +298,9 @@ def get_new_explanation(  # noqa: D417
         annotated_tests: str,
         optimization_id: str,
         original_explanation: str,
+        original_throughput: str | None = None,
+        optimized_throughput: str | None = None,
+        throughput_improvement: str | None = None,
     ) -> str:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -314,6 +317,9 @@ def get_new_explanation(  # noqa: D417
         - annotated_tests: str - test functions annotated with runtime
         - optimization_id: str - unique id of opt candidate
         - original_explanation: str - original_explanation generated for the opt candidate
+        - original_throughput: str | None - throughput for the baseline code (operations per second)
+        - optimized_throughput: str | None - throughput for the optimized code (operations per second)
+        - throughput_improvement: str | None - throughput improvement percentage
 
         Returns
         -------
@@ -333,6 +339,9 @@ def get_new_explanation(  # noqa: D417
             "optimization_id": optimization_id,
             "original_explanation": original_explanation,
             "dependency_code": dependency_code,
+            "original_throughput": original_throughput,
+            "optimized_throughput": optimized_throughput,
+            "throughput_improvement": throughput_improvement,
         }
         logger.info("Generating explanation")
         console.rule()
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index 836c6ad20..e1e094661 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -100,6 +100,7 @@ class BestOptimization(BaseModel):
     winning_benchmarking_test_results: TestResults
     winning_replay_benchmarking_test_results: Optional[TestResults] = None
     line_profiler_test_results: dict
+    async_throughput: Optional[int] = None
 
 
 @dataclass(frozen=True)
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 628391238..6f931b014 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -635,6 +635,7 @@ def determine_best_candidate(
                             replay_performance_gain=replay_perf_gain if self.args.benchmark else None,
                             winning_benchmarking_test_results=candidate_result.benchmarking_test_results,
                             winning_replay_benchmarking_test_results=candidate_result.benchmarking_test_results,
+                            async_throughput=candidate_result.async_throughput,
                         )
                         valid_optimizations.append(best_optimization)
                         # queue corresponding refined optimization for best optimization
@@ -697,6 +698,7 @@ def determine_best_candidate(
                 replay_performance_gain=valid_opt.replay_performance_gain,
                 winning_benchmarking_test_results=valid_opt.winning_benchmarking_test_results,
                 winning_replay_benchmarking_test_results=valid_opt.winning_replay_benchmarking_test_results,
+                async_throughput=valid_opt.async_throughput,
             )
             valid_candidates_with_shorter_code.append(new_best_opt)
             diff_lens_list.append(
@@ -1281,6 +1283,23 @@ def process_review(
             original_runtimes_all=original_runtime_by_test,
             optimized_runtimes_all=optimized_runtime_by_test,
         )
+        original_throughput_str = None
+        optimized_throughput_str = None
+        throughput_improvement_str = None
+        
+        if (
+            self.function_to_optimize.is_async
+            and original_code_baseline.async_throughput is not None
+            and best_optimization.async_throughput is not None
+        ):
+            original_throughput_str = f"{original_code_baseline.async_throughput} operations/second"
+            optimized_throughput_str = f"{best_optimization.async_throughput} operations/second"
+            throughput_improvement_value = throughput_gain(
+                original_throughput=original_code_baseline.async_throughput,
+                optimized_throughput=best_optimization.async_throughput,
+            )
+            throughput_improvement_str = f"{throughput_improvement_value * 100:.1f}%"
+        
         new_explanation_raw_str = self.aiservice_client.get_new_explanation(
             source_code=code_context.read_writable_code.flat,
             dependency_code=code_context.read_only_context_code,
@@ -1294,6 +1313,9 @@ def process_review(
             annotated_tests=generated_tests_str,
             optimization_id=best_optimization.candidate.optimization_id,
             original_explanation=best_optimization.candidate.explanation,
+            original_throughput=original_throughput_str,
+            optimized_throughput=optimized_throughput_str,
+            throughput_improvement=throughput_improvement_str,
         )
         new_explanation = Explanation(
             raw_explanation_message=new_explanation_raw_str or explanation.raw_explanation_message,

From d8849a5a805c57cba33732b87730b452b14fb7e1 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Mon, 22 Sep 2025 21:16:15 -0700
Subject: [PATCH 38/40] logger.debug

---
 codeflash/optimization/function_optimizer.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 6f931b014..28c485a80 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -587,8 +587,6 @@ def determine_best_candidate(
                         )
                         tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%")
                         tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X")
-                        logger.info(f"orig_async_throughput: {original_code_baseline.async_throughput}")
-                        logger.info(f"candidate_result.async_throughput: {candidate_result.async_throughput}")
                         if (
                             original_code_baseline.async_throughput is not None
                             and candidate_result.async_throughput is not None
@@ -1719,7 +1717,7 @@ def run_optimized_candidate(
                 candidate_async_throughput = calculate_function_throughput_from_test_results(
                     candidate_benchmarking_results, self.function_to_optimize.function_name
                 )
-                logger.info(f"Candidate async function throughput: {candidate_async_throughput} calls/second")
+                logger.debug(f"Candidate async function throughput: {candidate_async_throughput} calls/second")
 
             if self.args.benchmark:
                 candidate_replay_benchmarking_results = candidate_benchmarking_results.group_by_benchmarks(

From 47384e23191c8878a73eb45f6a2b48ba79022faf Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 23 Sep 2025 00:38:25 -0700
Subject: [PATCH 39/40] add end to end test

---
 .../code_directories/async_e2e/main.py        | 18 +++++--
 codeflash/optimization/function_optimizer.py  | 20 ++++++--
 codeflash/result/critic.py                    |  3 --
 codeflash/result/explanation.py               | 49 ++++++++++++++++++-
 tests/scripts/end_to_end_test_async.py        |  8 +--
 5 files changed, 84 insertions(+), 14 deletions(-)

diff --git a/code_to_optimize/code_directories/async_e2e/main.py b/code_to_optimize/code_directories/async_e2e/main.py
index 4470cc969..317068a1c 100644
--- a/code_to_optimize/code_directories/async_e2e/main.py
+++ b/code_to_optimize/code_directories/async_e2e/main.py
@@ -1,4 +1,16 @@
 import time
-async def fake_api_call(delay, data):
-    time.sleep(0.0001)
-    return f"Processed: {data}"
\ No newline at end of file
+import asyncio
+
+
+async def retry_with_backoff(func, max_retries=3):
+    if max_retries < 1:
+        raise ValueError("max_retries must be at least 1")
+    last_exception = None
+    for attempt in range(max_retries):
+        try:
+            return await func()
+        except Exception as e:
+            last_exception = e
+            if attempt < max_retries - 1:
+                time.sleep(0.0001 * attempt)
+    raise last_exception
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 28c485a80..bec15fe69 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -658,6 +658,15 @@ def determine_best_candidate(
                         )
                         tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%")
                         tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X")
+                        if (
+                            original_code_baseline.async_throughput is not None
+                            and candidate_result.async_throughput is not None
+                        ):
+                            throughput_gain_value = throughput_gain(
+                                original_throughput=original_code_baseline.async_throughput,
+                                optimized_throughput=candidate_result.async_throughput,
+                            )
+                            tree.add(f"Throughput gain: {throughput_gain_value * 100:.1f}%")
                     console.print(tree)
                     if self.args.benchmark and benchmark_tree:
                         console.print(benchmark_tree)
@@ -1199,6 +1208,8 @@ def find_and_process_best_optimization(
                     function_name=function_to_optimize_qualified_name,
                     file_path=self.function_to_optimize.file_path,
                     benchmark_details=processed_benchmark_info.benchmark_details if processed_benchmark_info else None,
+                    original_async_throughput=original_code_baseline.async_throughput,
+                    best_async_throughput=best_optimization.async_throughput,
                 )
 
                 self.replace_function_and_helpers_with_optimized_code(
@@ -1284,7 +1295,7 @@ def process_review(
         original_throughput_str = None
         optimized_throughput_str = None
         throughput_improvement_str = None
-        
+
         if (
             self.function_to_optimize.is_async
             and original_code_baseline.async_throughput is not None
@@ -1297,7 +1308,7 @@ def process_review(
                 optimized_throughput=best_optimization.async_throughput,
             )
             throughput_improvement_str = f"{throughput_improvement_value * 100:.1f}%"
-        
+
         new_explanation_raw_str = self.aiservice_client.get_new_explanation(
             source_code=code_context.read_writable_code.flat,
             dependency_code=code_context.read_only_context_code,
@@ -1324,6 +1335,8 @@ def process_review(
             function_name=explanation.function_name,
             file_path=explanation.file_path,
             benchmark_details=explanation.benchmark_details,
+            original_async_throughput=explanation.original_async_throughput,
+            best_async_throughput=explanation.best_async_throughput,
         )
         self.log_successful_optimization(new_explanation, generated_tests, exp_type)
 
@@ -1551,7 +1564,8 @@ def establish_original_code_baseline(
                 async_throughput = calculate_function_throughput_from_test_results(
                     benchmarking_results, self.function_to_optimize.function_name
                 )
-                logger.info(f"Original async function throughput: {async_throughput} calls/second")
+                logger.debug(f"Original async function throughput: {async_throughput} calls/second")
+                console.rule()
 
             if self.args.benchmark:
                 replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py
index 938b02c27..d0ff62176 100644
--- a/codeflash/result/critic.py
+++ b/codeflash/result/critic.py
@@ -82,9 +82,6 @@ def speedup_critic(
                 original_throughput=original_async_throughput, optimized_throughput=candidate_result.async_throughput
             )
             throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD
-            logger.info(
-                f"Async throughput gain: {throughput_gain_value * 100:.1f}% (original: {original_async_throughput}, optimized: {candidate_result.async_throughput})"
-            )
 
         throughput_is_best = (
             best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now
diff --git a/codeflash/result/explanation.py b/codeflash/result/explanation.py
index eb12beeb6..9fa5d02a5 100644
--- a/codeflash/result/explanation.py
+++ b/codeflash/result/explanation.py
@@ -11,6 +11,7 @@
 
 from codeflash.code_utils.time_utils import humanize_runtime
 from codeflash.models.models import BenchmarkDetail, TestResults
+from codeflash.result.critic import performance_gain, throughput_gain
 
 
 @dataclass(frozen=True, config={"arbitrary_types_allowed": True})
@@ -23,9 +24,29 @@ class Explanation:
     function_name: str
     file_path: Path
     benchmark_details: Optional[list[BenchmarkDetail]] = None
+    original_async_throughput: Optional[int] = None
+    best_async_throughput: Optional[int] = None
 
     @property
     def perf_improvement_line(self) -> str:
+        runtime_improvement = self.speedup
+        
+        if (
+            self.original_async_throughput is not None 
+            and self.best_async_throughput is not None 
+            and self.original_async_throughput > 0
+        ):
+            throughput_improvement = throughput_gain(
+                original_throughput=self.original_async_throughput,
+                optimized_throughput=self.best_async_throughput,
+            )
+            
+            # Use throughput metrics if throughput improvement is better or runtime got worse
+            if throughput_improvement > runtime_improvement or runtime_improvement <= 0:
+                throughput_pct = f"{throughput_improvement * 100:,.0f}%"
+                throughput_x = f"{throughput_improvement + 1:,.2f}x"
+                return f"{throughput_pct} improvement ({throughput_x} faster)."
+        
         return f"{self.speedup_pct} improvement ({self.speedup_x} faster)."
 
     @property
@@ -45,6 +66,24 @@ def to_console_string(self) -> str:
         # TODO: Sometimes the explanation says something similar to "This is the code that was optimized", remove such parts
         original_runtime_human = humanize_runtime(self.original_runtime_ns)
         best_runtime_human = humanize_runtime(self.best_runtime_ns)
+        
+        # Determine if we're showing throughput or runtime improvements
+        runtime_improvement = self.speedup
+        is_using_throughput_metric = False
+        
+        if (
+            self.original_async_throughput is not None 
+            and self.best_async_throughput is not None 
+            and self.original_async_throughput > 0
+        ):
+            throughput_improvement = throughput_gain(
+                original_throughput=self.original_async_throughput,
+                optimized_throughput=self.best_async_throughput,
+            )
+            
+            if throughput_improvement > runtime_improvement or runtime_improvement <= 0:
+                is_using_throughput_metric = True
+        
         benchmark_info = ""
 
         if self.benchmark_details:
@@ -85,10 +124,18 @@ def to_console_string(self) -> str:
             console.print(table)
             benchmark_info = cast("StringIO", console.file).getvalue() + "\n"  # Cast for mypy
 
+        if is_using_throughput_metric:
+            performance_description = (
+                f"Throughput improved from {self.original_async_throughput} to {self.best_async_throughput} operations/second "
+                f"(runtime: {original_runtime_human} → {best_runtime_human})\n\n"
+            )
+        else:
+            performance_description = f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n"
+        
         return (
             f"Optimized {self.function_name} in {self.file_path}\n"
             f"{self.perf_improvement_line}\n"
-            f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n"
+            + performance_description
             + (benchmark_info if benchmark_info else "")
             + self.raw_explanation_message
             + " \n\n"
diff --git a/tests/scripts/end_to_end_test_async.py b/tests/scripts/end_to_end_test_async.py
index f9ef1d806..5aed8f8ca 100644
--- a/tests/scripts/end_to_end_test_async.py
+++ b/tests/scripts/end_to_end_test_async.py
@@ -6,14 +6,14 @@
 
 def run_test(expected_improvement_pct: int) -> bool:
     config = TestConfig(
-        file_path="workload.py",
-        expected_unit_tests=1,
+        file_path="main.py",
+        expected_unit_tests=0,
         min_improvement_x=0.1,
         coverage_expectations=[
             CoverageExpectation(
-                function_name="process_data_list",
+                function_name="retry_with_backoff",
                 expected_coverage=100.0,
-                expected_lines=[5, 7, 8, 9, 10, 12],
+                expected_lines=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
             )
         ],
     )

From 43615bddf6760da445791a84e10b8b44b5e7e045 Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Tue, 23 Sep 2025 00:42:34 -0700
Subject: [PATCH 40/40] Create e2e-async.yaml

---
 .github/workflows/e2e-async.yaml | 69 ++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 .github/workflows/e2e-async.yaml

diff --git a/.github/workflows/e2e-async.yaml b/.github/workflows/e2e-async.yaml
new file mode 100644
index 000000000..e7d08091c
--- /dev/null
+++ b/.github/workflows/e2e-async.yaml
@@ -0,0 +1,69 @@
+name: E2E - Async
+
+on:
+  pull_request:
+    paths:
+      - '**'  # Trigger for all paths
+
+  workflow_dispatch:
+
+jobs:
+  async-optimization:
+    # Dynamically determine if environment is needed only when workflow files change and contributor is external
+    environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }}
+
+    runs-on: ubuntu-latest
+    env:
+      CODEFLASH_AIS_SERVER: prod
+      POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+      CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
+      COLUMNS: 110
+      MAX_RETRIES: 3
+      RETRY_DELAY: 5
+      EXPECTED_IMPROVEMENT_PCT: 10
+      CODEFLASH_END_TO_END: 1
+    steps:
+      - name: 🛎️ Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Validate PR
+        run: |
+          # Check for any workflow changes
+          if git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" | grep -q "^.github/workflows/"; then
+            echo "⚠️ Workflow changes detected."
+
+            # Get the PR author
+            AUTHOR="${{ github.event.pull_request.user.login }}"
+            echo "PR Author: $AUTHOR"
+
+            # Allowlist check
+            if [[ "$AUTHOR" == "misrasaurabh1" || "$AUTHOR" == "KRRT7" ]]; then
+              echo "✅ Authorized user ($AUTHOR). Proceeding."
+            elif [[ "${{ github.event.pull_request.state }}" == "open" ]]; then
+              echo "✅ PR triggered by 'pull_request_target' and is open. Assuming protection rules are in place. Proceeding."
+            else
+              echo "⛔ Unauthorized user ($AUTHOR) attempting to modify workflows. Exiting."
+              exit 1
+            fi
+          else
+            echo "✅ No workflow file changes detected. Proceeding."
+          fi
+
+      - name: Set up Python 3.11 for CLI
+        uses: astral-sh/setup-uv@v5
+        with:
+          python-version: 3.11.6
+
+      - name: Install dependencies (CLI)
+        run: |
+          uv sync
+          
+      - name: Run Codeflash to optimize async code
+        id: optimize_async_code
+        run: |
+          uv run python tests/scripts/end_to_end_test_async.py
\ No newline at end of file