codeflash-ai
diff --git a/‎codeflash/benchmarking/codeflash_trace.py‎
Lines changed: 4 additions & 0 deletions b/‎codeflash/benchmarking/codeflash_trace.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎codeflash/benchmarking/instrument_codeflash_trace.py‎
Lines changed: 2 additions & 0 deletions b/‎codeflash/benchmarking/instrument_codeflash_trace.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎codeflash/benchmarking/plugin/plugin.py‎
Lines changed: 4 additions & 0 deletions b/‎codeflash/benchmarking/plugin/plugin.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎codeflash/benchmarking/replay_test.py‎
Lines changed: 4 additions & 0 deletions b/‎codeflash/benchmarking/replay_test.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎codeflash/benchmarking/utils.py‎
Lines changed: 2 additions & 0 deletions b/‎codeflash/benchmarking/utils.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎codeflash/cli_cmds/cmd_init.py‎
Lines changed: 10 additions & 1 deletion b/‎codeflash/cli_cmds/cmd_init.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎codeflash/cli_cmds/logging_config.py‎
Lines changed: 2 additions & 2 deletions b/‎codeflash/cli_cmds/logging_config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎codeflash/code_utils/checkpoint.py‎
Lines changed: 3 additions & 1 deletion b/‎codeflash/code_utils/checkpoint.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎codeflash/code_utils/compat.py‎
Lines changed: 35 additions & 9 deletions b/‎codeflash/code_utils/compat.py‎
Lines changed: 35 additions & 9 deletions
diff --git a/‎codeflash/code_utils/edit_generated_tests.py‎
Lines changed: 141 additions & 0 deletions b/‎codeflash/code_utils/edit_generated_tests.py‎
Lines changed: 141 additions & 0 deletions
@@ -25,6 +25,7 @@ def setup(self, trace_path: str) -> None:
         """Set up the database connection for direct writing.
 
         Args:
+        ----
             trace_path: Path to the trace database file
 
         """
@@ -52,6 +53,7 @@ def write_function_timings(self) -> None:
         """Write function call data directly to the database.
 
         Args:
+        ----
             data: List of function call data tuples to write
 
         """
@@ -94,9 +96,11 @@ def __call__(self, func: Callable) -> Callable:
         """Use as a decorator to trace function execution.
 
         Args:
+        ----
             func: The function to be decorated
 
         Returns:
+        -------
             The wrapped function
 
         """
 
@@ -76,10 +76,12 @@ def add_codeflash_decorator_to_code(code: str, functions_to_optimize: list[Funct
     """Add codeflash_trace to a function.
 
     Args:
+    ----
         code: The source code as a string
         functions_to_optimize: List of FunctionToOptimize instances containing function details
 
     Returns:
+    -------
         The modified source code as a string
 
     """
 
@@ -74,9 +74,11 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
         """Process the trace file and extract timing data for all functions.
 
         Args:
+        ----
             trace_path: Path to the trace file
 
         Returns:
+        -------
             A nested dictionary where:
             - Outer keys are module_name.qualified_name (module.class.function)
             - Inner keys are of type BenchmarkKey
@@ -132,9 +134,11 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
         """Extract total benchmark timings from trace files.
 
         Args:
+        ----
             trace_path: Path to the trace file
 
         Returns:
+        -------
             A dictionary mapping where:
             - Keys are of type BenchmarkKey
             - Values are total benchmark timing in milliseconds (with overhead subtracted)
 
@@ -55,12 +55,14 @@ def create_trace_replay_test_code(
     """Create a replay test for functions based on trace data.
 
     Args:
+    ----
         trace_file: Path to the SQLite database file
         functions_data: List of dictionaries with function info extracted from DB
         test_framework: 'pytest' or 'unittest'
         max_run_count: Maximum number of runs to include in the test
 
     Returns:
+    -------
         A string containing the test code
 
     """
@@ -218,12 +220,14 @@ def generate_replay_test(
     """Generate multiple replay tests from the traced function calls, grouped by benchmark.
 
     Args:
+    ----
         trace_file_path: Path to the SQLite database file
         output_dir: Directory to write the generated tests (if None, only returns the code)
         test_framework: 'pytest' or 'unittest'
         max_run_count: Maximum number of runs to include per function
 
     Returns:
+    -------
         Dictionary mapping benchmark names to generated test code
 
     """
 
@@ -83,11 +83,13 @@ def process_benchmark_data(
     """Process benchmark data and generate detailed benchmark information.
 
     Args:
+    ----
         replay_performance_gain: The performance gain from replay
         fto_benchmark_timings: Function to optimize benchmark timings
         total_benchmark_timings: Total benchmark timings
 
     Returns:
+    -------
         ProcessedBenchmarkInfo containing processed benchmark details
 
     """
 
@@ -211,7 +211,7 @@ def collect_setup_info() -> SetupInfo:
     # Discover test directory
     default_tests_subdir = "tests"
     create_for_me_option = f"okay, create a tests{os.pathsep} directory for me!"
-    test_subdir_options = valid_subdirs
+    test_subdir_options = [sub_dir for sub_dir in valid_subdirs if sub_dir != module_root]
     if "tests" not in valid_subdirs:
         test_subdir_options.append(create_for_me_option)
     custom_dir_option = "enter a custom directory…"
@@ -240,7 +240,16 @@ def collect_setup_info() -> SetupInfo:
             apologize_and_exit()
     else:
         tests_root = Path(curdir) / Path(cast("str", tests_root_answer))
+
     tests_root = tests_root.relative_to(curdir)
+
+    resolved_module_root = (Path(curdir) / Path(module_root)).resolve()
+    resolved_tests_root = (Path(curdir) / Path(tests_root)).resolve()
+    if resolved_module_root == resolved_tests_root:
+        logger.warning(
+            "It looks like your tests root is the same as your module root. This is not recommended and can lead to unexpected behavior."
+        )
+
     ph("cli-tests-root-provided")
 
     # Autodiscover test framework
 
@@ -27,7 +27,7 @@ def set_level(level: int, *, echo_setting: bool = True) -> None:
                 ],
                 force=True,
             )
-            logging.info("Verbose DEBUG logging enabled")  # noqa: LOG015
+            logging.info("Verbose DEBUG logging enabled")
         else:
-            logging.info("Logging level set to INFO")  # noqa: LOG015
+            logging.info("Logging level set to INFO")
     console.rule()
@@ -47,6 +47,7 @@ def add_function_to_checkpoint(
         """Add a function to the checkpoint after it has been processed.
 
         Args:
+        ----
             function_fully_qualified_name: The fully qualified name of the function
             status: Status of optimization (e.g., "optimized", "failed", "skipped")
             additional_info: Any additional information to store about the function
@@ -104,7 +105,8 @@ def cleanup(self) -> None:
 def get_all_historical_functions(module_root: Path, checkpoint_dir: Path) -> dict[str, dict[str, str]]:
     """Get information about all processed functions, regardless of status.
 
-    Returns:
+    Returns
+    -------
         Dictionary mapping function names to their processing information
 
     """
 
@@ -1,21 +1,47 @@
 import os
 import sys
+import tempfile
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 from platformdirs import user_config_dir
 
-# os-independent newline
-# important for any user-facing output or files we write
-# make sure to use this in f-strings e.g. f"some string{LF}"
-# you can use "[^f]\".*\{LF\}\" to find any lines in your code that use this without the f-string
-LF: str = os.linesep
+if TYPE_CHECKING:
+    codeflash_temp_dir: Path
+    codeflash_cache_dir: Path
+    codeflash_cache_db: Path
 
 
-SAFE_SYS_EXECUTABLE: str = Path(sys.executable).as_posix()
+class Compat:
+    # os-independent newline
+    LF: str = os.linesep
 
-IS_POSIX = os.name != "nt"
+    SAFE_SYS_EXECUTABLE: str = Path(sys.executable).as_posix()
 
+    IS_POSIX: bool = os.name != "nt"
 
-codeflash_cache_dir = Path(user_config_dir(appname="codeflash", appauthor="codeflash-ai", ensure_exists=True))
+    @property
+    def codeflash_cache_dir(self) -> Path:
+        return Path(user_config_dir(appname="codeflash", appauthor="codeflash-ai", ensure_exists=True))
 
-codeflash_cache_db = codeflash_cache_dir / "codeflash_cache.db"
+    @property
+    def codeflash_temp_dir(self) -> Path:
+        temp_dir = Path(tempfile.gettempdir()) / "codeflash"
+        if not temp_dir.exists():
+            temp_dir.mkdir(parents=True, exist_ok=True)
+        return temp_dir
+
+    @property
+    def codeflash_cache_db(self) -> Path:
+        return self.codeflash_cache_dir / "codeflash_cache.db"
+
+
+_compat = Compat()
+
+
+codeflash_temp_dir = _compat.codeflash_temp_dir
+codeflash_cache_dir = _compat.codeflash_cache_dir
+codeflash_cache_db = _compat.codeflash_cache_db
+LF = _compat.LF
+SAFE_SYS_EXECUTABLE = _compat.SAFE_SYS_EXECUTABLE
+IS_POSIX = _compat.IS_POSIX
@@ -0,0 +1,141 @@
+import re
+
+import libcst as cst
+
+from codeflash.cli_cmds.console import logger
+from codeflash.code_utils.time_utils import format_time
+from codeflash.models.models import GeneratedTests, GeneratedTestsList, TestResults
+
+
+def remove_functions_from_generated_tests(
+    generated_tests: GeneratedTestsList, test_functions_to_remove: list[str]
+) -> GeneratedTestsList:
+    new_generated_tests = []
+    for generated_test in generated_tests.generated_tests:
+        for test_function in test_functions_to_remove:
+            function_pattern = re.compile(
+                rf"(@pytest\.mark\.parametrize\(.*?\)\s*)?def\s+{re.escape(test_function)}\(.*?\):.*?(?=\ndef\s|$)",
+                re.DOTALL,
+            )
+
+            match = function_pattern.search(generated_test.generated_original_test_source)
+
+            if match is None or "@pytest.mark.parametrize" in match.group(0):
+                continue
+
+            generated_test.generated_original_test_source = function_pattern.sub(
+                "", generated_test.generated_original_test_source
+            )
+
+        new_generated_tests.append(generated_test)
+
+    return GeneratedTestsList(generated_tests=new_generated_tests)
+
+
+def add_runtime_comments_to_generated_tests(
+    generated_tests: GeneratedTestsList, original_test_results: TestResults, optimized_test_results: TestResults
+) -> GeneratedTestsList:
+    """Add runtime performance comments to function calls in generated tests."""
+    # Create dictionaries for fast lookup of runtime data
+    original_runtime_by_test = original_test_results.usable_runtime_data_by_test_case()
+    optimized_runtime_by_test = optimized_test_results.usable_runtime_data_by_test_case()
+
+    class RuntimeCommentTransformer(cst.CSTTransformer):
+        def __init__(self) -> None:
+            self.in_test_function = False
+            self.current_test_name: str | None = None
+
+        def visit_FunctionDef(self, node: cst.FunctionDef) -> None:
+            if node.name.value.startswith("test_"):
+                self.in_test_function = True
+                self.current_test_name = node.name.value
+            else:
+                self.in_test_function = False
+                self.current_test_name = None
+
+        def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef:
+            if original_node.name.value.startswith("test_"):
+                self.in_test_function = False
+                self.current_test_name = None
+            return updated_node
+
+        def leave_SimpleStatementLine(
+            self,
+            original_node: cst.SimpleStatementLine,  # noqa: ARG002
+            updated_node: cst.SimpleStatementLine,
+        ) -> cst.SimpleStatementLine:
+            if not self.in_test_function or not self.current_test_name:
+                return updated_node
+
+            # Look for assignment statements that assign to codeflash_output
+            # Handle both single statements and multiple statements on one line
+            codeflash_assignment_found = False
+            for stmt in updated_node.body:
+                if isinstance(stmt, cst.Assign) and (
+                    len(stmt.targets) == 1
+                    and isinstance(stmt.targets[0].target, cst.Name)
+                    and stmt.targets[0].target.value == "codeflash_output"
+                ):
+                    codeflash_assignment_found = True
+                    break
+
+            if codeflash_assignment_found:
+                # Find matching test cases by looking for this test function name in the test results
+                matching_original_times = []
+                matching_optimized_times = []
+
+                for invocation_id, runtimes in original_runtime_by_test.items():
+                    if invocation_id.test_function_name == self.current_test_name:
+                        matching_original_times.extend(runtimes)
+
+                for invocation_id, runtimes in optimized_runtime_by_test.items():
+                    if invocation_id.test_function_name == self.current_test_name:
+                        matching_optimized_times.extend(runtimes)
+
+                if matching_original_times and matching_optimized_times:
+                    original_time = min(matching_original_times)
+                    optimized_time = min(matching_optimized_times)
+
+                    # Create the runtime comment
+                    comment_text = f"# {format_time(original_time)} -> {format_time(optimized_time)}"
+
+                    # Add comment to the trailing whitespace
+                    new_trailing_whitespace = cst.TrailingWhitespace(
+                        whitespace=cst.SimpleWhitespace(" "),
+                        comment=cst.Comment(comment_text),
+                        newline=updated_node.trailing_whitespace.newline,
+                    )
+
+                    return updated_node.with_changes(trailing_whitespace=new_trailing_whitespace)
+
+            return updated_node
+
+    # Process each generated test
+    modified_tests = []
+    for test in generated_tests.generated_tests:
+        try:
+            # Parse the test source code
+            tree = cst.parse_module(test.generated_original_test_source)
+
+            # Transform the tree to add runtime comments
+            transformer = RuntimeCommentTransformer()
+            modified_tree = tree.visit(transformer)
+
+            # Convert back to source code
+            modified_source = modified_tree.code
+
+            # Create a new GeneratedTests object with the modified source
+            modified_test = GeneratedTests(
+                generated_original_test_source=modified_source,
+                instrumented_behavior_test_source=test.instrumented_behavior_test_source,
+                instrumented_perf_test_source=test.instrumented_perf_test_source,
+                behavior_file_path=test.behavior_file_path,
+                perf_file_path=test.perf_file_path,
+            )
+            modified_tests.append(modified_test)
+        except Exception as e:
+            # If parsing fails, keep the original test
+            logger.debug(f"Failed to add runtime comments to test: {e}")
+            modified_tests.append(test)
+
+    return GeneratedTestsList(generated_tests=modified_tests)