codeflash-ai
diff --git a/‎codeflash/benchmarking/codeflash_trace.py‎
Lines changed: 6 additions & 6 deletions b/‎codeflash/benchmarking/codeflash_trace.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎codeflash/benchmarking/plugin/plugin.py‎
Lines changed: 12 additions & 12 deletions b/‎codeflash/benchmarking/plugin/plugin.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎codeflash/benchmarking/replay_test.py‎
Lines changed: 34 additions & 30 deletions b/‎codeflash/benchmarking/replay_test.py‎
Lines changed: 34 additions & 30 deletions
@@ -33,8 +33,8 @@ def setup(self, trace_path: str) -> None:
             cur.execute("PRAGMA synchronous = OFF")
             cur.execute(
                 "CREATE TABLE IF NOT EXISTS benchmark_function_timings("
-                "function_name TEXT, class_name TEXT, module_name TEXT, file_name TEXT,"
-                "benchmark_function_name TEXT, benchmark_file_name TEXT, benchmark_line_number INTEGER,"
+                "function_name TEXT, class_name TEXT, module_name TEXT, file_path TEXT,"
+                "benchmark_function_name TEXT, benchmark_file_path TEXT, benchmark_line_number INTEGER,"
                 "function_time_ns INTEGER, overhead_time_ns INTEGER, args BLOB, kwargs BLOB)"
             )
             self._connection.commit()
@@ -62,8 +62,8 @@ def write_function_timings(self) -> None:
             # Insert data into the benchmark_function_timings table
             cur.executemany(
                 "INSERT INTO benchmark_function_timings"
-                "(function_name, class_name, module_name, file_name, benchmark_function_name, "
-                "benchmark_file_name, benchmark_line_number, function_time_ns, overhead_time_ns, args, kwargs) "
+                "(function_name, class_name, module_name, file_path, benchmark_function_name, "
+                "benchmark_file_path, benchmark_line_number, function_time_ns, overhead_time_ns, args, kwargs) "
                 "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                 self.function_calls_data
             )
@@ -115,7 +115,7 @@ def wrapper(*args, **kwargs):
 
             # Get benchmark info from environment
             benchmark_function_name = os.environ.get("CODEFLASH_BENCHMARK_FUNCTION_NAME", "")
-            benchmark_file_name = os.environ.get("CODEFLASH_BENCHMARK_FILE_NAME", "")
+            benchmark_file_path = os.environ.get("CODEFLASH_BENCHMARK_FILE_PATH", "")
             benchmark_line_number = os.environ.get("CODEFLASH_BENCHMARK_LINE_NUMBER", "")
             # Get class name
             class_name = ""
@@ -151,7 +151,7 @@ def wrapper(*args, **kwargs):
 
             self.function_calls_data.append(
                 (func.__name__, class_name, func.__module__, func.__code__.co_filename,
-                 benchmark_function_name, benchmark_file_name, benchmark_line_number, execution_time,
+                 benchmark_function_name, benchmark_file_path, benchmark_line_number, execution_time,
                  overhead_time, pickled_args, pickled_kwargs)
             )
             return result
 
@@ -24,7 +24,7 @@ def setup(self, trace_path:str) -> None:
             cur.execute("PRAGMA synchronous = OFF")
             cur.execute(
                 "CREATE TABLE IF NOT EXISTS benchmark_timings("
-                "benchmark_file_name TEXT, benchmark_function_name TEXT, benchmark_line_number INTEGER,"
+                "benchmark_file_path TEXT, benchmark_function_name TEXT, benchmark_line_number INTEGER,"
                 "benchmark_time_ns INTEGER)"
             )
             self._connection.commit()
@@ -47,7 +47,7 @@ def write_benchmark_timings(self) -> None:
             cur = self._connection.cursor()
             # Insert data into the benchmark_timings table
             cur.executemany(
-                "INSERT INTO benchmark_timings (benchmark_file_name, benchmark_function_name, benchmark_line_number, benchmark_time_ns) VALUES (?, ?, ?, ?)",
+                "INSERT INTO benchmark_timings (benchmark_file_path, benchmark_function_name, benchmark_line_number, benchmark_time_ns) VALUES (?, ?, ?, ?)",
                 self.benchmark_timings
             )
             self._connection.commit()
@@ -86,7 +86,7 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
             # Query the function_calls table for all function calls
             cursor.execute(
                 "SELECT module_name, class_name, function_name, "
-                "benchmark_file_name, benchmark_function_name, benchmark_line_number, function_time_ns "
+                "benchmark_file_path, benchmark_function_name, benchmark_line_number, function_time_ns "
                 "FROM benchmark_function_timings"
             )
 
@@ -101,7 +101,7 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
                     qualified_name = f"{module_name}.{function_name}"
 
                 # Create the benchmark key (file::function::line)
-                benchmark_key = BenchmarkKey(file_name=benchmark_file, function_name=benchmark_func)
+                benchmark_key = BenchmarkKey(file_path=benchmark_file, function_name=benchmark_func)
                 # Initialize the inner dictionary if needed
                 if qualified_name not in result:
                     result[qualified_name] = {}
@@ -143,20 +143,20 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
         try:
             # Query the benchmark_function_timings table to get total overhead for each benchmark
             cursor.execute(
-                "SELECT benchmark_file_name, benchmark_function_name, benchmark_line_number, SUM(overhead_time_ns) "
+                "SELECT benchmark_file_path, benchmark_function_name, benchmark_line_number, SUM(overhead_time_ns) "
                 "FROM benchmark_function_timings "
-                "GROUP BY benchmark_file_name, benchmark_function_name, benchmark_line_number"
+                "GROUP BY benchmark_file_path, benchmark_function_name, benchmark_line_number"
             )
 
             # Process overhead information
             for row in cursor.fetchall():
                 benchmark_file, benchmark_func, benchmark_line, total_overhead_ns = row
-                benchmark_key = BenchmarkKey(file_name=benchmark_file, function_name=benchmark_func)
+                benchmark_key = BenchmarkKey(file_path=benchmark_file, function_name=benchmark_func)
                 overhead_by_benchmark[benchmark_key] = total_overhead_ns or 0  # Handle NULL sum case
 
             # Query the benchmark_timings table for total times
             cursor.execute(
-                "SELECT benchmark_file_name, benchmark_function_name, benchmark_line_number, benchmark_time_ns "
+                "SELECT benchmark_file_path, benchmark_function_name, benchmark_line_number, benchmark_time_ns "
                 "FROM benchmark_timings"
             )
 
@@ -165,7 +165,7 @@ def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
                 benchmark_file, benchmark_func, benchmark_line, time_ns = row
 
                 # Create the benchmark key (file::function::line)
-                benchmark_key = BenchmarkKey(file_name=benchmark_file, function_name=benchmark_func)
+                benchmark_key = BenchmarkKey(file_path=benchmark_file, function_name=benchmark_func)
                 # Subtract overhead from total time
                 overhead = overhead_by_benchmark.get(benchmark_key, 0)
                 result[benchmark_key] = time_ns - overhead
@@ -236,13 +236,13 @@ def test_something(benchmark):
                 The return value of the function
 
             """
-            benchmark_file_name = self.request.node.fspath
+            benchmark_file_path = str(self.request.node.fspath)
             benchmark_function_name = self.request.node.name
             line_number = int(str(sys._getframe(1).f_lineno))  # 1 frame up in the call stack
 
             # Set env vars so codeflash decorator can identify what benchmark its being run in
             os.environ["CODEFLASH_BENCHMARK_FUNCTION_NAME"] = benchmark_function_name
-            os.environ["CODEFLASH_BENCHMARK_FILE_NAME"] = benchmark_file_name
+            os.environ["CODEFLASH_BENCHMARK_FILE_PATH"] = benchmark_file_path
             os.environ["CODEFLASH_BENCHMARK_LINE_NUMBER"] = str(line_number)
             os.environ["CODEFLASH_BENCHMARKING"] = "True"
 
@@ -260,7 +260,7 @@ def test_something(benchmark):
             codeflash_trace.function_call_count = 0
             # Add to the benchmark timings buffer
             codeflash_benchmark_plugin.benchmark_timings.append(
-                (benchmark_file_name, benchmark_function_name, line_number, end - start))
+                (benchmark_file_path, benchmark_function_name, line_number, end - start))
 
             return result
 
 
@@ -2,32 +2,35 @@
 
 import sqlite3
 import textwrap
-from collections.abc import Generator
-from typing import Any, Dict
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
 
 import isort
 
 from codeflash.cli_cmds.console import logger
 from codeflash.discovery.functions_to_optimize import inspect_top_level_functions_or_methods
 from codeflash.verification.verification_utils import get_test_file_path
-from pathlib import Path
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
 
 def get_next_arg_and_return(
-        trace_file: str, function_name: str, file_name: str, class_name: str | None = None, num_to_get: int = 256
+        trace_file: str, function_name: str, file_path: str, class_name: str | None = None, num_to_get: int = 256
 ) -> Generator[Any]:
     db = sqlite3.connect(trace_file)
     cur = db.cursor()
     limit = num_to_get
 
     if class_name is not None:
         cursor = cur.execute(
-            "SELECT * FROM benchmark_function_timings WHERE function_name = ? AND file_name = ? AND class_name = ? LIMIT ?",
-            (function_name, file_name, class_name, limit),
+            "SELECT * FROM benchmark_function_timings WHERE function_name = ? AND file_path = ? AND class_name = ? LIMIT ?",
+            (function_name, file_path, class_name, limit),
         )
     else:
         cursor = cur.execute(
-            "SELECT * FROM benchmark_function_timings WHERE function_name = ? AND file_name = ? AND class_name = '' LIMIT ?",
-            (function_name, file_name, limit),
+            "SELECT * FROM benchmark_function_timings WHERE function_name = ? AND file_path = ? AND class_name = '' LIMIT ?",
+            (function_name, file_path, limit),
         )
 
     while (val := cursor.fetchone()) is not None:
@@ -88,7 +91,7 @@ def create_trace_replay_test_code(
     # Templates for different types of tests
     test_function_body = textwrap.dedent(
         """\
-        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_name=r"{file_name}", num_to_get={max_run_count}):
+        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_path=r"{file_path}", num_to_get={max_run_count}):
             args = pickle.loads(args_pkl)
             kwargs = pickle.loads(kwargs_pkl)
             ret = {function_name}(*args, **kwargs)
@@ -97,7 +100,7 @@ def create_trace_replay_test_code(
 
     test_method_body = textwrap.dedent(
         """\
-        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_name=r"{file_name}", class_name="{class_name}", num_to_get={max_run_count}):
+        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_path=r"{file_path}", class_name="{class_name}", num_to_get={max_run_count}):
             args = pickle.loads(args_pkl)
             kwargs = pickle.loads(kwargs_pkl){filter_variables}
             function_name = "{orig_function_name}"
@@ -112,7 +115,7 @@ def create_trace_replay_test_code(
 
     test_class_method_body = textwrap.dedent(
         """\
-        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_name=r"{file_name}", class_name="{class_name}", num_to_get={max_run_count}):
+        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_path=r"{file_path}", class_name="{class_name}", num_to_get={max_run_count}):
             args = pickle.loads(args_pkl)
             kwargs = pickle.loads(kwargs_pkl){filter_variables}
             if not args:
@@ -122,7 +125,7 @@ def create_trace_replay_test_code(
     )
     test_static_method_body = textwrap.dedent(
         """\
-        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_name=r"{file_name}", class_name="{class_name}", num_to_get={max_run_count}):
+        for args_pkl, kwargs_pkl in get_next_arg_and_return(trace_file=trace_file_path, function_name="{orig_function_name}", file_path=r"{file_path}", class_name="{class_name}", num_to_get={max_run_count}):
             args = pickle.loads(args_pkl)
             kwargs = pickle.loads(kwargs_pkl){filter_variables}
             ret = {class_name_alias}{method_name}(*args, **kwargs)
@@ -140,13 +143,13 @@ def create_trace_replay_test_code(
         module_name = func.get("module_name")
         function_name = func.get("function_name")
         class_name = func.get("class_name")
-        file_name = func.get("file_name")
+        file_path = func.get("file_path")
         function_properties = func.get("function_properties")
         if not class_name:
             alias = get_function_alias(module_name, function_name)
             test_body = test_function_body.format(
                 function_name=alias,
-                file_name=file_name,
+                file_path=file_path,
                 orig_function_name=function_name,
                 max_run_count=max_run_count,
             )
@@ -160,7 +163,7 @@ def create_trace_replay_test_code(
             if function_properties.is_classmethod:
                 test_body = test_class_method_body.format(
                     orig_function_name=function_name,
-                    file_name=file_name,
+                    file_path=file_path,
                     class_name_alias=class_name_alias,
                     class_name=class_name,
                     method_name=method_name,
@@ -170,7 +173,7 @@ def create_trace_replay_test_code(
             elif function_properties.is_staticmethod:
                 test_body = test_static_method_body.format(
                     orig_function_name=function_name,
-                    file_name=file_name,
+                    file_path=file_path,
                     class_name_alias=class_name_alias,
                     class_name=class_name,
                     method_name=method_name,
@@ -180,7 +183,7 @@ def create_trace_replay_test_code(
             else:
                 test_body = test_method_body.format(
                     orig_function_name=function_name,
-                    file_name=file_name,
+                    file_path=file_path,
                     class_name_alias=class_name_alias,
                     class_name=class_name,
                     method_name=method_name,
@@ -216,42 +219,41 @@ def generate_replay_test(trace_file_path: Path, output_dir: Path, test_framework
 
         # Get distinct benchmark names
         cursor.execute(
-            "SELECT DISTINCT benchmark_function_name, benchmark_file_name FROM benchmark_function_timings"
+            "SELECT DISTINCT benchmark_function_name, benchmark_file_path FROM benchmark_function_timings"
         )
         benchmarks = cursor.fetchall()
 
         # Generate a test for each benchmark
         for benchmark in benchmarks:
-            benchmark_function_name, benchmark_file_name = benchmark
+            benchmark_function_name, benchmark_file_path = benchmark
             # Get functions associated with this benchmark
             cursor.execute(
-                "SELECT DISTINCT function_name, class_name, module_name, file_name, benchmark_line_number FROM benchmark_function_timings "
-                "WHERE benchmark_function_name = ? AND benchmark_file_name = ?",
-                (benchmark_function_name, benchmark_file_name)
+                "SELECT DISTINCT function_name, class_name, module_name, file_path, benchmark_line_number FROM benchmark_function_timings "
+                "WHERE benchmark_function_name = ? AND benchmark_file_path = ?",
+                (benchmark_function_name, benchmark_file_path)
             )
 
             functions_data = []
             for func_row in cursor.fetchall():
-                function_name, class_name, module_name, file_name, benchmark_line_number = func_row
-
+                function_name, class_name, module_name, file_path, benchmark_line_number = func_row
                 # Add this function to our list
                 functions_data.append({
                     "function_name": function_name,
                     "class_name": class_name,
-                    "file_name": file_name,
+                    "file_path": file_path,
                     "module_name": module_name,
                     "benchmark_function_name": benchmark_function_name,
-                    "benchmark_file_name": benchmark_file_name,
+                    "benchmark_file_path": benchmark_file_path,
                     "benchmark_line_number": benchmark_line_number,
                     "function_properties": inspect_top_level_functions_or_methods(
-                            file_name=file_name,
+                            file_name=Path(file_path),
                             function_or_method_name=function_name,
                             class_name=class_name,
                         )
                 })
 
             if not functions_data:
-                logger.info(f"No functions found for benchmark {benchmark_function_name} in {benchmark_file_name}")
+                logger.info(f"No functions found for benchmark {benchmark_function_name} in {benchmark_file_path}")
                 continue
 
             # Generate the test code for this benchmark
@@ -265,17 +267,19 @@ def generate_replay_test(trace_file_path: Path, output_dir: Path, test_framework
 
             # Write to file if requested
             if output_dir:
+                name = Path(benchmark_file_path).name.split(".")[0][5:] # remove "test_" from the name since we add it in later
                 output_file = get_test_file_path(
-                    test_dir=Path(output_dir), function_name=f"{benchmark_file_name}_{benchmark_function_name}", test_type="replay"
+                    test_dir=Path(output_dir), function_name=f"{name}_{benchmark_function_name}", test_type="replay"
                 )
                 # Write test code to file, parents = true
                 output_dir.mkdir(parents=True, exist_ok=True)
                 output_file.write_text(test_code, "utf-8")
                 count += 1
-                logger.info(f"Replay test for benchmark `{benchmark_function_name}` in {benchmark_file_name} written to {output_file}")
+                logger.info(f"Replay test for benchmark `{benchmark_function_name}` in {name} written to {output_file}")
 
         conn.close()
 
     except Exception as e:
         logger.info(f"Error generating replay tests: {e}")
+
     return count