works with multithreading, added test

alvin-r · alvin-r · commit 163781081a87 · 2025-03-24T16:45:13.000-07:00
diff --git a/code_to_optimize/bubble_sort_multithread.py b/code_to_optimize/bubble_sort_multithread.py
@@ -0,0 +1,23 @@
+# from code_to_optimize.bubble_sort_codeflash_trace import sorter
+from code_to_optimize.bubble_sort_codeflash_trace import sorter
+import concurrent.futures
+
+
+def multithreaded_sorter(unsorted_lists: list[list[int]]) -> list[list[int]]:
+    # Create a list to store results in the correct order
+    sorted_lists = [None] * len(unsorted_lists)
+
+    # Use ThreadPoolExecutor to manage threads
+    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+        # Submit all sorting tasks and map them to their original indices
+        future_to_index = {
+            executor.submit(sorter, unsorted_list): i
+            for i, unsorted_list in enumerate(unsorted_lists)
+        }
+
+        # Collect results as they complete
+        for future in concurrent.futures.as_completed(future_to_index):
+            index = future_to_index[future]
+            sorted_lists[index] = future.result()
+
+    return sorted_lists
diff --git a/code_to_optimize/tests/pytest/benchmarks/test_benchmark_bubble_sort.py b/code_to_optimize/tests/pytest/benchmarks/test_benchmark_bubble_sort.py
@@ -1,6 +1,6 @@
 import pytest
 
-from code_to_optimize.bubble_sort_codeflash_trace import sorter, Sorter
+from code_to_optimize.bubble_sort import sorter
 
 
 def test_sort(benchmark):
@@ -11,10 +11,3 @@ def test_sort(benchmark):
 def test_sort2():
     result = sorter(list(reversed(range(500))))
     assert result == list(range(500))
-
-def test_class_sort(benchmark):
-    obj = Sorter(list(reversed(range(100))))
-    result1 = benchmark(obj.sorter, 2)
-    result2 = benchmark(Sorter.sort_class, list(reversed(range(100))))
-    result3 = benchmark(Sorter.sort_static, list(reversed(range(100))))
-    result4 = benchmark(Sorter, [1,2,3])
diff --git a/code_to_optimize/tests/pytest/benchmarks/test_process_and_sort.py b/code_to_optimize/tests/pytest/benchmarks/test_process_and_sort.py
@@ -1,5 +1,5 @@
-from code_to_optimize.process_and_bubble_sort_codeflash_trace import compute_and_sort
-from code_to_optimize.bubble_sort_codeflash_trace import sorter
+from code_to_optimize.process_and_bubble_sort import compute_and_sort
+from code_to_optimize.bubble_sort import sorter
 def test_compute_and_sort(benchmark):
     result = benchmark(compute_and_sort, list(reversed(range(500))))
     assert result == 62208.5
diff --git a/code_to_optimize/tests/pytest/benchmarks_multithread/test_multithread_sort.py b/code_to_optimize/tests/pytest/benchmarks_multithread/test_multithread_sort.py
@@ -0,0 +1,4 @@
+from code_to_optimize.bubble_sort_multithread import multithreaded_sorter
+
+def test_benchmark_sort(benchmark):
+    benchmark(multithreaded_sorter, [list(range(1000)) for i in range (10)])
diff --git a/code_to_optimize/tests/pytest/benchmarks_test/test_benchmark_bubble_sort.py b/code_to_optimize/tests/pytest/benchmarks_test/test_benchmark_bubble_sort.py
@@ -0,0 +1,20 @@
+import pytest
+
+from code_to_optimize.bubble_sort_codeflash_trace import sorter, Sorter
+
+
+def test_sort(benchmark):
+    result = benchmark(sorter, list(reversed(range(500))))
+    assert result == list(range(500))
+
+# This should not be picked up as a benchmark test
+def test_sort2():
+    result = sorter(list(reversed(range(500))))
+    assert result == list(range(500))
+
+def test_class_sort(benchmark):
+    obj = Sorter(list(reversed(range(100))))
+    result1 = benchmark(obj.sorter, 2)
+    result2 = benchmark(Sorter.sort_class, list(reversed(range(100))))
+    result3 = benchmark(Sorter.sort_static, list(reversed(range(100))))
+    result4 = benchmark(Sorter, [1,2,3])
diff --git a/code_to_optimize/tests/pytest/benchmarks_test/test_process_and_sort.py b/code_to_optimize/tests/pytest/benchmarks_test/test_process_and_sort.py
@@ -0,0 +1,8 @@
+from code_to_optimize.process_and_bubble_sort_codeflash_trace import compute_and_sort
+from code_to_optimize.bubble_sort_codeflash_trace import sorter
+def test_compute_and_sort(benchmark):
+    result = benchmark(compute_and_sort, list(reversed(range(500))))
+    assert result == 62208.5
+
+def test_no_func(benchmark):
+    benchmark(sorter, list(reversed(range(500))))
diff --git a/codeflash/benchmarking/codeflash_trace.py b/codeflash/benchmarking/codeflash_trace.py
@@ -15,11 +15,6 @@ class CodeflashTrace:
     def __init__(self) -> None:
         self.function_calls_data = []
 
-    # def __enter__(self) -> None:
-    #     # Initialize for context manager use
-    #     self.function_calls_data = []
-    #     return self
-
     def __exit__(self, exc_type, exc_val, exc_tb) -> None:
         # Cleanup is optional here
         pass
@@ -37,15 +32,14 @@ def __call__(self, func: Callable) -> Callable:
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
             # Measure execution time
-            start_time = time.perf_counter_ns()
+            start_time = time.thread_time_ns()
             result = func(*args, **kwargs)
-            end_time = time.perf_counter_ns()
-
+            end_time = time.thread_time_ns()
             # Calculate execution time
             execution_time = end_time - start_time
 
             # Measure overhead
-            overhead_start_time = time.perf_counter_ns()
+            overhead_start_time = time.thread_time_ns()
 
             try:
                 # Check if currently in pytest benchmark fixture
@@ -66,7 +60,7 @@ def wrapper(*args, **kwargs):
                 if "." in qualname:
                     class_name = qualname.split(".")[0]
                 # Calculate overhead time
-                overhead_end_time = time.perf_counter_ns()
+                overhead_end_time = time.thread_time_ns()
                 overhead_time = overhead_end_time - overhead_start_time
 
 
@@ -75,7 +69,7 @@ def wrapper(*args, **kwargs):
                      benchmark_function_name, benchmark_file_name, benchmark_line_number, execution_time,
                      overhead_time, pickled_args, pickled_kwargs)
                 )
-
+                print("appended")
             except Exception as e:
                 print(f"Error in codeflash_trace: {e}")
 
diff --git a/codeflash/benchmarking/utils.py b/codeflash/benchmarking/utils.py
@@ -1,47 +1,64 @@
 from rich.console import Console
 from rich.table import Table
 
+from codeflash.cli_cmds.console import logger
 
-def print_benchmark_table(function_benchmark_timings: dict[str, dict[str, int]],
-                          total_benchmark_timings: dict[str, int]):
-    console = Console()
 
+def validate_and_format_benchmark_table(function_benchmark_timings: dict[str, dict[str, int]],
+                          total_benchmark_timings: dict[str, int]) -> dict[str, list[tuple[str, float, float, float]]]:
+    function_to_result = {}
     # Process each function's benchmark data
     for func_path, test_times in function_benchmark_timings.items():
-        function_name = func_path.split(":")[-1]
-
-        # Create a table for this function
-        table = Table(title=f"Function: {function_name}", border_style="blue")
-
-        # Add columns
-        table.add_column("Benchmark Test", style="cyan", no_wrap=True)
-        table.add_column("Total Time (ms)", justify="right", style="green")
-        table.add_column("Function Time (ms)", justify="right", style="yellow")
-        table.add_column("Percentage (%)", justify="right", style="red")
-
         # Sort by percentage (highest first)
         sorted_tests = []
         for test_name, func_time in test_times.items():
             total_time = total_benchmark_timings.get(test_name, 0)
+            if func_time > total_time:
+                logger.debug(f"Skipping test {test_name} due to func_time {func_time} > total_time {total_time}")
+                # If the function time is greater than total time, likely to have multithreading / multiprocessing issues.
+                # Do not try to project the optimization impact for this function.
+                sorted_tests.append((test_name, 0.0, 0.0, 0.0))
             if total_time > 0:
                 percentage = (func_time / total_time) * 100
                 # Convert nanoseconds to milliseconds
                 func_time_ms = func_time / 1_000_000
                 total_time_ms = total_time / 1_000_000
                 sorted_tests.append((test_name, total_time_ms, func_time_ms, percentage))
-
         sorted_tests.sort(key=lambda x: x[3], reverse=True)
+        function_to_result[func_path] = sorted_tests
+    return function_to_result
+
+def print_benchmark_table(function_to_results: dict[str, list[tuple[str, float, float, float]]]) -> None:
+    console = Console()
+    for func_path, sorted_tests in function_to_results.items():
+        function_name = func_path.split(":")[-1]
+
+        # Create a table for this function
+        table = Table(title=f"Function: {function_name}", border_style="blue")
+
+        # Add columns
+        table.add_column("Benchmark Test", style="cyan", no_wrap=True)
+        table.add_column("Total Time (ms)", justify="right", style="green")
+        table.add_column("Function Time (ms)", justify="right", style="yellow")
+        table.add_column("Percentage (%)", justify="right", style="red")
 
-        # Add rows to the table
         for test_name, total_time, func_time, percentage in sorted_tests:
             benchmark_file, benchmark_func, benchmark_line = test_name.split("::")
             benchmark_name = f"{benchmark_file}::{benchmark_func}"
-            table.add_row(
-                benchmark_name,
-                f"{total_time:.3f}",
-                f"{func_time:.3f}",
-                f"{percentage:.2f}"
-            )
+            if total_time == 0.0:
+                table.add_row(
+                    benchmark_name,
+                    "N/A",
+                    "N/A",
+                    "N/A"
+                )
+            else:
+                table.add_row(
+                    benchmark_name,
+                    f"{total_time:.3f}",
+                    f"{func_time:.3f}",
+                    f"{percentage:.2f}"
+                )
 
         # Print the table
         console.print(table)
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -87,7 +87,7 @@ def __init__(
         function_to_tests: dict[str, list[FunctionCalledInTest]] | None = None,
         function_to_optimize_ast: ast.FunctionDef | None = None,
         aiservice_client: AiServiceClient | None = None,
-        function_benchmark_timings: dict[str, dict[str, int]] | None = None,
+        function_benchmark_timings: dict[str, int] | None = None,
         total_benchmark_timings: dict[str, int] | None = None,
         args: Namespace | None = None,
     ) -> None:
@@ -272,7 +272,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                     function_name=function_to_optimize_qualified_name,
                     file_path=self.function_to_optimize.file_path,
                     replay_performance_gain=best_optimization.replay_performance_gain if self.args.benchmark else None,
-                    fto_benchmark_timings = self.function_benchmark_timings[self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root)] if self.args.benchmark else None,
+                    fto_benchmark_timings = self.function_benchmark_timings if self.args.benchmark else None,
                     total_benchmark_timings = self.total_benchmark_timings if self.args.benchmark else None,
                 )
 
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
@@ -10,10 +10,9 @@
 from codeflash.api.aiservice import AiServiceClient, LocalAiServiceClient
 from codeflash.benchmarking.replay_test import generate_replay_test
 from codeflash.benchmarking.trace_benchmarks import trace_benchmarks_pytest
-from codeflash.benchmarking.utils import print_benchmark_table
+from codeflash.benchmarking.utils import print_benchmark_table, validate_and_format_benchmark_table
 from codeflash.cli_cmds.console import console, logger, progress_bar
 from codeflash.code_utils import env_utils
-from codeflash.code_utils.code_extractor import add_needed_imports_from_module
 from codeflash.code_utils.code_replacer import normalize_code, normalize_node
 from codeflash.code_utils.code_utils import get_run_tmp_file
 from codeflash.code_utils.static_analysis import analyze_imported_modules, get_first_top_level_function_or_method_ast
@@ -115,15 +114,15 @@ def run(self) -> None:
                             instrument_codeflash_trace_decorator(fto)
                     trace_file = Path(self.args.benchmarks_root) / "benchmarks.trace"
                     replay_tests_dir = Path(self.args.tests_root) / "codeflash_replay_tests"
-                    trace_benchmarks_pytest(self.args.benchmarks_root, self.args.tests_root, self.args.project_root, trace_file) # Simply run all tests that use pytest-benchmark
+                    trace_benchmarks_pytest(self.args.benchmarks_root, self.args.tests_root, self.args.project_root, trace_file) # Run all tests that use pytest-benchmark
                     replay_count = generate_replay_test(trace_file, replay_tests_dir)
                     if replay_count == 0:
                         logger.info(f"No valid benchmarks found in {self.args.benchmarks_root} for functions to optimize, continuing optimization")
                     else:
                         function_benchmark_timings = get_function_benchmark_timings(trace_file)
                         total_benchmark_timings = get_benchmark_timings(trace_file)
-
-                        print_benchmark_table(function_benchmark_timings, total_benchmark_timings)
+                        function_to_results = validate_and_format_benchmark_table(function_benchmark_timings, total_benchmark_timings)
+                        print_benchmark_table(function_to_results)
                         logger.info("Finished tracing existing benchmarks")
                 except Exception as e:
                     logger.info(f"Error while tracing existing benchmarks: {e}")
@@ -213,9 +212,12 @@ def run(self) -> None:
                             f"Skipping optimization."
                         )
                         continue
-                    if self.args.benchmark and function_benchmark_timings and total_benchmark_timings:
+                    qualified_name_w_module = function_to_optimize.qualified_name_with_modules_from_root(
+                        self.args.project_root
+                    )
+                    if self.args.benchmark and function_benchmark_timings and qualified_name_w_module in function_benchmark_timings and total_benchmark_timings:
                         function_optimizer = self.create_function_optimizer(
-                            function_to_optimize, function_to_optimize_ast, function_to_tests, validated_original_code[original_module_path].source_code, function_benchmark_timings, total_benchmark_timings
+                            function_to_optimize, function_to_optimize_ast, function_to_tests, validated_original_code[original_module_path].source_code, function_benchmark_timings[qualified_name_w_module], total_benchmark_timings
                         )
                     else:
                         function_optimizer = self.create_function_optimizer(
diff --git a/codeflash/result/explanation.py b/codeflash/result/explanation.py
@@ -54,16 +54,18 @@ def to_console_string(self) -> str:
                     continue
 
                 total_benchmark_timing = self.total_benchmark_timings[benchmark_key]
-                # find out expected new benchmark timing, then calculate how much total benchmark was sped up. print out intermediate values
-                benchmark_info += f"Original timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(total_benchmark_timing)}\n"
-                replay_speedup = self.replay_performance_gain
-                expected_new_benchmark_timing = total_benchmark_timing - og_benchmark_timing + 1 / (
-                        replay_speedup + 1) * og_benchmark_timing
-                benchmark_info += f"Expected new timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(int(expected_new_benchmark_timing))}\n"
-
-                benchmark_speedup_ratio = total_benchmark_timing / expected_new_benchmark_timing
-                benchmark_speedup_percent = (benchmark_speedup_ratio - 1) * 100
-                benchmark_info += f"Benchmark speedup for {benchmark_file_name}::{benchmark_test_function}: {benchmark_speedup_percent:.2f}%\n\n"
+                if total_benchmark_timing == 0:
+                    benchmark_info += f"Benchmark timing for {benchmark_file_name}::{benchmark_test_function} was improved, but the speedup cannot be estimated.\n"
+                else:
+                    # find out expected new benchmark timing, then calculate how much total benchmark was sped up. print out intermediate values
+                    benchmark_info += f"Original timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(total_benchmark_timing)}\n"
+                    replay_speedup = self.replay_performance_gain
+                    expected_new_benchmark_timing = total_benchmark_timing - og_benchmark_timing + 1 / (
+                            replay_speedup + 1) * og_benchmark_timing
+                    benchmark_info += f"Expected new timing for {benchmark_file_name}::{benchmark_test_function}: {humanize_runtime(int(expected_new_benchmark_timing))}\n"
+                    benchmark_speedup_ratio = total_benchmark_timing / expected_new_benchmark_timing
+                    benchmark_speedup_percent = (benchmark_speedup_ratio - 1) * 100
+                    benchmark_info += f"Benchmark speedup for {benchmark_file_name}::{benchmark_test_function}: {benchmark_speedup_percent:.2f}%\n\n"
 
         return (
                 f"Optimized {self.function_name} in {self.file_path}\n"
diff --git a/tests/test_trace_benchmarks.py b/tests/test_trace_benchmarks.py