Merge branch 'main' into memory-limit-tuning

misrasaurabh1 · misrasaurabh1 · commit 44d7ab087b5c · 2025-04-21T22:58:32.000-07:00
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.9.18, 3.10.13, 3.11.6, 3.12.1, 3.13.0]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     continue-on-error: true
     runs-on: ubuntu-latest
     steps:
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -94,7 +94,7 @@ def __init__(
         function_benchmark_timings: dict[BenchmarkKey, int] | None = None,
         total_benchmark_timings: dict[BenchmarkKey, int] | None = None,
         args: Namespace | None = None,
-        replay_tests_dir: Path|None = None
+        replay_tests_dir: Path | None = None,
     ) -> None:
         self.project_root = test_cfg.project_root_path
         self.test_cfg = test_cfg
@@ -242,8 +242,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
         # request for new optimizations but don't block execution, check for completion later
         # adding to control and experiment set but with same traceid
         best_optimization = None
-
-        for _u, candidates in enumerate([optimizations_set.control, optimizations_set.experiment]):
+        for _u, (candidates, exp_type) in enumerate(zip([optimizations_set.control, optimizations_set.experiment],["EXP0","EXP1"])):
             if candidates is None:
                 continue
 
@@ -253,8 +252,9 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                 original_code_baseline=original_code_baseline,
                 original_helper_code=original_helper_code,
                 file_path_to_helper_classes=file_path_to_helper_classes,
+                exp_type=exp_type,
             )
-            ph("cli-optimize-function-finished", {"function_trace_id": self.function_trace_id})
+            ph("cli-optimize-function-finished", {"function_trace_id": self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id})
 
             generated_tests = remove_functions_from_generated_tests(
                 generated_tests=generated_tests, test_functions_to_remove=test_functions_to_remove
@@ -273,7 +273,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                     processed_benchmark_info = process_benchmark_data(
                         replay_performance_gain=best_optimization.replay_performance_gain,
                         fto_benchmark_timings=self.function_benchmark_timings,
-                        total_benchmark_timings=self.total_benchmark_timings
+                        total_benchmark_timings=self.total_benchmark_timings,
                     )
                 explanation = Explanation(
                     raw_explanation_message=best_optimization.candidate.explanation,
@@ -283,10 +283,10 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                     best_runtime_ns=best_optimization.runtime,
                     function_name=function_to_optimize_qualified_name,
                     file_path=self.function_to_optimize.file_path,
-                    benchmark_details=processed_benchmark_info.benchmark_details if processed_benchmark_info else None
+                    benchmark_details=processed_benchmark_info.benchmark_details if processed_benchmark_info else None,
                 )
 
-                self.log_successful_optimization(explanation, generated_tests)
+                self.log_successful_optimization(explanation, generated_tests, exp_type)
 
                 self.replace_function_and_helpers_with_optimized_code(
                     code_context=code_context, optimized_code=best_optimization.candidate.source_code
@@ -324,11 +324,11 @@ def optimize_function(self) -> Result[BestOptimization, str]:
                         explanation=explanation,
                         existing_tests_source=existing_tests,
                         generated_original_test_source=generated_tests_str,
-                        function_trace_id=self.function_trace_id,
+                        function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
                         coverage_message=coverage_message,
                         git_remote=self.args.git_remote,
                     )
-                    if self.args.all or env_utils.get_pr_number():
+                    if self.args.all or env_utils.get_pr_number() or (self.args.file and not self.args.function):
                         self.write_code_and_helpers(
                             self.function_to_optimize_source_code,
                             original_helper_code,
@@ -361,6 +361,7 @@ def determine_best_candidate(
         original_code_baseline: OriginalCodeBaseline,
         original_helper_code: dict[Path, str],
         file_path_to_helper_classes: dict[Path, set[str]],
+        exp_type: str,
     ) -> BestOptimization | None:
         best_optimization: BestOptimization | None = None
         best_runtime_until_now = original_code_baseline.runtime
@@ -377,28 +378,29 @@ def determine_best_candidate(
         candidates = deque(candidates)
         # Start a new thread for AI service request, start loop in main thread
         # check if aiservice request is complete, when it is complete, append result to the candidates list
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+            ai_service_client = self.aiservice_client if exp_type=="EXP0" else self.local_aiservice_client
             future_line_profile_results = executor.submit(
-                self.aiservice_client.optimize_python_code_line_profiler,
+                ai_service_client.optimize_python_code_line_profiler,
                 source_code=code_context.read_writable_code,
                 dependency_code=code_context.read_only_context_code,
-                trace_id=self.function_trace_id,
+                trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
                 line_profiler_results=original_code_baseline.line_profile_results["str_out"],
                 num_candidates=10,
-                experiment_metadata=None,
+                experiment_metadata=ExperimentMetadata(id=self.experiment_id, group= "control" if exp_type == "EXP0" else "experiment") if self.experiment_id  else None,
             )
             try:
                 candidate_index = 0
-                done = False
                 original_len = len(candidates)
                 while candidates:
-                    # for candidate_index, candidate in enumerate(candidates, start=1):
                     done = True if future_line_profile_results is None else future_line_profile_results.done()
                     if done and (future_line_profile_results is not None):
                         line_profile_results = future_line_profile_results.result()
                         candidates.extend(line_profile_results)
-                        original_len+= len(candidates)
-                        logger.info(f"Added results from line profiler to candidates, total candidates now: {original_len}")
+                        original_len += len(line_profile_results)
+                        logger.info(
+                            f"Added results from line profiler to candidates, total candidates now: {original_len}"
+                        )
                         future_line_profile_results = None
                     candidate_index += 1
                     candidate = candidates.popleft()
@@ -425,7 +427,6 @@ def determine_best_candidate(
                         )
                         continue
 
-
                     run_results = self.run_optimized_candidate(
                         optimization_candidate_index=candidate_index,
                         baseline_results=original_code_baseline,
@@ -464,12 +465,19 @@ def determine_best_candidate(
                             tree.add(f"Speedup ratio: {perf_gain + 1:.1f}X")
                             replay_perf_gain = {}
                             if self.args.benchmark:
-                                test_results_by_benchmark = candidate_result.benchmarking_test_results.group_by_benchmarks(self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root)
+                                test_results_by_benchmark = (
+                                    candidate_result.benchmarking_test_results.group_by_benchmarks(
+                                        self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root
+                                    )
+                                )
                                 if len(test_results_by_benchmark) > 0:
                                     benchmark_tree = Tree("Speedup percentage on benchmarks:")
                                 for benchmark_key, candidate_test_results in test_results_by_benchmark.items():
-
-                                    original_code_replay_runtime = original_code_baseline.replay_benchmarking_test_results[benchmark_key].total_passed_runtime()
+                                    original_code_replay_runtime = (
+                                        original_code_baseline.replay_benchmarking_test_results[
+                                            benchmark_key
+                                        ].total_passed_runtime()
+                                    )
                                     candidate_replay_runtime = candidate_test_results.total_passed_runtime()
                                     replay_perf_gain[benchmark_key] = performance_gain(
                                         original_runtime_ns=original_code_replay_runtime,
@@ -511,16 +519,16 @@ def determine_best_candidate(
                 logger.exception(f"Optimization interrupted: {e}")
                 raise
 
-        self.aiservice_client.log_results(
-            function_trace_id=self.function_trace_id,
+        ai_service_client.log_results(
+            function_trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
             speedup_ratio=speedup_ratios,
             original_runtime=original_code_baseline.runtime,
             optimized_runtime=optimized_runtimes,
             is_correct=is_correct,
         )
         return best_optimization
 
-    def log_successful_optimization(self, explanation: Explanation, generated_tests: GeneratedTestsList) -> None:
+    def log_successful_optimization(self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str) -> None:
         explanation_panel = Panel(
             f"⚡️ Optimization successful! 📄 {self.function_to_optimize.qualified_name} in {explanation.file_path}\n"
             f"📈 {explanation.perf_improvement_line}\n"
@@ -547,7 +555,7 @@ def log_successful_optimization(self, explanation: Explanation, generated_tests:
         ph(
             "cli-optimize-success",
             {
-                "function_trace_id": self.function_trace_id,
+                "function_trace_id": self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
                 "speedup_x": explanation.speedup_x,
                 "speedup_pct": explanation.speedup_pct,
                 "best_runtime": explanation.best_runtime_ns,
@@ -958,13 +966,17 @@ def establish_original_code_baseline(
             logger.debug(f"Total original code runtime (ns): {total_timing}")
 
             if self.args.benchmark:
-                replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root)
+                replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
+                    self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root
+                )
             return Success(
                 (
                     OriginalCodeBaseline(
                         behavioral_test_results=behavioral_results,
                         benchmarking_test_results=benchmarking_results,
-                        replay_benchmarking_test_results = replay_benchmarking_test_results if self.args.benchmark else None,
+                        replay_benchmarking_test_results=replay_benchmarking_test_results
+                        if self.args.benchmark
+                        else None,
                         runtime=total_timing,
                         coverage_results=coverage_results,
                         line_profile_results=line_profile_results,
@@ -1077,16 +1089,22 @@ def run_optimized_candidate(
 
             logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
             if self.args.benchmark:
-                candidate_replay_benchmarking_results = candidate_benchmarking_results.group_by_benchmarks(self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root)
+                candidate_replay_benchmarking_results = candidate_benchmarking_results.group_by_benchmarks(
+                    self.total_benchmark_timings.keys(), self.replay_tests_dir, self.project_root
+                )
                 for benchmark_name, benchmark_results in candidate_replay_benchmarking_results.items():
-                    logger.debug(f"Benchmark {benchmark_name} runtime (ns): {humanize_runtime(benchmark_results.total_passed_runtime())}")
+                    logger.debug(
+                        f"Benchmark {benchmark_name} runtime (ns): {humanize_runtime(benchmark_results.total_passed_runtime())}"
+                    )
             return Success(
                 OptimizedCandidateResult(
                     max_loop_count=loop_count,
                     best_test_runtime=total_candidate_timing,
                     behavior_test_results=candidate_behavior_results,
                     benchmarking_test_results=candidate_benchmarking_results,
-                    replay_benchmarking_test_results = candidate_replay_benchmarking_results if self.args.benchmark else None,
+                    replay_benchmarking_test_results=candidate_replay_benchmarking_results
+                    if self.args.benchmark
+                    else None,
                     optimization_candidate_index=optimization_candidate_index,
                     total_candidate_timing=total_candidate_timing,
                 )
diff --git a/pyproject.toml b/pyproject.toml
@@ -69,7 +69,7 @@ exclude = [
 [tool.poetry.dependencies]
 python = ">=3.9"
 unidiff = ">=0.7.4"
-pytest = ">=7.0.0,<8.3.4"
+pytest = ">=7.0.0,!=8.3.4"
 gitpython = ">=3.1.31"
 libcst = ">=1.0.1"
 jedi = ">=0.19.1"