working version

mohammedahmed18 · mohammedahmed18 · commit c1ae81eec9ca · 2025-12-03T19:59:58.000+02:00
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -294,50 +294,39 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
         console.rule()
         return []
 
-    def optimize_python_code_repair(self, request: list[AIServiceCodeRepairRequest]) -> list[OptimizedCandidate]:
+    def optimize_python_code_repair(self, request: AIServiceCodeRepairRequest) -> OptimizedCandidate | None:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
         Args:
-        request: A list of optimization candidate details for refinement
+        request: optimization candidate details for refinement
 
         Returns:
         -------
-        - List[OptimizationCandidate]: A list of Optimization Candidates.
+        - OptimizationCandidate: new fixed candidate.
 
         """
-        payload = [
-            {
-                "optimization_id": opt.optimization_id,
-                "original_source_code": opt.original_source_code,
-                "modified_source_code": opt.modified_source_code,
-                "test_details": opt.test_details,
-                "trace_id": opt.trace_id,
-            }
-            for opt in request
-        ]
-        # logger.debug(f"Repair {len(request)} optimizations…")
         console.rule()
         try:
-            response = self.make_ai_service_request("/code_repair", payload=payload, timeout=120)
+            response = self.make_ai_service_request("/code_repair", payload=request, timeout=120)
         except requests.exceptions.RequestException as e:
             logger.exception(f"Error generating optimization repair: {e}")
             ph("cli-optimize-error-caught", {"error": str(e)})
             return []
 
         if response.status_code == 200:
-            refined_optimizations = response.json()["code_repairs"]
-            # logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.")
+            refined_optimization = response.json()
             console.rule()
 
-            refinements = self._get_valid_candidates(refined_optimizations)
-            return [
-                OptimizedCandidate(
-                    source_code=c.source_code,
-                    explanation=c.explanation,
-                    optimization_id=c.optimization_id[:-4] + "cdrp",
-                )
-                for c in refinements
-            ]
+            refinements = self._get_valid_candidates([refined_optimization])
+            if not refinements:
+                logger.error("Code repair failed to generate a valid candidate.")
+                return None
+
+            return OptimizedCandidate(
+                source_code=refinements[0].source_code,
+                explanation=refinements[0].explanation,
+                optimization_id=refinements[0].optimization_id[:-4] + "cdrp",
+            )
 
         try:
             error = response.json()["error"]
@@ -346,7 +335,7 @@ def optimize_python_code_repair(self, request: list[AIServiceCodeRepairRequest])
         logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
         ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
         console.rule()
-        return []
+        return None
 
     def get_new_explanation(  # noqa: D417
         self,
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -48,13 +48,33 @@ class AIServiceRefinerRequest:
     function_references: str | None = None
 
 
+class TestDiffScope(str, Enum):
+    RETURN_VALUE = "return_value"
+    STDOUT = "stdout"
+    DID_PASS = "did_pass"  # noqa: S105
+    TIMED_OUT = "timed_out"
+
+
+@dataclass
+class TestDiff:
+    scope: TestDiffScope
+    original_pass: bool
+    candidate_pass: bool
+
+    original_value: str | None = None
+    candidate_value: str | None = None
+    test_src_code: Optional[str] = None
+    candidate_pytest_error: Optional[str] = None
+    original_pytest_error: Optional[str] = None
+
+
 @dataclass(frozen=True)
 class AIServiceCodeRepairRequest:
     optimization_id: str
     original_source_code: str
     modified_source_code: str
-    test_details: str
     trace_id: str
+    test_diffs: list[TestDiff]
 
 
 # If the method spam is in the class Ham, which is at the top level of the module eggs in the package foo, the fully
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -116,6 +116,7 @@
         CoverageData,
         FunctionCalledInTest,
         FunctionSource,
+        TestDiff,
     )
     from codeflash.verification.verification_utils import TestConfig
 
@@ -685,32 +686,15 @@ def determine_best_candidate(
                     baseline_results=original_code_baseline,
                     original_helper_code=original_helper_code,
                     file_path_to_helper_classes=file_path_to_helper_classes,
+                    code_context=code_context,
+                    candidate=candidate,
+                    exp_type=exp_type,
                 )
                 console.rule()
                 if not is_successful(run_results):
                     optimized_runtimes[candidate.optimization_id] = None
                     is_correct[candidate.optimization_id] = False
                     speedup_ratios[candidate.optimization_id] = None
-                    fail_value = run_results.value
-                    if (
-                        fail_value.strip() != "Test results did not match the test results of the original code."
-                        and len(future_all_refinements) <= 3
-                        and not candidate.optimization_id.endswith("cdrp")
-                    ):
-                        # # queue corresponding code repair optimization for best optimization
-                        future_all_refinements.append(
-                            self.code_repair_optimizations(
-                                original_source_code=code_context.read_writable_code.markdown,
-                                modified_source_code=candidate.source_code.markdown,
-                                test_details=fail_value,
-                                trace_id=self.function_trace_id[:-4] + exp_type
-                                if self.experiment_id
-                                else self.function_trace_id,
-                                ai_service_client=ai_service_client,
-                                executor=self.executor,
-                                optimization_id=candidate.optimization_id,
-                            )
-                        )
                 else:
                     candidate_result: OptimizedCandidateResult = run_results.unwrap()
                     best_test_runtime = candidate_result.best_test_runtime
@@ -978,22 +962,19 @@ def code_repair_optimizations(
         self,
         original_source_code: str,
         modified_source_code: str,
-        test_details: str,
+        test_diffs: list[TestDiff],
         trace_id: str,
         optimization_id: str,
         ai_service_client: AiServiceClient,
-        executor: concurrent.futures.ThreadPoolExecutor,
-    ) -> concurrent.futures.Future:
-        request = [
-            AIServiceCodeRepairRequest(
-                optimization_id=optimization_id,
-                original_source_code=original_source_code,
-                modified_source_code=modified_source_code,
-                test_details=test_details,
-                trace_id=trace_id,
-            )
-        ]
-        return executor.submit(ai_service_client.optimize_python_code_repair, request=request)
+    ) -> OptimizedCandidate | None:
+        request = AIServiceCodeRepairRequest(
+            optimization_id=optimization_id,
+            original_source_code=original_source_code,
+            modified_source_code=modified_source_code,
+            test_diffs=test_diffs,
+            trace_id=trace_id,
+        )
+        return ai_service_client.optimize_python_code_repair(request=request)
 
     def log_successful_optimization(
         self, explanation: Explanation, generated_tests: GeneratedTestsList, exp_type: str
@@ -1920,6 +1901,9 @@ def run_optimized_candidate(
         baseline_results: OriginalCodeBaseline,
         original_helper_code: dict[Path, str],
         file_path_to_helper_classes: dict[Path, set[str]],
+        code_context: CodeOptimizationContext,
+        candidate: OptimizedCandidate,
+        exp_type: str,
     ) -> Result[OptimizedCandidateResult, str]:
         assert (test_framework := self.args.test_framework) in {"pytest", "unittest"}  # noqa: RUF018
 
@@ -1980,29 +1964,50 @@ def run_optimized_candidate(
                     # if the test unmatched percentage is greater than 50%, we can't fix it
                     return self.get_results_not_matched_error()
 
-                logger.info("running code repair...")
-                # not sure if all return types will be convertible to string
-                diff_per_test_fn = {}
-                for diff in diffs:
-                    try:
-                        diff_per_test_fn[diff.test_src_code] = (
-                            diff_per_test_fn.setdefault(diff.test_src_code, "")
-                            + f"Expected Value: {diff.original_value!s}\nActual Value: {diff.candidate_value!s}\nError String:{diff.candidate_pytest_error}\n"
-                        )
+                if candidate.optimization_id.endswith("cdrp"):
+                    # prevent looping for now
+                    return self.get_results_not_matched_error()
+
+                ai_service_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client
+
+                with progress_bar("The test results are not matching, let me see if I can fix this"):
+                    new_candidate = self.code_repair_optimizations(
+                        original_source_code=code_context.read_writable_code.markdown,
+                        modified_source_code=candidate.source_code.markdown,
+                        test_diffs=diffs,
+                        trace_id=self.function_trace_id[:-4] + exp_type
+                        if self.experiment_id
+                        else self.function_trace_id,
+                        ai_service_client=ai_service_client,
+                        optimization_id=candidate.optimization_id,
+                    )
+                    if not new_candidate:
+                        return Failure("Code repair failed to generate a valid candidate.")
+
+                code_print(new_candidate.source_code.flat)
 
-                    except Exception as e:
-                        sentry_sdk.capture_exception(e)
-                        logger.exception(e)
-                        return self.get_results_not_matched_error()
                 try:
-                    test_issues = "\n".join(
-                        f"{test_fn_def}\n{value}" for test_fn_def, value in diff_per_test_fn.items()
+                    did_update = self.replace_function_and_helpers_with_optimized_code(
+                        code_context=code_context,
+                        optimized_code=new_candidate.source_code,
+                        original_helper_code=original_helper_code,
                     )
-                except Exception as e:
-                    sentry_sdk.capture_exception(e)
-                    logger.exception(e)
-                    return self.get_results_not_matched_error()
-                return Failure(test_issues)
+                    if did_update:
+                        return self.run_optimized_candidate(
+                            optimization_candidate_index=optimization_candidate_index,
+                            baseline_results=baseline_results,
+                            original_helper_code=original_helper_code,
+                            file_path_to_helper_classes=file_path_to_helper_classes,
+                            code_context=code_context,
+                            candidate=new_candidate,
+                            exp_type=exp_type,
+                        )
+                except (ValueError, SyntaxError, cst.ParserSyntaxError, AttributeError) as e:
+                    logger.error(e)
+                    self.write_code_and_helpers(
+                        self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
+                    )
+                    return Failure("Code repair failed to generate a valid candidate.")
 
             logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...")
 
diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
@@ -1,12 +1,10 @@
 from __future__ import annotations
 
 import sys
-from dataclasses import dataclass
-from enum import Enum
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
 
 from codeflash.cli_cmds.console import logger
-from codeflash.models.models import TestResults, TestType, VerificationType
+from codeflash.models.models import TestDiff, TestDiffScope, TestResults, TestType, VerificationType
 from codeflash.verification.comparator import comparator
 
 if TYPE_CHECKING:
@@ -15,26 +13,6 @@
 INCREASED_RECURSION_LIMIT = 5000
 
 
-class TestDiffScope(Enum):
-    RETURN_VALUE = "return_value"
-    STDOUT = "stdout"
-    DID_PASS = "did_pass"  # noqa: S105
-    TIMED_OUT = "timed_out"
-
-
-@dataclass
-class TestDiff:
-    scope: TestDiffScope
-    original_value: any
-    candidate_value: any
-    original_pass: bool
-    candidate_pass: bool
-
-    test_src_code: Optional[str] = None
-    candidate_pytest_error: Optional[str] = None
-    original_pytest_error: Optional[str] = None
-
-
 def compare_test_results(original_results: TestResults, candidate_results: TestResults) -> tuple[bool, list[TestDiff]]:
     # This is meant to be only called with test results for the first loop index
     if len(original_results) == 0 or len(candidate_results) == 0:
@@ -87,8 +65,8 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
         test_src_code = original_test_result.id.get_src_code(original_test_result.file_name)
         test_diff = TestDiff(
             scope=TestDiffScope.RETURN_VALUE,
-            original_value=original_test_result.return_value,
-            candidate_value=cdd_test_result.return_value,
+            original_value=f"{original_test_result.return_value!r}",
+            candidate_value=f"{cdd_test_result.return_value!r}",
             test_src_code=test_src_code,
             candidate_pytest_error=cdd_pytest_error,
             original_pass=original_test_result.did_pass,
@@ -97,8 +75,6 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
         )
         if not comparator(original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj):
             test_diff.scope = TestDiffScope.RETURN_VALUE
-            test_diff.original_value = original_test_result.return_value
-            test_diff.candidate_value = cdd_test_result.return_value
             test_diffs.append(test_diff)
 
             try:
@@ -117,8 +93,8 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
             original_test_result.stdout, cdd_test_result.stdout
         ):
             test_diff.scope = TestDiffScope.STDOUT
-            test_diff.original_value = original_test_result.stdout
-            test_diff.candidate_value = cdd_test_result.stdout
+            test_diff.original_value = str(original_test_result.stdout)
+            test_diff.candidate_value = str(cdd_test_result.stdout)
             test_diffs.append(test_diff)
 
         if original_test_result.test_type in {
@@ -128,8 +104,8 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
             TestType.REPLAY_TEST,
         } and (cdd_test_result.did_pass != original_test_result.did_pass):
             test_diff.scope = TestDiffScope.DID_PASS
-            test_diff.original_value = original_test_result.did_pass
-            test_diff.candidate_value = cdd_test_result.did_pass
+            test_diff.original_value = str(original_test_result.did_pass)
+            test_diff.candidate_value = str(cdd_test_result.did_pass)
             test_diffs.append(test_diff)
 
     sys.setrecursionlimit(original_recursion_limit)