Merge branch 'feat/feedback-loop-for-unmatched-test-results' of github.com:codeflash-ai/codeflash into feat/feedback-loop-for-unmatched-test-results

mohammedahmed18 · mohammedahmed18 · commit 1ddc87c70c99 · 2025-12-01T16:49:09.000+02:00
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -310,6 +310,7 @@ def optimize_python_code_repair(self, request: list[AIServiceCodeRepairRequest])
                 "optimization_id": opt.optimization_id,
                 "original_source_code": opt.original_source_code,
                 "modified_source_code": opt.modified_source_code,
+                "test_details": opt.test_details,
                 "trace_id": opt.trace_id,
             }
             for opt in request
@@ -325,7 +326,7 @@ def optimize_python_code_repair(self, request: list[AIServiceCodeRepairRequest])
 
         if response.status_code == 200:
             refined_optimizations = response.json()["code_repairs"]
-            logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.")
+            # logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.")
             console.rule()
 
             refinements = self._get_valid_candidates(refined_optimizations)
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -5,6 +5,7 @@
 import os
 import queue
 import random
+import sqlite3
 import subprocess
 import time
 import uuid
@@ -119,6 +120,61 @@
     from codeflash.verification.verification_utils import TestConfig
 
 
+def log_code_repair_to_db(
+    code_repair_log_db: Path,
+    optimization_id: str,
+    trace_id: str | None = None,
+    passed: str | None = None,
+    faster: str | None = None,
+) -> None:
+    """Log code repair data to SQLite database.
+
+    Uses upsert pattern to allow incremental logging with different columns at different places.
+    Only non-None values will be updated; existing values are preserved.
+    """
+    try:
+        conn = sqlite3.connect(code_repair_log_db)
+        cursor = conn.cursor()
+
+        # Build dynamic upsert query based on provided columns
+        columns = ["optimization_id"]
+        values = [optimization_id]
+        update_parts = ["updated_at = CURRENT_TIMESTAMP"]
+
+        if trace_id is not None:
+            columns.append("trace_id")
+            values.append(trace_id)
+            update_parts.append("trace_id = excluded.trace_id")
+
+        if passed is not None:
+            columns.append("passed")
+            values.append(passed)
+            update_parts.append("passed = excluded.passed")
+
+        if faster is not None:
+            columns.append("faster")
+            values.append(faster)
+            update_parts.append("faster = excluded.faster")
+
+        placeholders = ", ".join(["?"] * len(values))
+        columns_str = ", ".join(columns)
+        update_str = ", ".join(update_parts)
+
+        cursor.execute(
+            f"""
+            INSERT INTO code_repair_logs_cf ({columns_str})
+            VALUES ({placeholders})
+            ON CONFLICT(optimization_id) DO UPDATE SET {update_str}
+            """,  # noqa: S608
+            values,
+        )
+        conn.commit()
+        conn.close()
+    except Exception as e:
+        sentry_sdk.capture_exception(e)
+        logger.exception(e)
+
+
 class CandidateProcessor:
     """Handles candidate processing using a queue-based approach."""
 
@@ -249,6 +305,8 @@ def __init__(
             max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4
         )
         self.optimization_review = ""
+        # SQLite database setup for logging
+        self.code_repair_log_db = Path(__file__).parent / "code_repair_logs_cf.db"
 
     def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]:
         should_run_experiment = self.experiment_id is not None
@@ -389,7 +447,20 @@ def optimize_function(self) -> Result[BestOptimization, str]:
         initialization_result = self.can_be_optimized()
         if not is_successful(initialization_result):
             return Failure(initialization_result.failure())
-
+        conn = sqlite3.connect(self.code_repair_log_db)
+        cursor = conn.cursor()
+        cursor.execute("""
+                               CREATE TABLE IF NOT EXISTS code_repair_logs_cf (
+            optimization_id TEXT PRIMARY KEY,
+            trace_id TEXT,
+            passed TEXT,
+            faster TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+                       """)
+        conn.commit()
+        conn.close()
         should_run_experiment, code_context, original_helper_code = initialization_result.unwrap()
 
         code_print(
@@ -540,13 +611,29 @@ def determine_best_candidate(
                         logger.warning(
                             "force_lsp|No functions were replaced in the optimized code. Skipping optimization candidate."
                         )
+                        if candidate.optimization_id.endswith("cdrp"):
+                            log_code_repair_to_db(
+                                code_repair_log_db=self.code_repair_log_db,
+                                trace_id=self.function_trace_id[:-4] + exp_type,
+                                optimization_id=candidate.optimization_id,
+                                passed="no",
+                                faster="no",
+                            )
                         console.rule()
                         continue
                 except (ValueError, SyntaxError, cst.ParserSyntaxError, AttributeError) as e:
                     logger.error(e)
                     self.write_code_and_helpers(
                         self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
                     )
+                    if candidate.optimization_id.endswith("cdrp"):
+                        log_code_repair_to_db(
+                            code_repair_log_db=self.code_repair_log_db,
+                            trace_id=self.function_trace_id[:-4] + exp_type,
+                            optimization_id=candidate.optimization_id,
+                            passed="no",
+                            faster="no",
+                        )
                     continue
                 # check if this code has been evaluated before by checking the ast normalized code string
                 normalized_code = normalize_code(candidate.source_code.flat.strip())
@@ -574,6 +661,19 @@ def determine_best_candidate(
                     ):  # new candidate has a shorter diff than the previously encountered one
                         ast_code_to_id[normalized_code]["shorter_source_code"] = candidate.source_code
                         ast_code_to_id[normalized_code]["diff_len"] = new_diff_len
+                    if candidate.optimization_id.endswith("cdrp"):
+                        log_code_repair_to_db(
+                            code_repair_log_db=self.code_repair_log_db,
+                            trace_id=self.function_trace_id[:-4] + exp_type,
+                            optimization_id=candidate.optimization_id,
+                            passed="yes" if is_correct[candidate.optimization_id] else "no",
+                            faster="yes"
+                            if (
+                                speedup_ratios[candidate.optimization_id] is not None
+                                and speedup_ratios[candidate.optimization_id] > 0
+                            )
+                            else "no",
+                        )
                     continue
                 ast_code_to_id[normalized_code] = {
                     "optimization_id": candidate.optimization_id,
@@ -593,24 +693,22 @@ def determine_best_candidate(
                     speedup_ratios[candidate.optimization_id] = None
                     fail_value = run_results.value
                     if (
-                        fail_value != "Test results did not match the test results of the original code."
+                        fail_value.strip() != "Test results did not match the test results of the original code."
                         and len(future_all_refinements) <= 3
                         and not candidate.optimization_id.endswith("cdrp")
                     ):
                         # # queue corresponding code repair optimization for best optimization
                         future_all_refinements.append(
                             self.code_repair_optimizations(
-                                original_source_code=candidate,
-                                modified_source_code=code_context,
-                                original_code_baseline=original_code_baseline,
-                                test_details="test_details",
-                                code_context=code_context,
+                                original_source_code=code_context.read_writable_code.markdown,
+                                modified_source_code=candidate.source_code.markdown,
+                                test_details=fail_value,
                                 trace_id=self.function_trace_id[:-4] + exp_type
                                 if self.experiment_id
                                 else self.function_trace_id,
                                 ai_service_client=ai_service_client,
                                 executor=self.executor,
-                                function_references=function_references,
+                                optimization_id=candidate.optimization_id,
                             )
                         )
                 else:
@@ -745,6 +843,19 @@ def determine_best_candidate(
                     if self.args.benchmark and benchmark_tree:
                         console.print(benchmark_tree)
                     console.rule()
+                if candidate.optimization_id.endswith("cdrp"):
+                    log_code_repair_to_db(
+                        code_repair_log_db=self.code_repair_log_db,
+                        trace_id=self.function_trace_id[:-4] + exp_type,
+                        optimization_id=candidate.optimization_id,
+                        passed="yes" if is_correct[candidate.optimization_id] else "no",
+                        faster="yes"
+                        if (
+                            speedup_ratios[candidate.optimization_id] is not None
+                            and speedup_ratios[candidate.optimization_id] > 0
+                        )
+                        else "no",
+                    )
             except KeyboardInterrupt as e:
                 logger.exception(f"Optimization interrupted: {e}")
                 raise
@@ -869,12 +980,13 @@ def code_repair_optimizations(
         modified_source_code: str,
         test_details: str,
         trace_id: str,
+        optimization_id: str,
         ai_service_client: AiServiceClient,
         executor: concurrent.futures.ThreadPoolExecutor,
     ) -> concurrent.futures.Future:
         request = [
             AIServiceCodeRepairRequest(
-                optimization_id="",
+                optimization_id=optimization_id,
                 original_source_code=original_source_code,
                 modified_source_code=modified_source_code,
                 test_details=test_details,
@@ -1875,7 +1987,7 @@ def run_optimized_candidate(
                     try:
                         diff_per_test_fn[diff.test_src_code] = (
                             diff_per_test_fn.setdefault(diff.test_src_code, "")
-                            + f"Expected Value: {diff.original_value!s}\nActual Value: {diff.candidate_value!s}\nError String:{diff.pytest_error}\n"
+                            + f"Expected Value: {diff.original_value!s}\nActual Value: {diff.candidate_value!s}\nError String:{diff.candidate_pytest_error}\n"
                         )
 
                     except Exception as e: