Merge branch 'main' of github.com:codeflash-ai/codeflash into fix/duplicate-global-assignments-when-reverting-helpers

mohammedahmed18 · mohammedahmed18 · commit 28f50cc1e0c5 · 2025-08-25T21:36:35.000+03:00
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -81,6 +81,19 @@ def make_ai_service_request(
         # response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
         return response
 
+    def _get_valid_candidates(self, optimizations_json: list[dict[str, Any]]) -> list[OptimizedCandidate]:
+        candidates: list[OptimizedCandidate] = []
+        for opt in optimizations_json:
+            code = CodeStringsMarkdown.parse_markdown_code(opt["source_code"])
+            if not code.code_strings:
+                continue
+            candidates.append(
+                OptimizedCandidate(
+                    source_code=code, explanation=opt["explanation"], optimization_id=opt["optimization_id"]
+                )
+            )
+        return candidates
+
     def optimize_python_code(  # noqa: D417
         self,
         source_code: str,
@@ -135,14 +148,7 @@ def optimize_python_code(  # noqa: D417
             console.rule()
             end_time = time.perf_counter()
             logger.debug(f"Generating optimizations took {end_time - start_time:.2f} seconds.")
-            return [
-                OptimizedCandidate(
-                    source_code=CodeStringsMarkdown.parse_markdown_code(opt["source_code"]),
-                    explanation=opt["explanation"],
-                    optimization_id=opt["optimization_id"],
-                )
-                for opt in optimizations_json
-            ]
+            return self._get_valid_candidates(optimizations_json)
         try:
             error = response.json()["error"]
         except Exception:
@@ -205,14 +211,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
             optimizations_json = response.json()["optimizations"]
             logger.info(f"Generated {len(optimizations_json)} candidate optimizations using line profiler information.")
             console.rule()
-            return [
-                OptimizedCandidate(
-                    source_code=CodeStringsMarkdown.parse_markdown_code(opt["source_code"]),
-                    explanation=opt["explanation"],
-                    optimization_id=opt["optimization_id"],
-                )
-                for opt in optimizations_json
-            ]
+            return self._get_valid_candidates(optimizations_json)
         try:
             error = response.json()["error"]
         except Exception:
@@ -262,14 +261,17 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
             refined_optimizations = response.json()["refinements"]
             logger.debug(f"Generated {len(refined_optimizations)} candidate refinements.")
             console.rule()
+
+            refinements = self._get_valid_candidates(refined_optimizations)
             return [
                 OptimizedCandidate(
-                    source_code=CodeStringsMarkdown.parse_markdown_code(opt["source_code"]),
-                    explanation=opt["explanation"],
-                    optimization_id=opt["optimization_id"][:-4] + "refi",
+                    source_code=c.source_code,
+                    explanation=c.explanation,
+                    optimization_id=c.optimization_id[:-4] + "refi",
                 )
-                for opt in refined_optimizations
+                for c in refinements
             ]
+
         try:
             error = response.json()["error"]
         except Exception:
diff --git a/codeflash/code_utils/code_extractor.py b/codeflash/code_utils/code_extractor.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import ast
+from itertools import chain
 from typing import TYPE_CHECKING, Optional
 
 import libcst as cst
@@ -119,6 +120,32 @@ def leave_Assign(self, original_node: cst.Assign, updated_node: cst.Assign) -> c
 
         return updated_node
 
+    def _find_insertion_index(self, updated_node: cst.Module) -> int:
+        """Find the position of the last import statement in the top-level of the module."""
+        insert_index = 0
+        for i, stmt in enumerate(updated_node.body):
+            is_top_level_import = isinstance(stmt, cst.SimpleStatementLine) and any(
+                isinstance(child, (cst.Import, cst.ImportFrom)) for child in stmt.body
+            )
+
+            is_conditional_import = isinstance(stmt, cst.If) and all(
+                isinstance(inner, cst.SimpleStatementLine)
+                and all(isinstance(child, (cst.Import, cst.ImportFrom)) for child in inner.body)
+                for inner in stmt.body.body
+            )
+
+            if is_top_level_import or is_conditional_import:
+                insert_index = i + 1
+
+            # Stop scanning once we reach a class or function definition.
+            # Imports are supposed to be at the top of the file, but they can technically appear anywhere, even at the bottom of the file.
+            # Without this check, a stray import later in the file
+            # would incorrectly shift our insertion index below actual code definitions.
+            if isinstance(stmt, (cst.ClassDef, cst.FunctionDef)):
+                break
+
+        return insert_index
+
     def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> cst.Module:
         # Add any new assignments that weren't in the original file
         new_statements = list(updated_node.body)
@@ -131,18 +158,26 @@ def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> c
         ]
 
         if assignments_to_append:
-            # Add a blank line before appending new assignments if needed
-            if new_statements and not isinstance(new_statements[-1], cst.EmptyLine):
-                new_statements.append(cst.SimpleStatementLine([cst.Pass()], leading_lines=[cst.EmptyLine()]))
-                new_statements.pop()  # Remove the Pass statement but keep the empty line
-
-            # Add the new assignments
-            new_statements.extend(
-                [
-                    cst.SimpleStatementLine([assignment], leading_lines=[cst.EmptyLine()])
-                    for assignment in assignments_to_append
-                ]
-            )
+            # after last top-level imports
+            insert_index = self._find_insertion_index(updated_node)
+
+            assignment_lines = [
+                cst.SimpleStatementLine([assignment], leading_lines=[cst.EmptyLine()])
+                for assignment in assignments_to_append
+            ]
+
+            new_statements = list(chain(new_statements[:insert_index], assignment_lines, new_statements[insert_index:]))
+
+            # Add a blank line after the last assignment if needed
+            after_index = insert_index + len(assignment_lines)
+            if after_index < len(new_statements):
+                next_stmt = new_statements[after_index]
+                # If there's no empty line, add one
+                has_empty = any(isinstance(line, cst.EmptyLine) for line in next_stmt.leading_lines)
+                if not has_empty:
+                    new_statements[after_index] = next_stmt.with_changes(
+                        leading_lines=[cst.EmptyLine(), *next_stmt.leading_lines]
+                    )
 
         return updated_node.with_changes(body=new_statements)
 
@@ -341,6 +376,7 @@ def add_global_assignments(src_module_code: str, dst_module_code: str) -> str:
     new_added_global_statements = extract_global_statements(src_module_code)
     existing_global_statements = extract_global_statements(dst_module_code)
 
+    # make sure we don't have any staments applited multiple times in the global level.
     unique_global_statements = [
         stmt
         for stmt in new_added_global_statements
diff --git a/codeflash/code_utils/code_replacer.py b/codeflash/code_utils/code_replacer.py
@@ -412,6 +412,7 @@ def replace_function_definitions_in_module(
     module_abspath: Path,
     preexisting_objects: set[tuple[str, tuple[FunctionParent, ...]]],
     project_root_path: Path,
+    global_assignments_added_before: bool = False,  # noqa: FBT001, FBT002
 ) -> bool:
     source_code: str = module_abspath.read_text(encoding="utf8")
     code_to_apply = get_optimized_code_for_module(module_abspath.relative_to(project_root_path), optimized_code)
@@ -421,7 +422,7 @@ def replace_function_definitions_in_module(
         # becuase of an "edge case" where the optimized code intoduced a new import and a global assignment using that import
         # and that import wasn't used before, so it was ignored when calling AddImportsVisitor.add_needed_import inside replace_functions_and_add_imports (because the global assignment wasn't added yet)
         # this was added at https://github.com/codeflash-ai/codeflash/pull/448
-        add_global_assignments(code_to_apply, source_code),
+        add_global_assignments(code_to_apply, source_code) if not global_assignments_added_before else source_code,
         function_names,
         code_to_apply,
         module_abspath,
diff --git a/codeflash/context/unused_definition_remover.py b/codeflash/context/unused_definition_remover.py
@@ -537,6 +537,7 @@ def revert_unused_helper_functions(
                     module_abspath=file_path,
                     preexisting_objects=set(),  # Empty set since we're reverting
                     project_root_path=project_root,
+                    global_assignments_added_before=True,  # since we revert helpers functions after applying the optimization, we know that the file already has global assignments added, otherwise they would be added twice.
                 )
 
                 if reverted_code:
diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py
@@ -110,7 +110,7 @@ def initialize_function_optimization(
 
     if count == 0:
         server.show_message_log(f"No optimizable functions found for {params.functionName}", "Warning")
-        cleanup_the_optimizer(server)
+        server.cleanup_the_optimizer()
         return {"functionName": params.functionName, "status": "error", "message": "not found", "args": None}
 
     fto = optimizable_funcs.popitem()[1][0]
@@ -217,6 +217,7 @@ def provide_api_key(server: CodeflashLanguageServer, params: ProvideApiKeyParams
 
 
 @server.feature("performFunctionOptimization")
+@server.thread()
 def perform_function_optimization(  # noqa: PLR0911
     server: CodeflashLanguageServer, params: FunctionOptimizationParams
 ) -> dict[str, str]:
@@ -337,14 +338,4 @@ def perform_function_optimization(  # noqa: PLR0911
             "explanation": best_optimization.explanation_v2,
         }
     finally:
-        cleanup_the_optimizer(server)
-
-
-def cleanup_the_optimizer(server: CodeflashLanguageServer) -> None:
-    server.optimizer.cleanup_temporary_paths()
-    # restore args and test cfg
-    if server.optimizer.original_args_and_test_cfg:
-        server.optimizer.args, server.optimizer.test_cfg = server.optimizer.original_args_and_test_cfg
-    server.optimizer.args.function = None
-    server.optimizer.current_worktree = None
-    server.optimizer.current_function_optimizer = None
+        server.cleanup_the_optimizer()
diff --git a/codeflash/lsp/server.py b/codeflash/lsp/server.py
@@ -1,12 +1,14 @@
 from __future__ import annotations
 
+import sys
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from threading import Event
+from typing import TYPE_CHECKING, Any, Optional, TextIO
 
 from lsprotocol.types import INITIALIZE, LogMessageParams, MessageType
 from pygls import uris
 from pygls.protocol import LanguageServerProtocol, lsp_method
-from pygls.server import LanguageServer
+from pygls.server import LanguageServer, StdOutTransportAdapter, aio_readline
 
 if TYPE_CHECKING:
     from lsprotocol.types import InitializeParams, InitializeResult
@@ -81,3 +83,39 @@ def show_message_log(self, message: str, message_type: str) -> None:
         # Send log message to client (appears in output channel)
         log_params = LogMessageParams(type=lsp_message_type, message=message)
         self.lsp.notify("window/logMessage", log_params)
+
+    def cleanup_the_optimizer(self) -> None:
+        try:
+            self.optimizer.cleanup_temporary_paths()
+            # restore args and test cfg
+            if self.optimizer.original_args_and_test_cfg:
+                self.optimizer.args, self.optimizer.test_cfg = self.optimizer.original_args_and_test_cfg
+            self.optimizer.args.function = None
+            self.optimizer.current_worktree = None
+            self.optimizer.current_function_optimizer = None
+        except Exception:
+            self.show_message_log("Failed to cleanup optimizer", "Error")
+
+    def start_io(self, stdin: Optional[TextIO] = None, stdout: Optional[TextIO] = None) -> None:
+        self.show_message_log("Starting IO server", "Info")
+
+        self._stop_event = Event()
+        transport = StdOutTransportAdapter(stdin or sys.stdin.buffer, stdout or sys.stdout.buffer)
+        self.lsp.connection_made(transport)
+        try:
+            self.loop.run_until_complete(
+                aio_readline(
+                    self.loop,
+                    self.thread_pool_executor,
+                    self._stop_event,
+                    stdin or sys.stdin.buffer,
+                    self.lsp.data_received,
+                )
+            )
+        except BrokenPipeError:
+            self.show_message_log("Connection to the client is lost! Shutting down the server.", "Error")
+        except (KeyboardInterrupt, SystemExit):
+            pass
+        finally:
+            self.cleanup_the_optimizer()
+            self.shutdown()
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -19,7 +19,7 @@
 from typing import Annotated, Optional, cast
 
 from jedi.api.classes import Name
-from pydantic import AfterValidator, BaseModel, ConfigDict, PrivateAttr
+from pydantic import AfterValidator, BaseModel, ConfigDict, PrivateAttr, ValidationError
 from pydantic.dataclasses import dataclass
 
 from codeflash.cli_cmds.console import console, logger
@@ -239,10 +239,14 @@ def parse_markdown_code(markdown_code: str) -> CodeStringsMarkdown:
         """
         matches = markdown_pattern.findall(markdown_code)
         results = CodeStringsMarkdown()
-        for file_path, code in matches:
-            path = file_path.strip()
-            results.code_strings.append(CodeString(code=code, file_path=Path(path)))
-        return results
+        try:
+            for file_path, code in matches:
+                path = file_path.strip()
+                results.code_strings.append(CodeString(code=code, file_path=Path(path)))
+            return results  # noqa: TRY300
+        except ValidationError:
+            # if any file is invalid, return an empty CodeStringsMarkdown for the entire context
+            return CodeStringsMarkdown()
 
 
 class CodeOptimizationContext(BaseModel):
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -1354,6 +1354,7 @@ def process_review(
             return
 
     def revert_code_and_helpers(self, original_helper_code: dict[Path, str]) -> None:
+        logger.info("Reverting code and helpers...")
         self.write_code_and_helpers(
             self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
         )
diff --git a/tests/test_code_replacement.py b/tests/test_code_replacement.py
diff --git a/tests/test_multi_file_code_replacement.py b/tests/test_multi_file_code_replacement.py
diff --git a/tests/test_validate_python_code.py b/tests/test_validate_python_code.py

Original file line number	Diff line number	Diff line change
`@@ -537,6 +537,7 @@ def revert_unused_helper_functions(`
`537`	`537`	`module_abspath=file_path,`
`538`	`538`	`preexisting_objects=set(), # Empty set since we're reverting`
`539`	`539`	`project_root_path=project_root,`
	`540`	`+ global_assignments_added_before=True, # since we revert helpers functions after applying the optimization, we know that the file already has global assignments added, otherwise they would be added twice.`
`540`	`541`	`)`
`541`	`542`
`542`	`543`	`if reverted_code:`
Original file line number	Diff line number	Diff line change
`@@ -1354,6 +1354,7 @@ def process_review(`
`1354`	`1354`	`return`
`1355`	`1355`
`1356`	`1356`	`def revert_code_and_helpers(self, original_helper_code: dict[Path, str]) -> None:`
	`1357`	`+ logger.info("Reverting code and helpers...")`
`1357`	`1358`	`self.write_code_and_helpers(`
`1358`	`1359`	`self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path`
`1359`	`1360`	`)`