codeflash-ai · mohammedahmed18 · Aug 7, 2025 · Jul 16, 2025 · Jul 24, 2025 · Jul 24, 2025
diff --git a/code_to_optimize/bubble_sort.py b/code_to_optimize/bubble_sort.py
@@ -7,4 +7,4 @@ def sorter(arr):
                 arr[j] = arr[j + 1]
                 arr[j + 1] = temp
     print(f"result: {arr}")
-    return arr
+    return arr
diff --git a/code_to_optimize/code_directories/circular_deps/constants.py b/code_to_optimize/code_directories/circular_deps/constants.py
@@ -1,8 +1,2 @@
 DEFAULT_API_URL = "https://api.galileo.ai/"
 DEFAULT_APP_URL = "https://app.galileo.ai/"
-
-
-# function_names: GalileoApiClient.get_console_url
-# module_abs_path : /home/mohammed/Work/galileo-python/src/galileo/api_client.py
-# preexisting_objects: {('GalileoApiClient', ()), ('_set_destination', ()), ('get_console_url', (FunctionParent(name='GalileoApiClient', type='ClassDef'),))}
-# project_root_path: /home/mohammed/Work/galileo-python/src
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -13,7 +13,7 @@
 from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
-from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate
+from codeflash.models.models import AIServiceRefinerRequest, CodeStringsMarkdown, OptimizedCandidate
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.version import __version__ as codeflash_version
 
@@ -136,7 +136,7 @@ def optimize_python_code(  # noqa: D417
             logger.debug(f"Generating optimizations took {end_time - start_time:.2f} seconds.")
             return [
                 OptimizedCandidate(
-                    source_code=opt["source_code"],
+                    source_code=CodeStringsMarkdown.parse_flattened_code(opt["source_code"]),
                     explanation=opt["explanation"],
                     optimization_id=opt["optimization_id"],
                 )
@@ -206,7 +206,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
             console.rule()
             return [
                 OptimizedCandidate(
-                    source_code=opt["source_code"],
+                    source_code=CodeStringsMarkdown.parse_flattened_code(opt["source_code"]),
                     explanation=opt["explanation"],
                     optimization_id=opt["optimization_id"],
                 )
@@ -263,7 +263,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
             console.rule()
             return [
                 OptimizedCandidate(
-                    source_code=opt["source_code"],
+                    source_code=CodeStringsMarkdown.parse_flattened_code(opt["source_code"]),
                     explanation=opt["explanation"],
                     optimization_id=opt["optimization_id"][:-4] + "refi",
                 )

diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
@@ -104,7 +104,7 @@ def is_diff_line(line: str) -> bool:
 def format_code(
     formatter_cmds: list[str],
     path: Union[str, Path],
-    optimized_function: str = "",
+    optimized_code: str = "",
     check_diff: bool = False,  # noqa
     print_status: bool = True,  # noqa
     exit_on_failure: bool = True,  # noqa
@@ -121,7 +121,7 @@ def format_code(
 
         if check_diff and original_code_lines > 50:
             # we dont' count the formatting diff for the optimized function as it should be well-formatted
-            original_code_without_opfunc = original_code.replace(optimized_function, "")
+            original_code_without_opfunc = original_code.replace(optimized_code, "")
 
             original_temp = Path(test_dir_str) / "original_temp.py"
             original_temp.write_text(original_code_without_opfunc, encoding="utf8")

diff --git a/codeflash/context/code_context_extractor.py b/codeflash/context/code_context_extractor.py
@@ -61,13 +61,14 @@ def get_code_optimization_context(
     )
 
     # Extract code context for optimization
-    final_read_writable_code = extract_code_string_context_from_files(
+    final_read_writable_code = extract_code_markdown_context_from_files(
         helpers_of_fto_dict,
-        {},
+        helpers_of_helpers_dict,
         project_root_path,
         remove_docstrings=False,
         code_context_type=CodeContextType.READ_WRITABLE,
-    ).code
+    )
+
     read_only_code_markdown = extract_code_markdown_context_from_files(
         helpers_of_fto_dict,
         helpers_of_helpers_dict,
@@ -84,14 +85,14 @@ def get_code_optimization_context(
     )
 
     # Handle token limits
-    final_read_writable_tokens = encoded_tokens_len(final_read_writable_code)
+    final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.flat)
     if final_read_writable_tokens > optim_token_limit:
         raise ValueError("Read-writable code has exceeded token limit, cannot proceed")
 
     # Setup preexisting objects for code replacer
     preexisting_objects = set(
         chain(
-            find_preexisting_objects(final_read_writable_code),
+            find_preexisting_objects(final_read_writable_code.flat),
             *(find_preexisting_objects(codestring.code) for codestring in read_only_code_markdown.code_strings),
         )
     )

diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py
@@ -168,7 +168,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization
         generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests
     ]
     optimizations_dict = {
-        candidate.optimization_id: {"source_code": candidate.source_code, "explanation": candidate.explanation}
+        candidate.optimization_id: {"source_code": candidate.source_code.flat, "explanation": candidate.explanation}
         for candidate in optimizations_set.control + optimizations_set.experiment
     }
 
@@ -276,7 +276,7 @@ def perform_function_optimization(  # noqa: PLR0911
             "message": f"No best optimizations found for function {function_to_optimize_qualified_name}",
         }
 
-    optimized_source = best_optimization.candidate.source_code
+    optimized_source = best_optimization.candidate.source_code.flat
     speedup = original_code_baseline.runtime / best_optimization.runtime
 
     server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -19,7 +19,7 @@
 from typing import Annotated, Optional, cast
 
 from jedi.api.classes import Name
-from pydantic import AfterValidator, BaseModel, ConfigDict, Field
+from pydantic import AfterValidator, BaseModel, ConfigDict, PrivateAttr
 from pydantic.dataclasses import dataclass
 
 from codeflash.cli_cmds.console import console, logger
@@ -157,8 +157,29 @@ class CodeString(BaseModel):
     file_path: Optional[Path] = None
 
 
+# Used to split files by adding a marker at the start of each file followed by the file path.
+LINE_SPLITTER_MARKER_PREFIX = "# --codeflash:file--"
+
+
+def get_code_block_splitter(file_path: Path) -> str:
+    return f"{LINE_SPLITTER_MARKER_PREFIX}{file_path}"
+
+
+splitter_pattern = re.compile(f"^{LINE_SPLITTER_MARKER_PREFIX}([^\n]+)\n", re.MULTILINE | re.DOTALL)
+
+
 class CodeStringsMarkdown(BaseModel):
     code_strings: list[CodeString] = []
+    _cache: dict = PrivateAttr(default_factory=dict)
+
+    @property
+    def flat(self) -> str:
+        if self._cache.get("flat") is not None:
+            return self._cache["flat"]
+        self._cache["flat"] = "\n".join(
+            get_code_block_splitter(block.file_path) + "\n" + block.code for block in self.code_strings
+        )
+        return self._cache["flat"]
 
     @property
     def markdown(self) -> str:
@@ -170,10 +191,30 @@ def markdown(self) -> str:
             ]
         )
 
+    def file_to_path(self) -> dict[str, str]:
+        if self._cache.get("file_to_path") is not None:
+            return self._cache["file_to_path"]
+        self._cache["file_to_path"] = {
+            str(code_string.file_path): code_string.code for code_string in self.code_strings
+        }
+        return self._cache["file_to_path"]
+
+    @staticmethod
+    def parse_flattened_code(flat_code: str) -> CodeStringsMarkdown:
+        matches = list(splitter_pattern.finditer(flat_code))
+        results = CodeStringsMarkdown()
+        for i, match in enumerate(matches):
+            start = match.end()
+            end = matches[i + 1].start() if i + 1 < len(matches) else len(flat_code)
+            file_path = match.group(1).strip()
+            code = flat_code[start:end].lstrip("\n")
+            results.code_strings.append(CodeString(code=code, file_path=Path(file_path)))
+        return results
+
 
 class CodeOptimizationContext(BaseModel):
     testgen_context_code: str = ""
-    read_writable_code: str = Field(min_length=1)
+    read_writable_code: CodeStringsMarkdown
     read_only_context_code: str = ""
     hashing_code_context: str = ""
     hashing_code_context_hash: str = ""
@@ -272,7 +313,7 @@ class TestsInFile:
 
 @dataclass(frozen=True)
 class OptimizedCandidate:
-    source_code: str
+    source_code: CodeStringsMarkdown
     explanation: str
     optimization_id: str