markdown multi context

mohammedahmed18 · mohammedahmed18 · commit 84324f849101 · 2025-08-01T20:55:04.000+03:00
Signed-off-by: mohammed &lt;mohammed18200118@gmail.com&gt;
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -13,7 +13,7 @@
 from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
-from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate
+from codeflash.models.models import AIServiceRefinerRequest, CodeStringsMarkdown, OptimizedCandidate
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.version import __version__ as codeflash_version
 
@@ -73,6 +73,9 @@ def make_ai_service_request(
         url = f"{self.base_url}/ai{endpoint}"
         if method.upper() == "POST":
             json_payload = json.dumps(payload, indent=None, default=pydantic_encoder)
+            print(f"------------------------JSON PAYLOAD for {url}--------------------")
+            print(json_payload)
+            print("-------------------END OF JSON PAYLOAD--------------------")
             headers = {**self.headers, "Content-Type": "application/json"}
             response = requests.post(url, data=json_payload, headers=headers, timeout=timeout)
         else:
@@ -136,7 +139,7 @@ def optimize_python_code(  # noqa: D417
             logger.debug(f"Generating optimizations took {end_time - start_time:.2f} seconds.")
             return [
                 OptimizedCandidate(
-                    source_code=opt["source_code"],
+                    source_code=CodeStringsMarkdown.parse_splitter_markers(opt["source_code"]),
                     explanation=opt["explanation"],
                     optimization_id=opt["optimization_id"],
                 )
@@ -206,7 +209,7 @@ def optimize_python_code_line_profiler(  # noqa: D417
             console.rule()
             return [
                 OptimizedCandidate(
-                    source_code=opt["source_code"],
+                    source_code=CodeStringsMarkdown.parse_splitter_markers(opt["source_code"]),
                     explanation=opt["explanation"],
                     optimization_id=opt["optimization_id"],
                 )
@@ -263,7 +266,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
             console.rule()
             return [
                 OptimizedCandidate(
-                    source_code=opt["source_code"],
+                    source_code=CodeStringsMarkdown.parse_splitter_markers(opt["source_code"]),
                     explanation=opt["explanation"],
                     optimization_id=opt["optimization_id"][:-4] + "refi",
                 )
diff --git a/codeflash/code_utils/code_replacer.py b/codeflash/code_utils/code_replacer.py
@@ -4,6 +4,7 @@
 from collections import defaultdict
 from functools import lru_cache
 from typing import TYPE_CHECKING, Optional, TypeVar
+from warnings import deprecated
 
 import isort
 import libcst as cst
@@ -432,6 +433,7 @@ def is_zero_diff(original_code: str, new_code: str) -> bool:
     return normalize_code(original_code) == normalize_code(new_code)
 
 
+@deprecated("")
 def replace_optimized_code(
     callee_module_paths: set[Path],
     candidates: list[OptimizedCandidate],
diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py
@@ -104,7 +104,7 @@ def is_diff_line(line: str) -> bool:
 def format_code(
     formatter_cmds: list[str],
     path: Union[str, Path],
-    optimized_function: str = "",
+    optimized_code: str = "",
     check_diff: bool = False,  # noqa
     print_status: bool = True,  # noqa
     exit_on_failure: bool = True,  # noqa
@@ -121,7 +121,8 @@ def format_code(
 
         if check_diff and original_code_lines > 50:
             # we dont' count the formatting diff for the optimized function as it should be well-formatted
-            original_code_without_opfunc = original_code.replace(optimized_function, "")
+            # TODO: This is not correct, optimized_code is not continuous, Think of a better way for doing this.
+            original_code_without_opfunc = original_code.replace(optimized_code, "")
 
             original_temp = Path(test_dir_str) / "original_temp.py"
             original_temp.write_text(original_code_without_opfunc, encoding="utf8")
diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py
@@ -168,7 +168,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization
         generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests
     ]
     optimizations_dict = {
-        candidate.optimization_id: {"source_code": candidate.source_code, "explanation": candidate.explanation}
+        candidate.optimization_id: {"source_code": candidate.source_code.flat, "explanation": candidate.explanation}
         for candidate in optimizations_set.control + optimizations_set.experiment
     }
 
@@ -276,7 +276,7 @@ def perform_function_optimization(  # noqa: PLR0911
             "message": f"No best optimizations found for function {function_to_optimize_qualified_name}",
         }
 
-    optimized_source = best_optimization.candidate.source_code
+    optimized_source = best_optimization.candidate.source_code.flat
     speedup = original_code_baseline.runtime / best_optimization.runtime
 
     server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -157,6 +157,7 @@ class CodeString(BaseModel):
     file_path: Optional[Path] = None
 
 
+# Used to split files by adding a marker at the start of each file followed by the file path.
 LINE_SPLITTER_MARKER_PREFIX = "# codeflash-splitter__"
 
 
@@ -188,17 +189,17 @@ def markdown(self) -> str:
         )
 
     @staticmethod
-    def parse_splitter_markers(code_with_markers: str) -> dict[str, str]:
+    def parse_splitter_markers(code_with_markers: str) -> CodeStringsMarkdown:
         pattern = rf"{LINE_SPLITTER_MARKER_PREFIX}([^\n]+)\n"
         matches = list(re.finditer(pattern, code_with_markers))
 
-        results = {}
+        results = CodeStringsMarkdown()
         for i, match in enumerate(matches):
             start = match.end()
             end = matches[i + 1].start() if i + 1 < len(matches) else len(code_with_markers)
             file_path = match.group(1).strip()
             code = code_with_markers[start:end].lstrip("\n")
-            results[file_path] = code
+            results.code_strings.append(CodeString(code=code, file_path=Path(file_path)))
         return results
 
 
@@ -303,7 +304,7 @@ class TestsInFile:
 
 @dataclass(frozen=True)
 class OptimizedCandidate:
-    source_code: str
+    source_code: CodeStringsMarkdown
     explanation: str
     optimization_id: str
 
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -62,6 +62,7 @@
 from codeflash.either import Failure, Success, is_successful
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.models.models import (
+    LINE_SPLITTER_MARKER_PREFIX,
     BestOptimization,
     CodeOptimizationContext,
     CodeStringsMarkdown,
@@ -216,7 +217,7 @@ def generate_and_instrument_tests(
             revert_to_print=bool(get_pr_number()),
         ):
             generated_results = self.generate_tests_and_optimizations(
-                testgen_context_code=code_context.testgen_context_code,
+                testgen_context_code=code_context.testgen_context_code,  # TODO: should we send the markdow context for the testgen instead.
                 read_writable_code=code_context.read_writable_code,
                 read_only_context_code=code_context.read_only_context_code,
                 helper_functions=code_context.helper_functions,
@@ -289,7 +290,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
 
         should_run_experiment, code_context, original_helper_code = initialization_result.unwrap()
 
-        code_print(code_context.read_writable_code.flat)
+        code_print(code_context.read_writable_code.flat)  # Should we print the markdown or the flattened code?
 
         test_setup_result = self.generate_and_instrument_tests(  # also generates optimizations
             code_context, should_run_experiment=should_run_experiment
@@ -414,11 +415,11 @@ def determine_best_candidate(
                     get_run_tmp_file(Path(f"test_return_values_{candidate_index}.bin")).unlink(missing_ok=True)
                     get_run_tmp_file(Path(f"test_return_values_{candidate_index}.sqlite")).unlink(missing_ok=True)
                     logger.info(f"Optimization candidate {candidate_index}/{original_len}:")
-                    code_print(candidate.source_code)
+                    code_print(candidate.source_code.flat)
                     try:
                         did_update = self.replace_function_and_helpers_with_optimized_code(
                             code_context=code_context,
-                            optimized_code=candidate.source_code,
+                            optimized_code=candidate.source_code.flat,
                             original_helper_code=original_helper_code,
                         )
                         if not did_update:
@@ -578,7 +579,7 @@ def determine_best_candidate(
         runtimes_list = []
         for valid_opt in self.valid_optimizations:
             diff_lens_list.append(
-                diff_length(valid_opt.candidate.source_code, code_context.read_writable_code.flat)
+                diff_length(valid_opt.candidate.source_code.flat, code_context.read_writable_code.flat)
             )  # char level diff
             runtimes_list.append(valid_opt.runtime)
         diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list)
@@ -613,7 +614,7 @@ def refine_optimizations(
                 original_source_code=code_context.read_writable_code.flat,
                 read_only_dependency_code=code_context.read_only_context_code,
                 original_code_runtime=humanize_runtime(original_code_baseline.runtime),
-                optimized_source_code=opt.candidate.source_code,
+                optimized_source_code=opt.candidate.source_code.flat,
                 optimized_explanation=opt.candidate.explanation,
                 optimized_code_runtime=humanize_runtime(opt.runtime),
                 speedup=f"{int(performance_gain(original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=opt.runtime) * 100)}%",
@@ -679,13 +680,13 @@ def write_code_and_helpers(original_code: str, original_helper_code: dict[Path,
                 f.write(helper_code)
 
     def reformat_code_and_helpers(
-        self, helper_functions: list[FunctionSource], path: Path, original_code: str, optimized_function: str
+        self, helper_functions: list[FunctionSource], path: Path, original_code: str, optimized_code: str
     ) -> tuple[str, dict[Path, str]]:
         should_sort_imports = not self.args.disable_imports_sorting
         if should_sort_imports and isort.code(original_code) != original_code:
             should_sort_imports = False
 
-        new_code = format_code(self.args.formatter_cmds, path, optimized_function=optimized_function, check_diff=True)
+        new_code = format_code(self.args.formatter_cmds, path, optimized_code=optimized_code, check_diff=True)
         if should_sort_imports:
             new_code = sort_imports(new_code)
 
@@ -694,7 +695,7 @@ def reformat_code_and_helpers(
             module_abspath = hp.file_path
             hp_source_code = hp.source_code
             formatted_helper_code = format_code(
-                self.args.formatter_cmds, module_abspath, optimized_function=hp_source_code, check_diff=True
+                self.args.formatter_cmds, module_abspath, optimized_code=hp_source_code, check_diff=True
             )
             if should_sort_imports:
                 formatted_helper_code = sort_imports(formatted_helper_code)
@@ -711,7 +712,8 @@ def replace_function_and_helpers_with_optimized_code(
             self.function_to_optimize.qualified_name
         )
 
-        file_to_code_context = CodeStringsMarkdown.parse_splitter_markers(optimized_code)
+        code_strings = CodeStringsMarkdown.parse_splitter_markers(optimized_code).code_strings
+        file_to_code_context = {str(code_string.file_path): code_string.code for code_string in code_strings}
 
         for helper_function in code_context.helper_functions:
             if helper_function.jedi_definition.type != "class":
@@ -721,11 +723,12 @@ def replace_function_and_helpers_with_optimized_code(
             relative_module_path = str(module_abspath.relative_to(self.project_root))
             logger.debug(f"applying optimized code to: {relative_module_path}")
 
-            scoped_optimized_code = file_to_code_context.get(relative_module_path, None)
+            scoped_optimized_code = file_to_code_context.get(relative_module_path)
             if scoped_optimized_code is None:
                 logger.warning(
                     f"Optimized code not found for {relative_module_path} In the context\n-------\n{optimized_code}\n-------\n"
                     "Existing files in the context are: {list(file_to_code_context.keys())}, re-check your 'split markers'"
+                    f"existing files are {file_to_code_context.keys()}"
                 )
                 scoped_optimized_code = ""
 
@@ -1063,7 +1066,7 @@ def find_and_process_best_optimization(
 
             if best_optimization:
                 logger.info("Best candidate:")
-                code_print(best_optimization.candidate.source_code)
+                code_print(best_optimization.candidate.source_code.flat)
                 console.print(
                     Panel(
                         best_optimization.candidate.explanation, title="Best Candidate Explanation", border_style="blue"
@@ -1089,15 +1092,15 @@ def find_and_process_best_optimization(
 
                 self.replace_function_and_helpers_with_optimized_code(
                     code_context=code_context,
-                    optimized_code=best_optimization.candidate.source_code,
+                    optimized_code=best_optimization.candidate.source_code.flat,
                     original_helper_code=original_helper_code,
                 )
 
                 new_code, new_helper_code = self.reformat_code_and_helpers(
                     code_context.helper_functions,
                     explanation.file_path,
                     self.function_to_optimize_source_code,
-                    optimized_function=best_optimization.candidate.source_code,
+                    optimized_code=best_optimization.candidate.source_code.flat,
                 )
 
                 original_code_combined = original_helper_code.copy()
@@ -1169,10 +1172,14 @@ def process_review(
             optimized_runtimes_all=optimized_runtime_by_test,
         )
         new_explanation_raw_str = self.aiservice_client.get_new_explanation(
-            source_code=code_context.read_writable_code,
+            source_code=code_context.read_writable_code.flat.replace(
+                LINE_SPLITTER_MARKER_PREFIX, "# file: "
+            ),  # for better readability to the LLM
             dependency_code=code_context.read_only_context_code,
             trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
-            optimized_code=best_optimization.candidate.source_code,
+            optimized_code=best_optimization.candidate.source_code.flat.replace(
+                LINE_SPLITTER_MARKER_PREFIX, "# file: "
+            ),
             original_line_profiler_results=original_code_baseline.line_profile_results["str_out"],
             optimized_line_profiler_results=best_optimization.line_profiler_test_results["str_out"],
             original_code_runtime=humanize_runtime(original_code_baseline.runtime),
diff --git a/tests/test_formatter.py b/tests/test_formatter.py
@@ -263,7 +263,7 @@ def _run_formatting_test(source_code: str, should_content_change: bool, expected
             helper_functions=[],
             path=target_path,
             original_code=optimizer.function_to_optimize_source_code,
-            optimized_function=optimized_function,
+            optimized_code=optimized_function,
         )
 
         content = target_path.read_text(encoding="utf8")

Original file line number	Diff line number	Diff line change
`@@ -168,7 +168,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization`
`168`	`168`	`generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests`
`169`	`169`	`]`
`170`	`170`	`optimizations_dict = {`
`171`		`- candidate.optimization_id: {"source_code": candidate.source_code, "explanation": candidate.explanation}`
	`171`	`+ candidate.optimization_id: {"source_code": candidate.source_code.flat, "explanation": candidate.explanation}`
`172`	`172`	`for candidate in optimizations_set.control + optimizations_set.experiment`
`173`	`173`	`}`
`174`	`174`
`@@ -276,7 +276,7 @@ def perform_function_optimization( # noqa: PLR0911`
`276`	`276`	`"message": f"No best optimizations found for function {function_to_optimize_qualified_name}",`
`277`	`277`	`}`
`278`	`278`
`279`		`- optimized_source = best_optimization.candidate.source_code`
	`279`	`+ optimized_source = best_optimization.candidate.source_code.flat`
`280`	`280`	`speedup = original_code_baseline.runtime / best_optimization.runtime`
`281`	`281`
`282`	`282`	`server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")`
Original file line number	Diff line number	Diff line change
`@@ -263,7 +263,7 @@ def _run_formatting_test(source_code: str, should_content_change: bool, expected`
`263`	`263`	`helper_functions=[],`
`264`	`264`	`path=target_path,`
`265`	`265`	`original_code=optimizer.function_to_optimize_source_code,`
`266`		`- optimized_function=optimized_function,`
	`266`	`+ optimized_code=optimized_function,`
`267`	`267`	`)`
`268`	`268`
`269`	`269`	`content = target_path.read_text(encoding="utf8")`