Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
216eb7e
start using markdown representation for read writable context
mohammedahmed18 Jul 16, 2025
cc2cc1b
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 24, 2025
083a983
render the code markdown to the console
mohammedahmed18 Jul 24, 2025
4385b8e
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 24, 2025
e504c87
split & apply
mohammedahmed18 Jul 24, 2025
99cd9dc
fix tests for context extractor
mohammedahmed18 Jul 25, 2025
330bf91
fix code replacement tests
mohammedahmed18 Jul 25, 2025
886616f
fix unused helper tests
mohammedahmed18 Jul 25, 2025
d3e5e6f
fix for python 3.9
mohammedahmed18 Jul 25, 2025
f48c77d
flat method rename
mohammedahmed18 Jul 25, 2025
57f8af0
test multifile replacement
mohammedahmed18 Jul 26, 2025
654a6ec
refactoring
mohammedahmed18 Jul 26, 2025
7ff9759
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 28, 2025
af27459
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 29, 2025
86913d4
flatten the context for refinement changes
mohammedahmed18 Jul 29, 2025
8a09c35
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 31, 2025
4edea33
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 1, 2025
84324f8
markdown multi context
mohammedahmed18 Aug 1, 2025
a81b1cc
fix import issue
mohammedahmed18 Aug 1, 2025
6b5c4a5
change the splitter marker
mohammedahmed18 Aug 4, 2025
a1c10a0
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 4, 2025
3eee162
fix markdown context for formatting and more refactoring
mohammedahmed18 Aug 4, 2025
b3bd888
fix splitter pattern
mohammedahmed18 Aug 4, 2025
b6e3c0d
fix unit tests
mohammedahmed18 Aug 4, 2025
307e6bb
revert unwanted changes
mohammedahmed18 Aug 4, 2025
f187456
cleanup
mohammedahmed18 Aug 5, 2025
d53be61
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 5, 2025
07a9365
send&recieve markdown code
mohammedahmed18 Aug 5, 2025
989b1f3
unit tests fixing
mohammedahmed18 Aug 6, 2025
684661e
typo
mohammedahmed18 Aug 6, 2025
6fc8926
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 6, 2025
a7ff701
eleminate the use of flat code for parsing
mohammedahmed18 Aug 6, 2025
c8d4e05
chore: trigger CI
mohammedahmed18 Aug 6, 2025
ef083ec
remove comments
mohammedahmed18 Aug 7, 2025
5573c46
Merge branch 'main' into feat/markdown-read-writable-context
mohammedahmed18 Aug 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions code_to_optimize/code_directories/circular_deps/constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,2 @@
DEFAULT_API_URL = "https://api.galileo.ai/"
DEFAULT_APP_URL = "https://app.galileo.ai/"


# function_names: GalileoApiClient.get_console_url
# module_abs_path : /home/mohammed/Work/galileo-python/src/galileo/api_client.py
# preexisting_objects: {('GalileoApiClient', ()), ('_set_destination', ()), ('get_console_url', (FunctionParent(name='GalileoApiClient', type='ClassDef'),))}
# project_root_path: /home/mohammed/Work/galileo-python/src
8 changes: 4 additions & 4 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled
from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate
from codeflash.models.models import AIServiceRefinerRequest, CodeStringsMarkdown, OptimizedCandidate
from codeflash.telemetry.posthog_cf import ph
from codeflash.version import __version__ as codeflash_version

Expand Down Expand Up @@ -136,7 +136,7 @@ def optimize_python_code( # noqa: D417
logger.debug(f"Generating optimizations took {end_time - start_time:.2f} seconds.")
return [
OptimizedCandidate(
source_code=opt["source_code"],
source_code=CodeStringsMarkdown.parse_markdown_code(opt["source_code"]),
explanation=opt["explanation"],
optimization_id=opt["optimization_id"],
)
Expand Down Expand Up @@ -206,7 +206,7 @@ def optimize_python_code_line_profiler( # noqa: D417
console.rule()
return [
OptimizedCandidate(
source_code=opt["source_code"],
source_code=CodeStringsMarkdown.parse_markdown_code(opt["source_code"]),
explanation=opt["explanation"],
optimization_id=opt["optimization_id"],
)
Expand Down Expand Up @@ -263,7 +263,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
console.rule()
return [
OptimizedCandidate(
source_code=opt["source_code"],
source_code=CodeStringsMarkdown.parse_markdown_code(opt["source_code"]),
explanation=opt["explanation"],
optimization_id=opt["optimization_id"][:-4] + "refi",
)
Expand Down
22 changes: 18 additions & 4 deletions codeflash/code_utils/code_replacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pathlib import Path

from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.models.models import CodeOptimizationContext, OptimizedCandidate, ValidCode
from codeflash.models.models import CodeOptimizationContext, CodeStringsMarkdown, OptimizedCandidate, ValidCode

ASTNodeT = TypeVar("ASTNodeT", bound=ast.AST)

Expand Down Expand Up @@ -408,16 +408,17 @@ def replace_functions_and_add_imports(

def replace_function_definitions_in_module(
function_names: list[str],
optimized_code: str,
optimized_code: CodeStringsMarkdown,
module_abspath: Path,
preexisting_objects: set[tuple[str, tuple[FunctionParent, ...]]],
project_root_path: Path,
) -> bool:
source_code: str = module_abspath.read_text(encoding="utf8")
code_to_apply = get_optimized_code_for_module(module_abspath.relative_to(project_root_path), optimized_code)
new_code: str = replace_functions_and_add_imports(
add_global_assignments(optimized_code, source_code),
add_global_assignments(code_to_apply, source_code),
function_names,
optimized_code,
code_to_apply,
module_abspath,
preexisting_objects,
project_root_path,
Expand All @@ -428,6 +429,19 @@ def replace_function_definitions_in_module(
return True


def get_optimized_code_for_module(relative_path: Path, optimized_code: CodeStringsMarkdown) -> str:
file_to_code_context = optimized_code.file_to_path()
module_optimized_code = file_to_code_context.get(str(relative_path))
if module_optimized_code is None:
logger.warning(
f"Optimized code not found for {relative_path} In the context\n-------\n{optimized_code}\n-------\n"
"re-check your 'markdown code structure'"
f"existing files are {file_to_code_context.keys()}"
)
module_optimized_code = ""
return module_optimized_code


def is_zero_diff(original_code: str, new_code: str) -> bool:
return normalize_code(original_code) == normalize_code(new_code)

Expand Down
4 changes: 2 additions & 2 deletions codeflash/code_utils/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def is_diff_line(line: str) -> bool:
def format_code(
formatter_cmds: list[str],
path: Union[str, Path],
optimized_function: str = "",
optimized_code: str = "",
check_diff: bool = False, # noqa
print_status: bool = True, # noqa
exit_on_failure: bool = True, # noqa
Expand All @@ -121,7 +121,7 @@ def format_code(

if check_diff and original_code_lines > 50:
# we dont' count the formatting diff for the optimized function as it should be well-formatted
original_code_without_opfunc = original_code.replace(optimized_function, "")
original_code_without_opfunc = original_code.replace(optimized_code, "")

original_temp = Path(test_dir_str) / "original_temp.py"
original_temp.write_text(original_code_without_opfunc, encoding="utf8")
Expand Down
9 changes: 5 additions & 4 deletions codeflash/context/code_context_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,14 @@ def get_code_optimization_context(
)

# Extract code context for optimization
final_read_writable_code = extract_code_string_context_from_files(
final_read_writable_code = extract_code_markdown_context_from_files(
helpers_of_fto_dict,
{},
project_root_path,
remove_docstrings=False,
code_context_type=CodeContextType.READ_WRITABLE,
).code
)

read_only_code_markdown = extract_code_markdown_context_from_files(
helpers_of_fto_dict,
helpers_of_helpers_dict,
Expand All @@ -84,14 +85,14 @@ def get_code_optimization_context(
)

# Handle token limits
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code)
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.markdown)
if final_read_writable_tokens > optim_token_limit:
raise ValueError("Read-writable code has exceeded token limit, cannot proceed")

# Setup preexisting objects for code replacer
preexisting_objects = set(
chain(
find_preexisting_objects(final_read_writable_code),
*(find_preexisting_objects(codestring.code) for codestring in final_read_writable_code.code_strings),
*(find_preexisting_objects(codestring.code) for codestring in read_only_code_markdown.code_strings),
)
)
Expand Down
25 changes: 19 additions & 6 deletions codeflash/context/unused_definition_remover.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@
import ast
from collections import defaultdict
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path
from itertools import chain
from pathlib import Path
from typing import TYPE_CHECKING, Optional

import libcst as cst

from codeflash.cli_cmds.console import logger
from codeflash.code_utils.code_replacer import replace_function_definitions_in_module
from codeflash.models.models import CodeString, CodeStringsMarkdown

if TYPE_CHECKING:
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
Expand Down Expand Up @@ -530,7 +529,11 @@ def revert_unused_helper_functions(
helper_names = [helper.qualified_name for helper in helpers_in_file]
reverted_code = replace_function_definitions_in_module(
function_names=helper_names,
optimized_code=original_code, # Use original code as the "optimized" code to revert
optimized_code=CodeStringsMarkdown(
code_strings=[
CodeString(code=original_code, file_path=Path(file_path).relative_to(project_root))
]
), # Use original code as the "optimized" code to revert
module_abspath=file_path,
preexisting_objects=set(), # Empty set since we're reverting
project_root_path=project_root,
Expand Down Expand Up @@ -609,7 +612,9 @@ def _analyze_imports_in_optimized_code(


def detect_unused_helper_functions(
function_to_optimize: FunctionToOptimize, code_context: CodeOptimizationContext, optimized_code: str
function_to_optimize: FunctionToOptimize,
code_context: CodeOptimizationContext,
optimized_code: str | CodeStringsMarkdown,
) -> list[FunctionSource]:
"""Detect helper functions that are no longer called by the optimized entrypoint function.

Expand All @@ -622,6 +627,14 @@ def detect_unused_helper_functions(
List of FunctionSource objects representing unused helper functions

"""
if isinstance(optimized_code, CodeStringsMarkdown) and len(optimized_code.code_strings) > 0:
return list(
chain.from_iterable(
detect_unused_helper_functions(function_to_optimize, code_context, code.code)
for code in optimized_code.code_strings
)
)

try:
# Parse the optimized code to analyze function calls and imports
optimized_ast = ast.parse(optimized_code)
Expand Down
4 changes: 2 additions & 2 deletions codeflash/lsp/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization
generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests
]
optimizations_dict = {
candidate.optimization_id: {"source_code": candidate.source_code, "explanation": candidate.explanation}
candidate.optimization_id: {"source_code": candidate.source_code.markdown, "explanation": candidate.explanation}
for candidate in optimizations_set.control + optimizations_set.experiment
}

Expand Down Expand Up @@ -330,7 +330,7 @@ def perform_function_optimization( # noqa: PLR0911
"message": f"No best optimizations found for function {function_to_optimize_qualified_name}",
}

optimized_source = best_optimization.candidate.source_code
optimized_source = best_optimization.candidate.source_code.markdown
speedup = original_code_baseline.runtime / best_optimization.runtime

server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")
Expand Down
81 changes: 77 additions & 4 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Annotated, Optional, cast

from jedi.api.classes import Name
from pydantic import AfterValidator, BaseModel, ConfigDict, Field
from pydantic import AfterValidator, BaseModel, ConfigDict, PrivateAttr
from pydantic.dataclasses import dataclass

from codeflash.cli_cmds.console import console, logger
Expand Down Expand Up @@ -157,23 +157,96 @@ class CodeString(BaseModel):
file_path: Optional[Path] = None


def get_code_block_splitter(file_path: Path) -> str:
return f"# file: {file_path}"


markdown_pattern = re.compile(r"```python:([^\n]+)\n(.*?)\n```", re.DOTALL)


class CodeStringsMarkdown(BaseModel):
code_strings: list[CodeString] = []
_cache: dict = PrivateAttr(default_factory=dict)

@property
def flat(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add docstring to define what flat means?

"""Returns the combined Python module from all code blocks.

Each block is prefixed by a file path comment to indicate its origin.
This representation is syntactically valid Python code.

Returns:
str: The concatenated code of all blocks with file path annotations.

!! Important !!:
Avoid parsing the flat code with multiple files,
parsing may result in unexpected behavior.


"""
if self._cache.get("flat") is not None:
return self._cache["flat"]
self._cache["flat"] = "\n".join(
get_code_block_splitter(block.file_path) + "\n" + block.code for block in self.code_strings
)
return self._cache["flat"]

@property
def markdown(self) -> str:
"""Returns the markdown representation of the code, including the file path where possible."""
"""Returns a Markdown-formatted string containing all code blocks.

Each block is enclosed in a triple-backtick code block with an optional
file path suffix (e.g., ```python:filename.py).

Returns:
str: Markdown representation of the code blocks.

"""
return "\n".join(
[
f"```python{':' + str(code_string.file_path) if code_string.file_path else ''}\n{code_string.code.strip()}\n```"
for code_string in self.code_strings
]
)

def file_to_path(self) -> dict[str, str]:
"""Return a dictionary mapping file paths to their corresponding code blocks.

Returns:
dict[str, str]: Mapping from file path (as string) to code.

"""
if self._cache.get("file_to_path") is not None:
return self._cache["file_to_path"]
self._cache["file_to_path"] = {
str(code_string.file_path): code_string.code for code_string in self.code_strings
}
return self._cache["file_to_path"]

@staticmethod
def parse_markdown_code(markdown_code: str) -> CodeStringsMarkdown:
"""Parse a Markdown string into a CodeStringsMarkdown object.

Extracts code blocks and their associated file paths and constructs a new CodeStringsMarkdown instance.

Args:
markdown_code (str): The Markdown-formatted string to parse.

Returns:
CodeStringsMarkdown: Parsed object containing code blocks.

"""
matches = markdown_pattern.findall(markdown_code)
results = CodeStringsMarkdown()
for file_path, code in matches:
path = file_path.strip()
results.code_strings.append(CodeString(code=code, file_path=Path(path)))
return results


class CodeOptimizationContext(BaseModel):
testgen_context_code: str = ""
read_writable_code: str = Field(min_length=1)
read_writable_code: CodeStringsMarkdown
read_only_context_code: str = ""
hashing_code_context: str = ""
hashing_code_context_hash: str = ""
Expand Down Expand Up @@ -272,7 +345,7 @@ class TestsInFile:

@dataclass(frozen=True)
class OptimizedCandidate:
source_code: str
source_code: CodeStringsMarkdown
explanation: str
optimization_id: str

Expand Down
Loading
Loading