Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
216eb7e
start using markdown representation for read writable context
mohammedahmed18 Jul 16, 2025
cc2cc1b
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 24, 2025
083a983
render the code markdown to the console
mohammedahmed18 Jul 24, 2025
4385b8e
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 24, 2025
e504c87
split & apply
mohammedahmed18 Jul 24, 2025
99cd9dc
fix tests for context extractor
mohammedahmed18 Jul 25, 2025
330bf91
fix code replacement tests
mohammedahmed18 Jul 25, 2025
886616f
fix unused helper tests
mohammedahmed18 Jul 25, 2025
d3e5e6f
fix for python 3.9
mohammedahmed18 Jul 25, 2025
f48c77d
flat method rename
mohammedahmed18 Jul 25, 2025
57f8af0
test multifile replacement
mohammedahmed18 Jul 26, 2025
654a6ec
refactoring
mohammedahmed18 Jul 26, 2025
7ff9759
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 28, 2025
af27459
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 29, 2025
86913d4
flatten the context for refinement changes
mohammedahmed18 Jul 29, 2025
8a09c35
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Jul 31, 2025
4edea33
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 1, 2025
84324f8
markdown multi context
mohammedahmed18 Aug 1, 2025
a81b1cc
fix import issue
mohammedahmed18 Aug 1, 2025
6b5c4a5
change the splitter marker
mohammedahmed18 Aug 4, 2025
a1c10a0
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 4, 2025
3eee162
fix markdown context for formatting and more refactoring
mohammedahmed18 Aug 4, 2025
b3bd888
fix splitter pattern
mohammedahmed18 Aug 4, 2025
b6e3c0d
fix unit tests
mohammedahmed18 Aug 4, 2025
307e6bb
revert unwanted changes
mohammedahmed18 Aug 4, 2025
f187456
cleanup
mohammedahmed18 Aug 5, 2025
d53be61
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 5, 2025
07a9365
send&recieve markdown code
mohammedahmed18 Aug 5, 2025
989b1f3
unit tests fixing
mohammedahmed18 Aug 6, 2025
684661e
typo
mohammedahmed18 Aug 6, 2025
6fc8926
Merge branch 'main' of github.com:codeflash-ai/codeflash into feat/ma…
mohammedahmed18 Aug 6, 2025
a7ff701
eleminate the use of flat code for parsing
mohammedahmed18 Aug 6, 2025
c8d4e05
chore: trigger CI
mohammedahmed18 Aug 6, 2025
ef083ec
remove comments
mohammedahmed18 Aug 7, 2025
5573c46
Merge branch 'main' into feat/markdown-read-writable-context
mohammedahmed18 Aug 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion code_to_optimize/bubble_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ def sorter(arr):
arr[j] = arr[j + 1]
arr[j + 1] = temp
print(f"result: {arr}")
return arr
return arr
6 changes: 0 additions & 6 deletions code_to_optimize/code_directories/circular_deps/constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,2 @@
DEFAULT_API_URL = "https://api.galileo.ai/"
DEFAULT_APP_URL = "https://app.galileo.ai/"


# function_names: GalileoApiClient.get_console_url
# module_abs_path : /home/mohammed/Work/galileo-python/src/galileo/api_client.py
# preexisting_objects: {('GalileoApiClient', ()), ('_set_destination', ()), ('get_console_url', (FunctionParent(name='GalileoApiClient', type='ClassDef'),))}
# project_root_path: /home/mohammed/Work/galileo-python/src
8 changes: 4 additions & 4 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled
from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
from codeflash.models.ExperimentMetadata import ExperimentMetadata
from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate
from codeflash.models.models import AIServiceRefinerRequest, CodeStringsMarkdown, OptimizedCandidate
from codeflash.telemetry.posthog_cf import ph
from codeflash.version import __version__ as codeflash_version

Expand Down Expand Up @@ -136,7 +136,7 @@ def optimize_python_code( # noqa: D417
logger.debug(f"Generating optimizations took {end_time - start_time:.2f} seconds.")
return [
OptimizedCandidate(
source_code=opt["source_code"],
source_code=CodeStringsMarkdown.parse_flattened_code(opt["source_code"]),
explanation=opt["explanation"],
optimization_id=opt["optimization_id"],
)
Expand Down Expand Up @@ -206,7 +206,7 @@ def optimize_python_code_line_profiler( # noqa: D417
console.rule()
return [
OptimizedCandidate(
source_code=opt["source_code"],
source_code=CodeStringsMarkdown.parse_flattened_code(opt["source_code"]),
explanation=opt["explanation"],
optimization_id=opt["optimization_id"],
)
Expand Down Expand Up @@ -263,7 +263,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
console.rule()
return [
OptimizedCandidate(
source_code=opt["source_code"],
source_code=CodeStringsMarkdown.parse_flattened_code(opt["source_code"]),
explanation=opt["explanation"],
optimization_id=opt["optimization_id"][:-4] + "refi",
)
Expand Down
4 changes: 2 additions & 2 deletions codeflash/code_utils/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def is_diff_line(line: str) -> bool:
def format_code(
formatter_cmds: list[str],
path: Union[str, Path],
optimized_function: str = "",
optimized_code: str = "",
check_diff: bool = False, # noqa
print_status: bool = True, # noqa
exit_on_failure: bool = True, # noqa
Expand All @@ -121,7 +121,7 @@ def format_code(

if check_diff and original_code_lines > 50:
# we dont' count the formatting diff for the optimized function as it should be well-formatted
original_code_without_opfunc = original_code.replace(optimized_function, "")
original_code_without_opfunc = original_code.replace(optimized_code, "")

original_temp = Path(test_dir_str) / "original_temp.py"
original_temp.write_text(original_code_without_opfunc, encoding="utf8")
Expand Down
11 changes: 6 additions & 5 deletions codeflash/context/code_context_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,14 @@ def get_code_optimization_context(
)

# Extract code context for optimization
final_read_writable_code = extract_code_string_context_from_files(
final_read_writable_code = extract_code_markdown_context_from_files(
helpers_of_fto_dict,
{},
helpers_of_helpers_dict,
project_root_path,
remove_docstrings=False,
code_context_type=CodeContextType.READ_WRITABLE,
).code
)

read_only_code_markdown = extract_code_markdown_context_from_files(
helpers_of_fto_dict,
helpers_of_helpers_dict,
Expand All @@ -84,14 +85,14 @@ def get_code_optimization_context(
)

# Handle token limits
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code)
final_read_writable_tokens = encoded_tokens_len(final_read_writable_code.flat)
if final_read_writable_tokens > optim_token_limit:
raise ValueError("Read-writable code has exceeded token limit, cannot proceed")

# Setup preexisting objects for code replacer
preexisting_objects = set(
chain(
find_preexisting_objects(final_read_writable_code),
find_preexisting_objects(final_read_writable_code.flat),
*(find_preexisting_objects(codestring.code) for codestring in read_only_code_markdown.code_strings),
)
)
Expand Down
4 changes: 2 additions & 2 deletions codeflash/lsp/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def generate_tests(server: CodeflashLanguageServer, params: FunctionOptimization
generated_test.generated_original_test_source for generated_test in generated_tests_list.generated_tests
]
optimizations_dict = {
candidate.optimization_id: {"source_code": candidate.source_code, "explanation": candidate.explanation}
candidate.optimization_id: {"source_code": candidate.source_code.flat, "explanation": candidate.explanation}
for candidate in optimizations_set.control + optimizations_set.experiment
}

Expand Down Expand Up @@ -276,7 +276,7 @@ def perform_function_optimization( # noqa: PLR0911
"message": f"No best optimizations found for function {function_to_optimize_qualified_name}",
}

optimized_source = best_optimization.candidate.source_code
optimized_source = best_optimization.candidate.source_code.flat
speedup = original_code_baseline.runtime / best_optimization.runtime

server.show_message_log(f"Optimization completed for {params.functionName} with {speedup:.2f}x speedup", "Info")
Expand Down
47 changes: 44 additions & 3 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Annotated, Optional, cast

from jedi.api.classes import Name
from pydantic import AfterValidator, BaseModel, ConfigDict, Field
from pydantic import AfterValidator, BaseModel, ConfigDict, PrivateAttr
from pydantic.dataclasses import dataclass

from codeflash.cli_cmds.console import console, logger
Expand Down Expand Up @@ -157,8 +157,29 @@ class CodeString(BaseModel):
file_path: Optional[Path] = None


# Used to split files by adding a marker at the start of each file followed by the file path.
LINE_SPLITTER_MARKER_PREFIX = "# --codeflash:file--"


def get_code_block_splitter(file_path: Path) -> str:
return f"{LINE_SPLITTER_MARKER_PREFIX}{file_path}"


splitter_pattern = re.compile(f"^{LINE_SPLITTER_MARKER_PREFIX}([^\n]+)\n", re.MULTILINE | re.DOTALL)


class CodeStringsMarkdown(BaseModel):
code_strings: list[CodeString] = []
_cache: dict = PrivateAttr(default_factory=dict)

@property
def flat(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add docstring to define what flat means?

if self._cache.get("flat") is not None:
return self._cache["flat"]
self._cache["flat"] = "\n".join(
get_code_block_splitter(block.file_path) + "\n" + block.code for block in self.code_strings
)
return self._cache["flat"]

@property
def markdown(self) -> str:
Expand All @@ -170,10 +191,30 @@ def markdown(self) -> str:
]
)

def file_to_path(self) -> dict[str, str]:
if self._cache.get("file_to_path") is not None:
return self._cache["file_to_path"]
self._cache["file_to_path"] = {
str(code_string.file_path): code_string.code for code_string in self.code_strings
}
return self._cache["file_to_path"]

@staticmethod
def parse_flattened_code(flat_code: str) -> CodeStringsMarkdown:
matches = list(splitter_pattern.finditer(flat_code))
results = CodeStringsMarkdown()
for i, match in enumerate(matches):
start = match.end()
end = matches[i + 1].start() if i + 1 < len(matches) else len(flat_code)
file_path = match.group(1).strip()
code = flat_code[start:end].lstrip("\n")
results.code_strings.append(CodeString(code=code, file_path=Path(file_path)))
return results


class CodeOptimizationContext(BaseModel):
testgen_context_code: str = ""
read_writable_code: str = Field(min_length=1)
read_writable_code: CodeStringsMarkdown
read_only_context_code: str = ""
hashing_code_context: str = ""
hashing_code_context_hash: str = ""
Expand Down Expand Up @@ -272,7 +313,7 @@ class TestsInFile:

@dataclass(frozen=True)
class OptimizedCandidate:
source_code: str
source_code: CodeStringsMarkdown
explanation: str
optimization_id: str

Expand Down
Loading
Loading