Skip to content

Commit c35705e

Browse files
Merge branch 'worktree/persist-optimization-patches' of github.com:codeflash-ai/codeflash into codeflash/optimize-pr690-2025-08-27T15.58.44
2 parents 503fa94 + 0de7ebd commit c35705e

File tree

4 files changed

+180
-154
lines changed

4 files changed

+180
-154
lines changed

codeflash/code_utils/git_utils.py

Lines changed: 2 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,24 @@
11
from __future__ import annotations
22

3-
import json
43
import os
54
import shutil
65
import subprocess
76
import sys
87
import tempfile
98
import time
10-
from functools import cache, lru_cache
9+
from functools import cache
1110
from io import StringIO
1211
from pathlib import Path
13-
from typing import TYPE_CHECKING, Optional
12+
from typing import TYPE_CHECKING
1413

1514
import git
16-
from filelock import FileLock
1715
from rich.prompt import Confirm
1816
from unidiff import PatchSet
1917

2018
from codeflash.cli_cmds.console import logger
21-
from codeflash.code_utils.compat import codeflash_cache_dir
2219
from codeflash.code_utils.config_consts import N_CANDIDATES
2320

2421
if TYPE_CHECKING:
25-
from typing import Any
26-
2722
from git import Repo
2823

2924

@@ -197,145 +192,3 @@ def get_last_commit_author_if_pr_exists(repo: Repo | None = None) -> str | None:
197192
return None
198193
else:
199194
return last_commit.author.name
200-
201-
202-
worktree_dirs = codeflash_cache_dir / "worktrees"
203-
patches_dir = codeflash_cache_dir / "patches"
204-
205-
206-
@lru_cache(maxsize=1)
207-
def get_git_project_id() -> str:
208-
"""Return the first commit sha of the repo."""
209-
repo: Repo = git.Repo(search_parent_directories=True)
210-
root_commits = list(repo.iter_commits(rev="HEAD", max_parents=0))
211-
return root_commits[0].hexsha
212-
213-
214-
def create_worktree_snapshot_commit(worktree_dir: Path, commit_message: str) -> None:
215-
repository = git.Repo(worktree_dir, search_parent_directories=True)
216-
repository.git.add(".")
217-
repository.git.commit("-m", commit_message, "--no-verify")
218-
219-
220-
def create_detached_worktree(module_root: Path) -> Optional[Path]:
221-
if not check_running_in_git_repo(module_root):
222-
logger.warning("Module is not in a git repository. Skipping worktree creation.")
223-
return None
224-
git_root = git_root_dir()
225-
current_time_str = time.strftime("%Y%m%d-%H%M%S")
226-
worktree_dir = worktree_dirs / f"{git_root.name}-{current_time_str}"
227-
228-
repository = git.Repo(git_root, search_parent_directories=True)
229-
230-
repository.git.worktree("add", "-d", str(worktree_dir))
231-
232-
# Get uncommitted diff from the original repo
233-
repository.git.add("-N", ".") # add the index for untracked files to be included in the diff
234-
exclude_binary_files = [":!*.pyc", ":!*.pyo", ":!*.pyd", ":!*.so", ":!*.dll", ":!*.whl", ":!*.egg", ":!*.egg-info", ":!*.pyz", ":!*.pkl", ":!*.pickle", ":!*.joblib", ":!*.npy", ":!*.npz", ":!*.h5", ":!*.hdf5", ":!*.pth", ":!*.pt", ":!*.pb", ":!*.onnx", ":!*.db", ":!*.sqlite", ":!*.sqlite3", ":!*.feather", ":!*.parquet", ":!*.jpg", ":!*.jpeg", ":!*.png", ":!*.gif", ":!*.bmp", ":!*.tiff", ":!*.webp", ":!*.wav", ":!*.mp3", ":!*.ogg", ":!*.flac", ":!*.mp4", ":!*.avi", ":!*.mov", ":!*.mkv", ":!*.pdf", ":!*.doc", ":!*.docx", ":!*.xls", ":!*.xlsx", ":!*.ppt", ":!*.pptx", ":!*.zip", ":!*.rar", ":!*.tar", ":!*.tar.gz", ":!*.tgz", ":!*.bz2", ":!*.xz"] # fmt: off
235-
uni_diff_text = repository.git.diff(
236-
None, "HEAD", "--", *exclude_binary_files, ignore_blank_lines=True, ignore_space_at_eol=True
237-
)
238-
239-
if not uni_diff_text.strip():
240-
logger.info("No uncommitted changes to copy to worktree.")
241-
return worktree_dir
242-
243-
# Write the diff to a temporary file
244-
with tempfile.NamedTemporaryFile(mode="w", suffix=".codeflash.patch", delete=False) as tmp_patch_file:
245-
tmp_patch_file.write(uni_diff_text + "\n") # the new line here is a must otherwise the last hunk won't be valid
246-
tmp_patch_file.flush()
247-
248-
patch_path = Path(tmp_patch_file.name).resolve()
249-
250-
# Apply the patch inside the worktree
251-
try:
252-
subprocess.run(
253-
["git", "apply", "--ignore-space-change", "--ignore-whitespace", "--whitespace=nowarn", patch_path],
254-
cwd=worktree_dir,
255-
check=True,
256-
)
257-
create_worktree_snapshot_commit(worktree_dir, "Initial Snapshot")
258-
except subprocess.CalledProcessError as e:
259-
logger.error(f"Failed to apply patch to worktree: {e}")
260-
261-
return worktree_dir
262-
263-
264-
def remove_worktree(worktree_dir: Path) -> None:
265-
try:
266-
repository = git.Repo(worktree_dir, search_parent_directories=True)
267-
repository.git.worktree("remove", "--force", worktree_dir)
268-
except Exception:
269-
logger.exception(f"Failed to remove worktree: {worktree_dir}")
270-
271-
272-
@lru_cache(maxsize=1)
273-
def get_patches_dir_for_project() -> Path:
274-
project_id = get_git_project_id() or ""
275-
return Path(patches_dir / project_id)
276-
277-
278-
def get_patches_metadata() -> dict[str, Any]:
279-
project_patches_dir = get_patches_dir_for_project()
280-
meta_file = project_patches_dir / "metadata.json"
281-
if meta_file.exists():
282-
with meta_file.open("r", encoding="utf-8") as f:
283-
return json.load(f)
284-
return {"id": get_git_project_id() or "", "patches": []}
285-
286-
287-
def save_patches_metadata(patch_metadata: dict) -> dict:
288-
project_patches_dir = get_patches_dir_for_project()
289-
meta_file = project_patches_dir / "metadata.json"
290-
lock_file = project_patches_dir / "metadata.json.lock"
291-
292-
# we are not supporting multiple concurrent optimizations within the same process, but keep that in case we decide to do so in the future.
293-
with FileLock(lock_file, timeout=10):
294-
metadata = get_patches_metadata()
295-
296-
patch_metadata["id"] = time.strftime("%Y%m%d-%H%M%S")
297-
metadata["patches"].append(patch_metadata)
298-
299-
meta_file.write_text(json.dumps(metadata, indent=2))
300-
301-
return patch_metadata
302-
303-
304-
def overwrite_patch_metadata(patches: list[dict]) -> bool:
305-
project_patches_dir = get_patches_dir_for_project()
306-
meta_file = project_patches_dir / "metadata.json"
307-
lock_file = project_patches_dir / "metadata.json.lock"
308-
309-
with FileLock(lock_file, timeout=10):
310-
metadata = get_patches_metadata()
311-
metadata["patches"] = patches
312-
meta_file.write_text(json.dumps(metadata, indent=2))
313-
return True
314-
315-
316-
def create_diff_patch_from_worktree(
317-
worktree_dir: Path, files: list[str], metadata_input: dict[str, Any]
318-
) -> dict[str, Any]:
319-
repository = git.Repo(worktree_dir, search_parent_directories=True)
320-
uni_diff_text = repository.git.diff(None, "HEAD", *files, ignore_blank_lines=True, ignore_space_at_eol=True)
321-
322-
if not uni_diff_text:
323-
logger.warning("No changes found in worktree.")
324-
return {}
325-
326-
if not uni_diff_text.endswith("\n"):
327-
uni_diff_text += "\n"
328-
329-
project_patches_dir = get_patches_dir_for_project()
330-
project_patches_dir.mkdir(parents=True, exist_ok=True)
331-
332-
patch_path = project_patches_dir / f"{worktree_dir.name}.{metadata_input['fto_name']}.patch"
333-
with patch_path.open("w", encoding="utf8") as f:
334-
f.write(uni_diff_text)
335-
336-
final_metadata = {}
337-
if metadata_input:
338-
metadata_input["patch_path"] = str(patch_path)
339-
final_metadata = save_patches_metadata(metadata_input)
340-
341-
return final_metadata
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from __future__ import annotations
2+
3+
import json
4+
import subprocess
5+
import tempfile
6+
import time
7+
from functools import lru_cache
8+
from pathlib import Path
9+
from typing import TYPE_CHECKING, Optional
10+
11+
import git
12+
from filelock import FileLock
13+
14+
from codeflash.cli_cmds.console import logger
15+
from codeflash.code_utils.compat import codeflash_cache_dir
16+
from codeflash.code_utils.git_utils import check_running_in_git_repo, git_root_dir
17+
18+
if TYPE_CHECKING:
19+
from typing import Any
20+
21+
from git import Repo
22+
23+
24+
worktree_dirs = codeflash_cache_dir / "worktrees"
25+
patches_dir = codeflash_cache_dir / "patches"
26+
27+
if TYPE_CHECKING:
28+
from git import Repo
29+
30+
31+
@lru_cache(maxsize=1)
32+
def get_git_project_id() -> str:
33+
"""Return the first commit sha of the repo."""
34+
repo: Repo = git.Repo(search_parent_directories=True)
35+
root_commits = list(repo.iter_commits(rev="HEAD", max_parents=0))
36+
return root_commits[0].hexsha
37+
38+
39+
def create_worktree_snapshot_commit(worktree_dir: Path, commit_message: str) -> None:
40+
repository = git.Repo(worktree_dir, search_parent_directories=True)
41+
repository.git.add(".")
42+
repository.git.commit("-m", commit_message, "--no-verify")
43+
44+
45+
def create_detached_worktree(module_root: Path) -> Optional[Path]:
46+
if not check_running_in_git_repo(module_root):
47+
logger.warning("Module is not in a git repository. Skipping worktree creation.")
48+
return None
49+
git_root = git_root_dir()
50+
current_time_str = time.strftime("%Y%m%d-%H%M%S")
51+
worktree_dir = worktree_dirs / f"{git_root.name}-{current_time_str}"
52+
53+
repository = git.Repo(git_root, search_parent_directories=True)
54+
55+
repository.git.worktree("add", "-d", str(worktree_dir))
56+
57+
# Get uncommitted diff from the original repo
58+
repository.git.add("-N", ".") # add the index for untracked files to be included in the diff
59+
exclude_binary_files = [":!*.pyc", ":!*.pyo", ":!*.pyd", ":!*.so", ":!*.dll", ":!*.whl", ":!*.egg", ":!*.egg-info", ":!*.pyz", ":!*.pkl", ":!*.pickle", ":!*.joblib", ":!*.npy", ":!*.npz", ":!*.h5", ":!*.hdf5", ":!*.pth", ":!*.pt", ":!*.pb", ":!*.onnx", ":!*.db", ":!*.sqlite", ":!*.sqlite3", ":!*.feather", ":!*.parquet", ":!*.jpg", ":!*.jpeg", ":!*.png", ":!*.gif", ":!*.bmp", ":!*.tiff", ":!*.webp", ":!*.wav", ":!*.mp3", ":!*.ogg", ":!*.flac", ":!*.mp4", ":!*.avi", ":!*.mov", ":!*.mkv", ":!*.pdf", ":!*.doc", ":!*.docx", ":!*.xls", ":!*.xlsx", ":!*.ppt", ":!*.pptx", ":!*.zip", ":!*.rar", ":!*.tar", ":!*.tar.gz", ":!*.tgz", ":!*.bz2", ":!*.xz"] # fmt: off
60+
uni_diff_text = repository.git.diff(
61+
None, "HEAD", "--", *exclude_binary_files, ignore_blank_lines=True, ignore_space_at_eol=True
62+
)
63+
64+
if not uni_diff_text.strip():
65+
logger.info("No uncommitted changes to copy to worktree.")
66+
return worktree_dir
67+
68+
# Write the diff to a temporary file
69+
with tempfile.NamedTemporaryFile(mode="w", suffix=".codeflash.patch", delete=False) as tmp_patch_file:
70+
tmp_patch_file.write(uni_diff_text + "\n") # the new line here is a must otherwise the last hunk won't be valid
71+
tmp_patch_file.flush()
72+
73+
patch_path = Path(tmp_patch_file.name).resolve()
74+
75+
# Apply the patch inside the worktree
76+
try:
77+
subprocess.run(
78+
["git", "apply", "--ignore-space-change", "--ignore-whitespace", "--whitespace=nowarn", patch_path],
79+
cwd=worktree_dir,
80+
check=True,
81+
)
82+
create_worktree_snapshot_commit(worktree_dir, "Initial Snapshot")
83+
except subprocess.CalledProcessError as e:
84+
logger.error(f"Failed to apply patch to worktree: {e}")
85+
86+
return worktree_dir
87+
88+
89+
def remove_worktree(worktree_dir: Path) -> None:
90+
try:
91+
repository = git.Repo(worktree_dir, search_parent_directories=True)
92+
repository.git.worktree("remove", "--force", worktree_dir)
93+
except Exception:
94+
logger.exception(f"Failed to remove worktree: {worktree_dir}")
95+
96+
97+
@lru_cache(maxsize=1)
98+
def get_patches_dir_for_project() -> Path:
99+
project_id = get_git_project_id() or ""
100+
return Path(patches_dir / project_id)
101+
102+
103+
def get_patches_metadata() -> dict[str, Any]:
104+
project_patches_dir = get_patches_dir_for_project()
105+
meta_file = project_patches_dir / "metadata.json"
106+
if meta_file.exists():
107+
with meta_file.open("r", encoding="utf-8") as f:
108+
return json.load(f)
109+
return {"id": get_git_project_id() or "", "patches": []}
110+
111+
112+
def save_patches_metadata(patch_metadata: dict) -> dict:
113+
project_patches_dir = get_patches_dir_for_project()
114+
meta_file = project_patches_dir / "metadata.json"
115+
lock_file = project_patches_dir / "metadata.json.lock"
116+
117+
# we are not supporting multiple concurrent optimizations within the same process, but keep that in case we decide to do so in the future.
118+
with FileLock(lock_file, timeout=10):
119+
metadata = get_patches_metadata()
120+
121+
patch_metadata["id"] = time.strftime("%Y%m%d-%H%M%S")
122+
metadata["patches"].append(patch_metadata)
123+
124+
meta_file.write_text(json.dumps(metadata, indent=2))
125+
126+
return patch_metadata
127+
128+
129+
def overwrite_patch_metadata(patches: list[dict]) -> bool:
130+
project_patches_dir = get_patches_dir_for_project()
131+
meta_file = project_patches_dir / "metadata.json"
132+
lock_file = project_patches_dir / "metadata.json.lock"
133+
134+
with FileLock(lock_file, timeout=10):
135+
metadata = get_patches_metadata()
136+
metadata["patches"] = patches
137+
meta_file.write_text(json.dumps(metadata, indent=2))
138+
return True
139+
140+
141+
def create_diff_patch_from_worktree(
142+
worktree_dir: Path,
143+
files: list[str],
144+
fto_name: Optional[str] = None,
145+
metadata_input: Optional[dict[str, Any]] = None,
146+
) -> dict[str, Any]:
147+
repository = git.Repo(worktree_dir, search_parent_directories=True)
148+
uni_diff_text = repository.git.diff(None, "HEAD", *files, ignore_blank_lines=True, ignore_space_at_eol=True)
149+
150+
if not uni_diff_text:
151+
logger.warning("No changes found in worktree.")
152+
return {}
153+
154+
if not uni_diff_text.endswith("\n"):
155+
uni_diff_text += "\n"
156+
157+
project_patches_dir = get_patches_dir_for_project()
158+
project_patches_dir.mkdir(parents=True, exist_ok=True)
159+
160+
final_function_name = fto_name or metadata_input.get("fto_name", "unknown")
161+
patch_path = project_patches_dir / f"{worktree_dir.name}.{final_function_name}.patch"
162+
with patch_path.open("w", encoding="utf8") as f:
163+
f.write(uni_diff_text)
164+
165+
final_metadata = {"patch_path": str(patch_path)}
166+
if metadata_input:
167+
final_metadata.update(metadata_input)
168+
final_metadata = save_patches_metadata(final_metadata)
169+
170+
return final_metadata

codeflash/lsp/beta.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from codeflash.api.cfapi import get_codeflash_api_key, get_user_id
1313
from codeflash.cli_cmds.cli import process_pyproject_config
14-
from codeflash.code_utils.git_utils import (
14+
from codeflash.code_utils.git_worktree_utils import (
1515
create_diff_patch_from_worktree,
1616
get_patches_metadata,
1717
overwrite_patch_metadata,
@@ -244,9 +244,9 @@ def on_patch_applied(_server: CodeflashLanguageServer, params: OnPatchAppliedPar
244244
continue
245245
new_patches.append(patch)
246246

247-
overwrite_patch_metadata(new_patches)
248247
# then remove the patch file
249248
if deleted_patch_file:
249+
overwrite_patch_metadata(new_patches)
250250
patch_path = Path(deleted_patch_file)
251251
patch_path.unlink(missing_ok=True)
252252
return {"status": "success"}

codeflash/optimization/optimizer.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
from codeflash.code_utils import env_utils
1616
from codeflash.code_utils.code_utils import cleanup_paths, get_run_tmp_file
1717
from codeflash.code_utils.env_utils import get_pr_number, is_pr_draft
18-
from codeflash.code_utils.git_utils import (
19-
check_running_in_git_repo,
18+
from codeflash.code_utils.git_utils import check_running_in_git_repo
19+
from codeflash.code_utils.git_worktree_utils import (
2020
create_detached_worktree,
2121
create_diff_patch_from_worktree,
2222
create_worktree_snapshot_commit,
@@ -349,7 +349,10 @@ def run(self) -> None:
349349
code_string.file_path for code_string in read_writable_code.code_strings
350350
]
351351
metadata = create_diff_patch_from_worktree(
352-
self.current_worktree, relative_file_paths, metadata_input={}
352+
self.current_worktree,
353+
relative_file_paths,
354+
fto_name=function_to_optimize.qualified_name,
355+
metadata_input={},
353356
)
354357
self.patch_files.append(metadata["patch_path"])
355358
if i < len(functions_to_optimize) - 1:

0 commit comments

Comments
 (0)