Skip to content

Commit 93a4b25

Browse files
committed
feat: migrate test infrastructure to pygit2 for in-process git ops
1 parent 0515242 commit 93a4b25

File tree

7 files changed

+524
-488
lines changed

7 files changed

+524
-488
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ optional-dependencies.dev = [
6767
"mutmut>=3.5,<4.0",
6868
"mypy>=1.0,<2.0",
6969
"pre-commit>=3.0,<5.0",
70+
"pygit2>=1.12,<2.0",
7071
"pyinstaller>=5.0,<7.0",
7172
"pytest>=7.0,<10.0",
7273
"pytest-cov>=3.0,<8.0",
@@ -139,7 +140,7 @@ keep_full_version = true
139140
ini_options.testpaths = [ "tests" ]
140141
ini_options.pythonpath = [ "src" ]
141142
ini_options.norecursedirs = [ "mutants", ".git", ".mypy_cache", ".pytest_cache", "__pycache__", ".hypothesis" ]
142-
ini_options.addopts = "-n auto --dist loadfile"
143+
ini_options.addopts = "-n auto --dist worksteal"
143144

144145
[tool.coverage]
145146
run.branch = true

tests/conftest.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import pytest
1010

11+
from tests.framework.pygit2_backend import Pygit2Repo
12+
1113
# Maximum budget for diff context tests - forces algorithm to actually select
1214
# Set to None to disable budget capping (original behavior)
1315
DIFF_CONTEXT_MAX_BUDGET = int(os.environ.get("DIFF_CONTEXT_MAX_BUDGET", "0"))
@@ -50,6 +52,30 @@ def enhanced_build(*args, **kwargs):
5052
yield
5153

5254

55+
@pytest.fixture(autouse=True)
56+
def _use_pygit2_git(monkeypatch):
57+
from tests.framework import pygit2_backend as pg
58+
from treemapper import diffctx as diffctx_mod
59+
from treemapper.diffctx import git as git_mod
60+
61+
for target in (git_mod, diffctx_mod):
62+
for name in (
63+
"parse_diff",
64+
"get_diff_text",
65+
"get_changed_files",
66+
"show_file_at_revision",
67+
"get_deleted_files",
68+
"get_renamed_old_paths",
69+
"get_untracked_files",
70+
"is_git_repo",
71+
):
72+
if hasattr(target, name):
73+
monkeypatch.setattr(target, name, getattr(pg, name))
74+
monkeypatch.setattr(git_mod, "run_git", pg.run_git)
75+
yield
76+
pg.clear_repo_cache()
77+
78+
5379
def _verify_no_garbage_in_context(context: dict) -> None:
5480
all_content = []
5581
for frag in context.get("fragments", []):
@@ -261,14 +287,8 @@ def create_nested(self, depth: int, files_per_level: int = 1) -> None:
261287

262288
@pytest.fixture
263289
def git_repo(tmp_path):
264-
"""Create a real git repository for testing diff-context mode."""
265290
repo_path = tmp_path / "git_test_repo"
266-
repo_path.mkdir()
267-
268-
subprocess.run(["git", "init"], cwd=repo_path, capture_output=True, check=True)
269-
subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=repo_path, capture_output=True, check=True)
270-
subprocess.run(["git", "config", "user.name", "Test User"], cwd=repo_path, capture_output=True, check=True)
271-
291+
Pygit2Repo(repo_path)
272292
return repo_path
273293

274294

tests/framework/pygit2_backend.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
from __future__ import annotations
2+
3+
import re
4+
from pathlib import Path
5+
6+
import pygit2
7+
8+
from treemapper.diffctx.git import GitError, _parse_hunk_header, _parse_path_line
9+
from treemapper.diffctx.types import DiffHunk
10+
11+
_HUNK_RE = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
12+
13+
_repo_cache: dict[str, pygit2.Repository] = {}
14+
15+
_SIGNATURE = pygit2.Signature("Test", "test@test.com")
16+
17+
18+
def _get_repo(repo_root: Path) -> pygit2.Repository:
19+
key = str(repo_root)
20+
if key not in _repo_cache:
21+
_repo_cache[key] = pygit2.Repository(str(repo_root))
22+
return _repo_cache[key]
23+
24+
25+
def clear_repo_cache() -> None:
26+
_repo_cache.clear()
27+
28+
29+
def _resolve_commit(repo: pygit2.Repository, rev: str) -> pygit2.Commit:
30+
obj = repo.revparse_single(rev)
31+
if isinstance(obj, pygit2.Tag):
32+
obj = obj.peel(pygit2.Commit)
33+
if isinstance(obj, pygit2.Commit):
34+
return obj
35+
raise GitError(f"Cannot resolve {rev} to a commit")
36+
37+
38+
def _is_working_tree_diff(diff_range: str) -> bool:
39+
return ".." not in diff_range
40+
41+
42+
def _resolve_range(repo: pygit2.Repository, diff_range: str) -> tuple[pygit2.Commit, pygit2.Commit | None]:
43+
if _is_working_tree_diff(diff_range):
44+
return _resolve_commit(repo, diff_range), None
45+
46+
parts = diff_range.split("...")
47+
if len(parts) == 2:
48+
base = _resolve_commit(repo, parts[0])
49+
head = _resolve_commit(repo, parts[1])
50+
return base, head
51+
52+
parts = diff_range.split("..")
53+
if len(parts) == 2:
54+
base = _resolve_commit(repo, parts[0])
55+
head = _resolve_commit(repo, parts[1])
56+
return base, head
57+
58+
raise GitError(f"Cannot parse diff range: {diff_range}")
59+
60+
61+
def _get_diff(repo: pygit2.Repository, diff_range: str, context_lines: int = 3) -> pygit2.Diff:
62+
base, head = _resolve_range(repo, diff_range)
63+
flags = pygit2.GIT_DIFF_PATIENCE
64+
if _is_working_tree_diff(diff_range):
65+
repo.index.read()
66+
diff_index = repo.index.diff_to_tree(base.tree)
67+
diff_workdir = repo.diff(a=base.tree, flags=flags, context_lines=context_lines)
68+
diff_index.merge(diff_workdir)
69+
diff_index.find_similar()
70+
return diff_index
71+
else:
72+
diff = repo.diff(a=base.tree, b=head.tree, flags=flags, context_lines=context_lines) # type: ignore[arg-type]
73+
diff.find_similar()
74+
return diff
75+
76+
77+
def parse_diff(repo_root: Path, diff_range: str) -> list[DiffHunk]:
78+
repo = _get_repo(repo_root)
79+
diff = _get_diff(repo, diff_range, context_lines=0)
80+
patch_text = diff.patch or ""
81+
82+
hunks: list[DiffHunk] = []
83+
old_path: Path | None = None
84+
new_path: Path | None = None
85+
86+
for line in patch_text.splitlines():
87+
path_type, path = _parse_path_line(line, repo_root)
88+
if path_type == "old":
89+
old_path = path
90+
continue
91+
if path_type == "new":
92+
new_path = path
93+
continue
94+
95+
match = _HUNK_RE.match(line)
96+
if match:
97+
current_path = new_path if new_path else old_path
98+
if current_path:
99+
hunks.append(_parse_hunk_header(match, current_path))
100+
101+
return hunks
102+
103+
104+
def get_diff_text(repo_root: Path, diff_range: str) -> str:
105+
repo = _get_repo(repo_root)
106+
diff = _get_diff(repo, diff_range)
107+
return diff.patch or ""
108+
109+
110+
def get_changed_files(repo_root: Path, diff_range: str) -> list[Path]:
111+
repo = _get_repo(repo_root)
112+
diff = _get_diff(repo, diff_range)
113+
paths: list[Path] = []
114+
for patch in diff:
115+
delta = patch.delta
116+
if delta.new_file.path:
117+
paths.append(repo_root / delta.new_file.path)
118+
return paths
119+
120+
121+
def get_deleted_files(repo_root: Path, diff_range: str) -> set[Path]:
122+
repo = _get_repo(repo_root)
123+
diff = _get_diff(repo, diff_range)
124+
result: set[Path] = set()
125+
for patch in diff:
126+
delta = patch.delta
127+
if delta.status == pygit2.GIT_DELTA_DELETED:
128+
result.add((repo_root / delta.old_file.path).resolve())
129+
return result
130+
131+
132+
def get_renamed_old_paths(repo_root: Path, diff_range: str) -> set[Path]:
133+
repo = _get_repo(repo_root)
134+
diff = _get_diff(repo, diff_range)
135+
result: set[Path] = set()
136+
for patch in diff:
137+
delta = patch.delta
138+
if delta.status == pygit2.GIT_DELTA_RENAMED:
139+
result.add((repo_root / delta.old_file.path).resolve())
140+
return result
141+
142+
143+
def get_untracked_files(repo_root: Path) -> list[Path]:
144+
repo = _get_repo(repo_root)
145+
result: list[Path] = []
146+
for filepath, flags in repo.status().items():
147+
if flags & pygit2.GIT_STATUS_WT_NEW:
148+
result.append(repo_root / filepath)
149+
return result
150+
151+
152+
def show_file_at_revision(repo_root: Path, rev: str, rel_path: Path) -> str:
153+
repo = _get_repo(repo_root)
154+
commit = _resolve_commit(repo, rev)
155+
try:
156+
entry = commit.tree[rel_path.as_posix()]
157+
except KeyError:
158+
raise GitError(f"Path {rel_path} not found at revision {rev}")
159+
blob = repo.get(entry.id)
160+
if blob is None or not isinstance(blob, pygit2.Blob):
161+
raise GitError(f"Not a blob: {rel_path} at {rev}")
162+
return blob.data.decode("utf-8", errors="replace")
163+
164+
165+
def is_git_repo(path: Path) -> bool:
166+
try:
167+
pygit2.Repository(str(path))
168+
return True
169+
except pygit2.GitError:
170+
return False
171+
172+
173+
def run_git(repo_root: Path, args: list[str]) -> str:
174+
raise GitError(
175+
f"run_git called with args {args} — all git operations should be handled by pygit2 backend. "
176+
"This indicates a missing pygit2 replacement."
177+
)
178+
179+
180+
class Pygit2Repo:
181+
def __init__(self, path: Path) -> None:
182+
self.path = path
183+
path.mkdir(parents=True, exist_ok=True)
184+
self._repo = pygit2.init_repository(str(path))
185+
self._repo.config["user.name"] = "Test"
186+
self._repo.config["user.email"] = "test@test.com"
187+
_repo_cache[str(path)] = self._repo
188+
189+
def add_file(self, rel_path: str, content: str) -> Path:
190+
file_path = self.path / rel_path
191+
file_path.parent.mkdir(parents=True, exist_ok=True)
192+
file_path.write_text(content, encoding="utf-8")
193+
return file_path
194+
195+
def add_file_binary(self, rel_path: str, data: bytes) -> Path:
196+
file_path = self.path / rel_path
197+
file_path.parent.mkdir(parents=True, exist_ok=True)
198+
file_path.write_bytes(data)
199+
return file_path
200+
201+
def remove_file(self, rel_path: str) -> None:
202+
file_path = self.path / rel_path
203+
if file_path.exists():
204+
file_path.unlink()
205+
206+
def stage_file(self, rel_path: str) -> None:
207+
self._repo.index.read()
208+
self._repo.index.add(rel_path)
209+
self._repo.index.write()
210+
211+
def commit(self, message: str) -> str:
212+
self._repo.index.read()
213+
self._repo.index.add_all()
214+
self._repo.index.write()
215+
tree_oid = self._repo.index.write_tree()
216+
217+
try:
218+
parent = self._repo.head.peel(pygit2.Commit)
219+
parents = [parent.id]
220+
except pygit2.GitError:
221+
parents = []
222+
223+
oid = self._repo.create_commit(
224+
"refs/heads/main" if not parents else "HEAD",
225+
_SIGNATURE,
226+
_SIGNATURE,
227+
message,
228+
tree_oid,
229+
parents,
230+
)
231+
232+
if not parents:
233+
self._repo.set_head(self._repo.references["refs/heads/main"].target)
234+
235+
return str(oid)

tests/framework/runner.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from __future__ import annotations
22

3-
import subprocess
43
from pathlib import Path
54

65
from tests.conftest import GARBAGE_FILES, GARBAGE_MARKERS
6+
from tests.framework.pygit2_backend import Pygit2Repo
77
from tests.framework.scoring import (
88
ScoreBreakdown,
99
check_diff_coverage,
@@ -38,29 +38,13 @@ def _format_fragment_summary(context: dict) -> str:
3838
class YamlTestRunner:
3939
def __init__(self, tmp_path: Path):
4040
self.repo = tmp_path / "test_repo"
41-
self.repo.mkdir()
42-
subprocess.run(["git", "init"], cwd=self.repo, capture_output=True, check=True)
43-
subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=self.repo, capture_output=True, check=True)
44-
subprocess.run(["git", "config", "user.name", "Test"], cwd=self.repo, capture_output=True, check=True)
41+
self._git = Pygit2Repo(self.repo)
4542

4643
def add_file(self, path: str, content: str) -> Path:
47-
file_path = self.repo / path
48-
file_path.parent.mkdir(parents=True, exist_ok=True)
49-
file_path.write_text(content, encoding="utf-8")
50-
return file_path
44+
return self._git.add_file(path, content)
5145

5246
def commit(self, message: str) -> str:
53-
subprocess.run(["git", "add", "-A"], cwd=self.repo, capture_output=True, check=True)
54-
result = subprocess.run(
55-
["git", "commit", "-m", message, "--allow-empty"],
56-
cwd=self.repo,
57-
capture_output=True,
58-
text=True,
59-
)
60-
if result.returncode != 0:
61-
subprocess.run(["git", "commit", "-m", message], cwd=self.repo, capture_output=True, check=True)
62-
rev = subprocess.run(["git", "rev-parse", "HEAD"], cwd=self.repo, capture_output=True, text=True, check=True)
63-
return rev.stdout.strip()
47+
return self._git.commit(message)
6448

6549
def run_test_case(self, case: YamlTestCase) -> dict:
6650
from treemapper.diffctx import build_diff_context

0 commit comments

Comments
 (0)