Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions code_review_graph/tools/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

from __future__ import annotations

import os
import subprocess
import threading
import time
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -58,6 +62,123 @@ def _error_response(
}


_CHANGED_FILES_CACHE_LOCK = threading.Lock()
_CHANGED_FILES_CACHE: dict[tuple[str, str], dict[str, Any]] = {}
_CHANGED_FILES_CACHE_TTL = float(os.environ.get("CRG_CHANGED_FILES_CACHE_TTL", "3.0"))
_FAST_DIFF_TIMEOUT = float(os.environ.get("CRG_FAST_DIFF_TIMEOUT", "5.0"))


def _run_git(root: Path, args: list[str], timeout_s: float) -> subprocess.CompletedProcess:
return subprocess.run(
["git", *args],
capture_output=True,
text=True,
encoding="utf-8",
errors="replace",
cwd=str(root),
timeout=timeout_s,
stdin=subprocess.DEVNULL,
check=False,
)


def _normalize_changed_files(files: list[str]) -> list[str]:
seen: set[str] = set()
out: list[str] = []
for f in files:
rel = f.strip().replace("\\", "/")
if not rel or rel in seen:
continue
seen.add(rel)
out.append(rel)
return out


def _parse_porcelain_paths(stdout: str) -> list[str]:
files: list[str] = []
for line in stdout.splitlines():
if not line.strip() or len(line) < 4:
continue
path_part = line[3:].strip()
# Handle rename lines: "old/path -> new/path"
if " -> " in path_part:
path_part = path_part.split(" -> ", 1)[1].strip()
files.append(path_part)
return files


def resolve_changed_files(
root: Path,
changed_files: list[str] | None,
base: str,
) -> tuple[list[str], dict[str, Any]]:
"""Resolve changed files with fast auto-detect and short-lived cache.

Returns:
(files, meta)
meta includes:
- source: explicit | cache | auto
- auto_detect_timed_out: bool
- timeout_seconds: float
- cache_hit: bool
"""
if changed_files is not None:
return _normalize_changed_files(changed_files), {
"source": "explicit",
"auto_detect_timed_out": False,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": False,
}

cache_key = (str(root.resolve()), base)
now = time.monotonic()
with _CHANGED_FILES_CACHE_LOCK:
cached = _CHANGED_FILES_CACHE.get(cache_key)
if cached and (now - float(cached["ts"])) <= _CHANGED_FILES_CACHE_TTL:
return list(cached["files"]), {
"source": "cache",
"auto_detect_timed_out": False,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": True,
}

timed_out = False
files: list[str] = []
try:
diff = _run_git(root, ["diff", "--name-only", base, "--"], _FAST_DIFF_TIMEOUT)
if diff.returncode == 0:
files.extend(diff.stdout.splitlines())
else:
cached_only = _run_git(
root,
["diff", "--name-only", "--cached"],
_FAST_DIFF_TIMEOUT,
)
if cached_only.returncode == 0:
files.extend(cached_only.stdout.splitlines())

status = _run_git(
root,
["status", "--porcelain", "--untracked-files=no"],
_FAST_DIFF_TIMEOUT,
)
if status.returncode == 0:
files.extend(_parse_porcelain_paths(status.stdout))
except (FileNotFoundError, subprocess.TimeoutExpired):
timed_out = True

normalized = _normalize_changed_files(files)
with _CHANGED_FILES_CACHE_LOCK:
_CHANGED_FILES_CACHE[cache_key] = {"ts": now, "files": normalized}

return normalized, {
"source": "auto",
"auto_detect_timed_out": timed_out,
"timeout_seconds": _FAST_DIFF_TIMEOUT,
"cache_hit": False,
}


def _validate_repo_root(path: "Path | str") -> Path:
"""Validate that a path is a plausible project root.

Expand Down
70 changes: 24 additions & 46 deletions code_review_graph/tools/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,13 @@

import logging
import sqlite3
import subprocess
from pathlib import Path
from typing import Any

from ._common import _get_store, compact_response
from ._common import _get_store, compact_response, resolve_changed_files

logger = logging.getLogger(__name__)


def _has_git_changes(root: Path, base: str) -> bool:
"""Quick check for uncommitted or diffed changes."""
try:
result = subprocess.run(
["git", "diff", "--name-only", base, "--"],
capture_output=True, stdin=subprocess.DEVNULL, text=True,
cwd=str(root), timeout=10,
)
if result.returncode == 0 and result.stdout.strip():
return True
# Also check staged/unstaged
result2 = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, stdin=subprocess.DEVNULL, text=True,
cwd=str(root), timeout=10,
)
return bool(result2.stdout.strip())
except (FileNotFoundError, subprocess.TimeoutExpired):
return False


def get_minimal_context(
task: str = "",
changed_files: list[str] | None = None,
Expand Down Expand Up @@ -62,33 +39,29 @@ def get_minimal_context(
risk_score = 0.0
top_affected: list[str] = []
test_gap_count = 0
if changed_files or _has_git_changes(root, base):
files, detect_meta = resolve_changed_files(root, changed_files, base)
if files:
try:
from ..changes import analyze_changes
from ..incremental import get_changed_files as _get_changed

files = changed_files
if not files:
files = _get_changed(root, base)
if files:
abs_files = [str(root / f) for f in files]
analysis = analyze_changes(
store, abs_files, repo_root=str(root), base=base,
)
risk_score = analysis.get("risk_score", 0.0)
risk = (
"high" if risk_score > 0.7
else "medium" if risk_score > 0.4
else "low"
)
top_affected = [
f.get("name", "")
for f in analysis.get("changed_functions", [])[:5]
]
test_gap_count = len(analysis.get("test_gaps", []))
abs_files = [str(root / f) for f in files]
analysis = analyze_changes(
store, abs_files, repo_root=str(root), base=base,
)
risk_score = analysis.get("risk_score", 0.0)
risk = (
"high" if risk_score > 0.7
else "medium" if risk_score > 0.4
else "low"
)
top_affected = [
f.get("name", "")
for f in analysis.get("changed_functions", [])[:5]
]
test_gap_count = len(analysis.get("test_gaps", []))
except (
ImportError, OSError, ValueError,
sqlite3.Error, subprocess.SubprocessError,
sqlite3.Error,
):
logger.debug("Risk analysis failed in get_minimal_context", exc_info=True)

Expand Down Expand Up @@ -137,6 +110,11 @@ def get_minimal_context(
]
if risk != "unknown":
summary_parts.append(f"Risk: {risk} ({risk_score:.2f}).")
if detect_meta.get("auto_detect_timed_out"):
summary_parts.append(
"Changed-file auto-detection timed out. "
"Pass changed_files for faster review tools."
)
if test_gap_count:
summary_parts.append(f"{test_gap_count} test gaps.")

Expand Down
25 changes: 19 additions & 6 deletions code_review_graph/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from ..embeddings import EmbeddingStore
from ..graph import _sanitize_name, edge_to_dict, node_to_dict
from ..hints import generate_hints, get_session
from ..incremental import get_changed_files, get_db_path, get_staged_and_unstaged
from ..incremental import get_db_path
from ..search import hybrid_search
from ._common import _BUILTIN_CALL_NAMES, _get_store
from ._common import _BUILTIN_CALL_NAMES, _get_store, resolve_changed_files

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -56,12 +56,25 @@ def get_impact_radius(
"""
store, root = _get_store(repo_root)
try:
if changed_files is None:
changed_files = get_changed_files(root, base)
if not changed_files:
changed_files = get_staged_and_unstaged(root)
changed_files, detect_meta = resolve_changed_files(root, changed_files, base)

if not changed_files:
if detect_meta.get("auto_detect_timed_out"):
timeout_s = detect_meta.get("timeout_seconds", 0)
return {
"status": "ok",
"summary": (
"Auto-detection of changed files timed out "
f"after {timeout_s:.1f}s. Pass changed_files explicitly "
"to compute impact radius quickly."
),
"changed_nodes": [],
"impacted_nodes": [],
"impacted_files": [],
"truncated": False,
"total_impacted": 0,
"requires_changed_files": True,
}
return {
"status": "ok",
"summary": "No changed files detected.",
Expand Down
59 changes: 45 additions & 14 deletions code_review_graph/tools/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from ..flows import get_affected_flows as _get_affected_flows
from ..graph import edge_to_dict, node_to_dict
from ..hints import generate_hints, get_session
from ..incremental import get_changed_files, get_staged_and_unstaged
from ._common import _get_store
from ._common import _get_store, resolve_changed_files

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,12 +52,21 @@ def get_review_context(
store, root = _get_store(repo_root)
try:
# Get impact radius first
if changed_files is None:
changed_files = get_changed_files(root, base)
if not changed_files:
changed_files = get_staged_and_unstaged(root)
changed_files, detect_meta = resolve_changed_files(root, changed_files, base)

if not changed_files:
if detect_meta.get("auto_detect_timed_out"):
timeout_s = detect_meta.get("timeout_seconds", 0)
return {
"status": "ok",
"summary": (
"Auto-detection of changed files timed out "
f"after {timeout_s:.1f}s. Pass changed_files explicitly "
"for a fast and reliable review context."
),
"context": {},
"requires_changed_files": True,
}
return {
"status": "ok",
"summary": "No changes detected. Nothing to review.",
Expand Down Expand Up @@ -304,12 +312,22 @@ def get_affected_flows_func(
"""
store, root = _get_store(repo_root)
try:
if changed_files is None:
changed_files = get_changed_files(root, base)
if not changed_files:
changed_files = get_staged_and_unstaged(root)
changed_files, detect_meta = resolve_changed_files(root, changed_files, base)

if not changed_files:
if detect_meta.get("auto_detect_timed_out"):
timeout_s = detect_meta.get("timeout_seconds", 0)
return {
"status": "ok",
"summary": (
"Auto-detection of changed files timed out "
f"after {timeout_s:.1f}s. Pass changed_files explicitly "
"to compute affected flows quickly."
),
"affected_flows": [],
"total": 0,
"requires_changed_files": True,
}
return {
"status": "ok",
"summary": "No changed files detected.",
Expand Down Expand Up @@ -381,12 +399,25 @@ def detect_changes_func(
store, root = _get_store(repo_root)
try:
# Detect changed files if not provided.
if changed_files is None:
changed_files = get_changed_files(root, base)
if not changed_files:
changed_files = get_staged_and_unstaged(root)
changed_files, detect_meta = resolve_changed_files(root, changed_files, base)

if not changed_files:
if detect_meta.get("auto_detect_timed_out"):
timeout_s = detect_meta.get("timeout_seconds", 0)
return {
"status": "ok",
"summary": (
"Auto-detection of changed files timed out "
f"after {timeout_s:.1f}s. Pass changed_files explicitly "
"to run detect_changes without delay."
),
"risk_score": 0.0,
"changed_functions": [],
"affected_flows": [],
"test_gaps": [],
"review_priorities": [],
"requires_changed_files": True,
}
return {
"status": "ok",
"summary": "No changed files detected.",
Expand Down
Loading
Loading