diff --git a/code_review_graph/tools/_common.py b/code_review_graph/tools/_common.py index fd206a9b..59d3ff97 100644 --- a/code_review_graph/tools/_common.py +++ b/code_review_graph/tools/_common.py @@ -2,6 +2,10 @@ from __future__ import annotations +import os +import subprocess +import threading +import time from pathlib import Path from typing import Any @@ -58,6 +62,123 @@ def _error_response( } +_CHANGED_FILES_CACHE_LOCK = threading.Lock() +_CHANGED_FILES_CACHE: dict[tuple[str, str], dict[str, Any]] = {} +_CHANGED_FILES_CACHE_TTL = float(os.environ.get("CRG_CHANGED_FILES_CACHE_TTL", "3.0")) +_FAST_DIFF_TIMEOUT = float(os.environ.get("CRG_FAST_DIFF_TIMEOUT", "5.0")) + + +def _run_git(root: Path, args: list[str], timeout_s: float) -> subprocess.CompletedProcess: + return subprocess.run( + ["git", *args], + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + cwd=str(root), + timeout=timeout_s, + stdin=subprocess.DEVNULL, + check=False, + ) + + +def _normalize_changed_files(files: list[str]) -> list[str]: + seen: set[str] = set() + out: list[str] = [] + for f in files: + rel = f.strip().replace("\\", "/") + if not rel or rel in seen: + continue + seen.add(rel) + out.append(rel) + return out + + +def _parse_porcelain_paths(stdout: str) -> list[str]: + files: list[str] = [] + for line in stdout.splitlines(): + if not line.strip() or len(line) < 4: + continue + path_part = line[3:].strip() + # Handle rename lines: "old/path -> new/path" + if " -> " in path_part: + path_part = path_part.split(" -> ", 1)[1].strip() + files.append(path_part) + return files + + +def resolve_changed_files( + root: Path, + changed_files: list[str] | None, + base: str, +) -> tuple[list[str], dict[str, Any]]: + """Resolve changed files with fast auto-detect and short-lived cache. + + Returns: + (files, meta) + meta includes: + - source: explicit | cache | auto + - auto_detect_timed_out: bool + - timeout_seconds: float + - cache_hit: bool + """ + if changed_files is not None: + return _normalize_changed_files(changed_files), { + "source": "explicit", + "auto_detect_timed_out": False, + "timeout_seconds": _FAST_DIFF_TIMEOUT, + "cache_hit": False, + } + + cache_key = (str(root.resolve()), base) + now = time.monotonic() + with _CHANGED_FILES_CACHE_LOCK: + cached = _CHANGED_FILES_CACHE.get(cache_key) + if cached and (now - float(cached["ts"])) <= _CHANGED_FILES_CACHE_TTL: + return list(cached["files"]), { + "source": "cache", + "auto_detect_timed_out": False, + "timeout_seconds": _FAST_DIFF_TIMEOUT, + "cache_hit": True, + } + + timed_out = False + files: list[str] = [] + try: + diff = _run_git(root, ["diff", "--name-only", base, "--"], _FAST_DIFF_TIMEOUT) + if diff.returncode == 0: + files.extend(diff.stdout.splitlines()) + else: + cached_only = _run_git( + root, + ["diff", "--name-only", "--cached"], + _FAST_DIFF_TIMEOUT, + ) + if cached_only.returncode == 0: + files.extend(cached_only.stdout.splitlines()) + + status = _run_git( + root, + ["status", "--porcelain", "--untracked-files=no"], + _FAST_DIFF_TIMEOUT, + ) + if status.returncode == 0: + files.extend(_parse_porcelain_paths(status.stdout)) + except (FileNotFoundError, subprocess.TimeoutExpired): + timed_out = True + + normalized = _normalize_changed_files(files) + with _CHANGED_FILES_CACHE_LOCK: + _CHANGED_FILES_CACHE[cache_key] = {"ts": now, "files": normalized} + + return normalized, { + "source": "auto", + "auto_detect_timed_out": timed_out, + "timeout_seconds": _FAST_DIFF_TIMEOUT, + "cache_hit": False, + } + + def _validate_repo_root(path: "Path | str") -> Path: """Validate that a path is a plausible project root. diff --git a/code_review_graph/tools/context.py b/code_review_graph/tools/context.py index 5abb8f3f..0bc0e6ed 100644 --- a/code_review_graph/tools/context.py +++ b/code_review_graph/tools/context.py @@ -4,36 +4,13 @@ import logging import sqlite3 -import subprocess -from pathlib import Path from typing import Any -from ._common import _get_store, compact_response +from ._common import _get_store, compact_response, resolve_changed_files logger = logging.getLogger(__name__) -def _has_git_changes(root: Path, base: str) -> bool: - """Quick check for uncommitted or diffed changes.""" - try: - result = subprocess.run( - ["git", "diff", "--name-only", base, "--"], - capture_output=True, stdin=subprocess.DEVNULL, text=True, - cwd=str(root), timeout=10, - ) - if result.returncode == 0 and result.stdout.strip(): - return True - # Also check staged/unstaged - result2 = subprocess.run( - ["git", "status", "--porcelain"], - capture_output=True, stdin=subprocess.DEVNULL, text=True, - cwd=str(root), timeout=10, - ) - return bool(result2.stdout.strip()) - except (FileNotFoundError, subprocess.TimeoutExpired): - return False - - def get_minimal_context( task: str = "", changed_files: list[str] | None = None, @@ -62,33 +39,29 @@ def get_minimal_context( risk_score = 0.0 top_affected: list[str] = [] test_gap_count = 0 - if changed_files or _has_git_changes(root, base): + files, detect_meta = resolve_changed_files(root, changed_files, base) + if files: try: from ..changes import analyze_changes - from ..incremental import get_changed_files as _get_changed - files = changed_files - if not files: - files = _get_changed(root, base) - if files: - abs_files = [str(root / f) for f in files] - analysis = analyze_changes( - store, abs_files, repo_root=str(root), base=base, - ) - risk_score = analysis.get("risk_score", 0.0) - risk = ( - "high" if risk_score > 0.7 - else "medium" if risk_score > 0.4 - else "low" - ) - top_affected = [ - f.get("name", "") - for f in analysis.get("changed_functions", [])[:5] - ] - test_gap_count = len(analysis.get("test_gaps", [])) + abs_files = [str(root / f) for f in files] + analysis = analyze_changes( + store, abs_files, repo_root=str(root), base=base, + ) + risk_score = analysis.get("risk_score", 0.0) + risk = ( + "high" if risk_score > 0.7 + else "medium" if risk_score > 0.4 + else "low" + ) + top_affected = [ + f.get("name", "") + for f in analysis.get("changed_functions", [])[:5] + ] + test_gap_count = len(analysis.get("test_gaps", [])) except ( ImportError, OSError, ValueError, - sqlite3.Error, subprocess.SubprocessError, + sqlite3.Error, ): logger.debug("Risk analysis failed in get_minimal_context", exc_info=True) @@ -137,6 +110,11 @@ def get_minimal_context( ] if risk != "unknown": summary_parts.append(f"Risk: {risk} ({risk_score:.2f}).") + if detect_meta.get("auto_detect_timed_out"): + summary_parts.append( + "Changed-file auto-detection timed out. " + "Pass changed_files for faster review tools." + ) if test_gap_count: summary_parts.append(f"{test_gap_count} test gaps.") diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py index e3d8c3e1..d7e30228 100644 --- a/code_review_graph/tools/query.py +++ b/code_review_graph/tools/query.py @@ -9,9 +9,9 @@ from ..embeddings import EmbeddingStore from ..graph import _sanitize_name, edge_to_dict, node_to_dict from ..hints import generate_hints, get_session -from ..incremental import get_changed_files, get_db_path, get_staged_and_unstaged +from ..incremental import get_db_path from ..search import hybrid_search -from ._common import _BUILTIN_CALL_NAMES, _get_store +from ._common import _BUILTIN_CALL_NAMES, _get_store, resolve_changed_files logger = logging.getLogger(__name__) @@ -56,12 +56,25 @@ def get_impact_radius( """ store, root = _get_store(repo_root) try: - if changed_files is None: - changed_files = get_changed_files(root, base) - if not changed_files: - changed_files = get_staged_and_unstaged(root) + changed_files, detect_meta = resolve_changed_files(root, changed_files, base) if not changed_files: + if detect_meta.get("auto_detect_timed_out"): + timeout_s = detect_meta.get("timeout_seconds", 0) + return { + "status": "ok", + "summary": ( + "Auto-detection of changed files timed out " + f"after {timeout_s:.1f}s. Pass changed_files explicitly " + "to compute impact radius quickly." + ), + "changed_nodes": [], + "impacted_nodes": [], + "impacted_files": [], + "truncated": False, + "total_impacted": 0, + "requires_changed_files": True, + } return { "status": "ok", "summary": "No changed files detected.", diff --git a/code_review_graph/tools/review.py b/code_review_graph/tools/review.py index fcf205ff..1f9d3490 100644 --- a/code_review_graph/tools/review.py +++ b/code_review_graph/tools/review.py @@ -10,8 +10,7 @@ from ..flows import get_affected_flows as _get_affected_flows from ..graph import edge_to_dict, node_to_dict from ..hints import generate_hints, get_session -from ..incremental import get_changed_files, get_staged_and_unstaged -from ._common import _get_store +from ._common import _get_store, resolve_changed_files logger = logging.getLogger(__name__) @@ -53,12 +52,21 @@ def get_review_context( store, root = _get_store(repo_root) try: # Get impact radius first - if changed_files is None: - changed_files = get_changed_files(root, base) - if not changed_files: - changed_files = get_staged_and_unstaged(root) + changed_files, detect_meta = resolve_changed_files(root, changed_files, base) if not changed_files: + if detect_meta.get("auto_detect_timed_out"): + timeout_s = detect_meta.get("timeout_seconds", 0) + return { + "status": "ok", + "summary": ( + "Auto-detection of changed files timed out " + f"after {timeout_s:.1f}s. Pass changed_files explicitly " + "for a fast and reliable review context." + ), + "context": {}, + "requires_changed_files": True, + } return { "status": "ok", "summary": "No changes detected. Nothing to review.", @@ -304,12 +312,22 @@ def get_affected_flows_func( """ store, root = _get_store(repo_root) try: - if changed_files is None: - changed_files = get_changed_files(root, base) - if not changed_files: - changed_files = get_staged_and_unstaged(root) + changed_files, detect_meta = resolve_changed_files(root, changed_files, base) if not changed_files: + if detect_meta.get("auto_detect_timed_out"): + timeout_s = detect_meta.get("timeout_seconds", 0) + return { + "status": "ok", + "summary": ( + "Auto-detection of changed files timed out " + f"after {timeout_s:.1f}s. Pass changed_files explicitly " + "to compute affected flows quickly." + ), + "affected_flows": [], + "total": 0, + "requires_changed_files": True, + } return { "status": "ok", "summary": "No changed files detected.", @@ -381,12 +399,25 @@ def detect_changes_func( store, root = _get_store(repo_root) try: # Detect changed files if not provided. - if changed_files is None: - changed_files = get_changed_files(root, base) - if not changed_files: - changed_files = get_staged_and_unstaged(root) + changed_files, detect_meta = resolve_changed_files(root, changed_files, base) if not changed_files: + if detect_meta.get("auto_detect_timed_out"): + timeout_s = detect_meta.get("timeout_seconds", 0) + return { + "status": "ok", + "summary": ( + "Auto-detection of changed files timed out " + f"after {timeout_s:.1f}s. Pass changed_files explicitly " + "to run detect_changes without delay." + ), + "risk_score": 0.0, + "changed_functions": [], + "affected_flows": [], + "test_gaps": [], + "review_priorities": [], + "requires_changed_files": True, + } return { "status": "ok", "summary": "No changed files detected.", diff --git a/tests/test_changes.py b/tests/test_changes.py index 2e4c803c..7765cbc2 100644 --- a/tests/test_changes.py +++ b/tests/test_changes.py @@ -436,8 +436,10 @@ def test_detect_changes_tool_no_changes(self): # and get_changed_files/get_staged_and_unstaged to return empty. with ( patch("code_review_graph.tools.review._get_store") as mock_get_store, - patch("code_review_graph.tools.review.get_changed_files", return_value=[]), - patch("code_review_graph.tools.review.get_staged_and_unstaged", return_value=[]), + patch( + "code_review_graph.tools.review.resolve_changed_files", + return_value=([], {"auto_detect_timed_out": False}), + ), ): mock_get_store.return_value = (self.store, Path("/fake/repo")) # Prevent the store from being closed by the tool @@ -458,7 +460,10 @@ def test_detect_changes_tool_with_changes(self): with ( patch("code_review_graph.tools.review._get_store") as mock_get_store, - patch("code_review_graph.tools.review.get_changed_files", return_value=["app.py"]), + patch( + "code_review_graph.tools.review.resolve_changed_files", + return_value=(["app.py"], {"auto_detect_timed_out": False}), + ), patch( "code_review_graph.tools.review.parse_git_diff_ranges", return_value={"app.py": [(1, 10)]}, @@ -473,3 +478,22 @@ def test_detect_changes_tool_with_changes(self): assert "risk_score" in result assert "test_gaps" in result assert "review_priorities" in result + + def test_detect_changes_tool_timeout_requires_changed_files(self): + """Timeout in changed-file auto-detection returns explicit fast-path guidance.""" + from code_review_graph.tools import detect_changes_func + + with ( + patch("code_review_graph.tools.review._get_store") as mock_get_store, + patch( + "code_review_graph.tools.review.resolve_changed_files", + return_value=([], {"auto_detect_timed_out": True, "timeout_seconds": 5.0}), + ), + ): + mock_get_store.return_value = (self.store, Path("/fake/repo")) + self.store.close = lambda: None + + result = detect_changes_func(base="HEAD~1", repo_root="/fake/repo") + assert result["status"] == "ok" + assert result["requires_changed_files"] is True + assert "timed out" in result["summary"]