diff --git a/.agents/recovery-hints.yaml b/.agents/recovery-hints.yaml new file mode 100644 index 000000000..0a1795ccf --- /dev/null +++ b/.agents/recovery-hints.yaml @@ -0,0 +1,48 @@ +# Recovery hints for error classification (Issue #1330). +# +# Structure: top-level keys are sections. Each section is a list of +# {pattern, hint} entries. Patterns are case-insensitive regex. +# +# Naming convention: +# tool_ - hints scoped to a specific tool +# general - hints applied to all failures + +tool_gh: + - pattern: "GraphQL: Could not resolve" + hint: "Issue or PR number may not exist. Verify with gh issue list or gh pr list." + - pattern: "HTTP 403" + hint: "Rate limited or insufficient permissions. Wait 60s or use gh api --cache." + - pattern: "HTTP 404" + hint: "Resource not found. Verify the owner/repo/number are correct." + - pattern: "HTTP 422" + hint: "Validation failed. Check required fields and value constraints." + +tool_git: + - pattern: "not a git repository" + hint: "Current directory is not a git repo. Verify working directory." + - pattern: "CONFLICT" + hint: "Merge conflict detected. Resolve conflicts before continuing." + - pattern: "Permission denied" + hint: "File permission error. Check file ownership and access rights." + +tool_python3: + - pattern: "ModuleNotFoundError" + hint: "Missing Python dependency. Run uv pip install or check pyproject.toml." + - pattern: "SyntaxError" + hint: "Python syntax error. Check the file for typos or version incompatibilities." + +tool_npm: + - pattern: "ENOENT.*package\\.json" + hint: "No package.json found. Verify you are in the correct directory." + - pattern: "ERESOLVE" + hint: "Dependency resolution conflict. Try npm install --legacy-peer-deps." + +general: + - pattern: "rate limit" + hint: "API rate limit hit. Wait before retrying or use cached data." + - pattern: "ETIMEDOUT|ECONNRESET|ECONNREFUSED" + hint: "Network error. Check connectivity and retry." + - pattern: "token.*expired|unauthorized|HTTP 401" + hint: "Authentication failure. Refresh credentials or tokens." + - pattern: "disk.*(full|space)|No space left" + hint: "Disk space exhausted. Free space before retrying." diff --git a/scripts/error_classification.py b/scripts/error_classification.py new file mode 100644 index 000000000..aaccd81e4 --- /dev/null +++ b/scripts/error_classification.py @@ -0,0 +1,304 @@ +"""Error classification and recovery hint system. + +Classifies tool execution failures into a taxonomy aligned with ADR-035 +exit codes. Provides recovery hints from a YAML configuration file. +Logs errors for pattern learning and graduation to MEMORY.md. + +Exit Codes (ADR-035): + 0 = success + 1 = logic/validation error + 2 = configuration error + 3 = external service error + 4 = authentication error +""" + +from __future__ import annotations + +import enum +import json +import logging +import re +from collections import Counter +from dataclasses import asdict, dataclass, field +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import yaml + +_logger = logging.getLogger(__name__) + +# Default paths for error logging and graduation. +_DEFAULT_ERROR_LOG = Path(".agents/sessions/errors.jsonl") +_GRADUATION_THRESHOLD = 3 # Patterns with 3+ successful recoveries graduate. + + +class ErrorType(enum.Enum): + """Error taxonomy aligned with ADR-035 exit codes.""" + + TOOL_FAILURE = "tool_failure" + REASONING_DRIFT = "reasoning_drift" + INFINITE_LOOP = "infinite_loop" + SCOPE_CREEP = "scope_creep" + CONTEXT_OVERFLOW = "context_overflow" + + +# Map ADR-035 exit codes to error types where deterministic. +_EXIT_CODE_MAP: dict[int, ErrorType] = { + 2: ErrorType.TOOL_FAILURE, # config error + 3: ErrorType.TOOL_FAILURE, # external service error + 4: ErrorType.TOOL_FAILURE, # auth error +} + +# Patterns indicating transient / retriable failures. +_TRANSIENT_PATTERNS: list[re.Pattern[str]] = [ + re.compile(r"rate limit", re.IGNORECASE), + re.compile(r"HTTP 429", re.IGNORECASE), + re.compile(r"HTTP 503", re.IGNORECASE), + re.compile(r"ETIMEDOUT", re.IGNORECASE), + re.compile(r"ECONNRESET", re.IGNORECASE), +] + + +@dataclass(frozen=True) +class RecoveryHint: + """A single recovery hint for a failure pattern.""" + + pattern: str + hint: str + compiled_pattern: re.Pattern[str] = field(init=False, repr=False, compare=False) + + def __post_init__(self) -> None: + object.__setattr__( + self, + "compiled_pattern", + re.compile(self.pattern, re.IGNORECASE), + ) + + +@dataclass(frozen=True) +class ClassifiedError: + """Result of classifying a tool execution failure.""" + + error_type: ErrorType + tool_name: str + exit_code: int + stderr: str + is_transient: bool + recovery_hints: tuple[str, ...] + + +def _is_transient(stderr: str) -> bool: + """Return True if stderr matches a known transient failure pattern.""" + return any(p.search(stderr) for p in _TRANSIENT_PATTERNS) + + +def load_recovery_hints( + hints_path: Path | None = None, +) -> dict[str, list[RecoveryHint]]: + """Load recovery hints from YAML. + + Args: + hints_path: Path to recovery-hints.yaml. Defaults to + .agents/recovery-hints.yaml relative to the repo root. + + Returns: + Dict mapping section names to lists of RecoveryHint objects. + """ + if hints_path is None: + # Default path: scripts/ is at repo root, so parents[1] is repo root. + # Override via hints_path parameter for different layouts. + hints_path = Path(__file__).resolve().parents[1] / ".agents" / "recovery-hints.yaml" + + if not hints_path.is_file(): + return {} + + raw: dict[str, Any] = yaml.safe_load(hints_path.read_text(encoding="utf-8")) or {} + result: dict[str, list[RecoveryHint]] = {} + + for section, entries in raw.items(): + if not isinstance(entries, list): + continue + hints: list[RecoveryHint] = [] + for entry in entries: + if isinstance(entry, dict) and "pattern" in entry and "hint" in entry: + hints.append(RecoveryHint(pattern=entry["pattern"], hint=entry["hint"])) + else: + _logger.warning("Skipped malformed entry in section '%s': %s", section, entry) + if hints: + result[section] = hints + + return result + + +def _match_hints( + stderr: str, + tool_name: str, + hints_db: dict[str, list[RecoveryHint]], +) -> tuple[str, ...]: + """Return matching recovery hints for the given failure.""" + matched: list[str] = [] + + # Check tool-specific hints first. + tool_hints = hints_db.get(f"tool_{tool_name}", []) + for rh in tool_hints: + if rh.compiled_pattern.search(stderr): + matched.append(rh.hint) + + # Check general hints. + for rh in hints_db.get("general", []): + if rh.compiled_pattern.search(stderr): + matched.append(rh.hint) + + return tuple(matched) + + +def classify_error( + tool_name: str, + exit_code: int, + stderr: str, + *, + call_history: list[str] | None = None, + hints_db: dict[str, list[RecoveryHint]] | None = None, +) -> ClassifiedError: + """Classify a tool execution failure. + + Args: + tool_name: Name of the tool that failed. + exit_code: Process exit code. + stderr: Standard error output. + call_history: Recent tool call names for loop detection. + hints_db: Pre-loaded recovery hints. Loaded from disk if None. + + Returns: + ClassifiedError with type, transient flag, and recovery hints. + """ + if hints_db is None: + hints_db = load_recovery_hints() + + # Loop detection: 3+ consecutive identical calls. + if call_history and len(call_history) >= 3: + last_three = call_history[-3:] + if len(set(last_three)) == 1 and last_three[0] == tool_name: + return ClassifiedError( + error_type=ErrorType.INFINITE_LOOP, + tool_name=tool_name, + exit_code=exit_code, + stderr=stderr, + is_transient=False, + recovery_hints=( + "Loop detected: same tool called 3+ times. " + "Break the loop, summarize progress, try a different approach.", + ), + ) + + # Exit code mapping. + error_type = _EXIT_CODE_MAP.get(exit_code, ErrorType.TOOL_FAILURE) + + transient = _is_transient(stderr) + hints = _match_hints(stderr, tool_name, hints_db) + + return ClassifiedError( + error_type=error_type, + tool_name=tool_name, + exit_code=exit_code, + stderr=stderr, + is_transient=transient, + recovery_hints=hints, + ) + + +@dataclass +class ErrorLogEntry: + """A single error log entry for pattern learning.""" + + timestamp: str + error_type: str + tool: str + exit_code: int + recovery: str + success: bool + + +def log_error( + classified: ClassifiedError, + recovery_action: str, + success: bool, + *, + log_path: Path | None = None, +) -> None: + """Log an error and its recovery outcome for pattern learning. + + Args: + classified: The classified error from classify_error(). + recovery_action: Description of the recovery action taken. + success: True if the recovery was successful. + log_path: Path to errors.jsonl. Defaults to .agents/sessions/errors.jsonl. + """ + if log_path is None: + log_path = _DEFAULT_ERROR_LOG + + log_path.parent.mkdir(parents=True, exist_ok=True) + + entry = ErrorLogEntry( + timestamp=datetime.now(UTC).isoformat(), + error_type=classified.error_type.value, + tool=classified.tool_name, + exit_code=classified.exit_code, + recovery=recovery_action, + success=success, + ) + + with log_path.open("a", encoding="utf-8") as f: + f.write(json.dumps(asdict(entry)) + "\n") + + +def get_graduation_candidates( + log_path: Path | None = None, + threshold: int = _GRADUATION_THRESHOLD, +) -> list[dict[str, Any]]: + """Identify patterns eligible for graduation to MEMORY.md. + + Patterns with `threshold` or more successful recoveries are candidates. + + Args: + log_path: Path to errors.jsonl. Defaults to .agents/sessions/errors.jsonl. + threshold: Minimum successful recoveries required (default: 3). + + Returns: + List of dicts with tool, error_type, recovery, and count. + """ + if log_path is None: + log_path = _DEFAULT_ERROR_LOG + + if not log_path.is_file(): + return [] + + success_counter: Counter[tuple[str, str, str]] = Counter() + + with log_path.open("r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + entry = json.loads(line) + if entry.get("success"): + key = (entry["tool"], entry["error_type"], entry["recovery"]) + success_counter[key] += 1 + except (json.JSONDecodeError, KeyError): + continue + + candidates = [] + for (tool, error_type, recovery), count in success_counter.items(): + if count >= threshold: + candidates.append( + { + "tool": tool, + "error_type": error_type, + "recovery": recovery, + "count": count, + } + ) + + return sorted(candidates, key=lambda x: x["count"], reverse=True) diff --git a/tests/test_error_classification.py b/tests/test_error_classification.py new file mode 100644 index 000000000..b20dbd3b7 --- /dev/null +++ b/tests/test_error_classification.py @@ -0,0 +1,353 @@ +"""Tests for scripts/error_classification.py.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from scripts.error_classification import ( + ClassifiedError, + ErrorType, + RecoveryHint, + classify_error, + get_graduation_candidates, + load_recovery_hints, + log_error, +) + +HINTS_PATH = Path(__file__).resolve().parents[1] / ".agents" / "recovery-hints.yaml" + +HintsDB = dict[str, list[RecoveryHint]] + + +# --------------------------------------------------------------------------- +# RecoveryHint +# --------------------------------------------------------------------------- + + +class TestRecoveryHint: + def test_compiled_pattern_matches(self) -> None: + rh = RecoveryHint(pattern="HTTP 403", hint="rate limited") + assert rh.compiled_pattern.search("Got HTTP 403 from API") + + def test_compiled_pattern_case_insensitive(self) -> None: + rh = RecoveryHint(pattern="conflict", hint="merge issue") + assert rh.compiled_pattern.search("CONFLICT in file.txt") + + +# --------------------------------------------------------------------------- +# load_recovery_hints +# --------------------------------------------------------------------------- + + +class TestLoadRecoveryHints: + def test_loads_from_repo_yaml(self) -> None: + hints = load_recovery_hints(HINTS_PATH) + assert "tool_gh" in hints + assert "general" in hints + assert len(hints["tool_gh"]) >= 1 + + def test_returns_empty_for_missing_file(self, tmp_path: Path) -> None: + hints = load_recovery_hints(tmp_path / "nonexistent.yaml") + assert hints == {} + + def test_returns_empty_for_empty_file(self, tmp_path: Path) -> None: + empty = tmp_path / "empty.yaml" + empty.write_text("") + hints = load_recovery_hints(empty) + assert hints == {} + + def test_skips_malformed_entries(self, tmp_path: Path) -> None: + bad = tmp_path / "bad.yaml" + bad.write_text("tool_gh:\n - pattern: 'x'\n - bad_key: 'y'\n") + hints = load_recovery_hints(bad) + # First entry has pattern but no hint, skip. Second has neither. + assert hints == {} + + +# --------------------------------------------------------------------------- +# classify_error +# --------------------------------------------------------------------------- + + +class TestClassifyError: + @pytest.fixture() + def hints_db(self) -> HintsDB: + return load_recovery_hints(HINTS_PATH) + + def test_loop_detection_three_identical_calls(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="some error", + call_history=["gh", "gh", "gh"], + hints_db=hints_db, + ) + assert result.error_type is ErrorType.INFINITE_LOOP + assert not result.is_transient + assert "Loop detected" in result.recovery_hints[0] + + def test_no_loop_with_mixed_history(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="error", + call_history=["git", "gh", "gh"], + hints_db=hints_db, + ) + assert result.error_type is not ErrorType.INFINITE_LOOP + + def test_exit_code_2_maps_to_tool_failure(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="unknown", + exit_code=2, + stderr="", + hints_db=hints_db, + ) + assert result.error_type is ErrorType.TOOL_FAILURE + + def test_exit_code_3_maps_to_tool_failure(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="unknown", + exit_code=3, + stderr="", + hints_db=hints_db, + ) + assert result.error_type is ErrorType.TOOL_FAILURE + + def test_exit_code_4_maps_to_tool_failure(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="unknown", + exit_code=4, + stderr="", + hints_db=hints_db, + ) + assert result.error_type is ErrorType.TOOL_FAILURE + + def test_transient_detection_rate_limit(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="API rate limit exceeded", + hints_db=hints_db, + ) + assert result.is_transient + + def test_transient_detection_timeout(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="curl", + exit_code=1, + stderr="connect ETIMEDOUT 1.2.3.4:443", + hints_db=hints_db, + ) + assert result.is_transient + + def test_not_transient_for_logic_error(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="python3", + exit_code=1, + stderr="AssertionError: expected True", + hints_db=hints_db, + ) + assert not result.is_transient + + def test_recovery_hints_match_tool_specific(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="GraphQL: Could not resolve to a node", + hints_db=hints_db, + ) + assert any("verify" in h.lower() for h in result.recovery_hints) + + def test_recovery_hints_match_general(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="curl", + exit_code=1, + stderr="ECONNREFUSED", + hints_db=hints_db, + ) + assert any("network" in h.lower() for h in result.recovery_hints) + + def test_classified_error_fields(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="HTTP 403 forbidden", + hints_db=hints_db, + ) + assert isinstance(result, ClassifiedError) + assert result.tool_name == "gh" + assert result.exit_code == 1 + assert result.stderr == "HTTP 403 forbidden" + + def test_empty_call_history(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="error", + call_history=[], + hints_db=hints_db, + ) + assert result.error_type is ErrorType.TOOL_FAILURE + + def test_none_call_history(self, hints_db: HintsDB) -> None: + result = classify_error( + tool_name="gh", + exit_code=1, + stderr="error", + call_history=None, + hints_db=hints_db, + ) + assert result.error_type is ErrorType.TOOL_FAILURE + + +# --------------------------------------------------------------------------- +# log_error +# --------------------------------------------------------------------------- + + +class TestLogError: + def test_creates_log_file(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + classified = ClassifiedError( + error_type=ErrorType.TOOL_FAILURE, + tool_name="gh", + exit_code=1, + stderr="HTTP 403", + is_transient=False, + recovery_hints=("Wait 60s",), + ) + log_error(classified, recovery_action="wait_and_retry", success=True, log_path=log_path) + assert log_path.exists() + + def test_appends_jsonl_entries(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + classified = ClassifiedError( + error_type=ErrorType.TOOL_FAILURE, + tool_name="gh", + exit_code=1, + stderr="error", + is_transient=False, + recovery_hints=(), + ) + log_error(classified, recovery_action="retry", success=True, log_path=log_path) + log_error(classified, recovery_action="fallback", success=False, log_path=log_path) + + lines = log_path.read_text().strip().split("\n") + assert len(lines) == 2 + + entry1 = json.loads(lines[0]) + assert entry1["tool"] == "gh" + assert entry1["success"] is True + + entry2 = json.loads(lines[1]) + assert entry2["recovery"] == "fallback" + assert entry2["success"] is False + + def test_creates_parent_directories(self, tmp_path: Path) -> None: + log_path = tmp_path / "nested" / "dir" / "errors.jsonl" + classified = ClassifiedError( + error_type=ErrorType.INFINITE_LOOP, + tool_name="git", + exit_code=1, + stderr="loop", + is_transient=False, + recovery_hints=(), + ) + log_error(classified, recovery_action="break_loop", success=True, log_path=log_path) + assert log_path.exists() + + +# --------------------------------------------------------------------------- +# get_graduation_candidates +# --------------------------------------------------------------------------- + + +def _make_entry( + tool: str, + recovery: str, + success: bool, + timestamp: str = "2026-01-01T00:00:00Z", +) -> dict[str, str | int | bool]: + """Create a test error log entry.""" + return { + "timestamp": timestamp, + "error_type": "tool_failure", + "tool": tool, + "exit_code": 1, + "recovery": recovery, + "success": success, + } + + +class TestGetGraduationCandidates: + def test_returns_empty_for_missing_file(self, tmp_path: Path) -> None: + candidates = get_graduation_candidates(tmp_path / "nonexistent.jsonl") + assert candidates == [] + + def test_identifies_patterns_at_threshold(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + entries = [ + _make_entry("gh", "wait_and_retry", True, "2026-01-01T00:00:00Z"), + _make_entry("gh", "wait_and_retry", True, "2026-01-01T00:01:00Z"), + _make_entry("gh", "wait_and_retry", True, "2026-01-01T00:02:00Z"), + ] + log_path.write_text("\n".join(json.dumps(e) for e in entries) + "\n") + + candidates = get_graduation_candidates(log_path, threshold=3) + assert len(candidates) == 1 + assert candidates[0]["tool"] == "gh" + assert candidates[0]["recovery"] == "wait_and_retry" + assert candidates[0]["count"] == 3 + + def test_excludes_below_threshold(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + entries = [ + _make_entry("gh", "retry", True, "2026-01-01T00:00:00Z"), + _make_entry("gh", "retry", True, "2026-01-01T00:01:00Z"), + ] + log_path.write_text("\n".join(json.dumps(e) for e in entries) + "\n") + + candidates = get_graduation_candidates(log_path, threshold=3) + assert candidates == [] + + def test_ignores_failed_recoveries(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + entries = [ + _make_entry("gh", "retry", False, "2026-01-01T00:00:00Z"), + _make_entry("gh", "retry", False, "2026-01-01T00:01:00Z"), + _make_entry("gh", "retry", False, "2026-01-01T00:02:00Z"), + ] + log_path.write_text("\n".join(json.dumps(e) for e in entries) + "\n") + + candidates = get_graduation_candidates(log_path, threshold=3) + assert candidates == [] + + def test_handles_malformed_entries(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + valid_entry = json.dumps(_make_entry("gh", "retry", True)) + content = f'not valid json\n{{"partial": true}}\n{valid_entry}\n' + log_path.write_text(content) + + # Should not raise, gracefully skips malformed lines. + candidates = get_graduation_candidates(log_path, threshold=1) + assert len(candidates) == 1 + + def test_sorts_by_count_descending(self, tmp_path: Path) -> None: + log_path = tmp_path / "errors.jsonl" + entries = [ + _make_entry("gh", "retry", True), + _make_entry("git", "reset", True), + _make_entry("git", "reset", True), + ] + log_path.write_text("\n".join(json.dumps(e) for e in entries) + "\n") + + candidates = get_graduation_candidates(log_path, threshold=1) + assert len(candidates) == 2 + assert candidates[0]["tool"] == "git" + assert candidates[0]["count"] == 2 + assert candidates[1]["tool"] == "gh" + assert candidates[1]["count"] == 1