VectorInstitute
diff --git a/‎.github/workflows/fix-remote-pr.yml‎
Lines changed: 69 additions & 45 deletions b/‎.github/workflows/fix-remote-pr.yml‎
Lines changed: 69 additions & 45 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 26 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎scripts/agent_tracer.py‎
Lines changed: 3 additions & 3 deletions b/‎scripts/agent_tracer.py‎
Lines changed: 3 additions & 3 deletions
@@ -73,48 +73,6 @@ jobs:
           git config user.name "aieng-bot-maintain[bot]"
           git config user.email "aieng-bot@vectorinstitute.ai"
 
-      - name: Analyze failure type
-        id: analyze
-        working-directory: target-repo
-        run: |
-          FAILED_CHECKS='${{ steps.pr-details.outputs.failed-checks }}'
-
-          echo "Analyzing failures..."
-          echo "$FAILED_CHECKS" | jq -r '.[] | "\(.name): \(.conclusion)"'
-
-          # Check for merge conflicts first
-          if git status | grep -q "Unmerged paths\|merge conflict"; then
-            echo "primary-type=merge_conflict" >> $GITHUB_OUTPUT
-            echo "failed-names=merge-conflict" >> $GITHUB_OUTPUT
-            echo "Detected merge conflicts"
-            exit 0
-          fi
-
-          # Categorize failures
-          TEST_FAILURES=$(echo "$FAILED_CHECKS" | jq '[.[] | select(.name | test("test|spec|jest|pytest|unittest"; "i"))]')
-          LINT_FAILURES=$(echo "$FAILED_CHECKS" | jq '[.[] | select(.name | test("lint|format|pre-commit|eslint|prettier|black|flake8|ruff"; "i"))]')
-          SECURITY_FAILURES=$(echo "$FAILED_CHECKS" | jq '[.[] | select(.name | test("audit|security|snyk|dependabot|pip-audit"; "i"))]')
-          BUILD_FAILURES=$(echo "$FAILED_CHECKS" | jq '[.[] | select(.name | test("build|compile|webpack|vite|tsc"; "i"))]')
-          MERGE_CONFLICT_FAILURES=$(echo "$FAILED_CHECKS" | jq '[.[] | select(.name | test("merge|conflict"; "i"))]')
-
-          # Determine primary failure type
-          if [ "$(echo "$MERGE_CONFLICT_FAILURES" | jq 'length')" -gt 0 ]; then
-            echo "primary-type=merge_conflict" >> $GITHUB_OUTPUT
-          elif [ "$(echo "$TEST_FAILURES" | jq 'length')" -gt 0 ]; then
-            echo "primary-type=test" >> $GITHUB_OUTPUT
-          elif [ "$(echo "$LINT_FAILURES" | jq 'length')" -gt 0 ]; then
-            echo "primary-type=lint" >> $GITHUB_OUTPUT
-          elif [ "$(echo "$SECURITY_FAILURES" | jq 'length')" -gt 0 ]; then
-            echo "primary-type=security" >> $GITHUB_OUTPUT
-          elif [ "$(echo "$BUILD_FAILURES" | jq 'length')" -gt 0 ]; then
-            echo "primary-type=build" >> $GITHUB_OUTPUT
-          else
-            echo "primary-type=unknown" >> $GITHUB_OUTPUT
-          fi
-
-          FAILED_NAMES=$(echo "$FAILED_CHECKS" | jq -r '.[].name' | paste -sd "," -)
-          echo "failed-names=$FAILED_NAMES" >> $GITHUB_OUTPUT
-
       - name: Get failure logs
         id: get-logs
         run: |
@@ -142,19 +100,85 @@ jobs:
         env:
           GH_TOKEN: ${{ secrets.ORG_ACCESS_TOKEN }}
 
+      - name: Setup Python for classification
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install classifier dependencies
+        run: |
+          cd bot-repo
+          pip install -e .
+
+      - name: Analyze failure type with Claude
+        id: analyze
+        working-directory: target-repo
+        run: |
+          FAILED_CHECKS='${{ steps.pr-details.outputs.failed-checks }}'
+
+          echo "Analyzing failures with Claude-based classifier..."
+          echo "$FAILED_CHECKS" | jq -r '.[] | "\(.name): \(.conclusion)"'
+
+          # Check for merge conflicts first (quick local check)
+          if git status | grep -q "Unmerged paths\|merge conflict"; then
+            echo "failure-type=merge_conflict" >> $GITHUB_OUTPUT
+            echo "failed-check-names=merge-conflict" >> $GITHUB_OUTPUT
+            echo "confidence=1.0" >> $GITHUB_OUTPUT
+            echo "reasoning=Git merge conflicts detected in working tree" >> $GITHUB_OUTPUT
+            echo "Detected merge conflicts via git status"
+            exit 0
+          fi
+
+          # Prepare PR context JSON
+          PR_INFO=$(jq -n \
+            --arg repo "${{ github.event.inputs.target_repo }}" \
+            --arg pr_number "${{ github.event.inputs.pr_number }}" \
+            --arg pr_title "${{ steps.pr-details.outputs.pr-title }}" \
+            --arg pr_author "${{ steps.pr-details.outputs.pr-author }}" \
+            --arg base_ref "${{ steps.pr-details.outputs.base-ref }}" \
+            --arg head_ref "${{ steps.pr-details.outputs.head-ref }}" \
+            '{repo: $repo, pr_number: $pr_number, pr_title: $pr_title, pr_author: $pr_author, base_ref: $base_ref, head_ref: $head_ref}')
+
+          # Get failure logs
+          FAILURE_LOGS=""
+          if [ -f /tmp/failure-logs.txt ]; then
+            FAILURE_LOGS=$(cat /tmp/failure-logs.txt)
+          fi
+
+          # Run Python classifier
+          cd ../bot-repo
+          CLASSIFICATION=$(python3 scripts/classify_pr_failure.py \
+            --pr-info "$PR_INFO" \
+            --failed-checks "$FAILED_CHECKS" \
+            --failure-logs "$FAILURE_LOGS" \
+            --output-format github)
+
+          # Parse and output results
+          echo "$CLASSIFICATION" >> $GITHUB_OUTPUT
+          echo "Classification results:"
+          echo "$CLASSIFICATION"
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+
       - name: Load and customize prompt
         id: prepare-prompt
         run: |
-          FAILURE_TYPE="${{ steps.analyze.outputs.primary-type }}"
+          FAILURE_TYPE="${{ steps.analyze.outputs.failure-type }}"
+          CONFIDENCE="${{ steps.analyze.outputs.confidence }}"
+          REASONING="${{ steps.analyze.outputs.reasoning }}"
           REPO="${{ github.event.inputs.target_repo }}"
           PR_NUMBER="${{ github.event.inputs.pr_number }}"
           PR_TITLE="${{ steps.pr-details.outputs.pr-title }}"
           PR_AUTHOR="${{ steps.pr-details.outputs.pr-author }}"
-          FAILED_NAMES="${{ steps.analyze.outputs.failed-names }}"
+          FAILED_NAMES="${{ steps.analyze.outputs.failed-check-names }}"
+
+          echo "Classification: $FAILURE_TYPE (confidence: $CONFIDENCE)"
+          echo "Reasoning: $REASONING"
 
-          # Skip unknown failure types
+          # Skip unknown failure types or low confidence
           if [ "$FAILURE_TYPE" = "unknown" ]; then
             echo "Failure type is unknown - skipping automated fix attempt"
+            echo "Reasoning: $REASONING"
             echo "should-skip=true" >> $GITHUB_OUTPUT
             exit 0
           fi
 
@@ -33,6 +33,15 @@ repos:
     - id: ruff-format
       types_or: [python, jupyter]
 
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.18.2
+    hooks:
+    - id: mypy
+      entry: python3 -m mypy --config-file pyproject.toml
+      language: system
+      types: [python]
+      exclude: "tests"
+
   - repo: https://github.com/crate-ci/typos
     rev: v1  # v1.19.0
     hooks:
@@ -58,5 +67,5 @@ ci:
     autoupdate_branch: ''
     autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
     autoupdate_schedule: weekly
-    skip: [nextjs-lint]
+    skip: [nextjs-lint, mypy]
     submodules: false
@@ -8,6 +8,7 @@ license = "Apache-2.0"
 repository = "https://github.com/VectorInstitute/aieng-bot-maintain"
 requires-python = ">=3.12"
 dependencies = [
+    "anthropic>=0.42.0",
     "pyyaml>=6.0.2",
 ]
 
@@ -36,6 +37,31 @@ docs = [
     "ipython>=8.31.0",
 ]
 
+[tool.uv]
+default-groups = ["dev"]
+
+[tool.mypy]
+follow_imports = "normal"
+ignore_missing_imports = false
+install_types = true
+pretty = true
+non_interactive = true
+allow_untyped_defs = false
+no_implicit_optional = true
+check_untyped_defs = true
+namespace_packages = true
+explicit_package_bases = true
+warn_unused_configs = true
+allow_subclassing_any = false
+allow_untyped_calls = false
+allow_incomplete_defs = false
+allow_untyped_decorators = false
+warn_redundant_casts = true
+warn_unused_ignores = true
+implicit_reexport = false
+strict_equality = true
+extra_checks = true
+
 [tool.ruff]
 include = ["*.py", "pyproject.toml", "*.ipynb"]
 line-length = 88
 
@@ -321,7 +321,7 @@ def finalize(
         files_modified: list[str] | None = None,
         commit_sha: str | None = None,
         commit_url: str | None = None,
-    ):
+    ) -> None:
         """Finalize trace with execution results.
 
         Args:
@@ -348,7 +348,7 @@ def finalize(
             }
         )
 
-    def save_trace(self, filepath: str):
+    def save_trace(self, filepath: str) -> None:
         """Save trace to JSON file.
 
         Args:
@@ -405,7 +405,7 @@ def get_summary(self) -> str:
             Summary string for PR comments
 
         """
-        event_counts = {}
+        event_counts: dict[str, int] = {}
         for event in self.trace["events"]:
             event_type = event["type"]
             event_counts[event_type] = event_counts.get(event_type, 0) + 1