Skip to content

Commit 1456969

Browse files
authored
Merge pull request #31 from VectorInstitute/improve_classify_cmd
Improve classify workflow
2 parents cfdc117 + a2c1f26 commit 1456969

File tree

6 files changed

+1716
-1276
lines changed

6 files changed

+1716
-1276
lines changed

.github/workflows/fix-remote-pr.yml

Lines changed: 44 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,20 @@ jobs:
3939
with:
4040
path: bot-repo
4141

42-
- name: Get PR details
42+
- name: Get PR basic info for checkout
4343
id: pr-details
4444
run: |
4545
REPO="${{ github.event.inputs.target_repo }}"
4646
PR_NUMBER="${{ github.event.inputs.pr_number }}"
4747
48-
# Get PR information including mergeable status
48+
# Get minimal PR info needed for checkout (full details fetched by CLI)
4949
PR_INFO=$(gh pr view $PR_NUMBER --repo "$REPO" --json \
50-
title,author,headRefName,headRepository,headRepositoryOwner,statusCheckRollup,baseRefName,mergeable)
50+
title,author,headRefName,baseRefName,mergeable)
5151
5252
echo "PR Info:"
5353
echo "$PR_INFO" | jq '.'
5454
55-
# Extract details
55+
# Extract details for checkout step
5656
HEAD_REF=$(echo "$PR_INFO" | jq -r '.headRefName')
5757
BASE_REF=$(echo "$PR_INFO" | jq -r '.baseRefName')
5858
PR_TITLE=$(echo "$PR_INFO" | jq -r '.title')
@@ -65,13 +65,8 @@ jobs:
6565
echo "pr-author=$PR_AUTHOR" >> $GITHUB_OUTPUT
6666
echo "mergeable=$MERGEABLE" >> $GITHUB_OUTPUT
6767
68-
# Get failed checks
69-
FAILED_CHECKS=$(echo "$PR_INFO" | jq -c '[.statusCheckRollup[] | select(.conclusion == "FAILURE")]')
70-
echo "failed-checks=$FAILED_CHECKS" >> $GITHUB_OUTPUT
71-
72-
FAILED_COUNT=$(echo "$FAILED_CHECKS" | jq 'length')
73-
echo "Found $FAILED_COUNT failed checks"
74-
echo "PR mergeable status: $MERGEABLE"
68+
echo "Branch: $HEAD_REF → $BASE_REF"
69+
echo "Mergeable: $MERGEABLE"
7570
env:
7671
GH_TOKEN: ${{ secrets.ORG_ACCESS_TOKEN }}
7772

@@ -122,63 +117,6 @@ jobs:
122117
fi
123118
continue-on-error: true
124119

125-
- name: Get failure logs
126-
id: get-logs
127-
run: |
128-
REPO="${{ github.event.inputs.target_repo }}"
129-
FAILED_CHECKS='${{ steps.pr-details.outputs.failed-checks }}'
130-
131-
echo "Extracting failure logs from failed checks..."
132-
133-
# Extract job URLs and fetch their logs
134-
> /tmp/failure-logs.txt
135-
136-
echo "$FAILED_CHECKS" | jq -r '.[].detailsUrl' | while read -r JOB_URL; do
137-
if [ -n "$JOB_URL" ]; then
138-
# Extract run ID and job ID from URL
139-
# Format: https://github.com/OWNER/REPO/actions/runs/RUN_ID/job/JOB_ID
140-
RUN_ID=$(echo "$JOB_URL" | grep -oP 'runs/\K[0-9]+')
141-
JOB_ID=$(echo "$JOB_URL" | grep -oP 'job/\K[0-9]+')
142-
143-
echo "Fetching logs for job $JOB_ID in run $RUN_ID"
144-
145-
# Fetch full run logs
146-
gh run view "$RUN_ID" --repo "$REPO" --log > /tmp/full-logs.txt 2>&1 || continue
147-
148-
# Extract relevant failure sections (errors, failures, security issues)
149-
grep -i -E "(error|fail|traceback|exception|exit code [^0]|vulnerability|vulnerabilities|CVE-|GHSA-|audit|found [0-9]+ known)" /tmp/full-logs.txt | tail -2000 >> /tmp/failure-logs.txt || true
150-
151-
# Also get the last 1000 lines which often contain the summary
152-
tail -1000 /tmp/full-logs.txt >> /tmp/failure-logs.txt
153-
154-
rm -f /tmp/full-logs.txt
155-
fi
156-
done
157-
158-
# If we got no logs, try fallback method
159-
if [ ! -s /tmp/failure-logs.txt ]; then
160-
echo "No logs from specific jobs, trying fallback..."
161-
HEAD_REF="${{ steps.pr-details.outputs.head-ref }}"
162-
RUN_ID=$(gh run list --repo "$REPO" --branch "$HEAD_REF" --limit 5 \
163-
--json databaseId,status,conclusion \
164-
--jq '.[] | select(.conclusion == "failure") | .databaseId' | head -1)
165-
166-
if [ -n "$RUN_ID" ]; then
167-
gh run view $RUN_ID --repo "$REPO" --log > /tmp/full-logs.txt 2>&1
168-
grep -i -E "(error|fail|traceback|exception|exit code [^0]|vulnerability|vulnerabilities|CVE-|GHSA-|audit|found [0-9]+ known)" /tmp/full-logs.txt | tail -2000 > /tmp/failure-logs.txt || true
169-
tail -1000 /tmp/full-logs.txt >> /tmp/failure-logs.txt
170-
rm -f /tmp/full-logs.txt
171-
fi
172-
fi
173-
174-
# Log the size of extracted logs
175-
if [ -f /tmp/failure-logs.txt ] && [ -s /tmp/failure-logs.txt ]; then
176-
echo "Extracted $(wc -l < /tmp/failure-logs.txt) lines of failure logs"
177-
else
178-
echo "No failure logs could be extracted" > /tmp/failure-logs.txt
179-
fi
180-
env:
181-
GH_TOKEN: ${{ secrets.ORG_ACCESS_TOKEN }}
182120

183121
- name: Setup Python for classification
184122
uses: actions/setup-python@v6
@@ -192,80 +130,54 @@ jobs:
192130
193131
- name: Analyze failure type with Claude
194132
id: analyze
195-
working-directory: target-repo
133+
working-directory: bot-repo
196134
run: |
197-
FAILED_CHECKS='${{ steps.pr-details.outputs.failed-checks }}'
198-
MERGEABLE='${{ steps.pr-details.outputs.mergeable }}'
199-
200-
echo "Analyzing failures with Claude-based classifier..."
201-
echo "$FAILED_CHECKS" | jq -r '.[] | "\(.name): \(.conclusion)"'
202-
203-
# Check for merge conflicts first via PR mergeable status
204-
if [ "$MERGEABLE" = "CONFLICTING" ]; then
205-
echo "failure-type=merge_conflict" >> $GITHUB_OUTPUT
206-
echo "failed-check-names=merge-conflict" >> $GITHUB_OUTPUT
207-
echo "confidence=1.0" >> $GITHUB_OUTPUT
208-
echo "reasoning=PR has merge conflicts with base branch (mergeable=CONFLICTING)" >> $GITHUB_OUTPUT
209-
echo "✓ Detected merge conflicts via PR mergeable status"
210-
exit 0
211-
fi
212-
213-
# Also check git status as fallback
214-
if git status | grep -q "Unmerged paths\|merge conflict"; then
215-
echo "failure-type=merge_conflict" >> $GITHUB_OUTPUT
216-
echo "failed-check-names=merge-conflict" >> $GITHUB_OUTPUT
217-
echo "confidence=1.0" >> $GITHUB_OUTPUT
218-
echo "reasoning=Git merge conflicts detected in working tree" >> $GITHUB_OUTPUT
219-
echo "✓ Detected merge conflicts via git status"
220-
exit 0
221-
fi
222-
223-
# Prepare PR context JSON
224-
PR_INFO=$(jq -n \
225-
--arg repo "${{ github.event.inputs.target_repo }}" \
226-
--arg pr_number "${{ github.event.inputs.pr_number }}" \
227-
--arg pr_title "${{ steps.pr-details.outputs.pr-title }}" \
228-
--arg pr_author "${{ steps.pr-details.outputs.pr-author }}" \
229-
--arg base_ref "${{ steps.pr-details.outputs.base-ref }}" \
230-
--arg head_ref "${{ steps.pr-details.outputs.head-ref }}" \
231-
'{repo: $repo, pr_number: $pr_number, pr_title: $pr_title, pr_author: $pr_author, base_ref: $base_ref, head_ref: $head_ref}')
232-
233-
# Check if failure logs file exists
234-
if [ ! -f /tmp/failure-logs.txt ]; then
235-
echo "⚠️ Warning: Failure logs file not found at /tmp/failure-logs.txt"
236-
echo "No failure logs could be extracted" > /tmp/failure-logs.txt
237-
fi
238-
239-
echo "Failure logs file size: $(wc -c < /tmp/failure-logs.txt) bytes"
240-
echo "Failure logs file lines: $(wc -l < /tmp/failure-logs.txt) lines"
135+
REPO="${{ github.event.inputs.target_repo }}"
136+
PR_NUMBER="${{ github.event.inputs.pr_number }}"
241137
242-
# Run Python classifier (using installed CLI entry point)
243-
# Pass file path instead of content to avoid bash variable size limits
244-
cd ../bot-repo
138+
echo "Analyzing PR $REPO#$PR_NUMBER with Claude-based classifier..."
139+
echo ""
245140
246-
echo "Running classifier..."
141+
# Run classify command and save JSON to file
247142
if ! aieng-bot classify \
248-
--pr-info "$PR_INFO" \
249-
--failed-checks "$FAILED_CHECKS" \
250-
--failure-logs-file /tmp/failure-logs.txt \
251-
--output-format github > /tmp/classification-output.txt 2> /tmp/classification-error.txt; then
143+
--repo "$REPO" \
144+
--pr "$PR_NUMBER" \
145+
--output /tmp/classification.json 2> /tmp/classification-error.txt; then
252146
echo "❌ Classification failed with exit code $?"
253-
echo "STDOUT:"
254-
cat /tmp/classification-output.txt || echo "(no stdout)"
255-
echo ""
256147
echo "STDERR:"
257148
cat /tmp/classification-error.txt || echo "(no stderr)"
258149
exit 1
259150
fi
260151
261-
CLASSIFICATION=$(cat /tmp/classification-output.txt)
262-
263-
# Parse and output results
264-
echo "$CLASSIFICATION" >> $GITHUB_OUTPUT
152+
# Parse JSON and output to GitHub Actions
265153
echo "Classification results:"
266-
echo "$CLASSIFICATION"
154+
cat /tmp/classification.json
155+
echo ""
156+
157+
# Extract fields from JSON
158+
FAILURE_TYPE=$(jq -r '.failure_type' /tmp/classification.json)
159+
CONFIDENCE=$(jq -r '.confidence' /tmp/classification.json)
160+
FAILED_CHECK_NAMES=$(jq -r '.failed_check_names | join(",")' /tmp/classification.json)
161+
REASONING=$(jq -r '.reasoning' /tmp/classification.json)
162+
RECOMMENDED_ACTION=$(jq -r '.recommended_action' /tmp/classification.json)
163+
164+
# Output to GitHub Actions
165+
echo "failure-type=$FAILURE_TYPE" >> $GITHUB_OUTPUT
166+
echo "confidence=$CONFIDENCE" >> $GITHUB_OUTPUT
167+
echo "failed-check-names=$FAILED_CHECK_NAMES" >> $GITHUB_OUTPUT
168+
{
169+
echo "reasoning<<EOF_REASONING"
170+
echo "$REASONING"
171+
echo "EOF_REASONING"
172+
} >> $GITHUB_OUTPUT
173+
{
174+
echo "recommended-action<<EOF_ACTION"
175+
echo "$RECOMMENDED_ACTION"
176+
echo "EOF_ACTION"
177+
} >> $GITHUB_OUTPUT
267178
env:
268179
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
180+
GH_TOKEN: ${{ secrets.ORG_ACCESS_TOKEN }}
269181

270182
- name: Prepare for agent execution
271183
id: prepare
@@ -303,21 +215,12 @@ jobs:
303215
cp -r bot-repo/.claude target-repo/.claude
304216
echo "✓ Skills copied to target-repo/.claude/"
305217
306-
# Copy failure logs to target-repo so agent can read it
307-
if [ -f /tmp/failure-logs.txt ]; then
308-
cp /tmp/failure-logs.txt target-repo/.failure-logs.txt
309-
echo "✓ Failure logs copied to target-repo/.failure-logs.txt"
310-
else
311-
echo "⚠️ Warning: No failure logs found at /tmp/failure-logs.txt"
312-
fi
313-
314218
# LAYER 1: Add bot files to git's local exclude list (safety net)
315219
echo "Excluding bot files from git..."
316220
{
317221
echo "# AI Engineering Bot temporary files - DO NOT COMMIT"
318222
echo ".claude/"
319223
echo ".pr-context.json"
320-
echo ".failure-logs.txt"
321224
} >> target-repo/.git/info/exclude
322225
echo "✓ Bot files added to .git/info/exclude"
323226
@@ -386,7 +289,7 @@ jobs:
386289
echo "Verifying bot temporary files are not staged or committed..."
387290
388291
# Check if bot files are in staging area
389-
BOT_FILES_STAGED=$(git diff --cached --name-only | grep -E "^\.claude/|^\.pr-context\.json$|^\.failure-logs\.txt$" || true)
292+
BOT_FILES_STAGED=$(git diff --cached --name-only | grep -E "^\.claude/|^\.pr-context\.json$" || true)
390293
391294
if [ -n "$BOT_FILES_STAGED" ]; then
392295
echo "❌ ERROR: Bot temporary files found in staging area!"
@@ -399,7 +302,7 @@ jobs:
399302
400303
# Check if bot files exist in any unpushed commits
401304
BASE_REF="${{ steps.pr-details.outputs.base-ref }}"
402-
BOT_FILES_COMMITTED=$(git diff --name-only "origin/$BASE_REF...HEAD" | grep -E "^\.claude/|^\.pr-context\.json$|^\.failure-logs\.txt$" || true)
305+
BOT_FILES_COMMITTED=$(git diff --name-only "origin/$BASE_REF...HEAD" | grep -E "^\.claude/|^\.pr-context\.json$" || true)
403306
404307
if [ -n "$BOT_FILES_COMMITTED" ]; then
405308
echo "⚠️ WARNING: Bot temporary files found in commits!"
@@ -413,7 +316,7 @@ jobs:
413316
414317
# Cleanup bot files from working directory
415318
echo "Cleaning up bot temporary files..."
416-
rm -rf .claude/ .pr-context.json .failure-logs.txt 2>/dev/null || true
319+
rm -rf .claude/ .pr-context.json 2>/dev/null || true
417320
echo "✓ Bot files cleaned up"
418321
419322
- name: Upload trace to GCS
@@ -548,9 +451,6 @@ jobs:
548451
id: push-fixes
549452
working-directory: target-repo
550453
run: |
551-
# Remove failure logs file (don't commit it to the repo)
552-
rm -f .failure-logs.txt
553-
554454
# Check if Agent SDK created a commit (check if HEAD differs from remote)
555455
git fetch origin ${{ steps.pr-details.outputs.head-ref }}
556456

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dependencies = [
2626
"click>=8.0.0",
2727
"pyyaml>=6.0.2",
2828
"rich>=13.0.0",
29+
"python-dotenv>=1.0.0",
2930
]
3031

3132
[project.urls]

0 commit comments

Comments
 (0)