Skip to content

Commit a7a01cd

Browse files
OAGrclaude
andcommitted
fix(pr-patrol): reduce compute waste with issue-type budgets and timeout abandonment
PR Patrol was burning 90%+ of compute on failed attempts: - Timeouts (30 min each) didn't count toward abandonment, causing infinite retries - All issue types got the same 40-turn / 30-min budget regardless of complexity - No early-exit guidance in prompts, so Claude kept trying unfixable issues - Reflection used expensive Sonnet model for simple log analysis Changes: - Unify failure tracking: timeouts now count toward abandonment (2 failures = abandoned) - Add per-issue-type budgets: missing-issue-ref gets 5 turns/3 min, ci-failure gets 25/15, etc. - Add "when to stop early" section to prompts with clear unfixable-scenario detection - CI failure prompt now explicitly lists human-required checks to skip immediately - Reflection uses haiku model with 5-min timeout instead of sonnet/30-min - Include pr-patrol tests in vitest config - Add computeBudget tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3e54e82 commit a7a01cd

File tree

2 files changed

+152
-27
lines changed

2 files changed

+152
-27
lines changed

crux/pr-patrol/index.test.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
checkMergeEligibility,
44
findMergeCandidates,
55
detectIssues,
6+
computeBudget,
67
type GqlPrNode,
78
} from './index.ts';
89

@@ -383,6 +384,46 @@ describe('findMergeCandidates', () => {
383384
});
384385
});
385386

387+
// ── computeBudget ────────────────────────────────────────────────────────────
388+
389+
describe('computeBudget', () => {
390+
it('gives small budget for missing-issue-ref only', () => {
391+
const budget = computeBudget(['missing-issue-ref']);
392+
expect(budget.maxTurns).toBe(5);
393+
expect(budget.timeoutMinutes).toBe(3);
394+
});
395+
396+
it('gives small budget for missing-testplan only', () => {
397+
const budget = computeBudget(['missing-testplan']);
398+
expect(budget.maxTurns).toBe(8);
399+
expect(budget.timeoutMinutes).toBe(5);
400+
});
401+
402+
it('gives medium budget for ci-failure', () => {
403+
const budget = computeBudget(['ci-failure']);
404+
expect(budget.maxTurns).toBe(25);
405+
expect(budget.timeoutMinutes).toBe(15);
406+
});
407+
408+
it('gives full budget for conflict', () => {
409+
const budget = computeBudget(['conflict']);
410+
expect(budget.maxTurns).toBe(40);
411+
expect(budget.timeoutMinutes).toBe(30);
412+
});
413+
414+
it('uses highest budget when multiple issues present', () => {
415+
const budget = computeBudget(['missing-issue-ref', 'ci-failure']);
416+
expect(budget.maxTurns).toBe(25);
417+
expect(budget.timeoutMinutes).toBe(15);
418+
});
419+
420+
it('conflict dominates when mixed with smaller issues', () => {
421+
const budget = computeBudget(['missing-testplan', 'conflict', 'missing-issue-ref']);
422+
expect(budget.maxTurns).toBe(40);
423+
expect(budget.timeoutMinutes).toBe(30);
424+
});
425+
});
426+
386427
// ── detectIssues (regression test after refactor) ────────────────────────────
387428

388429
describe('detectIssues', () => {

crux/pr-patrol/index.ts

Lines changed: 111 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -185,20 +185,27 @@ function markProcessed(key: number | string): void {
185185
);
186186
}
187187

188-
function getMaxTurnsFailCount(key: number | string): number {
189-
const file = join(STATE_DIR, `max-turns-${key}`);
190-
if (!existsSync(file)) return 0;
191-
return parseInt(readFileSync(file, 'utf-8').trim(), 10) || 0;
188+
function getFailCount(key: number | string): number {
189+
// Check both new and legacy file names for backwards compat
190+
const newFile = join(STATE_DIR, `failures-${key}`);
191+
const legacyFile = join(STATE_DIR, `max-turns-${key}`);
192+
if (existsSync(newFile)) {
193+
return parseInt(readFileSync(newFile, 'utf-8').trim(), 10) || 0;
194+
}
195+
if (existsSync(legacyFile)) {
196+
return parseInt(readFileSync(legacyFile, 'utf-8').trim(), 10) || 0;
197+
}
198+
return 0;
192199
}
193200

194-
function recordMaxTurnsFailure(key: number | string): number {
195-
const count = getMaxTurnsFailCount(key) + 1;
196-
writeFileSync(join(STATE_DIR, `max-turns-${key}`), String(count));
201+
function recordFailure(key: number | string): number {
202+
const count = getFailCount(key) + 1;
203+
writeFileSync(join(STATE_DIR, `failures-${key}`), String(count));
197204
return count;
198205
}
199206

200207
function isAbandoned(key: number | string): boolean {
201-
return getMaxTurnsFailCount(key) >= 2;
208+
return getFailCount(key) >= 2;
202209
}
203210

204211
// ── Main Branch CI Check ────────────────────────────────────────────────────
@@ -334,21 +341,27 @@ async function fixMainBranch(status: MainBranchStatus, config: PatrolConfig): Pr
334341
const elapsedS = Math.floor((Date.now() - startTime) / 1000);
335342

336343
if (result.timedOut) {
344+
const failCount = recordFailure(MAIN_BRANCH_KEY);
337345
outcome = 'timeout';
338-
reason = `Killed after ${config.timeoutMinutes}m timeout`;
339-
log(`${cl.red}✗ Main branch fix timed out after ${config.timeoutMinutes}m${cl.reset}`);
346+
reason = `Killed after ${config.timeoutMinutes}m timeout — attempt ${failCount}`;
347+
log(`${cl.red}✗ Main branch fix timed out after ${config.timeoutMinutes}m (attempt ${failCount})${cl.reset}`);
348+
349+
if (failCount >= 2) {
350+
reason = `Abandoned after ${failCount} failures (timeout)`;
351+
log(`${cl.red}✗ Main branch fix abandoned after ${failCount} failures${cl.reset}`);
352+
}
340353
} else if (result.exitCode === 0 && !result.hitMaxTurns) {
341354
outcome = 'fixed';
342355
log(`${cl.green}✓ Main branch CI fix processed${cl.reset} (${elapsedS}s)`);
343356
} else if (result.hitMaxTurns) {
344-
const failCount = recordMaxTurnsFailure(MAIN_BRANCH_KEY);
357+
const failCount = recordFailure(MAIN_BRANCH_KEY);
345358
outcome = 'max-turns';
346359
reason = `Hit max turns (${config.maxTurns}) — attempt ${failCount}`;
347360
log(`${cl.yellow}⚠ Main branch fix hit max turns after ${elapsedS}s${cl.reset}`);
348361

349362
if (failCount >= 2) {
350-
reason = `Abandoned after ${failCount} max-turns failures`;
351-
log(`${cl.red}✗ Main branch fix abandoned after ${failCount} max-turns failures${cl.reset}`);
363+
reason = `Abandoned after ${failCount} failures`;
364+
log(`${cl.red}✗ Main branch fix abandoned after ${failCount} failures${cl.reset}`);
352365
}
353366
} else {
354367
outcome = 'error';
@@ -821,6 +834,37 @@ const ISSUE_SCORES: Record<PrIssueType, number> = {
821834
'bot-review-nitpick': 15,
822835
};
823836

837+
// ── Issue-type-specific resource limits ──────────────────────────────────────
838+
// Scale max-turns and timeout based on the hardest issue in a PR.
839+
// This prevents trivial issues from consuming the full 40-turn / 30-min budget.
840+
841+
interface IssueBudget {
842+
maxTurns: number;
843+
timeoutMinutes: number;
844+
}
845+
846+
const ISSUE_BUDGETS: Record<PrIssueType, IssueBudget> = {
847+
conflict: { maxTurns: 40, timeoutMinutes: 30 },
848+
'ci-failure': { maxTurns: 25, timeoutMinutes: 15 },
849+
'bot-review-major': { maxTurns: 25, timeoutMinutes: 15 },
850+
'missing-issue-ref': { maxTurns: 5, timeoutMinutes: 3 },
851+
stale: { maxTurns: 10, timeoutMinutes: 5 },
852+
'missing-testplan': { maxTurns: 8, timeoutMinutes: 5 },
853+
'bot-review-nitpick':{ maxTurns: 8, timeoutMinutes: 5 },
854+
};
855+
856+
/** Compute the budget for a PR based on its hardest issue. */
857+
export function computeBudget(issues: PrIssueType[]): IssueBudget {
858+
let maxTurns = 5;
859+
let timeoutMinutes = 3;
860+
for (const issue of issues) {
861+
const budget = ISSUE_BUDGETS[issue];
862+
if (budget.maxTurns > maxTurns) maxTurns = budget.maxTurns;
863+
if (budget.timeoutMinutes > timeoutMinutes) timeoutMinutes = budget.timeoutMinutes;
864+
}
865+
return { maxTurns, timeoutMinutes };
866+
}
867+
824868
/** Pure function — computes priority score for a detected PR. */
825869
export function computeScore(pr: DetectedPr): number {
826870
let score = 0;
@@ -1045,9 +1089,12 @@ ${issues.join(', ')}
10451089
### CI Failure
10461090
- Check CI status: gh pr checks ${num} --repo ${repo}
10471091
- Read the failing check logs to understand the failure
1048-
- Fix the issue (build error, test failure, lint error)
1049-
- Run locally to verify: pnpm build and/or pnpm test
1050-
- Commit and push the fix`);
1092+
- **STOP IMMEDIATELY and report** if ANY of these apply:
1093+
- The check requires a human action (adding a label like \`rules-change-reviewed\`, manual approval, etc.)
1094+
- The failure is in a Vercel deployment or external service (not a code issue)
1095+
- The same check is also failing on the \`main\` branch (pre-existing, not caused by this PR)
1096+
- The failure is a permissions or authentication issue
1097+
- If the failure IS a code issue you can fix: fix it, run locally to verify (pnpm build / pnpm test), commit and push`);
10511098
}
10521099

10531100
if (issues.includes('missing-testplan')) {
@@ -1097,7 +1144,14 @@ ${issues.join(', ')}
10971144
- Use git push --force-with-lease (never --force) when pushing rebased branches
10981145
- Do not modify files unrelated to the fix
10991146
- Do NOT run /agent-session-start or /agent-session-ready-PR — this is a targeted fix, not a full session
1100-
- Do NOT create new branches — work on the existing PR branch`);
1147+
- Do NOT create new branches — work on the existing PR branch
1148+
1149+
## When to stop early
1150+
- **If the issue requires human intervention** (adding labels, approvals, external service fixes): output a clear summary of why and stop immediately. Do not attempt workarounds.
1151+
- **If the issue is pre-existing** (also failing on main, not introduced by this PR): state that and stop.
1152+
- **If you've tried 2+ approaches and none worked**: stop and summarize what you tried. Do not keep cycling through the same strategies.
1153+
- **If the fix is "no action needed"** (e.g., no matching issue exists for missing-issue-ref): say so and stop. Not every detected issue requires a code change.
1154+
- Stopping early with a clear explanation is BETTER than burning through all turns without progress.`);
11011155

11021156
return sections.join('\n');
11031157
}
@@ -1226,20 +1280,48 @@ async function fixPr(pr: ScoredPr, config: PatrolConfig): Promise<void> {
12261280

12271281
await claimPr(pr.number, config.repo);
12281282

1283+
// Compute issue-specific budget (capped by global config)
1284+
const budget = computeBudget(pr.issues);
1285+
const effectiveMaxTurns = Math.min(budget.maxTurns, config.maxTurns);
1286+
const effectiveTimeout = Math.min(budget.timeoutMinutes, config.timeoutMinutes);
1287+
1288+
log(` Budget: ${effectiveMaxTurns} max-turns, ${effectiveTimeout}m timeout (based on: ${pr.issues.join(', ')})`);
1289+
12291290
const prompt = buildPrompt(pr, config.repo);
12301291
const startTime = Date.now();
12311292

12321293
let outcome: FixOutcome = 'fixed';
12331294
let reason = '';
12341295

12351296
try {
1236-
const result = await spawnClaude(prompt, config);
1297+
const result = await spawnClaude(prompt, {
1298+
...config,
1299+
maxTurns: effectiveMaxTurns,
1300+
timeoutMinutes: effectiveTimeout,
1301+
});
12371302
const elapsedS = Math.floor((Date.now() - startTime) / 1000);
12381303

12391304
if (result.timedOut) {
1305+
// Timeouts count toward abandonment — a PR that times out repeatedly
1306+
// is likely unfixable and should not keep burning compute.
1307+
const failCount = recordFailure(pr.number);
12401308
outcome = 'timeout';
1241-
reason = `Killed after ${config.timeoutMinutes}m timeout`;
1242-
log(`${cl.red}✗ PR #${pr.number} timed out after ${config.timeoutMinutes}m${cl.reset}`);
1309+
reason = `Killed after ${effectiveTimeout}m timeout — attempt ${failCount}`;
1310+
log(`${cl.red}✗ PR #${pr.number} timed out after ${effectiveTimeout}m (attempt ${failCount})${cl.reset}`);
1311+
1312+
if (failCount >= 2) {
1313+
reason = `Abandoned after ${failCount} failures (timeout)`;
1314+
log(`${cl.red}✗ PR #${pr.number} abandoned after ${failCount} consecutive failures${cl.reset}`);
1315+
await githubApi(
1316+
`/repos/${config.repo}/issues/${pr.number}/comments`,
1317+
{
1318+
method: 'POST',
1319+
body: {
1320+
body: `🤖 **PR Patrol**: Abandoning automatic fix after ${failCount} failed attempts (timed out each time).\n\n**Issues detected**: ${pr.issues.join(', ')}\n**Last attempt**: ${elapsedS}s, ${effectiveTimeout}m timeout\n\nThis PR likely needs human intervention to resolve.`,
1321+
},
1322+
},
1323+
).catch(() => log(' Warning: could not post abandonment comment'));
1324+
}
12431325
} else if (result.exitCode === 0 && !result.hitMaxTurns) {
12441326
log(`${cl.green}✓ PR #${pr.number} processed successfully${cl.reset} (${elapsedS}s)`);
12451327
outcome = 'fixed';
@@ -1250,27 +1332,27 @@ async function fixPr(pr: ScoredPr, config: PatrolConfig): Promise<void> {
12501332
await githubApi(`/repos/${config.repo}/issues/${pr.number}/comments`, {
12511333
method: 'POST',
12521334
body: {
1253-
body: `🤖 **PR Patrol** ran for ${elapsedS}s (${config.maxTurns} max turns, model: ${config.model}).\n\n**Issues detected**: ${pr.issues.join(', ')}\n\n**Result**:\n${summary}`,
1335+
body: `🤖 **PR Patrol** ran for ${elapsedS}s (${effectiveMaxTurns} max turns, model: ${config.model}).\n\n**Issues detected**: ${pr.issues.join(', ')}\n\n**Result**:\n${summary}`,
12541336
},
12551337
}).catch(() => log(' Warning: could not post summary comment'));
12561338
}
12571339
} else if (result.hitMaxTurns) {
1258-
const failCount = recordMaxTurnsFailure(pr.number);
1340+
const failCount = recordFailure(pr.number);
12591341
outcome = 'max-turns';
1260-
reason = `Hit max turns (${config.maxTurns}) — attempt ${failCount}`;
1261-
log(`${cl.yellow}⚠ PR #${pr.number} hit max turns after ${elapsedS}s${cl.reset}`);
1342+
reason = `Hit max turns (${effectiveMaxTurns}) — attempt ${failCount}`;
1343+
log(`${cl.yellow}⚠ PR #${pr.number} hit max turns after ${elapsedS}s (attempt ${failCount})${cl.reset}`);
12621344

12631345
if (failCount >= 2) {
1264-
reason = `Abandoned after ${failCount} max-turns failures`;
1346+
reason = `Abandoned after ${failCount} failures`;
12651347
log(
1266-
`${cl.red}✗ PR #${pr.number} abandoned after ${failCount} max-turns failures${cl.reset}`,
1348+
`${cl.red}✗ PR #${pr.number} abandoned after ${failCount} consecutive failures${cl.reset}`,
12671349
);
12681350
await githubApi(
12691351
`/repos/${config.repo}/issues/${pr.number}/comments`,
12701352
{
12711353
method: 'POST',
12721354
body: {
1273-
body: `🤖 **PR Patrol**: Abandoning automatic fix after ${failCount} failed attempts (hit max turns each time).\n\n**Issues detected**: ${pr.issues.join(', ')}\n**Last attempt**: ${elapsedS}s, ${config.maxTurns} turns\n\nThis PR likely needs human intervention to resolve.`,
1355+
body: `🤖 **PR Patrol**: Abandoning automatic fix after ${failCount} failed attempts.\n\n**Issues detected**: ${pr.issues.join(', ')}\n**Last attempt**: ${elapsedS}s, ${effectiveMaxTurns} turns\n\nThis PR likely needs human intervention to resolve.`,
12741356
},
12751357
},
12761358
).catch(() => log(' Warning: could not post abandonment comment'));
@@ -1364,6 +1446,8 @@ ${recentEntries}
13641446
const result = await spawnClaude(prompt, {
13651447
...config,
13661448
maxTurns: 10, // Reflection needs fewer turns
1449+
model: 'haiku', // Reflection is log analysis — doesn't need sonnet
1450+
timeoutMinutes: 5, // Should complete quickly
13671451
});
13681452
const elapsedS = Math.floor((Date.now() - startTime) / 1000);
13691453
const filedIssue = /Created issue #|created.*#\d/.test(result.output);

0 commit comments

Comments
 (0)