Skip to content

Commit cdf5dba

Browse files
committed
Fix manual rebase polling
1 parent 3dde946 commit cdf5dba

File tree

6 files changed

+289
-78
lines changed

6 files changed

+289
-78
lines changed

src/aieng_bot_maintain/auto_merger/pr_processor.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def _trigger_rebase(self, pr: PRQueueItem) -> bool:
131131
1. Dependabot comments ("already up-to-date", error messages)
132132
2. PR head SHA changes (indicates successful force-push)
133133
134-
For pre-commit.ci PRs, skips rebase (not supported) and proceeds directly.
134+
For pre-commit.ci PRs, manual rebase is synchronous - no polling needed.
135135
136136
Parameters
137137
----------
@@ -156,7 +156,7 @@ def _trigger_rebase(self, pr: PRQueueItem) -> bool:
156156
pr.last_updated = datetime.now(UTC).isoformat()
157157
return False
158158

159-
# Get current head SHA before triggering rebase
159+
# Get current head SHA before triggering rebase (for Dependabot polling)
160160
initial_head_sha = self.workflow_client.get_pr_head_sha(pr)
161161
if not initial_head_sha:
162162
pr.error_message = "Failed to get PR head SHA"
@@ -167,7 +167,9 @@ def _trigger_rebase(self, pr: PRQueueItem) -> bool:
167167
log_info(f"Current head SHA: {initial_head_sha[:7]}")
168168

169169
# Trigger bot-specific rebase
170-
if not self.workflow_client.trigger_rebase(pr):
170+
success, new_sha, sha_changed = self.workflow_client.trigger_rebase(pr)
171+
172+
if not success:
171173
pr.error_message = "Failed to trigger rebase"
172174
pr.status = PRStatus.FAILED
173175
pr.last_updated = datetime.now(UTC).isoformat()
@@ -177,9 +179,25 @@ def _trigger_rebase(self, pr: PRQueueItem) -> bool:
177179
pr.rebase_started_at = datetime.now(UTC).isoformat()
178180
pr.last_updated = datetime.now(UTC).isoformat()
179181

180-
# Poll for rebase completion
181-
# - Dependabot: polls for comments + SHA changes
182-
# - pre-commit.ci: polls for SHA changes (manual rebase via git)
182+
# For manual rebases (pre-commit.ci), rebase completed synchronously
183+
if new_sha is not None:
184+
if sha_changed:
185+
log_success(
186+
f"Rebase completed (SHA changed: {initial_head_sha[:7]}{new_sha[:7]})"
187+
)
188+
# Wait longer for CI to start checks after new commits
189+
log_info("Waiting 45s for CI to trigger checks after rebase...")
190+
time.sleep(45)
191+
else:
192+
log_success(
193+
"Branch already up-to-date with base, proceeding to check monitoring"
194+
)
195+
# Brief wait for API to update, then check existing checks
196+
log_info("Waiting 15s for GitHub API to update...")
197+
time.sleep(15)
198+
return False # Proceed to check monitoring
199+
200+
# For async rebases (Dependabot), poll for completion
183201
return self._poll_rebase_completion(pr, initial_head_sha)
184202

185203
def _poll_rebase_completion(self, pr: PRQueueItem, initial_head_sha: str) -> bool:

src/aieng_bot_maintain/auto_merger/status_poller.py

Lines changed: 122 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,97 @@ def is_check_failed(check: dict) -> bool:
286286
log_warning(" Mergeable status still UNKNOWN after retries")
287287
return all_passed, has_failures, "UNKNOWN"
288288

289+
def _handle_no_checks(
290+
self, rollup: list[dict], attempt: int, check_interval: int
291+
) -> CheckStatus | None:
292+
"""Handle case when no checks are found.
293+
294+
Parameters
295+
----------
296+
rollup : list[dict]
297+
Raw check rollup from GitHub API.
298+
attempt : int
299+
Current attempt number.
300+
check_interval : int
301+
Seconds to wait between checks.
302+
303+
Returns
304+
-------
305+
CheckStatus | None
306+
"NO_CHECKS" if no checks found after initial attempts, None to continue.
307+
308+
"""
309+
if not rollup:
310+
if attempt > 2: # Give checks time to start
311+
log_warning(" No checks found")
312+
return "NO_CHECKS"
313+
time.sleep(check_interval)
314+
return None
315+
316+
# Filter out phantom checks
317+
relevant_checks = [c for c in rollup if self._should_check_be_counted(c)]
318+
if not relevant_checks:
319+
if attempt > 2:
320+
log_warning(" No relevant checks found")
321+
return "NO_CHECKS"
322+
time.sleep(check_interval)
323+
return None
324+
325+
# Continue with relevant checks
326+
return None
327+
328+
def _evaluate_check_status(
329+
self,
330+
relevant_checks: list[dict],
331+
stable_count_duration: int,
332+
min_stable_duration: int,
333+
) -> CheckStatus | None:
334+
"""Evaluate status of relevant checks.
335+
336+
Parameters
337+
----------
338+
relevant_checks : list[dict]
339+
Filtered checks to evaluate.
340+
stable_count_duration : int
341+
How long check count has been stable (seconds).
342+
min_stable_duration : int
343+
Minimum stability duration before declaring failure (seconds).
344+
345+
Returns
346+
-------
347+
CheckStatus | None
348+
Final status if determined, None to continue waiting.
349+
350+
"""
351+
any_running = any(self._is_check_running(c) for c in relevant_checks)
352+
any_failed = any(self._is_check_failed(c) for c in relevant_checks)
353+
all_passed = all(self._is_check_passed(c) for c in relevant_checks)
354+
all_finalized = all(self._has_finalized_conclusion(c) for c in relevant_checks)
355+
356+
if not any_running and all_finalized:
357+
if any_failed:
358+
# Wait for check count to stabilize before declaring failure
359+
if stable_count_duration >= min_stable_duration:
360+
log_error(" Checks failed")
361+
return "FAILED"
362+
log_info(
363+
f" ⏳ Some checks failed, but waiting for check count to "
364+
f"stabilize ({stable_count_duration}s/{min_stable_duration}s) "
365+
"to ensure all checks have appeared..."
366+
)
367+
elif all_passed:
368+
log_success(" Checks completed successfully")
369+
return "COMPLETED"
370+
else:
371+
log_info(" ⏳ Checks finalized but not all passed, waiting...")
372+
elif not any_running and not all_finalized:
373+
log_info(
374+
" ⏳ Checks appear done but conclusions not finalized yet, "
375+
"waiting..."
376+
)
377+
378+
return None
379+
289380
def wait_for_checks_completion(
290381
self,
291382
pr: PRQueueItem,
@@ -296,6 +387,10 @@ def wait_for_checks_completion(
296387
Polls every 30 seconds up to timeout_minutes.
297388
Similar to fix-remote-pr.yml:603-673.
298389
390+
To prevent premature failure detection, we track check count stability.
391+
We only return "FAILED" after the check count has been stable for 60s,
392+
ensuring all checks have appeared in GitHub's API.
393+
299394
Parameters
300395
----------
301396
pr : PRQueueItem
@@ -311,6 +406,11 @@ def wait_for_checks_completion(
311406
"""
312407
check_interval = 30
313408
max_attempts = (timeout_minutes * 60) // check_interval
409+
min_stable_duration = 60
410+
411+
# Track check count stability to ensure all checks have appeared
412+
last_check_count = 0
413+
stable_count_duration = 0
314414

315415
log_info(
316416
f" ⏳ Waiting up to {timeout_minutes} minutes for checks to complete..."
@@ -335,48 +435,37 @@ def wait_for_checks_completion(
335435
data = json.loads(status_json)
336436
rollup = data.get("statusCheckRollup") or []
337437

338-
if not rollup:
339-
if attempt > 2: # Give checks time to start
340-
log_warning(" No checks found")
341-
return "NO_CHECKS"
342-
time.sleep(check_interval)
438+
# Handle no checks case
439+
result = self._handle_no_checks(rollup, attempt, check_interval)
440+
if result == "NO_CHECKS":
441+
return result
442+
if result is None and not rollup:
343443
continue
344444

345-
# Filter out checks that should be ignored (phantom entries)
445+
# Get relevant checks (already filtered by _handle_no_checks)
346446
relevant_checks = [c for c in rollup if self._should_check_be_counted(c)]
347-
348447
if not relevant_checks:
349-
if attempt > 2: # Give checks time to start
350-
log_warning(" No relevant checks found")
351-
return "NO_CHECKS"
352-
time.sleep(check_interval)
353448
continue
354449

355-
# Check status of relevant checks
356-
any_running = any(self._is_check_running(c) for c in relevant_checks)
357-
any_failed = any(self._is_check_failed(c) for c in relevant_checks)
358-
all_passed = all(self._is_check_passed(c) for c in relevant_checks)
359-
all_finalized = all(
360-
self._has_finalized_conclusion(c) for c in relevant_checks
361-
)
362-
363-
if not any_running and all_finalized:
364-
if any_failed:
365-
log_error(" Checks failed")
366-
return "FAILED"
367-
if all_passed:
368-
log_success(" Checks completed successfully")
369-
return "COMPLETED"
370-
# Checks are finalized but not all passed - still waiting
371-
log_info(" ⏳ Checks finalized but not all passed, waiting...")
372-
373-
# Debug: Show why we're still waiting
374-
if not any_running and not all_finalized:
450+
# Track check count stability
451+
current_check_count = len(relevant_checks)
452+
if current_check_count == last_check_count:
453+
stable_count_duration += check_interval
454+
else:
455+
stable_count_duration = 0
456+
last_check_count = current_check_count
375457
log_info(
376-
" ⏳ Checks appear done but conclusions not finalized yet, "
377-
"waiting..."
458+
f" Check count changed to {current_check_count}, "
459+
"resetting stability timer"
378460
)
379461

462+
# Evaluate check status
463+
status = self._evaluate_check_status(
464+
relevant_checks, stable_count_duration, min_stable_duration
465+
)
466+
if status:
467+
return status
468+
380469
if attempt < max_attempts:
381470
time.sleep(check_interval)
382471

src/aieng_bot_maintain/auto_merger/workflow_client.py

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,11 @@ def get_pr_head_sha(self, pr: PRQueueItem) -> str | None:
157157
log_error(f"Failed to get PR head SHA: {e}")
158158
return None
159159

160-
def trigger_rebase(self, pr: PRQueueItem) -> bool:
160+
def trigger_rebase(self, pr: PRQueueItem) -> tuple[bool, str | None, bool]:
161161
"""Trigger bot-specific rebase command.
162162
163-
For Dependabot: Post @dependabot rebase comment
164-
For pre-commit.ci: Manually rebase via git operations
163+
For Dependabot: Post @dependabot rebase comment (async)
164+
For pre-commit.ci: Manually rebase via git operations (sync)
165165
166166
Parameters
167167
----------
@@ -170,11 +170,14 @@ def trigger_rebase(self, pr: PRQueueItem) -> bool:
170170
171171
Returns
172172
-------
173-
bool
174-
True on success, False on failure.
173+
tuple[bool, str | None, bool]
174+
(success, new_sha, sha_changed) where:
175+
- success: True if rebase triggered/succeeded, False on failure
176+
- new_sha: New head SHA (only for manual rebases, None for async)
177+
- sha_changed: True if SHA changed (only for manual rebases, True for async to trigger polling)
175178
176179
"""
177-
# Dependabot PRs use comment-based rebase
180+
# Dependabot PRs use comment-based rebase (async)
178181
if pr.pr_author == "app/dependabot":
179182
try:
180183
self._run_gh_command(
@@ -190,21 +193,21 @@ def trigger_rebase(self, pr: PRQueueItem) -> bool:
190193
]
191194
)
192195
log_success(f" Rebase triggered for {pr.repo}#{pr.pr_number}")
193-
return True
196+
return (True, None, True) # Async, will poll for SHA change
194197
except subprocess.CalledProcessError as e:
195198
log_error(f" Failed to trigger rebase: {e}")
196-
return False
199+
return (False, None, False)
197200

198-
# pre-commit.ci PRs use manual git rebase
201+
# pre-commit.ci PRs use manual git rebase (sync)
199202
if pr.pr_author == "app/pre-commit-ci":
200203
log_info(" Manually rebasing pre-commit.ci PR via git operations")
201204
return self._manual_rebase(pr)
202205

203206
# Unknown bot author
204207
log_error(f" Unknown bot author: {pr.pr_author}, cannot rebase")
205-
return False
208+
return (False, None, False)
206209

207-
def _manual_rebase(self, pr: PRQueueItem) -> bool:
210+
def _manual_rebase(self, pr: PRQueueItem) -> tuple[bool, str | None, bool]:
208211
"""Manually rebase a PR branch via git operations.
209212
210213
Clones the repo, checks out PR branch, rebases onto base, and force-pushes.
@@ -216,8 +219,11 @@ def _manual_rebase(self, pr: PRQueueItem) -> bool:
216219
217220
Returns
218221
-------
219-
bool
220-
True on success, False on failure.
222+
tuple[bool, str | None, bool]
223+
(success, new_sha, sha_changed) where:
224+
- success: True if rebase succeeded, False on failure
225+
- new_sha: New head SHA after rebase (None on failure)
226+
- sha_changed: True if rebase created new commits, False if already up-to-date
221227
222228
"""
223229
try:
@@ -313,6 +319,16 @@ def _manual_rebase(self, pr: PRQueueItem) -> bool:
313319
capture_output=True,
314320
)
315321

322+
# Get SHA before rebase
323+
result = subprocess.run(
324+
["git", "rev-parse", "HEAD"],
325+
cwd=repo_dir,
326+
check=True,
327+
capture_output=True,
328+
text=True,
329+
)
330+
sha_before = result.stdout.strip()
331+
316332
# Fetch latest base branch
317333
subprocess.run(
318334
["git", "fetch", "origin", base_ref],
@@ -330,7 +346,26 @@ def _manual_rebase(self, pr: PRQueueItem) -> bool:
330346
capture_output=True,
331347
)
332348

333-
# Force push to PR branch
349+
# Get SHA after rebase
350+
result = subprocess.run(
351+
["git", "rev-parse", "HEAD"],
352+
cwd=repo_dir,
353+
check=True,
354+
capture_output=True,
355+
text=True,
356+
)
357+
sha_after = result.stdout.strip()
358+
359+
# Check if rebase created new commits
360+
sha_changed = sha_after != sha_before
361+
362+
if not sha_changed:
363+
log_success(
364+
f" Branch already up-to-date with {base_ref}, no rebase needed"
365+
)
366+
return (True, sha_after, False)
367+
368+
# Force push to PR branch (only if SHA changed)
334369
# Using --force instead of --force-with-lease because the shallow clone
335370
# doesn't set up remote tracking properly, causing "stale info" errors.
336371
# This is safe because we just fetched and we're the only ones modifying
@@ -344,16 +379,16 @@ def _manual_rebase(self, pr: PRQueueItem) -> bool:
344379
)
345380

346381
log_success(f" Successfully rebased {pr.repo}#{pr.pr_number}")
347-
return True
382+
return (True, sha_after, True)
348383

349384
except subprocess.CalledProcessError as e:
350385
log_error(f" Failed to manually rebase: {e}")
351386
if e.stderr:
352387
log_error(f" Error output: {e.stderr.decode()}")
353-
return False
388+
return (False, None, False)
354389
except Exception as e:
355390
log_error(f" Unexpected error during manual rebase: {e}")
356-
return False
391+
return (False, None, False)
357392

358393
def trigger_fix_workflow(self, pr: PRQueueItem) -> str | None:
359394
"""Trigger fix-remote-pr.yml workflow.

0 commit comments

Comments
 (0)