Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 38 additions & 7 deletions mod_ci/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,22 +584,28 @@ def mark_test_failed(db, test, repository, message: str) -> bool:
# Continue to try GitHub update even if DB fails

# Step 2: Try to update GitHub status (CRITICAL - must not be skipped)
# Use retry logic since this is critical to prevent stuck "pending" status
try:
gh_commit = repository.get_commit(test.commit)
# Include target_url so the status links to the test page
# Build target_url first (doesn't need retry)
from flask import url_for
try:
target_url = url_for('test.by_id', test_id=test.id, _external=True)
except RuntimeError:
# Outside of request context
target_url = f"https://sampleplatform.ccextractor.org/test/{test.id}"
update_status_on_github(gh_commit, Status.ERROR, message, f"CI - {test.platform.value}", target_url)

# Use retry_with_backoff for GitHub API calls
def update_github_status():
gh_commit = repository.get_commit(test.commit)
update_status_on_github(gh_commit, Status.ERROR, message, f"CI - {test.platform.value}", target_url)

retry_with_backoff(update_github_status, max_retries=3, initial_backoff=2.0)
github_success = True
log.info(f"Test {test.id}: GitHub status updated to ERROR: {message}")
except GithubException as e:
log.error(f"Test {test.id}: GitHub API error while updating status: {e.status} - {e.data}")
log.error(f"Test {test.id}: GitHub API error while updating status (after retries): {e.status} - {e.data}")
except Exception as e:
log.error(f"Test {test.id}: Failed to update GitHub status: {type(e).__name__}: {e}")
log.error(f"Test {test.id}: Failed to update GitHub status (after retries): {type(e).__name__}: {e}")

# Log final status
if db_success and github_success:
Expand Down Expand Up @@ -663,8 +669,33 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu
f"'{workflow_run.conclusion}'. Check the GitHub Actions logs for details.")
return (message, False) # Not retryable
else:
# Build succeeded but artifact not found - may have expired
# This is a PERMANENT failure
# Build succeeded but artifact not found
# Check if the build completed very recently - if so, this might be
# GitHub API propagation delay (artifact exists but not visible yet)
# In that case, treat as retryable
ARTIFACT_PROPAGATION_GRACE_PERIOD = 300 # 5 minutes in seconds
try:
from datetime import datetime, timezone
now = datetime.now(timezone.utc)
# workflow_run.updated_at is when the run completed
if workflow_run.updated_at:
completed_at = workflow_run.updated_at
if completed_at.tzinfo is None:
completed_at = completed_at.replace(tzinfo=timezone.utc)
seconds_since_completion = (now - completed_at).total_seconds()
if seconds_since_completion < ARTIFACT_PROPAGATION_GRACE_PERIOD:
message = (f"Build completed recently ({int(seconds_since_completion)}s ago): "
f"'{expected_workflow}' succeeded but artifact not yet visible. "
f"Will retry (GitHub API propagation delay).")
log.info(f"Artifact not found but build completed {int(seconds_since_completion)}s ago - "
f"treating as retryable (possible API propagation delay)")
return (message, True) # Retryable - API propagation delay
except Exception as e:
log.warning(f"Could not check workflow completion time: {e}")
# Fall through to permanent failure

# Build completed more than 5 minutes ago - artifact should be visible
# This is a PERMANENT failure (artifact expired or not uploaded)
message = (f"Artifact not found: '{expected_workflow}' completed successfully, "
f"but no artifact was found. The artifact may have expired (GitHub deletes "
f"artifacts after a retention period) or was not uploaded properly.")
Expand Down
90 changes: 90 additions & 0 deletions tests/test_ci/test_controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3094,6 +3094,35 @@ def test_mark_test_failed_includes_target_url(self, mock_progress, mock_update_g
self.assertEqual(len(call_args[0]), 5) # 5 positional args including target_url
self.assertIn("456", call_args[0][4]) # target_url contains test ID

@mock.patch('mod_ci.controllers.update_status_on_github')
@mock.patch('mod_ci.controllers.TestProgress')
@mock.patch('mod_ci.controllers.retry_with_backoff')
def test_mark_test_failed_uses_retry_for_github(self, mock_retry, mock_progress, mock_update_github):
"""Test that mark_test_failed uses retry_with_backoff for GitHub status update."""
from mod_ci.controllers import mark_test_failed

db = MagicMock()
test = MagicMock()
test.id = 789
test.commit = "abc123"
test.platform.value = "linux"

repository = MagicMock()
gh_commit = MagicMock()
repository.get_commit.return_value = gh_commit

# Make retry_with_backoff call the function it receives
mock_retry.side_effect = lambda func, **kwargs: func()

with mock.patch('run.log'):
mark_test_failed(db, test, repository, "Test error")

# Verify retry_with_backoff was called with correct parameters
mock_retry.assert_called_once()
call_kwargs = mock_retry.call_args[1]
self.assertEqual(call_kwargs['max_retries'], 3)
self.assertEqual(call_kwargs['initial_backoff'], 2.0)


class TestParseGcpError(unittest.TestCase):
"""Tests for the parse_gcp_error helper function."""
Expand Down Expand Up @@ -3361,6 +3390,67 @@ def test_artifact_expired_is_not_retryable(self):
self.assertIn("Artifact not found", message)
self.assertIn("expired", message)

def test_recently_completed_build_is_retryable(self):
"""Test that recently completed build (within grace period) returns retryable=True."""
from datetime import datetime, timedelta, timezone

from mod_ci.controllers import _diagnose_missing_artifact

repository = MagicMock()
log = MagicMock()

# Mock workflow
workflow = MagicMock()
workflow.id = 1
workflow.name = "Build CCExtractor on Linux"
repository.get_workflows.return_value = [workflow]

# Mock workflow run - completed successfully but very recently (1 minute ago)
workflow_run = MagicMock()
workflow_run.workflow_id = 1
workflow_run.status = "completed"
workflow_run.conclusion = "success"
workflow_run.updated_at = datetime.now(timezone.utc) - timedelta(seconds=60)
repository.get_workflow_runs.return_value = [workflow_run]

message, is_retryable = _diagnose_missing_artifact(
repository, "abc123", TestPlatform.linux, log
)

self.assertTrue(is_retryable)
self.assertIn("Build completed recently", message)
self.assertIn("Will retry", message)

def test_old_completed_build_is_not_retryable(self):
"""Test that build completed more than grace period ago returns retryable=False."""
from datetime import datetime, timedelta, timezone

from mod_ci.controllers import _diagnose_missing_artifact

repository = MagicMock()
log = MagicMock()

# Mock workflow
workflow = MagicMock()
workflow.id = 1
workflow.name = "Build CCExtractor on Linux"
repository.get_workflows.return_value = [workflow]

# Mock workflow run - completed 10 minutes ago (beyond grace period)
workflow_run = MagicMock()
workflow_run.workflow_id = 1
workflow_run.status = "completed"
workflow_run.conclusion = "success"
workflow_run.updated_at = datetime.now(timezone.utc) - timedelta(seconds=600)
repository.get_workflow_runs.return_value = [workflow_run]

message, is_retryable = _diagnose_missing_artifact(
repository, "abc123", TestPlatform.linux, log
)

self.assertFalse(is_retryable)
self.assertIn("Artifact not found", message)

def test_no_workflow_run_is_retryable(self):
"""Test that missing workflow run returns retryable=True."""
from mod_ci.controllers import _diagnose_missing_artifact
Expand Down