Skip to content

Commit 2b66f57

Browse files
Retry on Fedora CI Koji build submission failure (#3040)
Retry on Fedora CI Koji build submission failure Fixes #3003 Assisted-by: Claude Opus 4.6 noreply@anthropic.com RELEASE NOTES BEGIN Packit now retries on build submission failure for Fedora CI Koji scratch builds. RELEASE NOTES END Reviewed-by: gemini-code-assist[bot] Reviewed-by: Nikola Forró
2 parents e04b0fa + 4341d54 commit 2b66f57

File tree

2 files changed

+74
-1
lines changed

2 files changed

+74
-1
lines changed

packit_service/worker/handlers/distgit.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from packit_service import sentry_integration
3333
from packit_service.config import ServiceConfig
3434
from packit_service.constants import (
35+
BASE_RETRY_INTERVAL_IN_MINUTES_FOR_OUTAGES,
3536
CONTACTS_URL,
3637
DEFAULT_RETRY_BACKOFF,
3738
MSG_DOWNSTREAM_JOB_ERROR_HEADER,
@@ -937,6 +938,29 @@ def _run(self) -> TaskResults:
937938
)
938939
self.pushgateway.fedora_ci_koji_builds_queued.inc()
939940
except Exception as ex:
941+
if (
942+
isinstance(ex, PackitCommandFailedError)
943+
and self.celery_task
944+
and not self.celery_task.is_last_try()
945+
):
946+
koji_build.set_status("retry")
947+
interval = BASE_RETRY_INTERVAL_IN_MINUTES_FOR_OUTAGES * 2**self.celery_task.retries
948+
self.report(
949+
commit_status=BaseCommitStatus.pending,
950+
description="Failed to submit the build. The task will be"
951+
f" retried in {interval} {'minute' if interval == 1 else 'minutes'}.",
952+
url=get_koji_build_info_url(koji_build.id),
953+
)
954+
kargs = self.celery_task.task.request.kwargs.copy()
955+
self.celery_task.retry(delay=interval * 60, kargs=kargs)
956+
return TaskResults(
957+
success=True,
958+
details={
959+
"msg": "Task will be retried because of failure"
960+
f" when submitting the build: {ex}",
961+
},
962+
)
963+
940964
sentry_integration.send_to_sentry(ex)
941965
self.report(
942966
commit_status=BaseCommitStatus.error,

tests/integration/test_koji_build_cancel.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ogr.services.pagure import PagureProject
1111
from packit.api import PackitAPI
1212
from packit.config import Deployment, JobConfigTriggerType
13-
from packit.exceptions import PackitException
13+
from packit.exceptions import PackitCommandFailedError, PackitException
1414
from packit.local_project import LocalProjectBuilder
1515
from packit.utils import commands
1616

@@ -244,6 +244,55 @@ def test_downstream_koji_scratch_build_cancel_running(mock_distgit_pr_functional
244244
assert first_dict_value(results["job"])["success"]
245245

246246

247+
def test_downstream_koji_scratch_build_retry_on_submission_failure(
248+
mock_distgit_pr_functionality,
249+
):
250+
"""Test that DownstreamKojiScratchBuildHandler retries on build submission failure.
251+
252+
Simulates a Koji CLI failure (e.g. network issue, Koji outage) and verifies
253+
the handler sets retry status and schedules a retry via Celery.
254+
"""
255+
flexmock(PackitAPI).should_receive("init_kerberos_ticket")
256+
koji_build_target = flexmock(
257+
id=123,
258+
target="main",
259+
status="queued",
260+
)
261+
koji_build_target.should_receive("set_status").with_args("retry").once()
262+
koji_build_target.should_receive("set_task_id")
263+
koji_build_target.should_receive("set_web_url")
264+
koji_build_target.should_receive("set_build_logs_urls")
265+
koji_build_target.should_receive("set_data")
266+
koji_build_target.should_receive("set_build_submission_stdout")
267+
flexmock(KojiBuildTargetModel).should_receive("create").and_return(koji_build_target)
268+
flexmock(KojiBuildGroupModel).should_receive("create").and_return(
269+
flexmock(grouped_targets=[koji_build_target]),
270+
)
271+
272+
# Simulate koji CLI failure
273+
flexmock(commands).should_receive("run_command_remote").and_raise(
274+
PackitCommandFailedError,
275+
"Command failed",
276+
stdout_output="",
277+
stderr_output="koji: AuthError: unable to obtain a session",
278+
)
279+
280+
# Mock the Celery task's retry method to prevent actual retry and verify it's called
281+
flexmock(run_downstream_koji_scratch_build_handler).should_receive("retry").once()
282+
283+
processing_results = SteveJobs().process_message(mock_distgit_pr_functionality)
284+
event_dict, _, job_config, package_config = get_parameters_from_results(
285+
processing_results[:1],
286+
)
287+
results = run_downstream_koji_scratch_build_handler(
288+
package_config=package_config,
289+
event=event_dict,
290+
job_config=job_config,
291+
)
292+
293+
assert first_dict_value(results["job"])["success"]
294+
295+
247296
def test_downstream_koji_build_cancel_running(monkeypatch):
248297
"""Test that DownstreamKojiBuildHandler calls cancel_running_builds.
249298

0 commit comments

Comments
 (0)