diff --git a/.github/workflows/dev_container.yml b/.github/workflows/dev_container.yml index 8540a2e5b..4f9f0c425 100644 --- a/.github/workflows/dev_container.yml +++ b/.github/workflows/dev_container.yml @@ -29,11 +29,27 @@ jobs: registry: quay.io username: ${{ secrets.QUAY_USERNAME }} password: ${{ secrets.QUAY_ROBOT_TOKEN }} + - name: Prepare image tag + id: tag + run: | + # Derive a safe tag: prefer head ref for PRs, otherwise ref name. + if [ "${{ github.event_name }}" = "pull_request" ]; then + raw_tag="${{ github.head_ref }}" + else + raw_tag="${{ github.ref_name }}" + fi + # Replace any characters invalid in container tags with '-' + safe_tag=$(echo "$raw_tag" | sed -E 's#[^A-Za-z0-9_.-]#-#g') + # Avoid empty tag; fallback to short SHA + if [ -z "$safe_tag" ]; then + safe_tag=${GITHUB_SHA::7} + fi + echo "safe_tag=$safe_tag" >> $GITHUB_OUTPUT - name: Build and push uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 env: QUAY_URI: quay.io/ceph-infra/teuthology-dev - QUAY_TAG: ${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }} + QUAY_TAG: ${{ steps.tag.outputs.safe_tag }} with: context: . file: containers/teuthology-dev/Dockerfile diff --git a/teuthology/kill.py b/teuthology/kill.py index 137e49080..29a664765 100755 --- a/teuthology/kill.py +++ b/teuthology/kill.py @@ -76,7 +76,7 @@ def kill_run(run_name, archive_base=None, owner=None, machine_type=None, targets = find_targets(run_name) names = list(targets.keys()) lock_ops.unlock_safe(names, owner, run_name) - report.try_mark_run_dead(run_name) + report.try_mark_run_dead(run_name, reason="killed by user") def kill_job(run_name, job_id, archive_base=None, owner=None, skip_unlock=False): @@ -93,7 +93,7 @@ def kill_job(run_name, job_id, archive_base=None, owner=None, skip_unlock=False) owner = job_info['owner'] if kill_processes(run_name, [job_info.get('pid')]): return - report.try_push_job_info(job_info, dict(status="dead")) + report.try_push_job_info(job_info, dict(status="dead", failure_reason="killed by user")) if 'machine_type' in job_info: teuthology.exporter.JobResults().record( machine_type=job_info["machine_type"], diff --git a/teuthology/report.py b/teuthology/report.py index f0a447201..3f0b3b089 100644 --- a/teuthology/report.py +++ b/teuthology/report.py @@ -566,7 +566,7 @@ def try_delete_job(job_id): try_delete_job(job_id) -def try_mark_run_dead(run_name): +def try_mark_run_dead(run_name, reason=None): """ Using the same error checking and retry mechanism as try_push_job_info(), mark any unfinished runs as dead. @@ -578,18 +578,26 @@ def try_mark_run_dead(run_name): if not reporter.base_uri: return - log.debug("Marking run as dead: {name}".format(name=run_name)) + log.debug("Marking run as dead: {name} reason={reason}".format(name=run_name, reason=reason)) jobs = reporter.get_jobs(run_name, fields=['status']) for job in jobs: if job['status'] not in ['pass', 'fail', 'dead']: job_id = job['job_id'] try: log.info("Marking job {job_id} as dead".format(job_id=job_id)) - reporter.report_job(run_name, job['job_id'], dead=True) - if "machine_type" in job: + # Load existing job_info from the archive, merge in our + # extra fields so the results server gets a useful + # failure_reason when a run is marked dead manually. + job_info = reporter.serializer.job_info(run_name, job_id) + job_info.update({'status': 'dead'}) + if reason: + job_info['failure_reason'] = reason + + reporter.report_job(run_name, job_id, job_info=job_info) + if "machine_type" in job_info: teuthology.exporter.JobResults().record( - machine_type=job["machine_type"], - status=job["status"], + machine_type=job_info["machine_type"], + status=job_info["status"], ) except report_exceptions: log.exception("Could not mark job as dead: {job_id}".format( diff --git a/teuthology/task/internal/__init__.py b/teuthology/task/internal/__init__.py index 15b8f81f5..a19828155 100644 --- a/teuthology/task/internal/__init__.py +++ b/teuthology/task/internal/__init__.py @@ -109,10 +109,9 @@ def check_packages(ctx, config): ver=package.sha1, ) log.error(msg) - # set the failure message and update paddles with the status ctx.summary["failure_reason"] = msg set_status(ctx.summary, "dead") - report.try_push_job_info(ctx.config, dict(status='dead')) + report.try_push_job_info(ctx.config, dict(status='dead', failure_reason=msg)) raise VersionNotFoundError(package.base_url) else: log.info( diff --git a/teuthology/test/test_report_dead_reason.py b/teuthology/test/test_report_dead_reason.py new file mode 100644 index 000000000..aee230198 --- /dev/null +++ b/teuthology/test/test_report_dead_reason.py @@ -0,0 +1,35 @@ +from unittest.mock import patch, MagicMock + +import teuthology.report as report + + +@patch('teuthology.report.ResultsReporter') +def test_try_mark_run_dead_includes_reason(mock_reporter_cls): + # Set up a fake reporter with serializer.job_info and report_job + mock_reporter = MagicMock() + mock_reporter_cls.return_value = mock_reporter + + # Simulate one job returned by get_jobs + mock_reporter.get_jobs.return_value = [ + {'job_id': '1', 'status': 'running'} + ] + + # serializer.job_info should return a dict representing archived job info + mock_reporter.serializer.job_info.return_value = { + 'job_id': '1', + 'machine_type': 'smithi', + } + + # Call the function under test + report.try_mark_run_dead('fake-run', reason='killed by user') + + # Ensure report_job was called with job_info that contains failure_reason + assert mock_reporter.report_job.called + called_args, called_kwargs = mock_reporter.report_job.call_args + # call signature: report_job(run_name, job_id, job_info=...) + assert called_args[0] == 'fake-run' + assert called_args[1] == '1' + + job_info = called_kwargs.get('job_info') if 'job_info' in called_kwargs else called_args[2] + assert job_info['status'] == 'dead' + assert job_info['failure_reason'] == 'killed by user'