diff --git a/apps/codecov-api/upload/serializers.py b/apps/codecov-api/upload/serializers.py index ce90c4b63..3af0c6f55 100644 --- a/apps/codecov-api/upload/serializers.py +++ b/apps/codecov-api/upload/serializers.py @@ -1,7 +1,9 @@ +import logging from typing import Any from django.conf import settings -from django.db.models import QuerySet +from django.db import IntegrityError, transaction +from django.db.models import Q, QuerySet from rest_framework import serializers from codecov_auth.models import Owner @@ -11,6 +13,8 @@ from shared.api_archive.archive import ArchiveService from shared.django_apps.upload_breadcrumbs.models import BreadcrumbData, Milestones +log = logging.getLogger(__name__) + class FlagListField(serializers.ListField): child = serializers.CharField() @@ -197,17 +201,60 @@ def validate_code(self, value): return None def create(self, validated_data: dict[str, Any]) -> tuple[CommitReport, bool]: - report = ( - CommitReport.objects.coverage_reports() - .filter( - code=validated_data.get("code"), - commit_id=validated_data.get("commit_id"), - ) - .first() + code = validated_data.get("code") + commit_id = validated_data.get("commit_id") + report_type = validated_data.get( + "report_type", CommitReport.ReportType.COVERAGE ) - if report: - if report.report_type is None: - report.report_type = CommitReport.ReportType.COVERAGE - report.save() - return report, False - return super().create(validated_data), True + + # Use atomic transaction with select_for_update to serialize concurrent access. + # This prevents race conditions where multiple requests create duplicate reports. + # We lock ALL matching reports (legacy and non-legacy) before checking/creating. + with transaction.atomic(): + # Lock all coverage reports for this commit/code to serialize concurrent access. + # Using .first() handles the case where duplicates already exist (returns one). + # The Q filter matches both legacy (report_type=None) and coverage reports. + existing_report = ( + CommitReport.objects.select_for_update() + .filter( + Q(report_type=None) | Q(report_type=report_type), + code=code, + commit_id=commit_id, + ) + .first() + ) + + if existing_report: + # If it's a legacy report, upgrade it to the correct type + if existing_report.report_type is None: + existing_report.report_type = report_type + existing_report.save() + return existing_report, False + + # No existing report found while holding the lock - safe to create. + # Wrap in try/except as a belt-and-suspenders approach in case of + # any edge cases we haven't considered. + try: + report = CommitReport.objects.create( + code=code, + commit_id=commit_id, + report_type=report_type, + ) + return report, True + except IntegrityError: + # Another request created a report between our check and create. + # This shouldn't happen with select_for_update but handle defensively. + log.warning( + "IntegrityError during CommitReport creation, fetching existing", + extra={"commit_id": commit_id, "code": code}, + ) + # Re-query to get the report that was created + existing = CommitReport.objects.filter( + Q(report_type=None) | Q(report_type=report_type), + code=code, + commit_id=commit_id, + ).first() + if existing: + return existing, False + # This should never happen, but re-raise if it does + raise diff --git a/apps/codecov-api/upload/tests/test_serializers.py b/apps/codecov-api/upload/tests/test_serializers.py index f14bd4b68..5da83f76e 100644 --- a/apps/codecov-api/upload/tests/test_serializers.py +++ b/apps/codecov-api/upload/tests/test_serializers.py @@ -4,6 +4,7 @@ from rest_framework.exceptions import ErrorDetail from billing.tests.mocks import mock_all_plans_and_tiers +from reports.models import CommitReport from reports.tests.factories import ( CommitReportFactory, RepositoryFlagFactory, @@ -266,3 +267,173 @@ def test_commit_report_serializer(db): "code": report.code, } assert serializer.data == expected_data + + +def test_commit_report_serializer_creates_new_report(db): + """Test that create() returns (report, True) for new reports.""" + commit = CommitFactory.create() + serializer = CommitReportSerializer() + report, created = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + assert created is True + assert report.report_type == CommitReport.ReportType.COVERAGE + assert report.commit_id == commit.id + assert report.code is None + + +def test_commit_report_serializer_upgrades_legacy_report(db): + """Test that create() upgrades legacy reports with report_type=None.""" + commit = CommitFactory.create() + legacy_report = CommitReport.objects.create( + commit=commit, + code=None, + report_type=None, # Legacy report + ) + + serializer = CommitReportSerializer() + report, created = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + + assert created is False + assert report.id == legacy_report.id + legacy_report.refresh_from_db() + assert legacy_report.report_type == CommitReport.ReportType.COVERAGE + + +def test_commit_report_serializer_returns_existing_report(db): + """Test that create() returns existing report without creating duplicate.""" + commit = CommitFactory.create() + existing_report = CommitReport.objects.create( + commit=commit, + code=None, + report_type=CommitReport.ReportType.COVERAGE, + ) + + serializer = CommitReportSerializer() + report, created = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + + assert created is False + assert report.id == existing_report.id + # Ensure no duplicate was created + assert CommitReport.objects.filter(commit=commit, code=None).count() == 1 + + +def test_commit_report_serializer_handles_concurrent_creation(db): + """Test that create() handles race condition by serializing access. + + This test verifies that calling create() twice for the same commit/code + does not create duplicate reports (simulating concurrent requests). + """ + commit = CommitFactory.create() + serializer = CommitReportSerializer() + + # First call - should create + report1, created1 = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + assert created1 is True + + # Second call - should return existing (simulates concurrent request that lost the race) + report2, created2 = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + assert created2 is False + assert report1.id == report2.id + + # Verify only one report exists + assert ( + CommitReport.objects.filter( + commit=commit, code=None, report_type=CommitReport.ReportType.COVERAGE + ).count() + == 1 + ) + + +def test_commit_report_serializer_handles_existing_duplicates(db): + """Regression test: create() handles pre-existing duplicate reports gracefully. + + This test verifies that if duplicate CommitReports already exist in the database + (from before the race condition fix), create() returns one of them without crashing. + Previously, get_or_create would raise MultipleObjectsReturned in this case. + """ + commit = CommitFactory.create() + + # Create duplicate reports directly in the database (simulating pre-existing duplicates) + report1 = CommitReport.objects.create( + commit=commit, + code=None, + report_type=CommitReport.ReportType.COVERAGE, + ) + report2 = CommitReport.objects.create( + commit=commit, + code=None, + report_type=CommitReport.ReportType.COVERAGE, + ) + assert report1.id != report2.id # Confirm we have duplicates + + # Call create() - should return one of the existing reports, not crash + serializer = CommitReportSerializer() + report, created = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + + assert created is False + assert report.id in (report1.id, report2.id) + + +def test_commit_report_serializer_handles_duplicate_legacy_reports(db): + """Regression test: create() handles duplicate legacy reports gracefully. + + This test verifies that if duplicate legacy CommitReports (report_type=None) + exist in the database, create() returns one of them and upgrades its type. + """ + commit = CommitFactory.create() + + # Create duplicate legacy reports + legacy1 = CommitReport.objects.create( + commit=commit, + code=None, + report_type=None, # Legacy + ) + legacy2 = CommitReport.objects.create( + commit=commit, + code=None, + report_type=None, # Legacy + ) + assert legacy1.id != legacy2.id # Confirm we have duplicates + + # Call create() - should return one of the existing reports + serializer = CommitReportSerializer() + report, created = serializer.create( + { + "code": None, + "commit_id": commit.id, + } + ) + + assert created is False + assert report.id in (legacy1.id, legacy2.id) + # The returned report should be upgraded + assert report.report_type == CommitReport.ReportType.COVERAGE