fix: FIT-1392: exported annotations cannot be reimported as predictions (#9396)

matt-bernstein · web-flow · commit 7ae709f9850e · 2026-02-13T19:22:37.000+04:00
diff --git a/label_studio/data_import/api.py b/label_studio/data_import/api.py
@@ -33,6 +33,7 @@
 from rest_framework.views import APIView
 from tasks.functions import update_tasks_counters
 from tasks.models import Prediction, Task
+from tasks.serializers import sanitize_prediction_import_payload
 from users.models import User
 from webhooks.models import WebhookAction
 from webhooks.utils import emit_webhooks_for_instance
@@ -537,6 +538,7 @@ def _create_memory_efficient(self, project):
             # Build predictions for this batch
             batch_predictions = []
             for item in batch_items:
+                item = sanitize_prediction_import_payload(item)
                 task_id = item.get('task')
 
                 if task_id not in existing_task_ids:
@@ -586,6 +588,7 @@ def _create_legacy(self, project):
         predictions = []
 
         for i, item in enumerate(self.request.data):
+            item = sanitize_prediction_import_payload(item)
             # Validate task ID
             if item.get('task') not in tasks_ids:
                 if flag_set('fflag_feat_utc_210_prediction_validation_15082025', user='auto'):
diff --git a/label_studio/data_import/functions.py b/label_studio/data_import/functions.py
@@ -12,6 +12,7 @@
 from projects.models import ProjectImport, ProjectReimport, ProjectSummary
 from rest_framework.exceptions import ValidationError
 from tasks.models import Task
+from tasks.serializers import sanitize_prediction_import_payload
 from users.models import User
 from webhooks.models import WebhookAction
 from webhooks.utils import emit_webhooks_for_instance
@@ -71,6 +72,7 @@ def async_import_background(
             if 'predictions' in task:
                 for j, prediction in enumerate(task['predictions']):
                     try:
+                        prediction = sanitize_prediction_import_payload(prediction)
                         validation_errors_list = li.validate_prediction(prediction, return_errors=True)
                         if validation_errors_list:
                             for error in validation_errors_list:
@@ -451,6 +453,7 @@ def _async_import_background_streaming(project_import, user):
                     if 'predictions' in task:
                         for j, prediction in enumerate(task['predictions']):
                             try:
+                                prediction = sanitize_prediction_import_payload(prediction)
                                 validation_errors_list = li.validate_prediction(prediction, return_errors=True)
                                 if validation_errors_list:
                                     for error in validation_errors_list:
diff --git a/label_studio/tasks/serializers.py b/label_studio/tasks/serializers.py
@@ -1,6 +1,7 @@
 """This file and its contents are licensed under the Apache License 2.0. Please see the included NOTICE for copyright information and LICENSE for a copy of the license."""
 
 import logging
+from collections.abc import MutableMapping
 
 import ujson as json
 from core.current_request import CurrentContext, get_current_request
@@ -32,6 +33,14 @@
 logger = logging.getLogger(__name__)
 
 
+def sanitize_prediction_import_payload(prediction):
+    """Drop only FSM `state` from prediction import payloads."""
+    if not isinstance(prediction, MutableMapping):
+        return prediction
+    prediction.pop('state', None)
+    return prediction
+
+
 class PredictionQuerySerializer(serializers.Serializer):
     task = serializers.IntegerField(required=False, help_text='Task ID to filter predictions')
     project = serializers.IntegerField(required=False, help_text='Project ID to filter predictions')
@@ -515,6 +524,7 @@ def add_predictions(self, task_predictions):
                 # Validate prediction only when project label config is not default
                 if should_validate:
                     try:
+                        prediction = sanitize_prediction_import_payload(prediction)
                         li = LabelInterface(self.project.label_config) if should_validate else None
                         validation_errors_list = li.validate_prediction(prediction, return_errors=True)
 
diff --git a/label_studio/tests/test_prediction_validation.py b/label_studio/tests/test_prediction_validation.py
@@ -15,12 +15,14 @@
 from unittest.mock import patch
 
 import pytest
+from data_import.api import ImportPredictionsAPI
 from data_import.functions import reformat_predictions
 from data_import.serializers import ImportApiSerializer
 from django.contrib.auth import get_user_model
 from organizations.tests.factories import OrganizationFactory
 from projects.tests.factories import ProjectFactory
 from rest_framework.exceptions import ValidationError
+from rest_framework.test import APIRequestFactory, force_authenticate
 from tasks.models import Annotation, Prediction, Task
 from tasks.tests.factories import TaskFactory
 from users.tests.factories import UserFactory
@@ -95,6 +97,90 @@ def test_valid_prediction_creation(self):
         assert prediction.score == 0.95
         assert prediction.model_version == 'v1.0'
 
+    @patch('tasks.serializers.flag_set', return_value=True)
+    @patch('tasks.serializers.LabelInterface')
+    def test_import_tasks_sanitizes_prediction_before_validation(self, mock_li_cls, _mock_flag_set):
+        """ImportApiSerializer must strip export-only keys before validate_prediction()."""
+        mock_li = mock_li_cls.return_value
+
+        def _validate_prediction(payload, return_errors=True):
+            if 'state' in payload:
+                return ['Unexpected field: state']
+            return []
+
+        mock_li.validate_prediction.side_effect = _validate_prediction
+        tasks = [
+            {
+                'data': {'text': 'Sanitize before validate'},
+                'predictions': [
+                    {
+                        'state': 'CREATED',
+                        'id': 111,
+                        'result': [
+                            {
+                                'from_name': 'sentiment',
+                                'to_name': 'text',
+                                'type': 'choices',
+                                'value': {'choices': ['positive']},
+                            }
+                        ],
+                        'score': 0.9,
+                        'model_version': 'mv-sanitize',
+                    }
+                ],
+            }
+        ]
+
+        serializer = ImportApiSerializer(data=tasks, many=True, context={'project': self.project})
+        assert serializer.is_valid(), serializer.errors
+        created_tasks = serializer.save(project_id=self.project.id)
+        assert len(created_tasks) == 1
+
+    @patch(
+        'data_import.api.flag_set',
+        side_effect=lambda flag_name, user='auto', **kwargs: (
+            flag_name == 'fflag_feat_utc_210_prediction_validation_15082025'
+        ),
+    )
+    @patch('data_import.api.LabelInterface')
+    def test_import_predictions_endpoint_sanitizes_payload_before_validation(self, mock_li_cls, _mock_flag_set):
+        """Bulk import API should sanitize payload before LabelInterface.validate_prediction()."""
+        mock_li = mock_li_cls.return_value
+
+        def _validate_prediction(payload, return_errors=True):
+            if 'state' in payload:
+                return ['Unexpected field: state']
+            return []
+
+        mock_li.validate_prediction.side_effect = _validate_prediction
+        request_factory = APIRequestFactory()
+        payload = [
+            {
+                'state': 'CREATED',
+                'id': 222,
+                'result': [
+                    {
+                        'from_name': 'sentiment',
+                        'to_name': 'text',
+                        'type': 'choices',
+                        'value': {'choices': ['neutral']},
+                    }
+                ],
+                'score': 0.5,
+                'model_version': 'mv-sanitize-endpoint',
+                'task': self.task.id,
+            }
+        ]
+        request = request_factory.post(
+            f'/api/projects/{self.project.id}/import/predictions',
+            data=payload,
+            format='json',
+        )
+        force_authenticate(request, user=self.user)
+        response = ImportPredictionsAPI.as_view()(request, pk=self.project.id)
+        assert response.status_code == 201
+        assert response.data['created'] == 1
+
     def test_invalid_prediction_missing_result(self):
         """Test validation fails when prediction is missing result field."""
         tasks = [