open-edge-platform
diff --git a/‎application/backend/app/models/dataset_item.py‎
Lines changed: 36 additions & 4 deletions b/‎application/backend/app/models/dataset_item.py‎
Lines changed: 36 additions & 4 deletions
diff --git a/‎application/backend/app/services/data_collect/prediction_converter.py‎
Lines changed: 26 additions & 20 deletions b/‎application/backend/app/services/data_collect/prediction_converter.py‎
Lines changed: 26 additions & 20 deletions
@@ -12,11 +12,15 @@
 
 
 class DatasetItemFormat(StrEnum):
+    """Format of the image related to the dataset item."""
+
     JPG = "jpg"
     PNG = "png"
 
 
 class DatasetItemSubset(StrEnum):
+    """Subset of the dataset item."""
+
     UNASSIGNED = "unassigned"
     TRAINING = "training"
     VALIDATION = "validation"
@@ -25,24 +29,52 @@ class DatasetItemSubset(StrEnum):
 
 class DatasetItemAnnotation(BaseModel):
     """
-    Dataset item annotation
+    DatasetItemAnnotation represents an individual annotation within a dataset item.
+
+    An annotation consists of a shape, one or more labels associated with that shape,
+    and optionally a confidence score for each label (if applicable, e.g., for model predictions).
+
+    Attributes:
+        labels: A list of references to labels associated with the annotation.
+        shape: The geometric shape defining the annotation area.
+        confidences: A list of confidence scores corresponding to each label (if applicable).
     """
 
-    labels: list[LabelReference]
     shape: Shape
-    confidence: float | None = None
+    labels: list[LabelReference]
+    confidences: list[float] | None = None
 
     model_config = {
         "json_schema_extra": {
             "example": {
-                "labels": [{"id": "d476573e-d43c-42a6-9327-199a9aa75c33"}],
                 "shape": {"type": "rectangle", "x": 10, "y": 20, "width": 100, "height": 200},
+                "labels": [{"id": "d476573e-d43c-42a6-9327-199a9aa75c33"}],
             }
         }
     }
 
 
 class DatasetItem(BaseEntity):
+    """
+    DatasetItem represents an individual item within a dataset.
+
+    Attributes:
+        id: Unique identifier for the dataset item.
+        project_id: Identifier of the project to which the dataset item belongs.
+        name: Name of the dataset item.
+        format: Format of the dataset item (e.g., JPG, PNG).
+        width: Width of the dataset item in pixels.
+        height: Height of the dataset item in pixels.
+        size: Size of the dataset item in bytes.
+        annotation_data: List of annotations associated with the dataset item.
+        user_reviewed: Indicates whether the dataset item has been reviewed by a user,
+            namely if its annotation has been created/accepted by a human or if it is just a raw model prediction.
+        prediction_model_id: Identifier of the model that generated predictions for this dataset item, if applicable.
+        source_id: Identifier of the source from which the dataset item was acquired, if applicable.
+        subset: Subset to which the dataset item belongs (e.g., training, validation, testing).
+        subset_assigned_at: Timestamp indicating when the dataset item was assigned to its subset.
+    """
+
     id: UUID
     project_id: UUID
     name: str
 
@@ -13,11 +13,32 @@
 logger = logging.getLogger(__name__)
 
 
+def _convert_classification_prediction(
+    labels: Sequence[Label], prediction: ClassificationResult
+) -> list[DatasetItemAnnotation]:
+    predicted_labels: list[LabelReference] = []
+    predicted_confidences: list[float] = []
+    for predicted_label in prediction.top_labels:
+        label_name = predicted_label.name
+        label = next((label for label in labels if label.name == label_name), None)
+        if not label:
+            logger.warning("Prediction label %s cannot be found in the project", label_name)
+            continue
+        confidence = predicted_label.confidence
+        if confidence is None:
+            logger.warning("The predicted label %s does not have a confidence score; assuming 1.0", label_name)
+            confidence = 1.0
+        predicted_labels.append(LabelReference(id=label.id))
+        predicted_confidences.append(confidence)
+    return [DatasetItemAnnotation(labels=predicted_labels, shape=FullImage(), confidences=predicted_confidences)]
+
+
 def _convert_detection_prediction(labels: Sequence[Label], prediction: DetectionResult) -> list[DatasetItemAnnotation]:
     result = []
+    prediction_scores_list = prediction.scores.tolist()
     for idx, box in enumerate(prediction.bboxes):
         label_name = prediction.label_names[idx]
-        confidence = prediction.scores.tolist()[idx]
+        bbox_confidence = prediction_scores_list[idx]
         label = next((label for label in labels if label.name == label_name), None)
         if not label:
             logger.warning("Prediction label %s cannot be found in the project", label_name)
@@ -26,38 +47,23 @@ def _convert_detection_prediction(labels: Sequence[Label], prediction: Detection
         annotation = DatasetItemAnnotation(
             labels=[LabelReference(id=label.id)],
             shape=Rectangle(x=x1, y=y1, width=(x2 - x1), height=(y2 - y1)),
-            confidence=confidence,
+            confidences=[bbox_confidence],
         )
         result.append(annotation)
     return result
 
 
-def _convert_classification_prediction(
-    labels: Sequence[Label], prediction: ClassificationResult
-) -> list[DatasetItemAnnotation]:
-    annotation_labels: list[LabelReference] = []
-    confidence = 0
-    for predicted_label in prediction.top_labels:
-        label_name = predicted_label.name
-        confidence = predicted_label.confidence
-        label = next((label for label in labels if label.name == label_name), None)
-        if not label:
-            logger.warning("Prediction label %s cannot be found in the project", label_name)
-            continue
-        annotation_labels.append(LabelReference(id=label.id))
-    return [DatasetItemAnnotation(labels=annotation_labels, shape=FullImage(), confidence=confidence)]
-
-
 def _convert_segmentation_prediction(
     labels: Sequence[Label],
     frame_data: np.ndarray,
     prediction: InstanceSegmentationResult,
 ) -> list[DatasetItemAnnotation]:
     height, width, _ = frame_data.shape
     result = []
+    prediction_scores_list = prediction.scores.tolist()
     for idx, box in enumerate(prediction.bboxes):
         label_name = prediction.label_names[idx]
-        confidence = prediction.scores.tolist()[idx]
+        polygon_confidence = prediction_scores_list[idx]
         label = next((label for label in labels if label.name == label_name), None)
         if not label:
             logger.warning("Prediction label %s cannot be found in the project", label_name)
@@ -73,7 +79,7 @@ def _convert_segmentation_prediction(
                 continue
             polygon = Polygon(points=[Point(x=point[0][0], y=point[0][1]) for point in list(contour)])
             annotation = DatasetItemAnnotation(
-                labels=[LabelReference(id=label.id)], shape=polygon, confidence=confidence
+                labels=[LabelReference(id=label.id)], shape=polygon, confidences=[polygon_confidence]
             )
             result.append(annotation)
     return result