Skip to content

Commit 50705ba

Browse files
authored
Add confidence field to prediction dataset samples (#4990)
1 parent 804292b commit 50705ba

File tree

13 files changed

+379
-176
lines changed

13 files changed

+379
-176
lines changed

application/backend/app/models/dataset_item.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,15 @@
1212

1313

1414
class DatasetItemFormat(StrEnum):
15+
"""Format of the image related to the dataset item."""
16+
1517
JPG = "jpg"
1618
PNG = "png"
1719

1820

1921
class DatasetItemSubset(StrEnum):
22+
"""Subset of the dataset item."""
23+
2024
UNASSIGNED = "unassigned"
2125
TRAINING = "training"
2226
VALIDATION = "validation"
@@ -25,24 +29,52 @@ class DatasetItemSubset(StrEnum):
2529

2630
class DatasetItemAnnotation(BaseModel):
2731
"""
28-
Dataset item annotation
32+
DatasetItemAnnotation represents an individual annotation within a dataset item.
33+
34+
An annotation consists of a shape, one or more labels associated with that shape,
35+
and optionally a confidence score for each label (if applicable, e.g., for model predictions).
36+
37+
Attributes:
38+
labels: A list of references to labels associated with the annotation.
39+
shape: The geometric shape defining the annotation area.
40+
confidences: A list of confidence scores corresponding to each label (if applicable).
2941
"""
3042

31-
labels: list[LabelReference]
3243
shape: Shape
33-
confidence: float | None = None
44+
labels: list[LabelReference]
45+
confidences: list[float] | None = None
3446

3547
model_config = {
3648
"json_schema_extra": {
3749
"example": {
38-
"labels": [{"id": "d476573e-d43c-42a6-9327-199a9aa75c33"}],
3950
"shape": {"type": "rectangle", "x": 10, "y": 20, "width": 100, "height": 200},
51+
"labels": [{"id": "d476573e-d43c-42a6-9327-199a9aa75c33"}],
4052
}
4153
}
4254
}
4355

4456

4557
class DatasetItem(BaseEntity):
58+
"""
59+
DatasetItem represents an individual item within a dataset.
60+
61+
Attributes:
62+
id: Unique identifier for the dataset item.
63+
project_id: Identifier of the project to which the dataset item belongs.
64+
name: Name of the dataset item.
65+
format: Format of the dataset item (e.g., JPG, PNG).
66+
width: Width of the dataset item in pixels.
67+
height: Height of the dataset item in pixels.
68+
size: Size of the dataset item in bytes.
69+
annotation_data: List of annotations associated with the dataset item.
70+
user_reviewed: Indicates whether the dataset item has been reviewed by a user,
71+
namely if its annotation has been created/accepted by a human or if it is just a raw model prediction.
72+
prediction_model_id: Identifier of the model that generated predictions for this dataset item, if applicable.
73+
source_id: Identifier of the source from which the dataset item was acquired, if applicable.
74+
subset: Subset to which the dataset item belongs (e.g., training, validation, testing).
75+
subset_assigned_at: Timestamp indicating when the dataset item was assigned to its subset.
76+
"""
77+
4678
id: UUID
4779
project_id: UUID
4880
name: str

application/backend/app/services/data_collect/prediction_converter.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,32 @@
1313
logger = logging.getLogger(__name__)
1414

1515

16+
def _convert_classification_prediction(
17+
labels: Sequence[Label], prediction: ClassificationResult
18+
) -> list[DatasetItemAnnotation]:
19+
predicted_labels: list[LabelReference] = []
20+
predicted_confidences: list[float] = []
21+
for predicted_label in prediction.top_labels:
22+
label_name = predicted_label.name
23+
label = next((label for label in labels if label.name == label_name), None)
24+
if not label:
25+
logger.warning("Prediction label %s cannot be found in the project", label_name)
26+
continue
27+
confidence = predicted_label.confidence
28+
if confidence is None:
29+
logger.warning("The predicted label %s does not have a confidence score; assuming 1.0", label_name)
30+
confidence = 1.0
31+
predicted_labels.append(LabelReference(id=label.id))
32+
predicted_confidences.append(confidence)
33+
return [DatasetItemAnnotation(labels=predicted_labels, shape=FullImage(), confidences=predicted_confidences)]
34+
35+
1636
def _convert_detection_prediction(labels: Sequence[Label], prediction: DetectionResult) -> list[DatasetItemAnnotation]:
1737
result = []
38+
prediction_scores_list = prediction.scores.tolist()
1839
for idx, box in enumerate(prediction.bboxes):
1940
label_name = prediction.label_names[idx]
20-
confidence = prediction.scores.tolist()[idx]
41+
bbox_confidence = prediction_scores_list[idx]
2142
label = next((label for label in labels if label.name == label_name), None)
2243
if not label:
2344
logger.warning("Prediction label %s cannot be found in the project", label_name)
@@ -26,38 +47,23 @@ def _convert_detection_prediction(labels: Sequence[Label], prediction: Detection
2647
annotation = DatasetItemAnnotation(
2748
labels=[LabelReference(id=label.id)],
2849
shape=Rectangle(x=x1, y=y1, width=(x2 - x1), height=(y2 - y1)),
29-
confidence=confidence,
50+
confidences=[bbox_confidence],
3051
)
3152
result.append(annotation)
3253
return result
3354

3455

35-
def _convert_classification_prediction(
36-
labels: Sequence[Label], prediction: ClassificationResult
37-
) -> list[DatasetItemAnnotation]:
38-
annotation_labels: list[LabelReference] = []
39-
confidence = 0
40-
for predicted_label in prediction.top_labels:
41-
label_name = predicted_label.name
42-
confidence = predicted_label.confidence
43-
label = next((label for label in labels if label.name == label_name), None)
44-
if not label:
45-
logger.warning("Prediction label %s cannot be found in the project", label_name)
46-
continue
47-
annotation_labels.append(LabelReference(id=label.id))
48-
return [DatasetItemAnnotation(labels=annotation_labels, shape=FullImage(), confidence=confidence)]
49-
50-
5156
def _convert_segmentation_prediction(
5257
labels: Sequence[Label],
5358
frame_data: np.ndarray,
5459
prediction: InstanceSegmentationResult,
5560
) -> list[DatasetItemAnnotation]:
5661
height, width, _ = frame_data.shape
5762
result = []
63+
prediction_scores_list = prediction.scores.tolist()
5864
for idx, box in enumerate(prediction.bboxes):
5965
label_name = prediction.label_names[idx]
60-
confidence = prediction.scores.tolist()[idx]
66+
polygon_confidence = prediction_scores_list[idx]
6167
label = next((label for label in labels if label.name == label_name), None)
6268
if not label:
6369
logger.warning("Prediction label %s cannot be found in the project", label_name)
@@ -73,7 +79,7 @@ def _convert_segmentation_prediction(
7379
continue
7480
polygon = Polygon(points=[Point(x=point[0][0], y=point[0][1]) for point in list(contour)])
7581
annotation = DatasetItemAnnotation(
76-
labels=[LabelReference(id=label.id)], shape=polygon, confidence=confidence
82+
labels=[LabelReference(id=label.id)], shape=polygon, confidences=[polygon_confidence]
7783
)
7884
result.append(annotation)
7985
return result

0 commit comments

Comments
 (0)