Skip to content

Commit cc32240

Browse files
authored
Adds annotation status filtering to dataset item endpoints (#4976)
1 parent be094bb commit cc32240

File tree

7 files changed

+362
-11
lines changed

7 files changed

+362
-11
lines changed

application/backend/app/api/routers/datasets.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
SetDatasetItemAnnotations,
1818
)
1919
from app.core.models import Pagination
20+
from app.models import DatasetItemAnnotationStatus
2021
from app.schemas import ProjectView
2122
from app.services import DatasetService, ResourceNotFoundError
2223
from app.services.dataset_service import AnnotationValidationError, InvalidImageError, SubsetAlreadyAssignedError
@@ -103,15 +104,23 @@ def list_dataset_items(
103104
offset: Annotated[int, Query(ge=0)] = 0,
104105
start_date: Annotated[datetime | None, Query()] = None,
105106
end_date: Annotated[datetime | None, Query()] = None,
107+
annotation_status: Annotated[DatasetItemAnnotationStatus | None, Query()] = None,
106108
) -> DatasetItemsWithPagination:
107109
"""List the available dataset items and their metadata. This endpoint supports pagination."""
108110
if start_date is not None and end_date is not None and start_date > end_date:
109111
raise HTTPException(
110112
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Start date must be before end date."
111113
)
112-
total = dataset_service.count_dataset_items(project=project, start_date=start_date, end_date=end_date)
114+
total = dataset_service.count_dataset_items(
115+
project=project, start_date=start_date, end_date=end_date, annotation_status=annotation_status
116+
)
113117
dataset_items = dataset_service.list_dataset_items(
114-
project=project, limit=limit, offset=offset, start_date=start_date, end_date=end_date
118+
project=project,
119+
limit=limit,
120+
offset=offset,
121+
start_date=start_date,
122+
end_date=end_date,
123+
annotation_status=annotation_status,
115124
)
116125
return DatasetItemsWithPagination(
117126
items=[DatasetItemView.model_validate(dataset_item, from_attributes=True) for dataset_item in dataset_items],

application/backend/app/models/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
# Copyright (C) 2025 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4-
from .dataset_item import DatasetItem, DatasetItemAnnotation, DatasetItemFormat, DatasetItemSubset
4+
from .dataset_item import (
5+
DatasetItem,
6+
DatasetItemAnnotation,
7+
DatasetItemAnnotationStatus,
8+
DatasetItemFormat,
9+
DatasetItemSubset,
10+
)
511
from .label import Label, LabelReference
612
from .shape import FullImage, Point, Polygon, Rectangle, Shape
713
from .sink import (
@@ -30,6 +36,7 @@
3036
__all__ = [
3137
"DatasetItem",
3238
"DatasetItemAnnotation",
39+
"DatasetItemAnnotationStatus",
3340
"DatasetItemFormat",
3441
"DatasetItemSubset",
3542
"DisconnectedSinkConfig",

application/backend/app/models/dataset_item.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ class DatasetItemSubset(StrEnum):
2727
TESTING = "testing"
2828

2929

30+
class DatasetItemAnnotationStatus(StrEnum):
31+
UNANNOTATED = "unannotated"
32+
REVIEWED = "reviewed"
33+
TO_REVIEW = "to_review"
34+
35+
3036
class DatasetItemAnnotation(BaseModel):
3137
"""
3238
DatasetItemAnnotation represents an individual annotation within a dataset item.

application/backend/app/repositories/dataset_item_repo.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sqlalchemy.orm import Session
99

1010
from app.db.schema import DatasetItemDB, DatasetItemLabelDB
11-
from app.models import DatasetItemSubset
11+
from app.models import DatasetItemAnnotationStatus, DatasetItemSubset
1212

1313

1414
class UpdateDatasetItemAnnotation(NamedTuple):
@@ -28,8 +28,9 @@ def _base_select(self) -> Select:
2828
"""Create base select statement filtered by project_id."""
2929
return select(DatasetItemDB).where(DatasetItemDB.project_id == self.project_id)
3030

31+
@staticmethod
3132
def _apply_date_filters(
32-
self, stmt: Select, start_date: datetime | None = None, end_date: datetime | None = None
33+
stmt: Select, start_date: datetime | None = None, end_date: datetime | None = None
3334
) -> Select:
3435
"""Apply date range filters to a select statement."""
3536
if start_date:
@@ -38,22 +39,45 @@ def _apply_date_filters(
3839
stmt = stmt.where(DatasetItemDB.created_at < end_date)
3940
return stmt
4041

42+
@staticmethod
43+
def _apply_annotation_status_filter(stmt: Select, annotation_status: str | None = None) -> Select:
44+
"""Apply annotation status filter to a select statement."""
45+
if annotation_status == DatasetItemAnnotationStatus.UNANNOTATED:
46+
stmt = stmt.where(DatasetItemDB.annotation_data.is_(None))
47+
elif annotation_status == DatasetItemAnnotationStatus.REVIEWED:
48+
stmt = stmt.where(DatasetItemDB.annotation_data.is_not(None), DatasetItemDB.user_reviewed.is_(True))
49+
elif annotation_status == DatasetItemAnnotationStatus.TO_REVIEW:
50+
stmt = stmt.where(DatasetItemDB.annotation_data.is_not(None), DatasetItemDB.user_reviewed.is_(False))
51+
return stmt
52+
4153
def save(self, dataset_item_db: DatasetItemDB) -> DatasetItemDB:
4254
dataset_item_db.updated_at = datetime.now(UTC)
4355
self.db.add(dataset_item_db)
4456
self.db.flush()
4557
return dataset_item_db
4658

47-
def count(self, start_date: datetime | None = None, end_date: datetime | None = None) -> int:
59+
def count(
60+
self,
61+
start_date: datetime | None = None,
62+
end_date: datetime | None = None,
63+
annotation_status: str | None = None,
64+
) -> int:
4865
stmt = select(func.count()).select_from(DatasetItemDB).where(DatasetItemDB.project_id == self.project_id)
4966
stmt = self._apply_date_filters(stmt, start_date, end_date)
67+
stmt = self._apply_annotation_status_filter(stmt, annotation_status)
5068
return self.db.scalar(stmt) or 0
5169

5270
def list_items(
53-
self, limit: int, offset: int, start_date: datetime | None = None, end_date: datetime | None = None
71+
self,
72+
limit: int,
73+
offset: int,
74+
start_date: datetime | None = None,
75+
end_date: datetime | None = None,
76+
annotation_status: str | None = None,
5477
) -> list[DatasetItemDB]:
5578
stmt = self._base_select()
5679
stmt = self._apply_date_filters(stmt, start_date, end_date)
80+
stmt = self._apply_annotation_status_filter(stmt, annotation_status)
5781
stmt = stmt.order_by(DatasetItemDB.created_at.desc()).offset(offset).limit(limit)
5882
return list(self.db.scalars(stmt).all())
5983

application/backend/app/services/dataset_service.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,11 @@ def count_dataset_items(
158158
project: ProjectView,
159159
start_date: datetime | None = None,
160160
end_date: datetime | None = None,
161+
annotation_status: str | None = None,
161162
) -> int:
162163
"""Get number of available dataset items (within date range if specified)"""
163164
repo = DatasetItemRepository(project_id=str(project.id), db=self._db_session)
164-
return repo.count(start_date=start_date, end_date=end_date)
165+
return repo.count(start_date=start_date, end_date=end_date, annotation_status=annotation_status)
165166

166167
def list_dataset_items(
167168
self,
@@ -170,12 +171,19 @@ def list_dataset_items(
170171
offset: int = 0,
171172
start_date: datetime | None = None,
172173
end_date: datetime | None = None,
174+
annotation_status: str | None = None,
173175
) -> list[DatasetItem]:
174176
"""Get information about available dataset items"""
175177
repo = DatasetItemRepository(project_id=str(project.id), db=self._db_session)
176178
return [
177179
DatasetItem.model_validate(db)
178-
for db in repo.list_items(limit=limit, offset=offset, start_date=start_date, end_date=end_date)
180+
for db in repo.list_items(
181+
limit=limit,
182+
offset=offset,
183+
start_date=start_date,
184+
end_date=end_date,
185+
annotation_status=annotation_status,
186+
)
179187
]
180188

181189
def get_dataset_item_by_id(self, project: ProjectView, dataset_item_id: UUID) -> DatasetItem:

0 commit comments

Comments
 (0)