Skip to content

Commit c5b12cd

Browse files
authored
4545: Create skeleton of dataset endpoints (#4651)
1 parent 4457618 commit c5b12cd

File tree

10 files changed

+541
-2
lines changed

10 files changed

+541
-2
lines changed

backend/app/api/dependencies.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,14 @@
88
from fastapi import Depends, HTTPException, Request, status
99

1010
from app.core import Scheduler
11-
from app.services import ActivePipelineService, ConfigurationService, ModelService, PipelineService, SystemService
11+
from app.services import (
12+
ActivePipelineService,
13+
ConfigurationService,
14+
DatasetService,
15+
ModelService,
16+
PipelineService,
17+
SystemService,
18+
)
1219
from app.services.metrics_service import MetricsService
1320
from app.webrtc.manager import WebRTCManager
1421

@@ -34,6 +41,13 @@ def get_source_id(source_id: str) -> UUID:
3441
return UUID(source_id)
3542

3643

44+
def get_project_id(project_id: str) -> UUID:
45+
"""Initializes and validates a project ID"""
46+
if not is_valid_uuid(project_id):
47+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid project ID")
48+
return UUID(project_id)
49+
50+
3751
def get_sink_id(sink_id: str) -> UUID:
3852
"""Initializes and validates a sink ID"""
3953
if not is_valid_uuid(sink_id):
@@ -48,6 +62,13 @@ def get_model_id(model_id: str) -> UUID:
4862
return UUID(model_id)
4963

5064

65+
def get_dataset_item_id(dataset_item_id: str) -> UUID:
66+
"""Initializes and validates a dataset item ID"""
67+
if not is_valid_uuid(dataset_item_id):
68+
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid dataset item ID")
69+
return UUID(dataset_item_id)
70+
71+
5172
def get_pipeline_id(pipeline_id: str) -> UUID:
5273
"""Initializes and validates a pipeline ID"""
5374
if not is_valid_uuid(pipeline_id):
@@ -114,6 +135,12 @@ def get_model_service(
114135
)
115136

116137

138+
@lru_cache
139+
def get_dataset_service() -> DatasetService:
140+
"""Provides a DatasetService instance."""
141+
return DatasetService()
142+
143+
117144
def get_webrtc_manager(request: Request) -> WebRTCManager:
118145
"""Provides the global WebRTCManager instance from FastAPI application's state."""
119146
return request.app.state.webrtc_manager
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
from datetime import datetime
4+
from typing import Annotated
5+
from uuid import UUID
6+
7+
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile, status
8+
from starlette.responses import FileResponse
9+
10+
from app.api.dependencies import get_dataset_item_id, get_dataset_service, get_project_id
11+
from app.schemas import DatasetItem, DatasetItemsWithPagination
12+
from app.schemas.base import Pagination
13+
from app.services import DatasetService, ResourceNotFoundError
14+
15+
router = APIRouter(prefix="/api/projects/{project_id}/dataset/items", tags=["Datasets"])
16+
17+
DEFAULT_DATASET_ITEMS_NUMBER_RETURNED = 10
18+
MAX_DATASET_ITEMS_NUMBER_RETURNED = 100
19+
20+
21+
@router.post(
22+
"",
23+
status_code=status.HTTP_201_CREATED,
24+
responses={status.HTTP_201_CREATED: {"description": "Dataset item created", "model": DatasetItem}},
25+
)
26+
def add_dataset_item(
27+
project_id: Annotated[UUID, Depends(get_project_id)],
28+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
29+
file: Annotated[UploadFile, File()],
30+
) -> DatasetItem:
31+
"""Add a new item to the dataset by uploading an image"""
32+
return dataset_service.create_dataset_item(project_id=project_id, file=file.file)
33+
34+
35+
@router.get(
36+
"",
37+
responses={
38+
status.HTTP_200_OK: {"description": "List of available dataset items", "model": DatasetItemsWithPagination},
39+
},
40+
)
41+
def list_dataset_items(
42+
project_id: Annotated[UUID, Depends(get_project_id)],
43+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
44+
limit: Annotated[int, Query(ge=1, le=MAX_DATASET_ITEMS_NUMBER_RETURNED)] = DEFAULT_DATASET_ITEMS_NUMBER_RETURNED,
45+
offset: Annotated[int, Query(ge=0)] = 0,
46+
start_date: Annotated[datetime | None, Query()] = None,
47+
end_date: Annotated[datetime | None, Query()] = None,
48+
) -> DatasetItemsWithPagination:
49+
"""List the available dataset items and their metadata. This endpoint supports pagination."""
50+
if start_date is not None and end_date is not None and start_date > end_date:
51+
raise HTTPException(
52+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Start date must be before end date."
53+
)
54+
dataset_items = dataset_service.list_dataset_items(
55+
project_id=project_id, limit=limit, offset=offset, start_date=start_date, end_date=end_date
56+
)
57+
return DatasetItemsWithPagination( # TODO: implement
58+
items=dataset_items,
59+
pagination=Pagination(
60+
limit=limit,
61+
offset=offset,
62+
total=0,
63+
count=len(dataset_items),
64+
),
65+
)
66+
67+
68+
@router.get(
69+
"/{dataset_item_id}",
70+
responses={
71+
status.HTTP_200_OK: {"description": "Dataset item found", "model": DatasetItem},
72+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID"},
73+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item not found"},
74+
},
75+
)
76+
def get_dataset_item(
77+
project_id: Annotated[UUID, Depends(get_project_id)],
78+
dataset_item_id: Annotated[UUID, Depends(get_dataset_item_id)],
79+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
80+
) -> DatasetItem:
81+
"""Get information about a specific dataset item"""
82+
try:
83+
return dataset_service.get_dataset_item_by_id(project_id=project_id, dataset_item_id=dataset_item_id)
84+
except ResourceNotFoundError as e:
85+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
86+
87+
88+
@router.get(
89+
"/{dataset_item_id}/binary",
90+
responses={
91+
status.HTTP_200_OK: {"description": "Dataset item binary found"},
92+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID"},
93+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item binary not found"},
94+
},
95+
)
96+
def get_dataset_item_binary(
97+
project_id: Annotated[UUID, Depends(get_project_id)],
98+
dataset_item_id: Annotated[UUID, Depends(get_dataset_item_id)],
99+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
100+
) -> FileResponse:
101+
"""Get dataset item binary content"""
102+
try:
103+
binary_path = dataset_service.get_dataset_item_binary_path_by_id(
104+
project_id=project_id, dataset_item_id=dataset_item_id
105+
)
106+
return FileResponse(path=binary_path)
107+
except ResourceNotFoundError as e:
108+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
109+
110+
111+
@router.get(
112+
"/{dataset_item_id}/thumbnail",
113+
responses={
114+
status.HTTP_200_OK: {"description": "Dataset item thumbnail found"},
115+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID"},
116+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item thumbnail not found"},
117+
},
118+
)
119+
def get_dataset_item_thumbnail(
120+
project_id: Annotated[UUID, Depends(get_project_id)],
121+
dataset_item_id: Annotated[UUID, Depends(get_dataset_item_id)],
122+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
123+
) -> FileResponse:
124+
"""Get dataset item thumbnail binary content"""
125+
try:
126+
thumbnail_path = dataset_service.get_dataset_item_thumbnail_path_by_id(
127+
project_id=project_id, dataset_item_id=dataset_item_id
128+
)
129+
return FileResponse(path=thumbnail_path)
130+
except ResourceNotFoundError as e:
131+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
132+
133+
134+
@router.delete(
135+
"/{dataset_item_id}",
136+
status_code=status.HTTP_204_NO_CONTENT,
137+
responses={
138+
status.HTTP_204_NO_CONTENT: {"description": "Dataset item deleted"},
139+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID"},
140+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item not found"},
141+
},
142+
)
143+
def delete_dataset_item(
144+
project_id: Annotated[UUID, Depends(get_project_id)],
145+
dataset_item_id: Annotated[UUID, Depends(get_dataset_item_id)],
146+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
147+
) -> None:
148+
"""Delete an item from the dataset"""
149+
try:
150+
dataset_service.delete_dataset_item(project_id=project_id, dataset_item_id=dataset_item_id)
151+
except ResourceNotFoundError as e:
152+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))

backend/app/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from fastapi.middleware.cors import CORSMiddleware
1616
from fastapi.responses import FileResponse
1717

18-
from app.api.endpoints import models, pipelines, sinks, sources, system, webrtc
18+
from app.api.endpoints import datasets, models, pipelines, sinks, sources, system, webrtc
1919
from app.core import lifespan
2020
from app.settings import get_settings
2121

@@ -50,6 +50,7 @@
5050
app.include_router(sources.router)
5151
app.include_router(sinks.router)
5252
app.include_router(pipelines.router)
53+
app.include_router(datasets.router)
5354
app.include_router(models.router)
5455
app.include_router(system.router)
5556
app.include_router(webrtc.router)

backend/app/schemas/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
# Copyright (C) 2025 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4+
from app.schemas.dataset_item import DatasetItem, DatasetItemsWithPagination
45
from app.schemas.metrics import InferenceMetrics, LatencyMetrics, PipelineMetrics, TimeWindow
56
from app.schemas.model import Model, ModelFormat
67
from app.schemas.pipeline import Pipeline, PipelineStatus
78
from app.schemas.sink import DisconnectedSinkConfig, OutputFormat, Sink, SinkType
89
from app.schemas.source import DisconnectedSourceConfig, Source, SourceType
910

1011
__all__ = [
12+
"DatasetItem",
13+
"DatasetItemsWithPagination",
1114
"DisconnectedSinkConfig",
1215
"DisconnectedSourceConfig",
1316
"InferenceMetrics",

backend/app/schemas/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,12 @@ class BaseIDNameModel(ABC, BaseModel):
1818

1919
id: UUID = Field(default_factory=uuid4)
2020
name: str = "Default Name"
21+
22+
23+
class Pagination(ABC, BaseModel):
24+
"""Pagination model."""
25+
26+
offset: int # index of the first item returned (0-based)
27+
limit: int # number of items requested per page
28+
count: int # number of items actually returned (may be less than limit if at the end)
29+
total: int # total number of items available
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from enum import StrEnum
5+
from uuid import UUID
6+
7+
from pydantic import BaseModel
8+
9+
from app.schemas.base import BaseIDNameModel, Pagination
10+
11+
12+
class DatasetItemFormat(StrEnum):
13+
JPG = "jpg"
14+
PNG = "png"
15+
16+
17+
class DatasetItemSubset(StrEnum):
18+
UNASSIGNED = "unassigned"
19+
TRAINING = "training"
20+
VALIDATION = "validation"
21+
TESTING = "testing"
22+
23+
24+
class DatasetItem(BaseIDNameModel):
25+
"""
26+
Dataset item
27+
"""
28+
29+
format: DatasetItemFormat
30+
width: int
31+
height: int
32+
size: int
33+
source_id: UUID | None
34+
subset: DatasetItemSubset
35+
36+
model_config = {
37+
"json_schema_extra": {
38+
"example": {
39+
"id": "7b073838-99d3-42ff-9018-4e901eb047fc",
40+
"name": "img-010203",
41+
"format": "jpg",
42+
"width": 1280,
43+
"height": 720,
44+
"size": 2211840,
45+
"source_id": "c1feaabc-da2b-442e-9b3e-55c11c2c2ff3",
46+
"subset": "unassigned",
47+
}
48+
}
49+
}
50+
51+
52+
class DatasetItemsWithPagination(BaseModel):
53+
"""
54+
Dataset Items list with pagination info
55+
"""
56+
57+
items: list[DatasetItem]
58+
pagination: Pagination

backend/app/services/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .active_pipeline_service import ActivePipelineService
55
from .base import ResourceAlreadyExistsError, ResourceInUseError, ResourceNotFoundError, ResourceType
66
from .configuration_service import ConfigurationService
7+
from .dataset_service import DatasetService
78
from .dispatch_service import DispatchService
89
from .metrics_service import MetricsService
910
from .model_service import ModelAlreadyExistsError, ModelService
@@ -14,6 +15,7 @@
1415
__all__ = [
1516
"ActivePipelineService",
1617
"ConfigurationService",
18+
"DatasetService",
1719
"DispatchService",
1820
"MetricsService",
1921
"ModelAlreadyExistsError",

backend/app/services/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class ResourceType(StrEnum):
2424
SINK = "Sink"
2525
MODEL = "Model"
2626
PIPELINE = "Pipeline"
27+
DATASET_ITEM = "DatasetItem"
2728

2829

2930
class ResourceError(Exception):
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import logging
5+
from datetime import datetime
6+
from pathlib import Path
7+
from typing import BinaryIO
8+
from uuid import UUID
9+
10+
from app.schemas.dataset_item import DatasetItem
11+
from app.services.base import ResourceNotFoundError, ResourceType
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class DatasetService:
17+
def create_dataset_item(self, project_id: UUID, file: BinaryIO) -> DatasetItem:
18+
"""Creates a new dataset item"""
19+
raise NotImplementedError
20+
21+
def list_dataset_items(
22+
self,
23+
project_id: UUID, # noqa: ARG002
24+
limit: int = 20, # noqa: ARG002
25+
offset: int = 0, # noqa: ARG002
26+
start_date: datetime | None = None, # noqa: ARG002
27+
end_date: datetime | None = None, # noqa: ARG002
28+
) -> list[DatasetItem]:
29+
"""Get information about available dataset items"""
30+
return []
31+
32+
def get_dataset_item_by_id(self, project_id: UUID, dataset_item_id: UUID) -> DatasetItem: # noqa: ARG002
33+
"""Get a dataset item by its ID"""
34+
raise ResourceNotFoundError(ResourceType.DATASET_ITEM, str(dataset_item_id))
35+
36+
def get_dataset_item_binary_path_by_id(self, project_id: UUID, dataset_item_id: UUID) -> Path | str: # noqa: ARG002
37+
"""Get a dataset item binary content by its ID"""
38+
raise ResourceNotFoundError(ResourceType.DATASET_ITEM, str(dataset_item_id))
39+
40+
def get_dataset_item_thumbnail_path_by_id(self, project_id: UUID, dataset_item_id: UUID) -> Path | str: # noqa: ARG002
41+
"""Get a dataset item thumbnail binary content by its ID"""
42+
raise ResourceNotFoundError(ResourceType.DATASET_ITEM, str(dataset_item_id))
43+
44+
def delete_dataset_item(self, project_id: UUID, dataset_item_id: UUID) -> None: # noqa: ARG002
45+
"""Delete a dataset item by its ID"""
46+
raise ResourceNotFoundError(ResourceType.DATASET_ITEM, str(dataset_item_id))

0 commit comments

Comments
 (0)