Skip to content

Commit 8b3b5b8

Browse files
authored
Dataset revision endpoints (#5044)
1 parent edc6da3 commit 8b3b5b8

File tree

8 files changed

+797
-1
lines changed

8 files changed

+797
-1
lines changed

application/backend/app/api/dependencies.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
from fastapi import Depends, HTTPException, Request, UploadFile, status
99
from sqlalchemy.orm import Session
1010

11-
from app.api.validators import ProjectID, SinkID, SourceID
11+
from app.api.validators import DatasetRevisionID, ProjectID, SinkID, SourceID
1212
from app.core.jobs.control_plane import JobQueue
1313
from app.db import get_db_session
1414
from app.models import Project, Sink, Source
15+
from app.models.dataset_revision import DatasetRevision
1516
from app.scheduler import Scheduler
1617
from app.services import (
1718
BaseWeightsService,
@@ -214,3 +215,15 @@ def get_job_queue(request: Request) -> JobQueue:
214215
def get_training_configuration_service(db: Annotated[Session, Depends(get_db)]) -> TrainingConfigurationService:
215216
"""Provides a TrainingConfigurationService instance for managing training configurations."""
216217
return TrainingConfigurationService(db_session=db)
218+
219+
220+
def get_dataset_revision(
221+
project_id: ProjectID,
222+
dataset_revision_id: DatasetRevisionID,
223+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
224+
) -> DatasetRevision:
225+
"""Provides a DatasetService instance."""
226+
try:
227+
return dataset_service.get_dataset_revision(project_id=project_id, revision_id=dataset_revision_id)
228+
except ResourceNotFoundError as e:
229+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from typing import Annotated
5+
6+
from fastapi import APIRouter, Depends, Query, status
7+
from starlette.responses import FileResponse
8+
9+
from app.api.dependencies import get_dataset_revision, get_dataset_service, get_project
10+
from app.api.schemas.dataset_item import DatasetItemsWithPagination, DatasetItemView
11+
from app.api.validators import DatasetItemID, DatasetRevisionID
12+
from app.models import DatasetItemSubset, Project
13+
from app.models.dataset_revision import DatasetRevision
14+
from app.services import DatasetService
15+
16+
router = APIRouter(
17+
prefix="/api/projects/{project_id}/dataset_revisions/{dataset_revision_id}",
18+
tags=["Dataset Revisions"],
19+
)
20+
21+
DEFAULT_DATASET_ITEMS_NUMBER_RETURNED = 10
22+
MAX_DATASET_ITEMS_NUMBER_RETURNED = 100
23+
24+
25+
@router.get(
26+
"/items",
27+
responses={
28+
status.HTTP_200_OK: {
29+
"description": "List of dataset items in the revision",
30+
"model": DatasetItemsWithPagination,
31+
},
32+
status.HTTP_404_NOT_FOUND: {"description": "Dataset revision or project not found"},
33+
},
34+
)
35+
def list_dataset_revision_items(
36+
_project: Annotated[Project, Depends(get_project)],
37+
_dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
38+
_dataset_revision: Annotated[DatasetRevision, Depends(get_dataset_revision)],
39+
_limit: Annotated[int, Query(ge=1, le=MAX_DATASET_ITEMS_NUMBER_RETURNED)] = DEFAULT_DATASET_ITEMS_NUMBER_RETURNED,
40+
_offset: Annotated[int, Query(ge=0)] = 0,
41+
_subset: Annotated[DatasetItemSubset | None, Query()] = None,
42+
) -> DatasetItemsWithPagination:
43+
"""List the items in a dataset revision. This endpoint supports pagination."""
44+
raise NotImplementedError
45+
46+
47+
@router.get(
48+
"/items/{dataset_item_id}",
49+
responses={
50+
status.HTTP_200_OK: {"description": "Dataset item found", "model": DatasetItemView},
51+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID or revision ID"},
52+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item, revision, or project not found"},
53+
},
54+
)
55+
def get_dataset_revision_item(
56+
_project: Annotated[Project, Depends(get_project)],
57+
_dataset_revision: Annotated[DatasetRevision, Depends(get_dataset_revision)],
58+
_dataset_item_id: DatasetItemID,
59+
_dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
60+
) -> DatasetItemView:
61+
"""Get information about a specific item in the dataset revision"""
62+
raise NotImplementedError
63+
64+
65+
@router.get(
66+
"/items/{dataset_item_id}/binary",
67+
responses={
68+
status.HTTP_200_OK: {"description": "Dataset item binary found"},
69+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID or revision ID"},
70+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item, binary, revision, or project not found"},
71+
},
72+
)
73+
def get_dataset_revision_item_binary(
74+
_project: Annotated[Project, Depends(get_project)],
75+
_dataset_revision: Annotated[DatasetRevision, Depends(get_dataset_revision)],
76+
_dataset_item_id: DatasetItemID,
77+
_dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
78+
) -> FileResponse:
79+
"""Get the image data of an item in the dataset revision"""
80+
raise NotImplementedError
81+
82+
83+
@router.get(
84+
"/items/{dataset_item_id}/thumbnail",
85+
responses={
86+
status.HTTP_200_OK: {"description": "Dataset item thumbnail found"},
87+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid dataset item ID or revision ID"},
88+
status.HTTP_404_NOT_FOUND: {"description": "Dataset item, thumbnail, revision, or project not found"},
89+
},
90+
)
91+
def get_dataset_revision_item_thumbnail(
92+
_project: Annotated[Project, Depends(get_project)],
93+
_dataset_revision: Annotated[DatasetRevision, Depends(get_dataset_revision)],
94+
_dataset_item_id: DatasetItemID,
95+
_dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
96+
) -> FileResponse:
97+
"""Get the thumbnail of an item in the dataset revision"""
98+
raise NotImplementedError
99+
100+
101+
@router.delete(
102+
"",
103+
status_code=status.HTTP_204_NO_CONTENT,
104+
responses={
105+
status.HTTP_204_NO_CONTENT: {"description": "Dataset revision files deleted"},
106+
status.HTTP_400_BAD_REQUEST: {"description": "Invalid revision ID"},
107+
status.HTTP_404_NOT_FOUND: {"description": "Dataset revision or project not found"},
108+
},
109+
)
110+
def delete_dataset_revision_files(
111+
project: Annotated[Project, Depends(get_project)],
112+
dataset_revision_id: DatasetRevisionID,
113+
_dataset_revision: Annotated[DatasetRevision, Depends(get_dataset_revision)],
114+
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)],
115+
) -> None:
116+
"""Delete the files associated with a dataset revision"""
117+
dataset_service.delete_dataset_revision_files(project_id=project.id, revision_id=dataset_revision_id)

application/backend/app/api/validators.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,6 @@ def validate_uuid_param(value: str, param_name: str) -> UUID:
5050
DatasetItemID = Annotated[
5151
UUID, Depends(lambda dataset_item_id: validate_uuid_param(dataset_item_id, "dataset_item_id"))
5252
]
53+
DatasetRevisionID = Annotated[
54+
UUID, Depends(lambda dataset_revision_id: validate_uuid_param(dataset_revision_id, "dataset_revision_id"))
55+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
from uuid import UUID
4+
5+
from app.models.base import BaseEntity
6+
7+
8+
class DatasetRevision(BaseEntity):
9+
"""
10+
Represents a specific revision of a dataset.
11+
12+
A dataset revision captures the state of a dataset at a particular point in time, allowing for versioning and
13+
tracking changes over time.
14+
15+
Attributes:
16+
id: Unique identifier for the dataset revision.
17+
project_id: Identifier of the project to which this dataset revision belongs.
18+
files_deleted: Flag indicating whether the files associated with this dataset revision have been deleted.
19+
"""
20+
21+
id: UUID
22+
project_id: UUID
23+
files_deleted: bool

application/backend/app/services/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class ResourceType(StrEnum):
1717
PIPELINE = "Pipeline"
1818
PROJECT = "Project"
1919
DATASET_ITEM = "DatasetItem"
20+
DATASET_REVISION = "DatasetRevision"
2021
LABEL = "Label"
2122

2223

application/backend/app/services/dataset_service.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import os
55
import os.path
6+
import shutil
67
from collections.abc import Sequence
78
from dataclasses import dataclass
89
from datetime import datetime
@@ -32,6 +33,7 @@
3233
Task,
3334
TaskType,
3435
)
36+
from app.models.dataset_revision import DatasetRevision
3537
from app.repositories import DatasetItemRepository, DatasetRevisionRepository
3638
from app.services.datumaro_converter import convert_dataset
3739
from app.utils.images import crop_to_thumbnail
@@ -405,3 +407,56 @@ def save_revision(self, project_id: UUID, dataset: dm.Dataset) -> UUID:
405407
as_zip=True,
406408
)
407409
return UUID(revision_db.id)
410+
411+
def get_dataset_revision(self, project_id: UUID, revision_id: UUID) -> DatasetRevision:
412+
"""
413+
Get a dataset revision by ID.
414+
415+
Args:
416+
project_id: The UUID of the project.
417+
revision_id: The UUID of the dataset revision.
418+
419+
Returns:
420+
DatasetRevision: The dataset revision.
421+
422+
Raises:
423+
ResourceNotFoundError: If the revision is not found.
424+
"""
425+
revision_repo = DatasetRevisionRepository(db=self.db_session)
426+
revision = revision_repo.get_by_id(str(revision_id))
427+
if revision is None or revision.project_id != str(project_id):
428+
raise ResourceNotFoundError(ResourceType.DATASET_REVISION, str(revision_id))
429+
return self._to_dataset_revision(dataset_db=revision)
430+
431+
def delete_dataset_revision_files(self, project_id: UUID, revision_id: UUID) -> None:
432+
"""
433+
Delete the files associated with a dataset revision.
434+
435+
Args:
436+
project_id: The UUID of the project.
437+
revision_id: The UUID of the dataset revision.
438+
439+
Raises:
440+
ResourceNotFoundError: If the revision is not found.
441+
"""
442+
revision = self.get_dataset_revision(project_id, revision_id)
443+
if revision.files_deleted:
444+
logger.info("Files for dataset revision '{}' already deleted", revision_id)
445+
return
446+
447+
revision_path = self.projects_dir / str(project_id) / "dataset_revisions" / str(revision_id)
448+
if revision_path.exists():
449+
shutil.rmtree(revision_path)
450+
logger.info("Deleted dataset revision files at '{}'", revision_path)
451+
452+
# Mark as deleted in the database
453+
revision_repo = DatasetRevisionRepository(db=self.db_session)
454+
revision_db = revision_repo.get_by_id(str(revision_id))
455+
if revision_db:
456+
revision_db.files_deleted = True
457+
revision_repo.save(revision_db)
458+
459+
@staticmethod
460+
def _to_dataset_revision(dataset_db: DatasetRevisionDB) -> DatasetRevision:
461+
"""Convert database model to DatasetRevision."""
462+
return DatasetRevision.model_validate(dataset_db, from_attributes=True)

0 commit comments

Comments
 (0)