diff --git a/data_rentgen/db/repositories/dataset.py b/data_rentgen/db/repositories/dataset.py index c8f76f0f..8d49316e 100644 --- a/data_rentgen/db/repositories/dataset.py +++ b/data_rentgen/db/repositories/dataset.py @@ -5,6 +5,7 @@ from sqlalchemy import ( ColumnElement, CompoundSelect, + Row, Select, SQLColumnExpression, any_, @@ -102,6 +103,24 @@ async def list_by_ids(self, dataset_ids: Sequence[int]) -> list[Dataset]: result = await self._session.scalars(query) return list(result.all()) + async def get_stats_by_location_ids(self, location_ids: Sequence[int]) -> dict[int, Row]: + if not location_ids: + return {} + + query = ( + select( + Dataset.location_id.label("location_id"), + func.count(Dataset.id.distinct()).label("total_datasets"), + ) + .where( + Dataset.location_id == any_(location_ids), # type: ignore[arg-type] + ) + .group_by(Dataset.location_id) + ) + + query_result = await self._session.execute(query) + return {row.location_id: row for row in query_result.all()} + async def _get(self, dataset: DatasetDTO) -> Dataset | None: statement = select(Dataset).where(Dataset.location_id == dataset.location.id, Dataset.name == dataset.name) return await self._session.scalar(statement) diff --git a/data_rentgen/db/repositories/job.py b/data_rentgen/db/repositories/job.py index b5ef1ef7..2802bbd5 100644 --- a/data_rentgen/db/repositories/job.py +++ b/data_rentgen/db/repositories/job.py @@ -5,6 +5,7 @@ from sqlalchemy import ( ColumnElement, CompoundSelect, + Row, Select, SQLColumnExpression, any_, @@ -101,6 +102,24 @@ async def list_by_ids(self, job_ids: Sequence[int]) -> list[Job]: result = await self._session.scalars(query) return list(result.all()) + async def get_stats_by_location_ids(self, location_ids: Sequence[int]) -> dict[int, Row]: + if not location_ids: + return {} + + query = ( + select( + Job.location_id.label("location_id"), + func.count(Job.id.distinct()).label("total_jobs"), + ) + .where( + Job.location_id == any_(location_ids), # type: ignore[arg-type] + ) + .group_by(Job.location_id) + ) + + query_result = await self._session.execute(query) + return {row.location_id: row for row in query_result.all()} + async def _get(self, job: JobDTO) -> Job | None: statement = select(Job).where(Job.location_id == job.location.id, Job.name == job.name) return await self._session.scalar(statement) diff --git a/data_rentgen/server/api/v1/router/location.py b/data_rentgen/server/api/v1/router/location.py index d754c623..72da7eb1 100644 --- a/data_rentgen/server/api/v1/router/location.py +++ b/data_rentgen/server/api/v1/router/location.py @@ -8,43 +8,45 @@ from data_rentgen.server.errors import get_error_responses from data_rentgen.server.errors.schemas import InvalidRequestSchema, NotFoundSchema from data_rentgen.server.schemas.v1 import ( + LocationDetailedResponseV1, LocationPaginateQueryV1, - LocationResponseV1, PageResponseV1, UpdateLocationRequestV1, ) -from data_rentgen.server.services import get_user -from data_rentgen.services import UnitOfWork +from data_rentgen.server.services import LocationService, get_user router = APIRouter( prefix="/locations", tags=["Locations"], - responses=get_error_responses(include={InvalidRequestSchema, NotFoundSchema}), + responses=get_error_responses(include={InvalidRequestSchema}), ) @router.get("", summary="Paginated list of Locations") async def paginate_locations( query_args: Annotated[LocationPaginateQueryV1, Depends()], - unit_of_work: Annotated[UnitOfWork, Depends()], + location_service: Annotated[LocationService, Depends()], current_user: User = Depends(get_user()), -) -> PageResponseV1[LocationResponseV1]: - pagination = await unit_of_work.location.paginate( +) -> PageResponseV1[LocationDetailedResponseV1]: + pagination = await location_service.paginate( page=query_args.page, page_size=query_args.page_size, location_ids=query_args.location_id, location_type=query_args.location_type, search_query=query_args.search_query, ) - return PageResponseV1[LocationResponseV1].from_pagination(pagination) + return PageResponseV1[LocationDetailedResponseV1].from_pagination(pagination) -@router.patch("/{location_id}") +@router.patch( + "/{location_id}", + responses=get_error_responses(include={InvalidRequestSchema, NotFoundSchema}), +) async def update_location( location_id: int, location_data: UpdateLocationRequestV1, - unit_of_work: Annotated[UnitOfWork, Depends()], + location_service: Annotated[LocationService, Depends()], current_user: User = Depends(get_user()), -) -> LocationResponseV1: - location = await unit_of_work.location.update_external_id(location_id, location_data.external_id) - return LocationResponseV1.model_validate(location) +) -> LocationDetailedResponseV1: + location = await location_service.update_external_id(location_id, location_data.external_id) + return LocationDetailedResponseV1.model_validate(location) diff --git a/data_rentgen/server/schemas/v1/__init__.py b/data_rentgen/server/schemas/v1/__init__.py index afedf1ec..cbec5c6a 100644 --- a/data_rentgen/server/schemas/v1/__init__.py +++ b/data_rentgen/server/schemas/v1/__init__.py @@ -21,6 +21,7 @@ RunLineageQueryV1, ) from data_rentgen.server.schemas.v1.location import ( + LocationDetailedResponseV1, LocationPaginateQueryV1, LocationResponseV1, UpdateLocationRequestV1, @@ -68,6 +69,7 @@ "LineageOutputRelationV1", "LineageParentRelationV1", "LineageSymlinkRelationV1", + "LocationDetailedResponseV1", "LocationPaginateQueryV1", "LocationResponseV1", "UpdateLocationRequestV1", diff --git a/data_rentgen/server/schemas/v1/location.py b/data_rentgen/server/schemas/v1/location.py index 3dcac389..02b52038 100644 --- a/data_rentgen/server/schemas/v1/location.py +++ b/data_rentgen/server/schemas/v1/location.py @@ -17,6 +17,38 @@ class LocationResponseV1(BaseModel): model_config = ConfigDict(from_attributes=True) +class LocationDatasetStatisticsReponseV1(BaseModel): + """Location dataset statistics response.""" + + total_datasets: int = Field(description="Total number of datasets bound to this location") + + model_config = ConfigDict(from_attributes=True) + + +class LocationJobStatisticsReponseV1(BaseModel): + """Location job statistics response.""" + + total_jobs: int = Field(description="Total number of jobs bound to this location") + + model_config = ConfigDict(from_attributes=True) + + +class LocationStatisticsReponseV1(BaseModel): + """Location statistics response.""" + + datasets: LocationDatasetStatisticsReponseV1 = Field(description="Dataset statistics") + jobs: LocationJobStatisticsReponseV1 = Field(description="Dataset statistics") + + model_config = ConfigDict(from_attributes=True) + + +class LocationDetailedResponseV1(BaseModel): + data: LocationResponseV1 = Field(description="Location data") + statistics: LocationStatisticsReponseV1 = Field(description="Location statistics") + + model_config = ConfigDict(from_attributes=True) + + class LocationPaginateQueryV1(PaginateQueryV1): """Query params for Location paginate request.""" diff --git a/data_rentgen/server/services/__init__.py b/data_rentgen/server/services/__init__.py index 83b2f663..35d4c046 100644 --- a/data_rentgen/server/services/__init__.py +++ b/data_rentgen/server/services/__init__.py @@ -2,12 +2,14 @@ # SPDX-License-Identifier: Apache-2.0 from data_rentgen.server.services.get_user import get_user from data_rentgen.server.services.lineage import LineageService +from data_rentgen.server.services.location import LocationService from data_rentgen.server.services.operation import OperationService from data_rentgen.server.services.run import RunService __all__ = [ "get_user", "LineageService", + "LocationService", "OperationService", "RunService", ] diff --git a/data_rentgen/server/services/location.py b/data_rentgen/server/services/location.py new file mode 100644 index 00000000..cfe74879 --- /dev/null +++ b/data_rentgen/server/services/location.py @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: 2024-2025 MTS PJSC +# SPDX-License-Identifier: Apache-2.0 +from dataclasses import dataclass +from typing import Annotated + +from fastapi import Depends +from sqlalchemy import Row + +from data_rentgen.db.models.location import Location +from data_rentgen.dto.pagination import PaginationDTO +from data_rentgen.services.uow import UnitOfWork + + +@dataclass +class LocationServiceDatasetStatistics: + total_datasets: int = 0 + + @classmethod + def from_row(cls, row: Row | None): + if not row: + return cls() + + return cls(total_datasets=row.total_datasets) + + +@dataclass +class LocationServiceJobStatistics: + total_jobs: int = 0 + + @classmethod + def from_row(cls, row: Row | None): + if not row: + return cls() + + return cls(total_jobs=row.total_jobs) + + +@dataclass +class LocationServiceStatistics: + datasets: LocationServiceDatasetStatistics + jobs: LocationServiceJobStatistics + + +@dataclass +class LocationServiceResult: + data: Location + statistics: LocationServiceStatistics + + +class LocationServicePaginatedResult(PaginationDTO[LocationServiceResult]): + pass + + +class LocationService: + def __init__(self, uow: Annotated[UnitOfWork, Depends()]): + self._uow = uow + + async def paginate( + self, + page: int, + page_size: int, + location_ids: list[int], + location_type: str | None, + search_query: str | None, + ) -> LocationServicePaginatedResult: + pagination = await self._uow.location.paginate( + page=page, + page_size=page_size, + location_ids=location_ids, + location_type=location_type, + search_query=search_query, + ) + location_ids = [item.id for item in pagination.items] + dataset_stats = await self._uow.dataset.get_stats_by_location_ids(location_ids) + job_stats = await self._uow.job.get_stats_by_location_ids(location_ids) + + return LocationServicePaginatedResult( + page=pagination.page, + page_size=pagination.page_size, + total_count=pagination.total_count, + items=[ + LocationServiceResult( + data=location, + statistics=LocationServiceStatistics( + datasets=LocationServiceDatasetStatistics.from_row(dataset_stats.get(location.id)), + jobs=LocationServiceJobStatistics.from_row(job_stats.get(location.id)), + ), + ) + for location in pagination.items + ], + ) + + async def update_external_id( + self, + location_id: int, + external_id: str | None, + ) -> LocationServiceResult: + location = await self._uow.location.update_external_id(location_id, external_id) + dataset_stats = await self._uow.dataset.get_stats_by_location_ids([location.id]) + job_stats = await self._uow.job.get_stats_by_location_ids([location.id]) + return LocationServiceResult( + data=location, + statistics=LocationServiceStatistics( + datasets=LocationServiceDatasetStatistics.from_row(dataset_stats.get(location.id)), + jobs=LocationServiceJobStatistics.from_row(job_stats.get(location.id)), + ), + ) diff --git a/docs/changelog/next_release/160.breaking.rst b/docs/changelog/next_release/160.breaking.rst new file mode 100644 index 00000000..0d91a0d4 --- /dev/null +++ b/docs/changelog/next_release/160.breaking.rst @@ -0,0 +1,63 @@ +Change response schema of ``GET /locations`` from: + +.. code:: python + + { + "meta": {...}, + "items": [ + { + "kind": "LOCATION", + "id": ..., + # ... + } + ], + } + +to: + +.. code:: python + + { + "meta": {...}, + "items": [ + { + "data": { + "kind": "LOCATION", + "id": ..., + # ... + }, + "statistics": { + "datasets": {"total_datasets": 2}, + "jobs": {"total_jobs": 0}, + }, + } + ], + } + +Same for ``PATCH /datasets/:id`` - before: + +.. code:: python + + { + "kind": "LOCATION", + "id": ..., + # ... + } + +after: + +.. code:: python + + { + "data": { + "kind": "LOCATION", + "id": ..., + # ... + }, + "statistics": { + "datasets": {"total_datasets": 2}, + "jobs": {"total_jobs": 0}, + }, + } + +This allows to show location statistics in UI. diff --git a/tests/test_server/test_locations/test_get_locations.py b/tests/test_server/test_locations/test_get_locations.py index a4a34a80..1209a588 100644 --- a/tests/test_server/test_locations/test_get_locations.py +++ b/tests/test_server/test_locations/test_get_locations.py @@ -37,11 +37,21 @@ async def test_get_locations_no_filters( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } for location in sorted(locations, key=lambda x: x.name) ], @@ -78,11 +88,21 @@ async def test_get_locations_with_type_filter( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } for location in sorted(locations, key=lambda x: x.name) ], diff --git a/tests/test_server/test_locations/test_get_locations_by_id.py b/tests/test_server/test_locations/test_get_locations_by_id.py index 46afbe2b..d1f6d11f 100644 --- a/tests/test_server/test_locations/test_get_locations_by_id.py +++ b/tests/test_server/test_locations/test_get_locations_by_id.py @@ -5,6 +5,8 @@ from sqlalchemy.ext.asyncio import AsyncSession from data_rentgen.db.models import Location +from data_rentgen.db.models.dataset import Dataset +from data_rentgen.db.models.job import Job from tests.fixtures.mocks import MockedUser from tests.test_server.utils.enrich import enrich_locations @@ -66,11 +68,21 @@ async def test_get_locations_by_one_id( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, }, ], } @@ -105,11 +117,76 @@ async def test_get_locations_by_multiple_ids( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, + } + for location in sorted(selected_locations, key=lambda x: x.name) + ], + } + + +async def test_get_locations_by_multiple_ids_with_stats( + test_client: AsyncClient, + datasets: list[Dataset], + jobs: list[Job], + async_session: AsyncSession, + mocked_user: MockedUser, +): + dataset_location_ids = {dataset.location_id for dataset in datasets} + job_location_ids = {job.location_id for job in jobs} + location_ids = dataset_location_ids | job_location_ids + + locations = [Location(id=id) for id in location_ids] + selected_locations = await enrich_locations(locations, async_session) + + response = await test_client.get( + "v1/locations", + headers={"Authorization": f"Bearer {mocked_user.access_token}"}, + params={"location_id": list(location_ids)}, + ) + + assert response.status_code == HTTPStatus.OK, response.json() + assert response.json() == { + "meta": { + "page": 1, + "page_size": 20, + "total_count": 10 + 5, + "pages_count": 1, + "has_next": False, + "has_previous": False, + "next_page": None, + "previous_page": None, + }, + "items": [ + { + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": int(location.id in dataset_location_ids), + }, + "jobs": { + "total_jobs": int(location.id in job_location_ids), + }, + }, } for location in sorted(selected_locations, key=lambda x: x.name) ], diff --git a/tests/test_server/test_locations/test_patch_locations.py b/tests/test_server/test_locations/test_patch_locations.py index de19595f..58a06a25 100644 --- a/tests/test_server/test_locations/test_patch_locations.py +++ b/tests/test_server/test_locations/test_patch_locations.py @@ -18,7 +18,7 @@ ], indirect=True, ) -async def test_add_location_external_id( +async def test_set_location_external_id( test_client: AsyncClient, async_session: AsyncSession, location: Location, @@ -35,15 +35,25 @@ async def test_add_location_external_id( assert response.status_code == HTTPStatus.OK, response.json() assert response.json() == { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": "external_id", + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": "external_id", + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } -async def test_update_location_external_id( +async def test_change_location_external_id( test_client: AsyncClient, async_session: AsyncSession, location: Location, @@ -58,11 +68,54 @@ async def test_update_location_external_id( assert response.status_code == HTTPStatus.OK, response.json() assert response.json() == { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": "new_external_id", + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": "new_external_id", + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, + } + + +async def test_reset_location_external_id( + test_client: AsyncClient, + async_session: AsyncSession, + location: Location, + mocked_user: MockedUser, +): + response = await test_client.patch( + f"v1/locations/{location.id}", + headers={"Authorization": f"Bearer {mocked_user.access_token}"}, + json={"external_id": None}, + ) + + [location] = await enrich_locations([location], async_session=async_session) + assert response.status_code == HTTPStatus.OK, response.json() + assert response.json() == { + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": None, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } @@ -91,29 +144,6 @@ async def test_update_location_not_found( } -async def test_update_location_writing_null_to_external_id( - test_client: AsyncClient, - async_session: AsyncSession, - location: Location, - mocked_user: MockedUser, -): - response = await test_client.patch( - f"v1/locations/{location.id}", - headers={"Authorization": f"Bearer {mocked_user.access_token}"}, - json={"external_id": None}, - ) - - [location] = await enrich_locations([location], async_session=async_session) - assert response.status_code == HTTPStatus.OK, response.json() - assert response.json() == { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": None, - } - - async def test_patch_location_without_auth( test_client: AsyncClient, async_session: AsyncSession, diff --git a/tests/test_server/test_locations/test_search_locations.py b/tests/test_server/test_locations/test_search_locations.py index ca0faa11..24dce254 100644 --- a/tests/test_server/test_locations/test_search_locations.py +++ b/tests/test_server/test_locations/test_search_locations.py @@ -42,11 +42,21 @@ async def test_search_locations_by_address_url( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } for location in locations ], @@ -85,11 +95,21 @@ async def test_search_locations_by_location_name( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } for location in locations ], @@ -133,11 +153,21 @@ async def test_search_locations_by_location_name_and_address_url( }, "items": [ { - "id": location.id, - "name": location.name, - "type": location.type, - "addresses": [{"url": address.url} for address in location.addresses], - "external_id": location.external_id, + "data": { + "id": location.id, + "name": location.name, + "type": location.type, + "addresses": [{"url": address.url} for address in location.addresses], + "external_id": location.external_id, + }, + "statistics": { + "datasets": { + "total_datasets": 0, + }, + "jobs": { + "total_jobs": 0, + }, + }, } for location in locations ],