[DOP-22530] Implement GET /v1/jobs?location_type=...

dolfinus · dolfinus · commit 00e2ea69479e · 2025-10-07T15:21:39.000+03:00
diff --git a/data_rentgen/db/repositories/job.py b/data_rentgen/db/repositories/job.py
@@ -77,35 +77,40 @@ async def paginate(
         job_ids: Collection[int],
         search_query: str | None,
         location_id: int | None,
+        location_type: Collection[str],
         job_type: Collection[str],
     ) -> PaginationDTO[Job]:
         where = []
+        location_join_clause = Location.id == Job.location_id
         if job_ids:
             where.append(Job.id == any_(list(job_ids)))  # type: ignore[arg-type]
-
-        query: Select | CompoundSelect
-        order_by: list[ColumnElement | SQLColumnExpression]
         if job_type:
             where.append(Job.type == any_(list(job_type)))  # type: ignore[arg-type]
         if location_id:
             where.append(Job.location_id == location_id)  # type: ignore[arg-type]
+        if location_type:
+            location_type_lower = [location_type.lower() for location_type in location_type]
+            where.append(Location.type == any_(location_type_lower))  # type: ignore[arg-type]
 
+        query: Select | CompoundSelect
+        order_by: list[ColumnElement | SQLColumnExpression]
         if search_query:
             tsquery = make_tsquery(search_query)
 
-            job_stmt = select(Job, ts_rank(Job.search_vector, tsquery).label("search_rank")).where(
-                ts_match(Job.search_vector, tsquery),
-                *where,
+            job_stmt = (
+                select(Job, ts_rank(Job.search_vector, tsquery).label("search_rank"))
+                .join(Location, location_join_clause)
+                .where(ts_match(Job.search_vector, tsquery), *where)
             )
             location_stmt = (
                 select(Job, ts_rank(Location.search_vector, tsquery).label("search_rank"))
-                .join(Job, Location.id == Job.location_id)
+                .join(Location, location_join_clause)
                 .where(ts_match(Location.search_vector, tsquery), *where)
             )
             address_stmt = (
                 select(Job, func.max(ts_rank(Address.search_vector, tsquery).label("search_rank")))
-                .join(Location, Address.location_id == Location.id)
-                .join(Job, Location.id == Job.location_id)
+                .join(Location, location_join_clause)
+                .join(Address, Address.location_id == Job.location_id)
                 .where(ts_match(Address.search_vector, tsquery), *where)
                 .group_by(Job.id, Location.id, Address.id)
             )
@@ -120,7 +125,7 @@ async def paginate(
             ).group_by(*job_columns)
             order_by = [desc("search_rank"), asc("name")]
         else:
-            query = select(Job).where(*where)
+            query = select(Job).join(Location, location_join_clause).where(*where)
             order_by = [Job.name]
 
         options = [selectinload(Job.location).selectinload(Location.addresses)]
diff --git a/data_rentgen/server/api/v1/router/job.py b/data_rentgen/server/api/v1/router/job.py
@@ -44,6 +44,7 @@ async def paginate_jobs(
         job_ids=query_args.job_id,
         search_query=query_args.search_query,
         location_id=query_args.location_id,
+        location_type=query_args.location_type,
         job_type=query_args.job_type,
     )
     return PageResponseV1[JobDetailedResponseV1].from_pagination(pagination)
diff --git a/data_rentgen/server/schemas/v1/job.py b/data_rentgen/server/schemas/v1/job.py
@@ -29,7 +29,10 @@ class JobDetailedResponseV1(BaseModel):
 class JobTypesResponseV1(BaseModel):
     """Job types"""
 
-    job_types: list[str] = Field(description="List of distinct job types")
+    job_types: list[str] = Field(
+        description="List of distinct job types",
+        examples=[["SPARK_APPLICATION", "AIRFLOW_DAG"]],
+    )
 
     model_config = ConfigDict(from_attributes=True)
 
@@ -42,14 +45,22 @@ class JobPaginateQueryV1(PaginateQueryV1):
         default=None,
         min_length=3,
         description="Search query",
+        examples=["my job"],
     )
     job_type: list[str] = Field(
         default_factory=list,
         description="Specify job types",
+        examples=[["SPARK_APPLICATION", "AIRFLOW_DAG"]],
     )
     location_id: int | None = Field(
         default=None,
         description="The location id which jobs belong",
+        examples=[123],
+    )
+    location_type: list[str] = Field(
+        default_factory=list,
+        description="Specify location types",
+        examples=[["yarn"]],
     )
 
     model_config = ConfigDict(extra="forbid")
diff --git a/data_rentgen/server/services/job.py b/data_rentgen/server/services/job.py
@@ -32,6 +32,7 @@ async def paginate(
         job_ids: Collection[int],
         search_query: str | None,
         location_id: int | None,
+        location_type: Collection[str],
         job_type: Collection[str],
     ) -> JobServicePaginatedResult:
         pagination = await self._uow.job.paginate(
@@ -40,6 +41,7 @@ async def paginate(
             job_ids=job_ids,
             search_query=search_query,
             location_id=location_id,
+            location_type=location_type,
             job_type=job_type,
         )
 
diff --git a/docs/changelog/next_release/328.feature.2.rst b/docs/changelog/next_release/328.feature.2.rst
@@ -0,0 +1,2 @@
+Add new filter to ``GET /v1/jobs``:
+  - location_type: ``list[str]``
diff --git a/tests/test_server/fixtures/factories/job.py b/tests/test_server/fixtures/factories/job.py
@@ -235,20 +235,20 @@ async def jobs_search(
 @pytest_asyncio.fixture
 async def jobs_with_locations_and_types(
     async_session_maker: Callable[[], AbstractAsyncContextManager[AsyncSession]],
-) -> AsyncGenerator[tuple[Job]]:
+) -> AsyncGenerator[tuple[Job, ...]]:
     async with async_session_maker() as async_session:
         cluster_location = await create_location(async_session, location_kwargs={"name": "my-cluster", "type": "yarn"})
         airflow_location = await create_location(
             async_session,
             location_kwargs={"name": "airflow-host", "type": "http"},
         )
-        cluster_type = await create_job_type(async_session, {"type": "SPARK_APPLICATION"})
+        spark_type = await create_job_type(async_session, {"type": "SPARK_APPLICATION"})
         airflow_dag_type = await create_job_type(async_session, {"type": "AIRFLOW_DAG"})
         airflow_task_type = await create_job_type(async_session, {"type": "AIRFLOW_TASK"})
-        cluster_job = await create_job(
+        spark_job = await create_job(
             async_session,
             location_id=cluster_location.id,
-            job_type_id=cluster_type.id,
+            job_type_id=spark_type.id,
             job_kwargs={"name": "my-job_cluster"},
         )
         dag_job = await create_job(
@@ -266,7 +266,7 @@ async def jobs_with_locations_and_types(
 
         async_session.expunge_all()
 
-    yield (cluster_job, dag_job, task_job)
+    yield (spark_job, dag_job, task_job)
 
     async with async_session_maker() as async_session:
         await clean_db(async_session)
diff --git a/tests/test_server/test_jobs/test_get_jobs_by_location.py b/tests/test_server/test_jobs/test_get_jobs_by_location.py
@@ -0,0 +1,140 @@
+from http import HTTPStatus
+
+import pytest
+from httpx import AsyncClient
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from data_rentgen.db.models import Job
+from tests.fixtures.mocks import MockedUser
+from tests.test_server.utils.convert_to_json import job_to_json
+from tests.test_server.utils.enrich import enrich_jobs
+
+pytestmark = [pytest.mark.server, pytest.mark.asyncio]
+
+
+async def test_get_jobs_by_location_id(
+    test_client: AsyncClient,
+    async_session: AsyncSession,
+    jobs_with_locations_and_types: tuple[Job, ...],
+    mocked_user: MockedUser,
+) -> None:
+    jobs = await enrich_jobs(jobs_with_locations_and_types, async_session)
+
+    # first job in jobs has a different location unlike two others
+    [_, dag_job, task_job] = jobs
+    response = await test_client.get(
+        "/v1/jobs",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+        params={"location_id": dag_job.location_id},
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {
+        "meta": {
+            "has_next": False,
+            "has_previous": False,
+            "next_page": None,
+            "page": 1,
+            "page_size": 20,
+            "pages_count": 1,
+            "previous_page": None,
+            "total_count": 2,
+        },
+        "items": [
+            {
+                "id": str(job.id),
+                "data": job_to_json(job),
+            }
+            for job in [dag_job, task_job]
+        ],
+    }
+
+
+async def test_get_jobs_by_location_id_non_existent(
+    test_client: AsyncClient,
+    async_session: AsyncSession,
+    jobs_with_locations_and_types: tuple[Job, ...],
+    mocked_user: MockedUser,
+):
+    response = await test_client.get(
+        "/v1/jobs",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+        params={"location_id": -1},
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {
+        "meta": {
+            "has_next": False,
+            "has_previous": False,
+            "next_page": None,
+            "page": 1,
+            "page_size": 20,
+            "pages_count": 1,
+            "previous_page": None,
+            "total_count": 0,
+        },
+        "items": [],
+    }
+
+
+async def test_get_lobs_by_location_type(
+    test_client: AsyncClient,
+    async_session: AsyncSession,
+    jobs_with_locations_and_types: tuple[Job, ...],
+    mocked_user: MockedUser,
+) -> None:
+    spark_job, *_ = await enrich_jobs(jobs_with_locations_and_types, async_session)
+    response = await test_client.get(
+        "/v1/jobs",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+        params={"location_type": ["YARN"]},  # case-insensitive
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {
+        "meta": {
+            "has_next": False,
+            "has_previous": False,
+            "next_page": None,
+            "page": 1,
+            "page_size": 20,
+            "pages_count": 1,
+            "previous_page": None,
+            "total_count": 1,
+        },
+        "items": [
+            {
+                "id": str(spark_job.id),
+                "data": job_to_json(spark_job),
+            },
+        ],
+    }
+
+
+async def test_get_jobs_by_location_type_non_existent(
+    test_client: AsyncClient,
+    async_session: AsyncSession,
+    jobs_with_locations_and_types: tuple[Job, ...],
+    mocked_user: MockedUser,
+):
+    response = await test_client.get(
+        "/v1/jobs",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+        params={"location_type": "non_existing_location_type"},
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {
+        "meta": {
+            "has_next": False,
+            "has_previous": False,
+            "next_page": None,
+            "page": 1,
+            "page_size": 20,
+            "pages_count": 1,
+            "previous_page": None,
+            "total_count": 0,
+        },
+        "items": [],
+    }
diff --git a/tests/test_server/test_jobs/test_get_jobs_by_type.py b/tests/test_server/test_jobs/test_get_jobs_by_type.py
@@ -0,0 +1,91 @@
+from http import HTTPStatus
+
+import pytest
+from httpx import AsyncClient
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from data_rentgen.db.models import Job, JobType
+from tests.fixtures.mocks import MockedUser
+from tests.test_server.utils.convert_to_json import job_to_json
+from tests.test_server.utils.enrich import enrich_jobs
+
+pytestmark = [pytest.mark.server, pytest.mark.asyncio]
+
+
+async def test_get_job_types(
+    test_client: AsyncClient,
+    job_types: list[JobType],
+    mocked_user: MockedUser,
+) -> None:
+    unique_job_type = {item.type for item in job_types}
+    response = await test_client.get(
+        "/v1/jobs/types",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {"job_types": sorted(unique_job_type)}
+
+
+async def test_get_jobs_by_job_type(
+    test_client: AsyncClient,
+    async_session: AsyncSession,
+    jobs_with_locations_and_types: tuple[Job, ...],
+    mocked_user: MockedUser,
+) -> None:
+    jobs = await enrich_jobs(jobs_with_locations_and_types, async_session)
+    [_, dag_job, task_job] = jobs
+    response = await test_client.get(
+        "/v1/jobs",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+        params={"job_type": ["AIRFLOW_DAG", "AIRFLOW_TASK"]},
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {
+        "meta": {
+            "has_next": False,
+            "has_previous": False,
+            "next_page": None,
+            "page": 1,
+            "page_size": 20,
+            "pages_count": 1,
+            "previous_page": None,
+            "total_count": 2,
+        },
+        "items": [
+            {
+                "id": str(job.id),
+                "data": job_to_json(job),
+            }
+            for job in (dag_job, task_job)
+        ],
+    }
+
+
+async def test_get_jobs_by_non_existent_type(
+    test_client: AsyncClient,
+    async_session: AsyncSession,
+    jobs_with_locations_and_types: tuple[Job, ...],
+    mocked_user: MockedUser,
+) -> None:
+    response = await test_client.get(
+        "/v1/jobs",
+        headers={"Authorization": f"Bearer {mocked_user.access_token}"},
+        params={"job_type": "NO_EXISTENT_TYPE"},
+    )
+
+    assert response.status_code == HTTPStatus.OK, response.json()
+    assert response.json() == {
+        "meta": {
+            "has_next": False,
+            "has_previous": False,
+            "next_page": None,
+            "page": 1,
+            "page_size": 20,
+            "pages_count": 1,
+            "previous_page": None,
+            "total_count": 0,
+        },
+        "items": [],
+    }
diff --git a/tests/test_server/test_jobs/test_search_jobs.py b/tests/test_server/test_jobs/test_search_jobs.py
diff --git a/tests/test_server/utils/enrich.py b/tests/test_server/utils/enrich.py

Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,7 @@ async def paginate_jobs(`
`44`	`44`	`job_ids=query_args.job_id,`
`45`	`45`	`search_query=query_args.search_query,`
`46`	`46`	`location_id=query_args.location_id,`
	`47`	`+ location_type=query_args.location_type,`
`47`	`48`	`job_type=query_args.job_type,`
`48`	`49`	`)`
`49`	`50`	`return PageResponseV1[JobDetailedResponseV1].from_pagination(pagination)`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Add new filter to ``GET /v1/jobs``:
	`2`	+ - location_type: ``list[str]``