From 024bec7a1aa481f3b6ee95531f7fc4fd6cbeeb78 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Thu, 6 Mar 2025 15:59:43 +0100 Subject: [PATCH 001/131] start adding links to output schemas --- .../api_schemas_webserver/storage.py | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index f192ba51355e..9865f4e3ea9e 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -1,11 +1,12 @@ from pathlib import Path from typing import Annotated, Any +from aiohttp import web from models_library.api_schemas_storage.storage_schemas import ( DEFAULT_NUMBER_OF_PATHS_PER_PAGE, MAX_NUMBER_OF_PATHS_PER_PAGE, ) -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, HttpUrl from ..api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobGet, @@ -49,12 +50,33 @@ def to_rpc_schema(self, location_id: LocationID) -> DataExportTaskStartInput: ) +class AsyncJobLinks(OutputSchema): + status_href: HttpUrl + abort_href: HttpUrl + result_href: HttpUrl + + class StorageAsyncJobGet(OutputSchema): job_id: AsyncJobId + links: AsyncJobLinks @classmethod - def from_rpc_schema(cls, async_job_rpc_get: AsyncJobGet) -> "StorageAsyncJobGet": - return StorageAsyncJobGet(job_id=async_job_rpc_get.job_id) + def from_rpc_schema( + cls, app: web.Application, async_job_rpc_get: AsyncJobGet + ) -> "StorageAsyncJobGet": + job_id = f"{async_job_rpc_get.job_id}" + links = AsyncJobLinks( + status_href=HttpUrl( + f"{app.router['get_async_job_status'].url_for(job_id=job_id)}" + ), + abort_href=HttpUrl( + f"{app.router['abort_async_job'].url_for(job_id=job_id)}" + ), + result_href=HttpUrl( + f"{app.router['get_async_job_result'].url_for(job_id=job_id)}" + ), + ) + return StorageAsyncJobGet(job_id=async_job_rpc_get.job_id, links=links) class StorageAsyncJobStatus(OutputSchema): From 1e1a444ca55ac44cf414157f1b44ac9d5ae2537d Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 08:56:29 +0100 Subject: [PATCH 002/131] add links also to status response --- .../api_schemas_webserver/storage.py | 42 +++++++++++-------- .../storage/_rest.py | 13 ++++-- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index 9865f4e3ea9e..1ceed50a281d 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -6,7 +6,7 @@ DEFAULT_NUMBER_OF_PATHS_PER_PAGE, MAX_NUMBER_OF_PATHS_PER_PAGE, ) -from pydantic import BaseModel, Field, HttpUrl +from pydantic import BaseModel, Field from ..api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobGet, @@ -51,9 +51,17 @@ def to_rpc_schema(self, location_id: LocationID) -> DataExportTaskStartInput: class AsyncJobLinks(OutputSchema): - status_href: HttpUrl - abort_href: HttpUrl - result_href: HttpUrl + status_href: str + abort_href: str + result_href: str + + @classmethod + def from_job_id(cls, app: web.Application, job_id: str) -> "AsyncJobLinks": + return AsyncJobLinks( + status_href=f"{app.router['get_async_job_status'].url_for(job_id=job_id)}", + abort_href=f"{app.router['abort_async_job'].url_for(job_id=job_id)}", + result_href=f"{app.router['get_async_job_result'].url_for(job_id=job_id)}", + ) class StorageAsyncJobGet(OutputSchema): @@ -62,36 +70,36 @@ class StorageAsyncJobGet(OutputSchema): @classmethod def from_rpc_schema( - cls, app: web.Application, async_job_rpc_get: AsyncJobGet + cls, *, app: web.Application, async_job_rpc_get: AsyncJobGet ) -> "StorageAsyncJobGet": job_id = f"{async_job_rpc_get.job_id}" - links = AsyncJobLinks( - status_href=HttpUrl( - f"{app.router['get_async_job_status'].url_for(job_id=job_id)}" - ), - abort_href=HttpUrl( - f"{app.router['abort_async_job'].url_for(job_id=job_id)}" - ), - result_href=HttpUrl( - f"{app.router['get_async_job_result'].url_for(job_id=job_id)}" - ), + return StorageAsyncJobGet( + job_id=async_job_rpc_get.job_id, + links=AsyncJobLinks.from_job_id(app=app, job_id=job_id), ) - return StorageAsyncJobGet(job_id=async_job_rpc_get.job_id, links=links) class StorageAsyncJobStatus(OutputSchema): job_id: AsyncJobId progress: ProgressReport done: bool + started: datetime + stopped: datetime | None + links: AsyncJobLinks @classmethod def from_rpc_schema( - cls, async_job_rpc_status: AsyncJobStatus + cls, *, app: web.Application, async_job_rpc_status: AsyncJobStatus ) -> "StorageAsyncJobStatus": return StorageAsyncJobStatus( job_id=async_job_rpc_status.job_id, progress=async_job_rpc_status.progress, done=async_job_rpc_status.done, + started=async_job_rpc_status.started, + stopped=async_job_rpc_status.stopped, + links=AsyncJobLinks.from_job_id( + app=app, job_id=f"{async_job_rpc_status.job_id}" + ), ) diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 669bad63f218..efd55d379e00 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -433,7 +433,9 @@ class _PathParams(BaseModel): ), ) return create_data_response( - StorageAsyncJobGet.from_rpc_schema(async_job_rpc_get), + StorageAsyncJobGet.from_rpc_schema( + app=request.app, async_job_rpc_get=async_job_rpc_get + ), status=status.HTTP_202_ACCEPTED, ) @@ -459,7 +461,10 @@ async def get_async_jobs(request: web.Request) -> web.Response: filter_="", ) return create_data_response( - [StorageAsyncJobGet.from_rpc_schema(job) for job in user_async_jobs], + [ + StorageAsyncJobGet.from_rpc_schema(app=request.app, async_job_rpc_get=job) + for job in user_async_jobs + ], status=status.HTTP_200_OK, ) @@ -489,7 +494,9 @@ class _PathParams(BaseModel): ), ) return create_data_response( - StorageAsyncJobStatus.from_rpc_schema(async_job_rpc_status), + StorageAsyncJobStatus.from_rpc_schema( + app=request.app, async_job_rpc_status=async_job_rpc_status + ), status=status.HTTP_200_OK, ) From 4f42d2c47ba3ce791c5fa85b7f7c1a19e10e756f Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 09:08:32 +0100 Subject: [PATCH 003/131] fix signatures --- api/specs/web-server/_storage.py | 8 ++--- .../api_schemas_webserver/storage.py | 9 +++-- .../api/v0/openapi.yaml | 35 +++++++++++++++++++ .../storage/_rest.py | 7 ++-- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 848fac10a7b0..66b9261d6f2e 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -5,9 +5,9 @@ from typing import Annotated, TypeAlias -from uuid import UUID from fastapi import APIRouter, Depends, Query, status +from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobId from models_library.api_schemas_storage.storage_schemas import ( FileLocation, FileMetaDataGet, @@ -210,7 +210,7 @@ async def export_data(data_export: DataExportPost, location_id: LocationID): response_model=Envelope[StorageAsyncJobStatus], name="get_async_job_status", ) -async def get_async_job_status(job_id: UUID): +async def get_async_job_status(job_id: AsyncJobId): """Get async job status""" @@ -218,7 +218,7 @@ async def get_async_job_status(job_id: UUID): "/storage/async-jobs/{job_id}:abort", name="abort_async_job", ) -async def abort_async_job(job_id: UUID): +async def abort_async_job(job_id: AsyncJobId): """aborts execution of an async job""" @@ -227,7 +227,7 @@ async def abort_async_job(job_id: UUID): response_model=Envelope[StorageAsyncJobResult], name="get_async_job_result", ) -async def get_async_job_result(job_id: UUID): +async def get_async_job_result(job_id: AsyncJobId): """Get the result of the async job""" diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index 1ceed50a281d..d6d89416231e 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -64,6 +64,10 @@ def from_job_id(cls, app: web.Application, job_id: str) -> "AsyncJobLinks": ) +class StorageAsyncJobId(InputSchema): + job_id: AsyncJobId + + class StorageAsyncJobGet(OutputSchema): job_id: AsyncJobId links: AsyncJobLinks @@ -72,10 +76,11 @@ class StorageAsyncJobGet(OutputSchema): def from_rpc_schema( cls, *, app: web.Application, async_job_rpc_get: AsyncJobGet ) -> "StorageAsyncJobGet": - job_id = f"{async_job_rpc_get.job_id}" return StorageAsyncJobGet( job_id=async_job_rpc_get.job_id, - links=AsyncJobLinks.from_job_id(app=app, job_id=job_id), + links=AsyncJobLinks.from_job_id( + app=app, job_id=f"{async_job_rpc_get.job_id}" + ), ) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index a64a214e1f0e..93e6a3da85e0 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -7780,6 +7780,23 @@ components: - app_name - version title: AppStatusCheck + AsyncJobLinks: + properties: + statusHref: + type: string + title: Statushref + abortHref: + type: string + title: Aborthref + resultHref: + type: string + title: Resulthref + type: object + required: + - statusHref + - abortHref + - resultHref + title: AsyncJobLinks Author: properties: name: @@ -14873,9 +14890,12 @@ components: type: string format: uuid title: Jobid + links: + $ref: '#/components/schemas/AsyncJobLinks' type: object required: - jobId + - links title: StorageAsyncJobGet StorageAsyncJobResult: properties: @@ -14905,11 +14925,26 @@ components: done: type: boolean title: Done + started: + type: string + format: date-time + title: Started + stopped: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Stopped + links: + $ref: '#/components/schemas/AsyncJobLinks' type: object required: - jobId - progress - done + - started + - stopped + - links title: StorageAsyncJobStatus Structure: properties: diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index efd55d379e00..d88ae0595556 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -21,6 +21,7 @@ from models_library.api_schemas_webserver.storage import ( DataExportPost, StorageAsyncJobGet, + StorageAsyncJobId, StorageAsyncJobResult, StorageAsyncJobStatus, ) @@ -484,7 +485,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(_PathParams, request) + async_job_get = parse_request_path_parameters_as(StorageAsyncJobId, request) async_job_rpc_status = await get_status( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, @@ -515,7 +516,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(_PathParams, request) + async_job_get = parse_request_path_parameters_as(StorageAsyncJobId, request) async_job_rpc_abort = await abort( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, @@ -547,7 +548,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(_PathParams, request) + async_job_get = parse_request_path_parameters_as(StorageAsyncJobId, request) async_job_rpc_result = await get_result( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, From 2022cc409361e061b2355aef24ada0f34bda0fa4 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 09:09:35 +0100 Subject: [PATCH 004/131] minor fix --- api/specs/web-server/_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 66b9261d6f2e..7b2d74659406 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -237,4 +237,4 @@ async def get_async_job_result(job_id: AsyncJobId): name="get_async_jobs", ) async def get_async_jobs(user_id: UserID): - """Retrunsa list of async jobs for the user""" + """Returns a list of async jobs for the user""" From 1062013d109bad149c8688e143e39123ec8d7c04 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 11:04:38 +0100 Subject: [PATCH 005/131] fix tests --- .../api_schemas_webserver/storage.py | 4 ---- .../simcore_service_webserver/storage/_rest.py | 16 +++++++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index d6d89416231e..ba122471e61e 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -64,10 +64,6 @@ def from_job_id(cls, app: web.Application, job_id: str) -> "AsyncJobLinks": ) -class StorageAsyncJobId(InputSchema): - job_id: AsyncJobId - - class StorageAsyncJobGet(OutputSchema): job_id: AsyncJobId links: AsyncJobLinks diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index d88ae0595556..4cf8d5a7b7e8 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -10,7 +10,10 @@ from uuid import UUID from aiohttp import ClientTimeout, web -from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobNameData +from models_library.api_schemas_rpc_async_jobs.async_jobs import ( + AsyncJobId, + AsyncJobNameData, +) from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE from models_library.api_schemas_storage.storage_schemas import ( FileUploadCompleteResponse, @@ -21,7 +24,6 @@ from models_library.api_schemas_webserver.storage import ( DataExportPost, StorageAsyncJobGet, - StorageAsyncJobId, StorageAsyncJobResult, StorageAsyncJobStatus, ) @@ -470,6 +472,10 @@ async def get_async_jobs(request: web.Request) -> web.Response: ) +class _StorageAsyncJobId(BaseModel): + job_id: AsyncJobId + + @routes.get( _storage_prefix + "/async-jobs/{job_id}/status", name="get_async_job_status", @@ -485,7 +491,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(StorageAsyncJobId, request) + async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) async_job_rpc_status = await get_status( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, @@ -516,7 +522,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(StorageAsyncJobId, request) + async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) async_job_rpc_abort = await abort( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, @@ -548,7 +554,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(StorageAsyncJobId, request) + async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) async_job_rpc_result = await get_result( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, From 85cb1576d10f9bff25ace7cfe454b0d2d7da22d6 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 13:02:01 +0100 Subject: [PATCH 006/131] add 404 in case result is not done --- api/specs/web-server/_storage.py | 6 ++++++ .../api/v0/openapi.yaml | 8 +++++++- .../simcore_service_webserver/storage/_rest.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 7b2d74659406..f85c5a786c14 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -226,6 +226,12 @@ async def abort_async_job(job_id: AsyncJobId): "/storage/async-jobs/{job_id}/result", response_model=Envelope[StorageAsyncJobResult], name="get_async_job_result", + responses={ + status.HTTP_404_NOT_FOUND: { + "description": "Result not found", + "model": StorageAsyncJobStatus, + } + }, ) async def get_async_job_result(job_id: AsyncJobId): """Get the result of the async job""" diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 93e6a3da85e0..32037e6f5e3e 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -6477,12 +6477,18 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_StorageAsyncJobResult_' + '404': + description: Result not found + content: + application/json: + schema: + $ref: '#/components/schemas/StorageAsyncJobStatus' /v0/storage/async-jobs: get: tags: - storage summary: Get Async Jobs - description: Retrunsa list of async jobs for the user + description: Returns a list of async jobs for the user operationId: get_async_jobs parameters: - name: user_id diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 4cf8d5a7b7e8..301ac5494d01 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -555,6 +555,21 @@ class _PathParams(BaseModel): rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) + + async_job_rpc_status = await get_status( + rabbitmq_rpc_client=rabbitmq_rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id=async_job_get.job_id, + job_id_data=AsyncJobNameData( + user_id=_req_ctx.user_id, product_name=_req_ctx.product_name + ), + ) + if not async_job_rpc_status.done: + return create_data_response( + async_job_rpc_status, + status=status.HTTP_404_NOT_FOUND, + ) + async_job_rpc_result = await get_result( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, @@ -563,6 +578,7 @@ class _PathParams(BaseModel): user_id=_req_ctx.user_id, product_name=_req_ctx.product_name ), ) + return create_data_response( StorageAsyncJobResult.from_rpc_schema(async_job_rpc_result), status=status.HTTP_200_OK, From 1a0ae0af2a048f605c2fc36d2f3dd762e1953b94 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 12 Feb 2025 11:22:28 +0100 Subject: [PATCH 007/131] add distribute task queue --- services/storage/requirements/_base.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/requirements/_base.in b/services/storage/requirements/_base.in index 44da01ea7897..0118f4320011 100644 --- a/services/storage/requirements/_base.in +++ b/services/storage/requirements/_base.in @@ -18,7 +18,7 @@ aioboto3 # s3 storage aiofiles # i/o asgi_lifespan asyncpg # database -celery[redis] +celery httpx opentelemetry-instrumentation-botocore packaging From 52d001a19f256b47499ce95b7e0936b85288ea8e Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 12 Feb 2025 11:49:49 +0100 Subject: [PATCH 008/131] add settings --- .../simcore_service_storage/core/settings.py | 95 +++++++++++-------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 66142a82b3ee..49224d9e95e9 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -30,47 +30,60 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): STORAGE_MONITORING_ENABLED: bool = False STORAGE_PROFILING: bool = False - STORAGE_POSTGRES: Annotated[ - PostgresSettings | None, - Field(json_schema_extra={"auto_default_from_env": True}), - ] - - STORAGE_REDIS: Annotated[ - RedisSettings | None, Field(json_schema_extra={"auto_default_from_env": True}) - ] - - STORAGE_S3: Annotated[ - S3Settings | None, Field(json_schema_extra={"auto_default_from_env": True}) - ] - - STORAGE_CELERY: Annotated[ - CelerySettings | None, Field(json_schema_extra={"auto_default_from_env": True}) - ] - - STORAGE_TRACING: Annotated[ - TracingSettings | None, Field(json_schema_extra={"auto_default_from_env": True}) - ] - - DATCORE_ADAPTER: Annotated[ - DatcoreAdapterSettings, Field(json_schema_extra={"auto_default_from_env": True}) - ] - - STORAGE_SYNC_METADATA_TIMEOUT: Annotated[ - PositiveInt, Field(180, description="Timeout (seconds) for metadata sync task") - ] - - STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS: Annotated[ - int, - Field( - 3600, description="Default expiration time in seconds for presigned links" - ), - ] - - STORAGE_CLEANER_INTERVAL_S: Annotated[ - int | None, - Field( - 30, - description="Interval in seconds when task cleaning pending uploads runs. setting to NULL disables the cleaner.", + BF_API_KEY: str | None = Field( + None, description="Pennsieve API key ONLY for testing purposes" + ) + BF_API_SECRET: str | None = Field( + None, description="Pennsieve API secret ONLY for testing purposes" + ) + + STORAGE_POSTGRES: PostgresSettings | None = Field( + json_schema_extra={"auto_default_from_env": True} + ) + + STORAGE_REDIS: RedisSettings | None = Field( + json_schema_extra={"auto_default_from_env": True} + ) + + STORAGE_S3: S3Settings | None = Field( + json_schema_extra={"auto_default_from_env": True} + ) + + STORAGE_CELERY: CelerySettings | None = Field( + json_schema_extra={"auto_default_from_env": True} + ) + + STORAGE_TRACING: TracingSettings | None = Field( + json_schema_extra={"auto_default_from_env": True} + ) + + DATCORE_ADAPTER: DatcoreAdapterSettings = Field( + json_schema_extra={"auto_default_from_env": True} + ) + + STORAGE_SYNC_METADATA_TIMEOUT: PositiveInt = Field( + 180, description="Timeout (seconds) for metadata sync task" + ) + + STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS: int = Field( + 3600, description="Default expiration time in seconds for presigned links" + ) + + STORAGE_CLEANER_INTERVAL_S: int | None = Field( + 30, + description="Interval in seconds when task cleaning pending uploads runs. setting to NULL disables the cleaner.", + ) + + STORAGE_S3_CLIENT_MAX_TRANSFER_CONCURRENCY: int = Field( + 4, + description="Maximal amount of threads used by underlying S3 client to transfer data to S3 backend", + ) + + STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED: bool = Field( + default=False, + validation_alias=AliasChoices( + "STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED", + "LOG_FORMAT_LOCAL_DEV_ENABLED", ), ] From 6b2fb6fb0f20a51993f608f66c0cfbd4ee04a61c Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 12 Feb 2025 12:17:42 +0100 Subject: [PATCH 009/131] update reqs --- services/storage/requirements/_base.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/requirements/_base.in b/services/storage/requirements/_base.in index 0118f4320011..44da01ea7897 100644 --- a/services/storage/requirements/_base.in +++ b/services/storage/requirements/_base.in @@ -18,7 +18,7 @@ aioboto3 # s3 storage aiofiles # i/o asgi_lifespan asyncpg # database -celery +celery[redis] httpx opentelemetry-instrumentation-botocore packaging From eab1aa78c9bf1039dec9ead89d434a9666bc53d4 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 13 Feb 2025 00:25:38 +0100 Subject: [PATCH 010/131] add celery task --- .../core/application.py | 3 ++ .../modules/celery/celery.py | 48 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 services/storage/src/simcore_service_storage/modules/celery/celery.py diff --git a/services/storage/src/simcore_service_storage/core/application.py b/services/storage/src/simcore_service_storage/core/application.py index 13082a00fb06..8723f473cfbb 100644 --- a/services/storage/src/simcore_service_storage/core/application.py +++ b/services/storage/src/simcore_service_storage/core/application.py @@ -32,6 +32,7 @@ from ..dsm import setup_dsm from ..dsm_cleaner import setup_dsm_cleaner from ..exceptions.handlers import set_exception_handlers +from ..modules.celery.celery import setup_celery from ..modules.db import setup_db from ..modules.long_running_tasks import setup_rest_api_long_running_tasks_for_uploads from ..modules.rabbitmq import setup as setup_rabbitmq @@ -95,6 +96,8 @@ def create_app(settings: ApplicationSettings) -> FastAPI: if settings.STORAGE_CLEANER_INTERVAL_S and not settings.STORAGE_WORKER_MODE: setup_dsm_cleaner(app) + setup_celery(app) + if settings.STORAGE_PROFILING: app.add_middleware(ProfilerMiddleware) diff --git a/services/storage/src/simcore_service_storage/modules/celery/celery.py b/services/storage/src/simcore_service_storage/modules/celery/celery.py new file mode 100644 index 000000000000..10397fdd5625 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/celery.py @@ -0,0 +1,48 @@ +import logging +from multiprocessing import Process +from typing import cast + +from celery import Celery +from celery.apps.worker import Worker +from fastapi import FastAPI +from settings_library.redis import RedisDatabase +from simcore_service_storage.modules.celery.tasks import setup_celery_tasks + +from ...core.settings import get_application_settings + +_log = logging.getLogger(__name__) + + +def setup_celery(app: FastAPI) -> None: + async def on_startup() -> None: + settings = get_application_settings(app) + assert settings.STORAGE_REDIS + + redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( + RedisDatabase.CELERY_TASKS, + ) + + app.state.celery_app = Celery( + broker=redis_dsn, + backend=redis_dsn, + ) + + setup_celery_tasks(app.state.celery_app) + + # FIXME: Experiment: to start worker in a separate process + def worker_process(): + worker = Worker(app=app.state.celery_app) + worker.start() + + worker_proc = Process(target=worker_process) + worker_proc.start() + + async def on_shutdown() -> None: + _log.warning("Implementing shutdown of celery app") + + app.add_event_handler("startup", on_startup) + app.add_event_handler("shutdown", on_shutdown) + + +def get_celery_app(app: FastAPI) -> Celery: + return cast(Celery, app.state.celery_app) From 6767d144a26fd2ccb286f4ebf21dbd132ae85b5d Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 13 Feb 2025 10:23:03 +0100 Subject: [PATCH 011/131] add celery task queue class --- .../modules/celery/celery.py | 46 ++++++++++++++----- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/celery.py b/services/storage/src/simcore_service_storage/modules/celery/celery.py index 10397fdd5625..59b451661a0f 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/celery.py +++ b/services/storage/src/simcore_service_storage/modules/celery/celery.py @@ -4,30 +4,52 @@ from celery import Celery from celery.apps.worker import Worker +from celery.result import AsyncResult from fastapi import FastAPI from settings_library.redis import RedisDatabase -from simcore_service_storage.modules.celery.tasks import setup_celery_tasks +from simcore_service_storage.modules.celery.tasks import archive -from ...core.settings import get_application_settings +from ...core.settings import ApplicationSettings, get_application_settings _log = logging.getLogger(__name__) -def setup_celery(app: FastAPI) -> None: - async def on_startup() -> None: - settings = get_application_settings(app) - assert settings.STORAGE_REDIS - - redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( +class CeleryTaskQueue: + def __init__(self, app_settings: ApplicationSettings): + assert app_settings.STORAGE_REDIS + redis_dsn = app_settings.STORAGE_REDIS.build_redis_dsn( RedisDatabase.CELERY_TASKS, ) - app.state.celery_app = Celery( + self._celery_app = Celery( broker=redis_dsn, backend=redis_dsn, ) - setup_celery_tasks(app.state.celery_app) + @property + def celery_app(self): + return self._celery_app + + def create_task(self, task): + self._celery_app.task()(task) + + def send_task(self, name: str, **kwargs) -> AsyncResult: + return self._celery_app.send_task(name, **kwargs) + + def cancel_task(self, task_id: str): + self._celery_app.control.revoke(task_id) + + +# TODO: use new FastAPI lifespan +def setup_celery(app: FastAPI) -> None: + async def on_startup() -> None: + settings = get_application_settings(app) + assert settings.STORAGE_REDIS + + task_queue = CeleryTaskQueue(settings) + task_queue.create_task(archive) + + app.state.task_queue = task_queue # FIXME: Experiment: to start worker in a separate process def worker_process(): @@ -44,5 +66,5 @@ async def on_shutdown() -> None: app.add_event_handler("shutdown", on_shutdown) -def get_celery_app(app: FastAPI) -> Celery: - return cast(Celery, app.state.celery_app) +def get_celery_task_queue(app: FastAPI) -> CeleryTaskQueue: + return cast(CeleryTaskQueue, app.state.task_queue) From f7ab399f128bc15f8ebee0368fa220e71e8fc522 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 13 Feb 2025 10:25:55 +0100 Subject: [PATCH 012/131] rename --- .../src/simcore_service_storage/modules/celery/celery.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/celery.py b/services/storage/src/simcore_service_storage/modules/celery/celery.py index 59b451661a0f..1732e5fe3f75 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/celery.py +++ b/services/storage/src/simcore_service_storage/modules/celery/celery.py @@ -1,6 +1,6 @@ import logging from multiprocessing import Process -from typing import cast +from typing import Callable, cast from celery import Celery from celery.apps.worker import Worker @@ -30,8 +30,8 @@ def __init__(self, app_settings: ApplicationSettings): def celery_app(self): return self._celery_app - def create_task(self, task): - self._celery_app.task()(task) + def create_task(self, task_fn: Callable): + self._celery_app.task()(task_fn) def send_task(self, name: str, **kwargs) -> AsyncResult: return self._celery_app.send_task(name, **kwargs) From 9b8c7006497dc0b2dd3ca8a4e2a098faec34118d Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 13 Feb 2025 10:57:51 +0100 Subject: [PATCH 013/131] make testable --- .../core/application.py | 2 +- .../modules/celery/{celery.py => core.py} | 32 +++++++++---------- 2 files changed, 16 insertions(+), 18 deletions(-) rename services/storage/src/simcore_service_storage/modules/celery/{celery.py => core.py} (79%) diff --git a/services/storage/src/simcore_service_storage/core/application.py b/services/storage/src/simcore_service_storage/core/application.py index 8723f473cfbb..d31a7652c37f 100644 --- a/services/storage/src/simcore_service_storage/core/application.py +++ b/services/storage/src/simcore_service_storage/core/application.py @@ -32,7 +32,7 @@ from ..dsm import setup_dsm from ..dsm_cleaner import setup_dsm_cleaner from ..exceptions.handlers import set_exception_handlers -from ..modules.celery.celery import setup_celery +from ..modules.celery.core import setup_celery from ..modules.db import setup_db from ..modules.long_running_tasks import setup_rest_api_long_running_tasks_for_uploads from ..modules.rabbitmq import setup as setup_rabbitmq diff --git a/services/storage/src/simcore_service_storage/modules/celery/celery.py b/services/storage/src/simcore_service_storage/modules/celery/core.py similarity index 79% rename from services/storage/src/simcore_service_storage/modules/celery/celery.py rename to services/storage/src/simcore_service_storage/modules/celery/core.py index 1732e5fe3f75..0f70ec4d6f48 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/celery.py +++ b/services/storage/src/simcore_service_storage/modules/celery/core.py @@ -9,26 +9,14 @@ from settings_library.redis import RedisDatabase from simcore_service_storage.modules.celery.tasks import archive -from ...core.settings import ApplicationSettings, get_application_settings +from ...core.settings import get_application_settings _log = logging.getLogger(__name__) class CeleryTaskQueue: - def __init__(self, app_settings: ApplicationSettings): - assert app_settings.STORAGE_REDIS - redis_dsn = app_settings.STORAGE_REDIS.build_redis_dsn( - RedisDatabase.CELERY_TASKS, - ) - - self._celery_app = Celery( - broker=redis_dsn, - backend=redis_dsn, - ) - - @property - def celery_app(self): - return self._celery_app + def __init__(self, celery_app: Celery): + self._celery_app = celery_app def create_task(self, task_fn: Callable): self._celery_app.task()(task_fn) @@ -40,13 +28,23 @@ def cancel_task(self, task_id: str): self._celery_app.control.revoke(task_id) -# TODO: use new FastAPI lifespan +# TODO: move and use new FastAPI lifespan def setup_celery(app: FastAPI) -> None: async def on_startup() -> None: settings = get_application_settings(app) assert settings.STORAGE_REDIS - task_queue = CeleryTaskQueue(settings) + assert settings.STORAGE_REDIS + redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( + RedisDatabase.CELERY_TASKS, + ) + + celery_app = Celery( + broker=redis_dsn, + backend=redis_dsn, + ) + + task_queue = CeleryTaskQueue(celery_app) task_queue.create_task(archive) app.state.task_queue = task_queue From ac531692cfa84bb113366ee03dcf80b3312ed09c Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 13 Feb 2025 16:38:58 +0100 Subject: [PATCH 014/131] add storage worker --- .../modules/celery/configurator.py | 17 ++++++++++++++ .../modules/celery/core.py | 22 +++---------------- .../modules/celery/worker/__init__.py | 0 .../modules/celery/worker/main.py | 12 ++++++++++ 4 files changed, 32 insertions(+), 19 deletions(-) create mode 100644 services/storage/src/simcore_service_storage/modules/celery/configurator.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/__init__.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/main.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/configurator.py b/services/storage/src/simcore_service_storage/modules/celery/configurator.py new file mode 100644 index 000000000000..6477b25903ab --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/configurator.py @@ -0,0 +1,17 @@ +import logging + +from celery import Celery +from settings_library.redis import RedisDatabase + +from ...core.settings import ApplicationSettings + +_log = logging.getLogger(__name__) + + +def create_celery_app(settings: ApplicationSettings) -> Celery: + assert settings.STORAGE_REDIS + app = Celery( + broker=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), + backend=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), + ) + return app diff --git a/services/storage/src/simcore_service_storage/modules/celery/core.py b/services/storage/src/simcore_service_storage/modules/celery/core.py index 0f70ec4d6f48..af9080fdce5b 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/core.py +++ b/services/storage/src/simcore_service_storage/modules/celery/core.py @@ -1,13 +1,10 @@ import logging -from multiprocessing import Process -from typing import Callable, cast +from typing import cast from celery import Celery -from celery.apps.worker import Worker from celery.result import AsyncResult from fastapi import FastAPI from settings_library.redis import RedisDatabase -from simcore_service_storage.modules.celery.tasks import archive from ...core.settings import get_application_settings @@ -18,11 +15,8 @@ class CeleryTaskQueue: def __init__(self, celery_app: Celery): self._celery_app = celery_app - def create_task(self, task_fn: Callable): - self._celery_app.task()(task_fn) - - def send_task(self, name: str, **kwargs) -> AsyncResult: - return self._celery_app.send_task(name, **kwargs) + def send_task(self, name: str, *args, **kwargs) -> AsyncResult: + return self._celery_app.send_task(name, args=args, kwargs=kwargs) def cancel_task(self, task_id: str): self._celery_app.control.revoke(task_id) @@ -34,7 +28,6 @@ async def on_startup() -> None: settings = get_application_settings(app) assert settings.STORAGE_REDIS - assert settings.STORAGE_REDIS redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( RedisDatabase.CELERY_TASKS, ) @@ -45,18 +38,9 @@ async def on_startup() -> None: ) task_queue = CeleryTaskQueue(celery_app) - task_queue.create_task(archive) app.state.task_queue = task_queue - # FIXME: Experiment: to start worker in a separate process - def worker_process(): - worker = Worker(app=app.state.celery_app) - worker.start() - - worker_proc = Process(target=worker_process) - worker_proc.start() - async def on_shutdown() -> None: _log.warning("Implementing shutdown of celery app") diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/__init__.py b/services/storage/src/simcore_service_storage/modules/celery/worker/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py new file mode 100644 index 000000000000..780f3483d4c9 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py @@ -0,0 +1,12 @@ +from ....core.settings import ApplicationSettings +from ...celery.tasks import archive +from ..configurator import create_celery_app + +settings = ApplicationSettings.create_from_envs() + +app = create_celery_app(settings) + +app.task(name="archive")(archive) + + +__all__ = ["app"] From 2cb3b0f59a1d54a8ecf10e914f4b3b4f401aca27 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 14 Feb 2025 16:40:02 +0100 Subject: [PATCH 015/131] continue working --- .../core/application.py | 3 -- .../simcore_service_storage/core/settings.py | 2 ++ .../src/simcore_service_storage/main.py | 1 + .../celery/{core.py => application.py} | 32 +++++++------------ .../modules/celery/configurator.py | 17 ---------- .../modules/celery/worker/main.py | 12 ------- 6 files changed, 14 insertions(+), 53 deletions(-) rename services/storage/src/simcore_service_storage/modules/celery/{core.py => application.py} (50%) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/configurator.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/main.py diff --git a/services/storage/src/simcore_service_storage/core/application.py b/services/storage/src/simcore_service_storage/core/application.py index d31a7652c37f..13082a00fb06 100644 --- a/services/storage/src/simcore_service_storage/core/application.py +++ b/services/storage/src/simcore_service_storage/core/application.py @@ -32,7 +32,6 @@ from ..dsm import setup_dsm from ..dsm_cleaner import setup_dsm_cleaner from ..exceptions.handlers import set_exception_handlers -from ..modules.celery.core import setup_celery from ..modules.db import setup_db from ..modules.long_running_tasks import setup_rest_api_long_running_tasks_for_uploads from ..modules.rabbitmq import setup as setup_rabbitmq @@ -96,8 +95,6 @@ def create_app(settings: ApplicationSettings) -> FastAPI: if settings.STORAGE_CLEANER_INTERVAL_S and not settings.STORAGE_WORKER_MODE: setup_dsm_cleaner(app) - setup_celery(app) - if settings.STORAGE_PROFILING: app.add_middleware(ProfilerMiddleware) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 49224d9e95e9..a878134cb7f2 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -129,6 +129,8 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): bool, Field(description="If True, run as a worker") ] = False + STORAGE_MODE: str + @field_validator("LOG_LEVEL", mode="before") @classmethod def _validate_loglevel(cls, value: str) -> str: diff --git a/services/storage/src/simcore_service_storage/main.py b/services/storage/src/simcore_service_storage/main.py index f0639c753685..83200e2fe850 100644 --- a/services/storage/src/simcore_service_storage/main.py +++ b/services/storage/src/simcore_service_storage/main.py @@ -1,5 +1,6 @@ """Main application to be deployed in for example uvicorn.""" +import asyncio import logging from servicelib.logging_utils import config_all_loggers diff --git a/services/storage/src/simcore_service_storage/modules/celery/core.py b/services/storage/src/simcore_service_storage/modules/celery/application.py similarity index 50% rename from services/storage/src/simcore_service_storage/modules/celery/core.py rename to services/storage/src/simcore_service_storage/modules/celery/application.py index af9080fdce5b..67eec70d8831 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/core.py +++ b/services/storage/src/simcore_service_storage/modules/celery/application.py @@ -6,7 +6,7 @@ from fastapi import FastAPI from settings_library.redis import RedisDatabase -from ...core.settings import get_application_settings +from ...core.settings import ApplicationSettings _log = logging.getLogger(__name__) @@ -23,29 +23,19 @@ def cancel_task(self, task_id: str): # TODO: move and use new FastAPI lifespan -def setup_celery(app: FastAPI) -> None: - async def on_startup() -> None: - settings = get_application_settings(app) - assert settings.STORAGE_REDIS +def create_celery_app(settings: ApplicationSettings) -> Celery: + assert settings.STORAGE_REDIS - redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( - RedisDatabase.CELERY_TASKS, - ) + redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( + RedisDatabase.CELERY_TASKS, + ) - celery_app = Celery( - broker=redis_dsn, - backend=redis_dsn, - ) + celery_app = Celery( + broker=redis_dsn, + backend=redis_dsn, + ) - task_queue = CeleryTaskQueue(celery_app) - - app.state.task_queue = task_queue - - async def on_shutdown() -> None: - _log.warning("Implementing shutdown of celery app") - - app.add_event_handler("startup", on_startup) - app.add_event_handler("shutdown", on_shutdown) + return celery_app def get_celery_task_queue(app: FastAPI) -> CeleryTaskQueue: diff --git a/services/storage/src/simcore_service_storage/modules/celery/configurator.py b/services/storage/src/simcore_service_storage/modules/celery/configurator.py deleted file mode 100644 index 6477b25903ab..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/configurator.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -from celery import Celery -from settings_library.redis import RedisDatabase - -from ...core.settings import ApplicationSettings - -_log = logging.getLogger(__name__) - - -def create_celery_app(settings: ApplicationSettings) -> Celery: - assert settings.STORAGE_REDIS - app = Celery( - broker=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), - backend=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), - ) - return app diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py deleted file mode 100644 index 780f3483d4c9..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py +++ /dev/null @@ -1,12 +0,0 @@ -from ....core.settings import ApplicationSettings -from ...celery.tasks import archive -from ..configurator import create_celery_app - -settings = ApplicationSettings.create_from_envs() - -app = create_celery_app(settings) - -app.task(name="archive")(archive) - - -__all__ = ["app"] From 5a9be830c60daaee44c5362979be20fb955ef935 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 17 Feb 2025 10:25:48 +0100 Subject: [PATCH 016/131] continue --- .../src/simcore_service_storage/main.py | 1 - .../modules/celery/application.py | 1 - .../modules/celery/configurator.py | 17 ++++ .../modules/celery/worker/main.py | 79 +++++++++++++++++++ 4 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 services/storage/src/simcore_service_storage/modules/celery/configurator.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/main.py diff --git a/services/storage/src/simcore_service_storage/main.py b/services/storage/src/simcore_service_storage/main.py index 83200e2fe850..f0639c753685 100644 --- a/services/storage/src/simcore_service_storage/main.py +++ b/services/storage/src/simcore_service_storage/main.py @@ -1,6 +1,5 @@ """Main application to be deployed in for example uvicorn.""" -import asyncio import logging from servicelib.logging_utils import config_all_loggers diff --git a/services/storage/src/simcore_service_storage/modules/celery/application.py b/services/storage/src/simcore_service_storage/modules/celery/application.py index 67eec70d8831..0a0d51c60d5b 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/application.py +++ b/services/storage/src/simcore_service_storage/modules/celery/application.py @@ -22,7 +22,6 @@ def cancel_task(self, task_id: str): self._celery_app.control.revoke(task_id) -# TODO: move and use new FastAPI lifespan def create_celery_app(settings: ApplicationSettings) -> Celery: assert settings.STORAGE_REDIS diff --git a/services/storage/src/simcore_service_storage/modules/celery/configurator.py b/services/storage/src/simcore_service_storage/modules/celery/configurator.py new file mode 100644 index 000000000000..6477b25903ab --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/configurator.py @@ -0,0 +1,17 @@ +import logging + +from celery import Celery +from settings_library.redis import RedisDatabase + +from ...core.settings import ApplicationSettings + +_log = logging.getLogger(__name__) + + +def create_celery_app(settings: ApplicationSettings) -> Celery: + assert settings.STORAGE_REDIS + app = Celery( + broker=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), + backend=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), + ) + return app diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py new file mode 100644 index 000000000000..89ae2e0af7f1 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py @@ -0,0 +1,79 @@ +"""Main application to be deployed in for example uvicorn.""" + +import asyncio +import logging +import threading + +from asgi_lifespan import LifespanManager +from celery.signals import worker_init, worker_shutdown +from servicelib.logging_utils import config_all_loggers +from simcore_service_storage.core.application import create_app +from simcore_service_storage.core.settings import ApplicationSettings +from simcore_service_storage.modules.celery.application import create_celery_app +from simcore_service_storage.modules.celery.tasks import archive + +_settings = ApplicationSettings.create_from_envs() + +# SEE https://github.com/ITISFoundation/osparc-simcore/issues/3148 +logging.basicConfig(level=_settings.log_level) # NOSONAR +logging.root.setLevel(_settings.log_level) +config_all_loggers( + log_format_local_dev_enabled=_settings.STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED, + logger_filter_mapping=_settings.STORAGE_LOG_FILTER_MAPPING, + tracing_settings=_settings.STORAGE_TRACING, +) + +_logger = logging.getLogger(__name__) + +fastapi_app = create_app(_settings) + +celery_app = create_celery_app(_settings) +celery_app.task(name="archive")(archive) + + +@worker_init.connect +def on_worker_init(**_kwargs): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + shutdown_event = asyncio.Event() + + async def lifespan(): + async with LifespanManager(fastapi_app): + _logger.error("FastAPI lifespan started") + try: + await shutdown_event.wait() + except asyncio.CancelledError: + _logger.error("Lifespan task cancelled") + _logger.error("FastAPI lifespan ended") + + lifespan_task = loop.create_task(lifespan()) + fastapi_app.state.lifespan_task = lifespan_task + fastapi_app.state.shutdown_event = shutdown_event + fastapi_app.state.loop = loop # Store the loop for shutdown + + def run_loop(): + loop.run_forever() + + thread = threading.Thread(target=run_loop, daemon=True) + thread.start() + + +@worker_shutdown.connect +def on_worker_shutdown(**_kwargs): + loop = fastapi_app.state.loop + + async def shutdown(): + fastapi_app.state.shutdown_event.set() + fastapi_app.state.lifespan_task.cancel() + try: + await fastapi_app.state.lifespan_task + except asyncio.CancelledError: + pass + + asyncio.run_coroutine_threadsafe(shutdown(), loop) + + _logger.error("FastAPI lifespan stopped.") + + +celery_app.conf.fastapi_app = fastapi_app +app = celery_app From 8bf39ffefa3bd12907a4949c326661239ad99c79 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 17 Feb 2025 11:27:20 +0100 Subject: [PATCH 017/131] use rabbit --- .../src/simcore_service_storage/modules/celery/configurator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/configurator.py b/services/storage/src/simcore_service_storage/modules/celery/configurator.py index 6477b25903ab..90d22023cc7e 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/configurator.py +++ b/services/storage/src/simcore_service_storage/modules/celery/configurator.py @@ -9,9 +9,10 @@ def create_celery_app(settings: ApplicationSettings) -> Celery: + assert settings.STORAGE_RABBITMQ assert settings.STORAGE_REDIS app = Celery( - broker=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), + broker=settings.STORAGE_RABBITMQ.dsn, backend=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), ) return app From 5c381849b9ff066d583f3e2afcd5bdcd7f9d970a Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 17 Feb 2025 13:07:11 +0100 Subject: [PATCH 018/131] continue --- .../modules/celery/tasks.py | 1 + .../modules/celery/worker/main.py | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/tasks.py b/services/storage/src/simcore_service_storage/modules/celery/tasks.py index b58a09a69361..3f295f6e65a5 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/tasks.py +++ b/services/storage/src/simcore_service_storage/modules/celery/tasks.py @@ -1,3 +1,4 @@ +import asyncio import logging import time diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py index 89ae2e0af7f1..61f2e3202cca 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py @@ -6,6 +6,7 @@ from asgi_lifespan import LifespanManager from celery.signals import worker_init, worker_shutdown +from servicelib.background_task import cancel_wait_task from servicelib.logging_utils import config_all_loggers from simcore_service_storage.core.application import create_app from simcore_service_storage.core.settings import ApplicationSettings @@ -25,7 +26,6 @@ _logger = logging.getLogger(__name__) -fastapi_app = create_app(_settings) celery_app = create_celery_app(_settings) celery_app.task(name="archive")(archive) @@ -37,6 +37,8 @@ def on_worker_init(**_kwargs): asyncio.set_event_loop(loop) shutdown_event = asyncio.Event() + fastapi_app = create_app(_settings) + async def lifespan(): async with LifespanManager(fastapi_app): _logger.error("FastAPI lifespan started") @@ -49,7 +51,9 @@ async def lifespan(): lifespan_task = loop.create_task(lifespan()) fastapi_app.state.lifespan_task = lifespan_task fastapi_app.state.shutdown_event = shutdown_event - fastapi_app.state.loop = loop # Store the loop for shutdown + + celery_app.conf.fastapi_app = fastapi_app + celery_app.conf.loop = loop def run_loop(): loop.run_forever() @@ -60,20 +64,17 @@ def run_loop(): @worker_shutdown.connect def on_worker_shutdown(**_kwargs): - loop = fastapi_app.state.loop + loop = celery_app.conf.loop + fastapi_app = celery_app.conf.fastapi_app async def shutdown(): fastapi_app.state.shutdown_event.set() - fastapi_app.state.lifespan_task.cancel() - try: - await fastapi_app.state.lifespan_task - except asyncio.CancelledError: - pass + + await cancel_wait_task(fastapi_app.state.lifespan_task, max_delay=5) asyncio.run_coroutine_threadsafe(shutdown(), loop) _logger.error("FastAPI lifespan stopped.") -celery_app.conf.fastapi_app = fastapi_app app = celery_app From 7395621214814de95515191212007bd7de0d899d Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 17 Feb 2025 15:20:46 +0100 Subject: [PATCH 019/131] continue --- services/storage/src/simcore_service_storage/core/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index a878134cb7f2..f6d7ec8b3723 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -129,7 +129,7 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): bool, Field(description="If True, run as a worker") ] = False - STORAGE_MODE: str + STORAGE_WORKER_MODE: bool | None = False @field_validator("LOG_LEVEL", mode="before") @classmethod From 6445728849357e73e8540c0d98ce5baf112f29c4 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 18 Feb 2025 12:16:14 +0100 Subject: [PATCH 020/131] add unit tests --- services/storage/tests/unit/modules/celery/test_tasks.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 services/storage/tests/unit/modules/celery/test_tasks.py diff --git a/services/storage/tests/unit/modules/celery/test_tasks.py b/services/storage/tests/unit/modules/celery/test_tasks.py new file mode 100644 index 000000000000..c6390b4dd196 --- /dev/null +++ b/services/storage/tests/unit/modules/celery/test_tasks.py @@ -0,0 +1,7 @@ +from faker import Faker +from simcore_service_storage.modules.celery.tasks import archive + + +def test_archive(celery_app, celery_worker, faker: Faker): + result = archive.apply(args=(faker.uuid4(), ["f1", "f2"])) + assert result.get() == "f1_f2.zip" From 755771650f21b19d9d5614e5f2bea44f22a17f21 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 18 Feb 2025 14:37:09 +0100 Subject: [PATCH 021/131] continue --- .../modules/celery/worker/main.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py index 61f2e3202cca..210cc711c9b7 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/main.py @@ -11,7 +11,6 @@ from simcore_service_storage.core.application import create_app from simcore_service_storage.core.settings import ApplicationSettings from simcore_service_storage.modules.celery.application import create_celery_app -from simcore_service_storage.modules.celery.tasks import archive _settings = ApplicationSettings.create_from_envs() @@ -28,11 +27,10 @@ celery_app = create_celery_app(_settings) -celery_app.task(name="archive")(archive) @worker_init.connect -def on_worker_init(**_kwargs): +def on_worker_init(sender, **_kwargs): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) shutdown_event = asyncio.Event() @@ -40,20 +38,22 @@ def on_worker_init(**_kwargs): fastapi_app = create_app(_settings) async def lifespan(): - async with LifespanManager(fastapi_app): + async with LifespanManager( + fastapi_app, startup_timeout=30, shutdown_timeout=30 + ): _logger.error("FastAPI lifespan started") try: await shutdown_event.wait() - except asyncio.CancelledError: - _logger.error("Lifespan task cancelled") + except asyncio.exceptions.CancelledError: + _logger.info("Lifespan task cancelled") _logger.error("FastAPI lifespan ended") lifespan_task = loop.create_task(lifespan()) fastapi_app.state.lifespan_task = lifespan_task fastapi_app.state.shutdown_event = shutdown_event - celery_app.conf.fastapi_app = fastapi_app - celery_app.conf.loop = loop + sender.app.conf["fastapi_app"] = fastapi_app + sender.app.conf["loop"] = loop def run_loop(): loop.run_forever() @@ -63,9 +63,9 @@ def run_loop(): @worker_shutdown.connect -def on_worker_shutdown(**_kwargs): - loop = celery_app.conf.loop - fastapi_app = celery_app.conf.fastapi_app +def on_worker_shutdown(sender, **_kwargs): + loop = sender.app.conf["loop"] + fastapi_app = sender.app.conf["fastapi_app"] async def shutdown(): fastapi_app.state.shutdown_event.set() From fef45ef79d85aedbabb9c83ecd670edef2b5c319 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 19 Feb 2025 11:49:02 +0100 Subject: [PATCH 022/131] base working tests --- .../modules/celery/application.py | 41 ------------- .../modules/celery/client/__init__.py | 3 + .../modules/celery/client/_interface.py | 33 ++++++++++ .../modules/celery/client/client_utils.py | 8 +++ .../modules/celery/client/setup.py | 34 +++++++++++ .../modules/celery/configurator.py | 18 ------ .../modules/celery/worker/_interface.py | 12 ++++ .../celery/worker/{main.py => setup.py} | 13 ++-- .../modules/celery/worker/utils.py | 21 +++++++ .../tests/unit/modules/celery/test_core.py | 61 +++++++++++++++++++ 10 files changed, 178 insertions(+), 66 deletions(-) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/application.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/__init__.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/_interface.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/client_utils.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/setup.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/configurator.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py rename services/storage/src/simcore_service_storage/modules/celery/worker/{main.py => setup.py} (86%) create mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/utils.py create mode 100644 services/storage/tests/unit/modules/celery/test_core.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/application.py b/services/storage/src/simcore_service_storage/modules/celery/application.py deleted file mode 100644 index 0a0d51c60d5b..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/application.py +++ /dev/null @@ -1,41 +0,0 @@ -import logging -from typing import cast - -from celery import Celery -from celery.result import AsyncResult -from fastapi import FastAPI -from settings_library.redis import RedisDatabase - -from ...core.settings import ApplicationSettings - -_log = logging.getLogger(__name__) - - -class CeleryTaskQueue: - def __init__(self, celery_app: Celery): - self._celery_app = celery_app - - def send_task(self, name: str, *args, **kwargs) -> AsyncResult: - return self._celery_app.send_task(name, args=args, kwargs=kwargs) - - def cancel_task(self, task_id: str): - self._celery_app.control.revoke(task_id) - - -def create_celery_app(settings: ApplicationSettings) -> Celery: - assert settings.STORAGE_REDIS - - redis_dsn = settings.STORAGE_REDIS.build_redis_dsn( - RedisDatabase.CELERY_TASKS, - ) - - celery_app = Celery( - broker=redis_dsn, - backend=redis_dsn, - ) - - return celery_app - - -def get_celery_task_queue(app: FastAPI) -> CeleryTaskQueue: - return cast(CeleryTaskQueue, app.state.task_queue) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/__init__.py b/services/storage/src/simcore_service_storage/modules/celery/client/__init__.py new file mode 100644 index 000000000000..3d3e1162977e --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/client/__init__.py @@ -0,0 +1,3 @@ +from ._interface import CeleryClientInterface + +__all__: tuple[str, ...] = ("CeleryClientInterface",) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py new file mode 100644 index 000000000000..6580fad704b0 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py @@ -0,0 +1,33 @@ +from typing import Any +from uuid import uuid4 + +from celery import Celery +from celery.result import AsyncResult +from models_library.users import UserID + +from ..models import TaskID + + +class CeleryClientInterface: + def __init__(self, celery_app: Celery): + self._celery_app = celery_app + + def submit(self, name: str, *, user_id: UserID, **kwargs) -> TaskID: + task_id = f"{user_id}_{name}_{uuid4()}" + return self._celery_app.send_task(name, task_id=task_id, kwargs=kwargs).id + + def _get_result(self, task_id: TaskID) -> AsyncResult: + return self._celery_app.AsyncResult(task_id) + + def get_state(self, task_id: TaskID) -> str: + # task_id , state, progress + return self._get_result(task_id).state + + def get_result(self, task_id: TaskID) -> Any: + return self._get_result(task_id).result + + def cancel(self, task_id: TaskID) -> None: + self._celery_app.control.revoke(task_id, terminate=True) + + def list(self, user_id: UserID) -> list[TaskID]: + return [] diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/client_utils.py b/services/storage/src/simcore_service_storage/modules/celery/client/client_utils.py new file mode 100644 index 000000000000..28fda45d3982 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/client/client_utils.py @@ -0,0 +1,8 @@ +from typing import cast + +from fastapi import FastAPI +from simcore_service_storage.modules.celery.client import CeleryClientInterface + + +def get_celery_client_interface(app: FastAPI) -> CeleryClientInterface: + return cast(CeleryClientInterface, app.state.celery.conf["client_interface"]) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/setup.py b/services/storage/src/simcore_service_storage/modules/celery/client/setup.py new file mode 100644 index 000000000000..245932e53776 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/client/setup.py @@ -0,0 +1,34 @@ +import logging + +from celery import Celery +from fastapi import FastAPI +from settings_library.redis import RedisDatabase + +from ....core.settings import ApplicationSettings +from ._interface import CeleryClientInterface + +_log = logging.getLogger(__name__) + + +def create_celery_app(settings: ApplicationSettings) -> Celery: + assert settings.STORAGE_RABBITMQ + assert settings.STORAGE_REDIS + + celery_app = Celery( + broker=settings.STORAGE_RABBITMQ.dsn, + backend=settings.STORAGE_REDIS.build_redis_dsn( + RedisDatabase.CELERY_TASKS, + ), + ) + celery_app.conf["client_interface"] = CeleryClientInterface(celery_app) + + return celery_app + + +def attach_to_fastapi(fastapi: FastAPI, celery: Celery) -> None: + fastapi.state.celery = celery + + +def get_celery_client(fastapi: FastAPI) -> CeleryClientInterface: + celery: Celery = fastapi.state.celery + return celery.conf.get("client_interface") diff --git a/services/storage/src/simcore_service_storage/modules/celery/configurator.py b/services/storage/src/simcore_service_storage/modules/celery/configurator.py deleted file mode 100644 index 90d22023cc7e..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/configurator.py +++ /dev/null @@ -1,18 +0,0 @@ -import logging - -from celery import Celery -from settings_library.redis import RedisDatabase - -from ...core.settings import ApplicationSettings - -_log = logging.getLogger(__name__) - - -def create_celery_app(settings: ApplicationSettings) -> Celery: - assert settings.STORAGE_RABBITMQ - assert settings.STORAGE_REDIS - app = Celery( - broker=settings.STORAGE_RABBITMQ.dsn, - backend=settings.STORAGE_REDIS.build_redis_dsn(RedisDatabase.CELERY_TASKS), - ) - return app diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py new file mode 100644 index 000000000000..b63490ea67bd --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py @@ -0,0 +1,12 @@ +from celery import Celery +from models_library.progress_bar import ProgressReport + +from ..models import TaskID + + +class CeleryWorkerInterface: + def __init__(self, celery_app: Celery) -> None: + self.celery_app = celery_app + + def set_progress(self, task_id: TaskID, report: ProgressReport) -> None: + pass diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py b/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py similarity index 86% rename from services/storage/src/simcore_service_storage/modules/celery/worker/main.py rename to services/storage/src/simcore_service_storage/modules/celery/worker/setup.py index 210cc711c9b7..63455deb8941 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/main.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py @@ -10,7 +10,9 @@ from servicelib.logging_utils import config_all_loggers from simcore_service_storage.core.application import create_app from simcore_service_storage.core.settings import ApplicationSettings -from simcore_service_storage.modules.celery.application import create_celery_app +from simcore_service_storage.modules.celery.client.setup import create_celery_app + +from ._interface import CeleryWorkerInterface _settings = ApplicationSettings.create_from_envs() @@ -41,12 +43,10 @@ async def lifespan(): async with LifespanManager( fastapi_app, startup_timeout=30, shutdown_timeout=30 ): - _logger.error("FastAPI lifespan started") try: await shutdown_event.wait() - except asyncio.exceptions.CancelledError: - _logger.info("Lifespan task cancelled") - _logger.error("FastAPI lifespan ended") + except asyncio.CancelledError: + _logger.warning("Lifespan task cancelled") lifespan_task = loop.create_task(lifespan()) fastapi_app.state.lifespan_task = lifespan_task @@ -54,6 +54,7 @@ async def lifespan(): sender.app.conf["fastapi_app"] = fastapi_app sender.app.conf["loop"] = loop + sender.app.conf["worker_interface"] = CeleryWorkerInterface(sender.app) def run_loop(): loop.run_forever() @@ -74,7 +75,5 @@ async def shutdown(): asyncio.run_coroutine_threadsafe(shutdown(), loop) - _logger.error("FastAPI lifespan stopped.") - app = celery_app diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/utils.py b/services/storage/src/simcore_service_storage/modules/celery/worker/utils.py new file mode 100644 index 000000000000..447093665e5b --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/utils.py @@ -0,0 +1,21 @@ +from asyncio import AbstractEventLoop + +from celery import Celery +from fastapi import FastAPI + +from ._interface import CeleryWorkerInterface + + +def get_fastapi_app(celery_app: Celery) -> FastAPI: + fast_api_app: FastAPI = celery_app.conf.get("fastapi_app") + return fast_api_app + + +def get_loop(celery_app: Celery) -> AbstractEventLoop: # nosec + loop: AbstractEventLoop = celery_app.conf.get("loop") + return loop + + +def get_worker_interface(celery_app: Celery) -> CeleryWorkerInterface: + worker_interface: CeleryWorkerInterface = celery_app.conf.get("worker_interface") + return worker_interface diff --git a/services/storage/tests/unit/modules/celery/test_core.py b/services/storage/tests/unit/modules/celery/test_core.py new file mode 100644 index 000000000000..99556d13a65f --- /dev/null +++ b/services/storage/tests/unit/modules/celery/test_core.py @@ -0,0 +1,61 @@ +import asyncio +import time +from typing import Callable + +import pytest +from celery import Celery, Task +from models_library.progress_bar import ProgressReport +from simcore_service_storage.modules.celery.client.client_utils import ( + get_celery_client_interface, +) +from simcore_service_storage.modules.celery.worker.utils import ( + get_fastapi_app, + get_loop, + get_worker_interface, +) + + +async def _async_archive( + celery_app: Celery, task_id: str, param1: int, values: list[str] +) -> str: + fastapi_app = get_fastapi_app(celery_app) + worker_interface = get_worker_interface(celery_app) + + worker_interface.set_progress(task_id, ProgressReport(actual_value=0)) + print(fastapi_app, task_id, param1, values) + + return "result" + + +def sync_archive(task: Task, param1: int, values: list[str]) -> str: + return asyncio.run_coroutine_threadsafe( + _async_archive(task.app, task.request.id, param1, values), get_loop(task.app) + ).result() + + +@pytest.fixture +def register_celery_tasks() -> Callable[[Celery], None]: + def _(celery_app: Celery) -> None: + celery_app.task(name="sync_archive", bind=True)(sync_archive) + + return _ + + +def test_slow_task_ends_successfully( + client_celery_app: Celery, worker_celery_app: Celery +): + from simcore_service_storage.main import fastapi_app + + client_interface = get_celery_client_interface(fastapi_app) + + task_id = client_interface.submit( + "sync_archive", user_id=1, param1=1, values=["a", "b"] + ) + assert client_interface.get_state(task_id) == "PENDING" + assert client_interface.get_result(task_id) is None + + # use tnaticyt to wait for resutl + time.sleep(2) + + assert client_interface.get_state(task_id) == "SUCCESS" + assert client_interface.get_result(task_id) == "result" From 4b08136eabee95e4e34b3126a67ea75d7918f343 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 20 Feb 2025 14:51:20 +0100 Subject: [PATCH 023/131] add progress --- .../modules/celery/client/_interface.py | 76 +++++++++++++++---- .../modules/celery/worker/_interface.py | 10 ++- .../tests/unit/modules/celery/test_core.py | 75 +++++++++++++----- .../tests/unit/modules/celery/test_tasks.py | 7 -- 4 files changed, 125 insertions(+), 43 deletions(-) delete mode 100644 services/storage/tests/unit/modules/celery/test_tasks.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py index 6580fad704b0..0ba96d2f639c 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py @@ -1,33 +1,79 @@ -from typing import Any +from typing import Any, Final, TypeAlias from uuid import uuid4 from celery import Celery from celery.result import AsyncResult -from models_library.users import UserID +from models_library.progress_bar import ProgressReport +from pydantic import ValidationError -from ..models import TaskID +from ..models import TaskID, TaskProgress + +_PREFIX: Final = "AJ" + +TaskIdComponents: TypeAlias = dict[str, Any] + + +def _get_task_id_components(task_id_components: TaskIdComponents) -> list[str]: + return [f"{v}" for _, v in sorted(task_id_components.items())] + + +def _get_components_prefix( + name: str, task_id_components: TaskIdComponents +) -> list[str]: + return [_PREFIX, name, *_get_task_id_components(task_id_components)] + + +def _get_task_id_prefix(name: str, task_id_components: TaskIdComponents) -> TaskID: + return "::".join(_get_components_prefix(name, task_id_components)) + + +def _get_task_id(name: str, task_id_components: TaskIdComponents) -> TaskID: + return "::".join([*_get_components_prefix(name, task_id_components), f"{uuid4()}"]) class CeleryClientInterface: def __init__(self, celery_app: Celery): self._celery_app = celery_app - def submit(self, name: str, *, user_id: UserID, **kwargs) -> TaskID: - task_id = f"{user_id}_{name}_{uuid4()}" - return self._celery_app.send_task(name, task_id=task_id, kwargs=kwargs).id + def submit( + self, task_name: str, *, task_id_components: TaskIdComponents, **task_params + ) -> TaskID: + task_id = _get_task_id(task_name, task_id_components) + task = self._celery_app.send_task( + task_name, task_id=task_id, kwargs=task_params + ) + return task.id - def _get_result(self, task_id: TaskID) -> AsyncResult: - return self._celery_app.AsyncResult(task_id) + def get(self, task_id: TaskID) -> Any: + return self._celery_app.tasks(task_id) - def get_state(self, task_id: TaskID) -> str: - # task_id , state, progress - return self._get_result(task_id).state + def cancel(self, task_id: TaskID) -> None: + self._celery_app.control.revoke(task_id, terminate=True) + + def _get_async_result(self, task_id: TaskID) -> AsyncResult: + return self._celery_app.AsyncResult(task_id) def get_result(self, task_id: TaskID) -> Any: - return self._get_result(task_id).result + # se manca il risultato o se va in FAILURE, ritorna error + return self._get_async_result(task_id).result - def cancel(self, task_id: TaskID) -> None: - self._celery_app.control.revoke(task_id, terminate=True) + def _get_progress_report(self, task_id: TaskID) -> ProgressReport | None: + result = self._get_async_result(task_id).result + if result: + try: + return ProgressReport.model_validate(result) + except ValidationError: + return None + + def get_progress(self, task_id: TaskID) -> TaskProgress: + return TaskProgress( + task_id=task_id, + task_state=self._get_async_result(task_id).state, + progress_report=self._get_progress_report(task_id), + ) - def list(self, user_id: UserID) -> list[TaskID]: + def list( + self, task_name: str, *, task_id_components: TaskIdComponents + ) -> list[TaskID]: + prefix_to_search_in_redis = _get_task_id_prefix(task_name, task_id_components) return [] diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py index b63490ea67bd..2c5afdf71e57 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py @@ -8,5 +8,11 @@ class CeleryWorkerInterface: def __init__(self, celery_app: Celery) -> None: self.celery_app = celery_app - def set_progress(self, task_id: TaskID, report: ProgressReport) -> None: - pass + def set_progress( + self, task_name: str, task_id: TaskID, report: ProgressReport + ) -> None: + self.celery_app.tasks[task_name].update_state( + task_id=task_id, + state="PROGRESS", + meta=report.model_dump(mode="json"), + ) diff --git a/services/storage/tests/unit/modules/celery/test_core.py b/services/storage/tests/unit/modules/celery/test_core.py index 99556d13a65f..cef22829369c 100644 --- a/services/storage/tests/unit/modules/celery/test_core.py +++ b/services/storage/tests/unit/modules/celery/test_core.py @@ -1,61 +1,98 @@ import asyncio -import time from typing import Callable import pytest from celery import Celery, Task from models_library.progress_bar import ProgressReport +from simcore_service_storage.main import fastapi_app +from simcore_service_storage.modules.celery.client._interface import TaskIdComponents from simcore_service_storage.modules.celery.client.client_utils import ( get_celery_client_interface, ) from simcore_service_storage.modules.celery.worker.utils import ( - get_fastapi_app, get_loop, get_worker_interface, ) +from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed async def _async_archive( - celery_app: Celery, task_id: str, param1: int, values: list[str] + celery_app: Celery, task_name: str, task_id: str, files: list[str] ) -> str: - fastapi_app = get_fastapi_app(celery_app) worker_interface = get_worker_interface(celery_app) - worker_interface.set_progress(task_id, ProgressReport(actual_value=0)) - print(fastapi_app, task_id, param1, values) + for n in range(len(files)): + worker_interface.set_progress( + task_name=task_name, + task_id=task_id, + report=ProgressReport(actual_value=n / len(files) * 100), + ) + await asyncio.sleep(0.1) - return "result" + return "archive.zip" -def sync_archive(task: Task, param1: int, values: list[str]) -> str: +def sync_archive(task: Task, files: list[str]) -> str: + assert task.name return asyncio.run_coroutine_threadsafe( - _async_archive(task.app, task.request.id, param1, values), get_loop(task.app) + _async_archive(task.app, task.name, task.request.id, files), get_loop(task.app) ).result() +def sync_error(task: Task) -> str: + raise ValueError("my error here") + + @pytest.fixture def register_celery_tasks() -> Callable[[Celery], None]: def _(celery_app: Celery) -> None: celery_app.task(name="sync_archive", bind=True)(sync_archive) + celery_app.task(name="sync_error", bind=True)(sync_error) return _ -def test_slow_task_ends_successfully( - client_celery_app: Celery, worker_celery_app: Celery +def test_archive( + client_celery_app: Celery, + worker_celery_app: Celery, ): - from simcore_service_storage.main import fastapi_app + client_interface = get_celery_client_interface(fastapi_app) + task_id_components = TaskIdComponents(user_id=1) + + task_id = client_interface.submit( + "sync_archive", + task_id_components=task_id_components, + files=[f"file{n}" for n in range(100)], + ) + + for attempt in Retrying( + retry=retry_if_exception_type(AssertionError), + wait=wait_fixed(1), + stop=stop_after_delay(30), + ): + with attempt: + progress = client_interface.get_progress(task_id) + assert progress.task_state == "SUCCESS" + + assert client_interface.get_progress(task_id).task_state == "SUCCESS" + + +def test_sync_error( + client_celery_app: Celery, + worker_celery_app: Celery, +): client_interface = get_celery_client_interface(fastapi_app) task_id = client_interface.submit( - "sync_archive", user_id=1, param1=1, values=["a", "b"] + "sync_error", task_id_components=TaskIdComponents(user_id=1) ) - assert client_interface.get_state(task_id) == "PENDING" - assert client_interface.get_result(task_id) is None - # use tnaticyt to wait for resutl - time.sleep(2) + for attempt in Retrying( + retry=retry_if_exception_type(AssertionError), wait=wait_fixed(1) + ): + with attempt: + result = client_interface.get_result(task_id) + assert isinstance(result, ValueError) - assert client_interface.get_state(task_id) == "SUCCESS" - assert client_interface.get_result(task_id) == "result" + assert f"{client_interface.get_result(task_id)}" == "my error here" diff --git a/services/storage/tests/unit/modules/celery/test_tasks.py b/services/storage/tests/unit/modules/celery/test_tasks.py deleted file mode 100644 index c6390b4dd196..000000000000 --- a/services/storage/tests/unit/modules/celery/test_tasks.py +++ /dev/null @@ -1,7 +0,0 @@ -from faker import Faker -from simcore_service_storage.modules.celery.tasks import archive - - -def test_archive(celery_app, celery_worker, faker: Faker): - result = archive.apply(args=(faker.uuid4(), ["f1", "f2"])) - assert result.get() == "f1_f2.zip" From 3e189b8886308a248b62028254c3b76afa839828 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 21 Feb 2025 14:28:51 +0100 Subject: [PATCH 024/131] continue fixing --- .../modules/celery/client/_interface.py | 42 ++++++-- .../client/{client_utils.py => utils.py} | 0 services/storage/tests/conftest.py | 1 + .../tests/unit/modules/celery/test_core.py | 98 ------------------- 4 files changed, 34 insertions(+), 107 deletions(-) rename services/storage/src/simcore_service_storage/modules/celery/client/{client_utils.py => utils.py} (100%) delete mode 100644 services/storage/tests/unit/modules/celery/test_core.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py index 0ba96d2f639c..f88750a9fb2e 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py @@ -2,13 +2,14 @@ from uuid import uuid4 from celery import Celery +from celery.contrib.abortable import AbortableAsyncResult from celery.result import AsyncResult from models_library.progress_bar import ProgressReport from pydantic import ValidationError -from ..models import TaskID, TaskProgress +from ..models import TaskID, TaskStatus -_PREFIX: Final = "AJ" +_PREFIX: Final = "ct" TaskIdComponents: TypeAlias = dict[str, Any] @@ -31,6 +32,9 @@ def _get_task_id(name: str, task_id_components: TaskIdComponents) -> TaskID: return "::".join([*_get_components_prefix(name, task_id_components), f"{uuid4()}"]) +_CELERY_TASK_META_PREFIX = "celery-task-meta-" + + class CeleryClientInterface: def __init__(self, celery_app: Celery): self._celery_app = celery_app @@ -48,13 +52,13 @@ def get(self, task_id: TaskID) -> Any: return self._celery_app.tasks(task_id) def cancel(self, task_id: TaskID) -> None: - self._celery_app.control.revoke(task_id, terminate=True) + AbortableAsyncResult(task_id, app=self._celery_app).abort() def _get_async_result(self, task_id: TaskID) -> AsyncResult: return self._celery_app.AsyncResult(task_id) def get_result(self, task_id: TaskID) -> Any: - # se manca il risultato o se va in FAILURE, ritorna error + # if the result is missing or if it goes into FAILURE, return error return self._get_async_result(task_id).result def _get_progress_report(self, task_id: TaskID) -> ProgressReport | None: @@ -63,17 +67,37 @@ def _get_progress_report(self, task_id: TaskID) -> ProgressReport | None: try: return ProgressReport.model_validate(result) except ValidationError: - return None + pass + return None - def get_progress(self, task_id: TaskID) -> TaskProgress: - return TaskProgress( + def get_status(self, task_id: TaskID) -> TaskStatus: + return TaskStatus( task_id=task_id, task_state=self._get_async_result(task_id).state, progress_report=self._get_progress_report(task_id), ) + def _get_completed_task_ids( + self, task_name: str, task_id_components: TaskIdComponents + ) -> list[TaskID]: + search_key = ( + _CELERY_TASK_META_PREFIX + + _get_task_id_prefix(task_name, task_id_components) + + "*" + ) + redis = self._celery_app.backend.client + keys = redis.keys(search_key) + if keys: + return [f"{key}".lstrip(_CELERY_TASK_META_PREFIX) for key in keys] + return [] + def list( self, task_name: str, *, task_id_components: TaskIdComponents ) -> list[TaskID]: - prefix_to_search_in_redis = _get_task_id_prefix(task_name, task_id_components) - return [] + all_task_ids = self._get_completed_task_ids(task_name, task_id_components) + + for task_type in ["active", "registered", "scheduled", "revoked"]: + if task_ids := getattr(self._celery_app.control.inspect(), task_type)(): + all_task_ids.extend(task_ids) + + return all_task_ids diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/client_utils.py b/services/storage/src/simcore_service_storage/modules/celery/client/utils.py similarity index 100% rename from services/storage/src/simcore_service_storage/modules/celery/client/client_utils.py rename to services/storage/src/simcore_service_storage/modules/celery/client/utils.py diff --git a/services/storage/tests/conftest.py b/services/storage/tests/conftest.py index b766c7655211..d6eeb538554c 100644 --- a/services/storage/tests/conftest.py +++ b/services/storage/tests/conftest.py @@ -92,6 +92,7 @@ "pytest_simcore.openapi_specs", "pytest_simcore.postgres_service", "pytest_simcore.pytest_global_environs", + "pytest_simcore.rabbit_service", "pytest_simcore.repository_paths", "pytest_simcore.simcore_storage_data_models", "pytest_simcore.simcore_storage_datcore_adapter", diff --git a/services/storage/tests/unit/modules/celery/test_core.py b/services/storage/tests/unit/modules/celery/test_core.py deleted file mode 100644 index cef22829369c..000000000000 --- a/services/storage/tests/unit/modules/celery/test_core.py +++ /dev/null @@ -1,98 +0,0 @@ -import asyncio -from typing import Callable - -import pytest -from celery import Celery, Task -from models_library.progress_bar import ProgressReport -from simcore_service_storage.main import fastapi_app -from simcore_service_storage.modules.celery.client._interface import TaskIdComponents -from simcore_service_storage.modules.celery.client.client_utils import ( - get_celery_client_interface, -) -from simcore_service_storage.modules.celery.worker.utils import ( - get_loop, - get_worker_interface, -) -from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed - - -async def _async_archive( - celery_app: Celery, task_name: str, task_id: str, files: list[str] -) -> str: - worker_interface = get_worker_interface(celery_app) - - for n in range(len(files)): - worker_interface.set_progress( - task_name=task_name, - task_id=task_id, - report=ProgressReport(actual_value=n / len(files) * 100), - ) - await asyncio.sleep(0.1) - - return "archive.zip" - - -def sync_archive(task: Task, files: list[str]) -> str: - assert task.name - return asyncio.run_coroutine_threadsafe( - _async_archive(task.app, task.name, task.request.id, files), get_loop(task.app) - ).result() - - -def sync_error(task: Task) -> str: - raise ValueError("my error here") - - -@pytest.fixture -def register_celery_tasks() -> Callable[[Celery], None]: - def _(celery_app: Celery) -> None: - celery_app.task(name="sync_archive", bind=True)(sync_archive) - celery_app.task(name="sync_error", bind=True)(sync_error) - - return _ - - -def test_archive( - client_celery_app: Celery, - worker_celery_app: Celery, -): - client_interface = get_celery_client_interface(fastapi_app) - - task_id_components = TaskIdComponents(user_id=1) - - task_id = client_interface.submit( - "sync_archive", - task_id_components=task_id_components, - files=[f"file{n}" for n in range(100)], - ) - - for attempt in Retrying( - retry=retry_if_exception_type(AssertionError), - wait=wait_fixed(1), - stop=stop_after_delay(30), - ): - with attempt: - progress = client_interface.get_progress(task_id) - assert progress.task_state == "SUCCESS" - - assert client_interface.get_progress(task_id).task_state == "SUCCESS" - - -def test_sync_error( - client_celery_app: Celery, - worker_celery_app: Celery, -): - client_interface = get_celery_client_interface(fastapi_app) - - task_id = client_interface.submit( - "sync_error", task_id_components=TaskIdComponents(user_id=1) - ) - - for attempt in Retrying( - retry=retry_if_exception_type(AssertionError), wait=wait_fixed(1) - ): - with attempt: - result = client_interface.get_result(task_id) - assert isinstance(result, ValueError) - - assert f"{client_interface.get_result(task_id)}" == "my error here" From 992da831efa1024a46f4dc27193ee286ee52faea Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 21 Feb 2025 14:45:08 +0100 Subject: [PATCH 025/131] continue fixing --- .../modules/celery/client/_interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py index f88750a9fb2e..ca317bf0e5a3 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py @@ -86,9 +86,9 @@ def _get_completed_task_ids( + "*" ) redis = self._celery_app.backend.client - keys = redis.keys(search_key) - if keys: - return [f"{key}".lstrip(_CELERY_TASK_META_PREFIX) for key in keys] + if hasattr(redis, "keys"): + if keys := redis.keys(search_key): + return [f"{key}".lstrip(_CELERY_TASK_META_PREFIX) for key in keys] return [] def list( From 52f113b9d285b8eb9a826873a8442ac256519fb1 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 24 Feb 2025 13:20:36 +0100 Subject: [PATCH 026/131] working --- .../modules/celery/client/_interface.py | 3 + .../modules/celery/client/setup.py | 34 ---------- .../modules/celery/client/utils.py | 17 +++-- .../modules/celery/worker/_interface.py | 9 +++ .../modules/celery/worker/setup.py | 65 ++++++++++--------- 5 files changed, 59 insertions(+), 69 deletions(-) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/setup.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py index ca317bf0e5a3..632dfe587340 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Final, TypeAlias from uuid import uuid4 @@ -13,6 +14,8 @@ TaskIdComponents: TypeAlias = dict[str, Any] +_logger = logging.getLogger(__name__) + def _get_task_id_components(task_id_components: TaskIdComponents) -> list[str]: return [f"{v}" for _, v in sorted(task_id_components.items())] diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/setup.py b/services/storage/src/simcore_service_storage/modules/celery/client/setup.py deleted file mode 100644 index 245932e53776..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/client/setup.py +++ /dev/null @@ -1,34 +0,0 @@ -import logging - -from celery import Celery -from fastapi import FastAPI -from settings_library.redis import RedisDatabase - -from ....core.settings import ApplicationSettings -from ._interface import CeleryClientInterface - -_log = logging.getLogger(__name__) - - -def create_celery_app(settings: ApplicationSettings) -> Celery: - assert settings.STORAGE_RABBITMQ - assert settings.STORAGE_REDIS - - celery_app = Celery( - broker=settings.STORAGE_RABBITMQ.dsn, - backend=settings.STORAGE_REDIS.build_redis_dsn( - RedisDatabase.CELERY_TASKS, - ), - ) - celery_app.conf["client_interface"] = CeleryClientInterface(celery_app) - - return celery_app - - -def attach_to_fastapi(fastapi: FastAPI, celery: Celery) -> None: - fastapi.state.celery = celery - - -def get_celery_client(fastapi: FastAPI) -> CeleryClientInterface: - celery: Celery = fastapi.state.celery - return celery.conf.get("client_interface") diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/utils.py b/services/storage/src/simcore_service_storage/modules/celery/client/utils.py index 28fda45d3982..94125cba93c6 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client/utils.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client/utils.py @@ -1,8 +1,17 @@ -from typing import cast +import logging +from celery import Celery from fastapi import FastAPI -from simcore_service_storage.modules.celery.client import CeleryClientInterface +from ._interface import CeleryClientInterface -def get_celery_client_interface(app: FastAPI) -> CeleryClientInterface: - return cast(CeleryClientInterface, app.state.celery.conf["client_interface"]) +_log = logging.getLogger(__name__) + + +def attach_to_fastapi(fastapi: FastAPI, celery: Celery) -> None: + fastapi.state.celery = celery + + +def get_celery_client(fastapi: FastAPI) -> CeleryClientInterface: + celery: Celery = fastapi.state.celery + return celery.conf.get("client_interface") diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py index 2c5afdf71e57..ec5e36d3a5d8 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py @@ -1,13 +1,22 @@ +import logging +from typing import Callable + from celery import Celery from models_library.progress_bar import ProgressReport from ..models import TaskID +_logger = logging.getLogger(__name__) + class CeleryWorkerInterface: def __init__(self, celery_app: Celery) -> None: self.celery_app = celery_app + def register_task(self, fn: Callable): + _logger.info("Registering %s task", fn.__name__) + self.celery_app.task(name=fn.__name__, bind=True)(fn) + def set_progress( self, task_name: str, task_id: TaskID, report: ProgressReport ) -> None: diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py b/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py index 63455deb8941..c87fcee9034f 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py @@ -8,10 +8,11 @@ from celery.signals import worker_init, worker_shutdown from servicelib.background_task import cancel_wait_task from servicelib.logging_utils import config_all_loggers -from simcore_service_storage.core.application import create_app -from simcore_service_storage.core.settings import ApplicationSettings -from simcore_service_storage.modules.celery.client.setup import create_celery_app +from ....core.application import create_app +from ....core.settings import ApplicationSettings +from ....modules.celery.tasks import sync_archive +from ....modules.celery.utils import create_celery_app from ._interface import CeleryWorkerInterface _settings = ApplicationSettings.create_from_envs() @@ -28,38 +29,37 @@ _logger = logging.getLogger(__name__) -celery_app = create_celery_app(_settings) - - @worker_init.connect def on_worker_init(sender, **_kwargs): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - shutdown_event = asyncio.Event() - - fastapi_app = create_app(_settings) - - async def lifespan(): - async with LifespanManager( - fastapi_app, startup_timeout=30, shutdown_timeout=30 - ): - try: - await shutdown_event.wait() - except asyncio.CancelledError: - _logger.warning("Lifespan task cancelled") - - lifespan_task = loop.create_task(lifespan()) - fastapi_app.state.lifespan_task = lifespan_task - fastapi_app.state.shutdown_event = shutdown_event - - sender.app.conf["fastapi_app"] = fastapi_app - sender.app.conf["loop"] = loop - sender.app.conf["worker_interface"] = CeleryWorkerInterface(sender.app) - - def run_loop(): + def shhsshhshs(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + shutdown_event = asyncio.Event() + + fastapi_app = create_app(_settings) + + async def lifespan(): + async with LifespanManager( + fastapi_app, startup_timeout=30, shutdown_timeout=30 + ): + try: + await shutdown_event.wait() + except asyncio.CancelledError: + _logger.warning("Lifespan task cancelled") + + lifespan_task = loop.create_task(lifespan()) + fastapi_app.state.lifespan_task = lifespan_task + fastapi_app.state.shutdown_event = shutdown_event + + celery_worker_interface = CeleryWorkerInterface(sender.app) + + sender.app.conf["fastapi_app"] = fastapi_app + sender.app.conf["loop"] = loop + sender.app.conf["worker_interface"] = celery_worker_interface + loop.run_forever() - thread = threading.Thread(target=run_loop, daemon=True) + thread = threading.Thread(target=shhsshhshs, daemon=True) thread.start() @@ -76,4 +76,7 @@ async def shutdown(): asyncio.run_coroutine_threadsafe(shutdown(), loop) +celery_app = create_celery_app(ApplicationSettings.create_from_envs()) +celery_app.task(name=sync_archive.__name__, bind=True)(sync_archive) + app = celery_app From 1955e9dbf7d6bbe5ca170967bc86264a1aedf1ab Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 24 Feb 2025 15:07:06 +0100 Subject: [PATCH 027/131] continue fix --- .../api/rest/_files.py | 10 ++ .../modules/celery/client/__init__.py | 3 - .../modules/celery/client/_interface.py | 106 ------------------ .../modules/celery/client/utils.py | 17 --- .../modules/celery/{utils.py => common.py} | 0 .../modules/celery/worker/__init__.py | 0 .../modules/celery/worker/_interface.py | 27 ----- .../modules/celery/worker/setup.py | 82 -------------- .../modules/celery/worker/utils.py | 21 ---- 9 files changed, 10 insertions(+), 256 deletions(-) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/__init__.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/_interface.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/client/utils.py rename services/storage/src/simcore_service_storage/modules/celery/{utils.py => common.py} (100%) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/__init__.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/setup.py delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/worker/utils.py diff --git a/services/storage/src/simcore_service_storage/api/rest/_files.py b/services/storage/src/simcore_service_storage/api/rest/_files.py index c0b6a4f4a7c9..2cb6b4428e7f 100644 --- a/services/storage/src/simcore_service_storage/api/rest/_files.py +++ b/services/storage/src/simcore_service_storage/api/rest/_files.py @@ -34,6 +34,7 @@ StorageQueryParamsBase, UploadLinks, ) +from ...modules.celery.client import get_client from ...modules.long_running_tasks import get_completed_upload_tasks from ...simcore_s3_dsm import SimcoreS3DataManager from ...utils.utils import create_upload_completion_task_name @@ -56,6 +57,15 @@ async def list_files_metadata( location_id: LocationID, request: Request, ): + c = get_client(request.app) + components = {"user_id": 1} + + task_id = c.submit("sync_archive", task_id_components=components, files=["aaa.xyz"]) + _logger.info("Submitted task: %s", task_id) + + task_ids = c.list("sync_archive", task_id_components=components) + _logger.info("%s", task_ids) + dsm = get_dsm_provider(request.app).get(location_id) data: list[FileMetaData] = await dsm.list_files( user_id=query_params.user_id, diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/__init__.py b/services/storage/src/simcore_service_storage/modules/celery/client/__init__.py deleted file mode 100644 index 3d3e1162977e..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/client/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from ._interface import CeleryClientInterface - -__all__: tuple[str, ...] = ("CeleryClientInterface",) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py deleted file mode 100644 index 632dfe587340..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/client/_interface.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -from typing import Any, Final, TypeAlias -from uuid import uuid4 - -from celery import Celery -from celery.contrib.abortable import AbortableAsyncResult -from celery.result import AsyncResult -from models_library.progress_bar import ProgressReport -from pydantic import ValidationError - -from ..models import TaskID, TaskStatus - -_PREFIX: Final = "ct" - -TaskIdComponents: TypeAlias = dict[str, Any] - -_logger = logging.getLogger(__name__) - - -def _get_task_id_components(task_id_components: TaskIdComponents) -> list[str]: - return [f"{v}" for _, v in sorted(task_id_components.items())] - - -def _get_components_prefix( - name: str, task_id_components: TaskIdComponents -) -> list[str]: - return [_PREFIX, name, *_get_task_id_components(task_id_components)] - - -def _get_task_id_prefix(name: str, task_id_components: TaskIdComponents) -> TaskID: - return "::".join(_get_components_prefix(name, task_id_components)) - - -def _get_task_id(name: str, task_id_components: TaskIdComponents) -> TaskID: - return "::".join([*_get_components_prefix(name, task_id_components), f"{uuid4()}"]) - - -_CELERY_TASK_META_PREFIX = "celery-task-meta-" - - -class CeleryClientInterface: - def __init__(self, celery_app: Celery): - self._celery_app = celery_app - - def submit( - self, task_name: str, *, task_id_components: TaskIdComponents, **task_params - ) -> TaskID: - task_id = _get_task_id(task_name, task_id_components) - task = self._celery_app.send_task( - task_name, task_id=task_id, kwargs=task_params - ) - return task.id - - def get(self, task_id: TaskID) -> Any: - return self._celery_app.tasks(task_id) - - def cancel(self, task_id: TaskID) -> None: - AbortableAsyncResult(task_id, app=self._celery_app).abort() - - def _get_async_result(self, task_id: TaskID) -> AsyncResult: - return self._celery_app.AsyncResult(task_id) - - def get_result(self, task_id: TaskID) -> Any: - # if the result is missing or if it goes into FAILURE, return error - return self._get_async_result(task_id).result - - def _get_progress_report(self, task_id: TaskID) -> ProgressReport | None: - result = self._get_async_result(task_id).result - if result: - try: - return ProgressReport.model_validate(result) - except ValidationError: - pass - return None - - def get_status(self, task_id: TaskID) -> TaskStatus: - return TaskStatus( - task_id=task_id, - task_state=self._get_async_result(task_id).state, - progress_report=self._get_progress_report(task_id), - ) - - def _get_completed_task_ids( - self, task_name: str, task_id_components: TaskIdComponents - ) -> list[TaskID]: - search_key = ( - _CELERY_TASK_META_PREFIX - + _get_task_id_prefix(task_name, task_id_components) - + "*" - ) - redis = self._celery_app.backend.client - if hasattr(redis, "keys"): - if keys := redis.keys(search_key): - return [f"{key}".lstrip(_CELERY_TASK_META_PREFIX) for key in keys] - return [] - - def list( - self, task_name: str, *, task_id_components: TaskIdComponents - ) -> list[TaskID]: - all_task_ids = self._get_completed_task_ids(task_name, task_id_components) - - for task_type in ["active", "registered", "scheduled", "revoked"]: - if task_ids := getattr(self._celery_app.control.inspect(), task_type)(): - all_task_ids.extend(task_ids) - - return all_task_ids diff --git a/services/storage/src/simcore_service_storage/modules/celery/client/utils.py b/services/storage/src/simcore_service_storage/modules/celery/client/utils.py deleted file mode 100644 index 94125cba93c6..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/client/utils.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -from celery import Celery -from fastapi import FastAPI - -from ._interface import CeleryClientInterface - -_log = logging.getLogger(__name__) - - -def attach_to_fastapi(fastapi: FastAPI, celery: Celery) -> None: - fastapi.state.celery = celery - - -def get_celery_client(fastapi: FastAPI) -> CeleryClientInterface: - celery: Celery = fastapi.state.celery - return celery.conf.get("client_interface") diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/common.py similarity index 100% rename from services/storage/src/simcore_service_storage/modules/celery/utils.py rename to services/storage/src/simcore_service_storage/modules/celery/common.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/__init__.py b/services/storage/src/simcore_service_storage/modules/celery/worker/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py b/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py deleted file mode 100644 index ec5e36d3a5d8..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/_interface.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging -from typing import Callable - -from celery import Celery -from models_library.progress_bar import ProgressReport - -from ..models import TaskID - -_logger = logging.getLogger(__name__) - - -class CeleryWorkerInterface: - def __init__(self, celery_app: Celery) -> None: - self.celery_app = celery_app - - def register_task(self, fn: Callable): - _logger.info("Registering %s task", fn.__name__) - self.celery_app.task(name=fn.__name__, bind=True)(fn) - - def set_progress( - self, task_name: str, task_id: TaskID, report: ProgressReport - ) -> None: - self.celery_app.tasks[task_name].update_state( - task_id=task_id, - state="PROGRESS", - meta=report.model_dump(mode="json"), - ) diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py b/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py deleted file mode 100644 index c87fcee9034f..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/setup.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Main application to be deployed in for example uvicorn.""" - -import asyncio -import logging -import threading - -from asgi_lifespan import LifespanManager -from celery.signals import worker_init, worker_shutdown -from servicelib.background_task import cancel_wait_task -from servicelib.logging_utils import config_all_loggers - -from ....core.application import create_app -from ....core.settings import ApplicationSettings -from ....modules.celery.tasks import sync_archive -from ....modules.celery.utils import create_celery_app -from ._interface import CeleryWorkerInterface - -_settings = ApplicationSettings.create_from_envs() - -# SEE https://github.com/ITISFoundation/osparc-simcore/issues/3148 -logging.basicConfig(level=_settings.log_level) # NOSONAR -logging.root.setLevel(_settings.log_level) -config_all_loggers( - log_format_local_dev_enabled=_settings.STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED, - logger_filter_mapping=_settings.STORAGE_LOG_FILTER_MAPPING, - tracing_settings=_settings.STORAGE_TRACING, -) - -_logger = logging.getLogger(__name__) - - -@worker_init.connect -def on_worker_init(sender, **_kwargs): - def shhsshhshs(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - shutdown_event = asyncio.Event() - - fastapi_app = create_app(_settings) - - async def lifespan(): - async with LifespanManager( - fastapi_app, startup_timeout=30, shutdown_timeout=30 - ): - try: - await shutdown_event.wait() - except asyncio.CancelledError: - _logger.warning("Lifespan task cancelled") - - lifespan_task = loop.create_task(lifespan()) - fastapi_app.state.lifespan_task = lifespan_task - fastapi_app.state.shutdown_event = shutdown_event - - celery_worker_interface = CeleryWorkerInterface(sender.app) - - sender.app.conf["fastapi_app"] = fastapi_app - sender.app.conf["loop"] = loop - sender.app.conf["worker_interface"] = celery_worker_interface - - loop.run_forever() - - thread = threading.Thread(target=shhsshhshs, daemon=True) - thread.start() - - -@worker_shutdown.connect -def on_worker_shutdown(sender, **_kwargs): - loop = sender.app.conf["loop"] - fastapi_app = sender.app.conf["fastapi_app"] - - async def shutdown(): - fastapi_app.state.shutdown_event.set() - - await cancel_wait_task(fastapi_app.state.lifespan_task, max_delay=5) - - asyncio.run_coroutine_threadsafe(shutdown(), loop) - - -celery_app = create_celery_app(ApplicationSettings.create_from_envs()) -celery_app.task(name=sync_archive.__name__, bind=True)(sync_archive) - -app = celery_app diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker/utils.py b/services/storage/src/simcore_service_storage/modules/celery/worker/utils.py deleted file mode 100644 index 447093665e5b..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/worker/utils.py +++ /dev/null @@ -1,21 +0,0 @@ -from asyncio import AbstractEventLoop - -from celery import Celery -from fastapi import FastAPI - -from ._interface import CeleryWorkerInterface - - -def get_fastapi_app(celery_app: Celery) -> FastAPI: - fast_api_app: FastAPI = celery_app.conf.get("fastapi_app") - return fast_api_app - - -def get_loop(celery_app: Celery) -> AbstractEventLoop: # nosec - loop: AbstractEventLoop = celery_app.conf.get("loop") - return loop - - -def get_worker_interface(celery_app: Celery) -> CeleryWorkerInterface: - worker_interface: CeleryWorkerInterface = celery_app.conf.get("worker_interface") - return worker_interface From fde73871e83b9751428386746f36f5d648918667 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 24 Feb 2025 15:39:40 +0100 Subject: [PATCH 028/131] fix files endpoint --- .../src/simcore_service_storage/api/rest/_files.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rest/_files.py b/services/storage/src/simcore_service_storage/api/rest/_files.py index 2cb6b4428e7f..c0b6a4f4a7c9 100644 --- a/services/storage/src/simcore_service_storage/api/rest/_files.py +++ b/services/storage/src/simcore_service_storage/api/rest/_files.py @@ -34,7 +34,6 @@ StorageQueryParamsBase, UploadLinks, ) -from ...modules.celery.client import get_client from ...modules.long_running_tasks import get_completed_upload_tasks from ...simcore_s3_dsm import SimcoreS3DataManager from ...utils.utils import create_upload_completion_task_name @@ -57,15 +56,6 @@ async def list_files_metadata( location_id: LocationID, request: Request, ): - c = get_client(request.app) - components = {"user_id": 1} - - task_id = c.submit("sync_archive", task_id_components=components, files=["aaa.xyz"]) - _logger.info("Submitted task: %s", task_id) - - task_ids = c.list("sync_archive", task_id_components=components) - _logger.info("%s", task_ids) - dsm = get_dsm_provider(request.app).get(location_id) data: list[FileMetaData] = await dsm.list_files( user_id=query_params.user_id, From 7cdf8a2f293a783fd0ee43b9893d5c2ed263d53b Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 24 Feb 2025 15:54:59 +0100 Subject: [PATCH 029/131] rename --- .../modules/celery/{tasks.py => example_tasks.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename services/storage/src/simcore_service_storage/modules/celery/{tasks.py => example_tasks.py} (100%) diff --git a/services/storage/src/simcore_service_storage/modules/celery/tasks.py b/services/storage/src/simcore_service_storage/modules/celery/example_tasks.py similarity index 100% rename from services/storage/src/simcore_service_storage/modules/celery/tasks.py rename to services/storage/src/simcore_service_storage/modules/celery/example_tasks.py From 582c53702c8b1546945fdb04e83fb30d464e382b Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 25 Feb 2025 10:51:58 +0100 Subject: [PATCH 030/131] add healthcheck --- .../storage/src/simcore_service_storage/core/settings.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index f6d7ec8b3723..4a88a9f3a392 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -129,7 +129,12 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): bool, Field(description="If True, run as a worker") ] = False - STORAGE_WORKER_MODE: bool | None = False + STORAGE_WORKER_MODE: Annotated[ + bool | None, Field(description="If True, run as a worker") + ] = False + STORAGE_CELERY_BROKER: RabbitSettings | None = Field( + json_schema_extra={"auto_default_from_env": True} + ) @field_validator("LOG_LEVEL", mode="before") @classmethod From 0c923d2dda7875fb7a6a5a89a4692a894135a61a Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 25 Feb 2025 11:46:39 +0100 Subject: [PATCH 031/131] add settings --- services/storage/src/simcore_service_storage/core/settings.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 4a88a9f3a392..27d97a12ead4 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -132,9 +132,6 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): STORAGE_WORKER_MODE: Annotated[ bool | None, Field(description="If True, run as a worker") ] = False - STORAGE_CELERY_BROKER: RabbitSettings | None = Field( - json_schema_extra={"auto_default_from_env": True} - ) @field_validator("LOG_LEVEL", mode="before") @classmethod From ac932635e824bbe7a8a89515a9afaa8de8bb65f3 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 25 Feb 2025 11:53:35 +0100 Subject: [PATCH 032/131] remuve unused pytest plugin --- services/storage/tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/storage/tests/conftest.py b/services/storage/tests/conftest.py index d6eeb538554c..b766c7655211 100644 --- a/services/storage/tests/conftest.py +++ b/services/storage/tests/conftest.py @@ -92,7 +92,6 @@ "pytest_simcore.openapi_specs", "pytest_simcore.postgres_service", "pytest_simcore.pytest_global_environs", - "pytest_simcore.rabbit_service", "pytest_simcore.repository_paths", "pytest_simcore.simcore_storage_data_models", "pytest_simcore.simcore_storage_datcore_adapter", From a931d7ff94e8dd9dc8dec30298521954e2671f70 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 25 Feb 2025 14:17:38 +0100 Subject: [PATCH 033/131] fix tests --- .../simcore_service_storage/modules/celery/utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 services/storage/src/simcore_service_storage/modules/celery/utils.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/utils.py new file mode 100644 index 000000000000..6c5016f30937 --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/utils.py @@ -0,0 +1,12 @@ +from celery import Celery +from fastapi import FastAPI +from simcore_service_storage.main import CeleryTaskQueueClient + + +def get_celery_client(fastapi: FastAPI) -> CeleryTaskQueueClient: + celery = fastapi.state.celery_app + assert isinstance(celery, Celery) + + client = celery.conf["client"] + assert isinstance(client, CeleryTaskQueueClient) + return client From a2e7e9e384bfb596bd9c31c0433d5a55f134d525 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 25 Feb 2025 15:34:08 +0100 Subject: [PATCH 034/131] add utils --- .../modules/celery/utils.py | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/utils.py index 6c5016f30937..9c050d86bb38 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/utils.py +++ b/services/storage/src/simcore_service_storage/modules/celery/utils.py @@ -1,12 +1,38 @@ from celery import Celery from fastapi import FastAPI -from simcore_service_storage.main import CeleryTaskQueueClient +from .client import CeleryTaskQueueClient +from .worker import CeleryTaskQueueWorker -def get_celery_client(fastapi: FastAPI) -> CeleryTaskQueueClient: +_CLIENT_KEY = "client" +_WORKER_KEY = "worker" + + +def get_celery_app(fastapi: FastAPI) -> Celery: celery = fastapi.state.celery_app assert isinstance(celery, Celery) + return celery + - client = celery.conf["client"] +def set_celery_app(fastapi: FastAPI, celery: Celery) -> None: + fastapi.state.celery_app = celery + + +def get_celery_client(celery_app: Celery) -> CeleryTaskQueueClient: + client = celery_app.conf[_CLIENT_KEY] assert isinstance(client, CeleryTaskQueueClient) return client + + +def set_celery_client(celery_app: Celery, celery_client: CeleryTaskQueueClient) -> None: + celery_app.conf[_CLIENT_KEY] = celery_client + + +def get_celery_worker(celery_app: Celery) -> CeleryTaskQueueWorker: + worker = celery_app.conf[_WORKER_KEY] + assert isinstance(worker, CeleryTaskQueueWorker) + return worker + + +def set_celery_worker(celery_app: Celery, celery_worker: CeleryTaskQueueWorker) -> None: + celery_app.conf[_WORKER_KEY] = celery_worker From 94732a46c9cf8b71466ce4e373a3d2e9d6aefb10 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 25 Feb 2025 15:47:30 +0100 Subject: [PATCH 035/131] update utils --- .../src/simcore_service_storage/modules/celery/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/utils.py index 9c050d86bb38..2d8ddb3f7593 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/utils.py +++ b/services/storage/src/simcore_service_storage/modules/celery/utils.py @@ -18,13 +18,17 @@ def set_celery_app(fastapi: FastAPI, celery: Celery) -> None: fastapi.state.celery_app = celery -def get_celery_client(celery_app: Celery) -> CeleryTaskQueueClient: +def get_celery_client(fastapi_app: FastAPI) -> CeleryTaskQueueClient: + celery_app = get_celery_app(fastapi_app) client = celery_app.conf[_CLIENT_KEY] assert isinstance(client, CeleryTaskQueueClient) return client -def set_celery_client(celery_app: Celery, celery_client: CeleryTaskQueueClient) -> None: +def set_celery_client( + fastapi_app: FastAPI, celery_client: CeleryTaskQueueClient +) -> None: + celery_app = get_celery_app(fastapi_app) celery_app.conf[_CLIENT_KEY] = celery_client From 18452fc12e464d399cec313f9183229199597737 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 26 Feb 2025 10:56:47 +0100 Subject: [PATCH 036/131] add async interface --- .../src/common_library/async_utils.py | 22 +++++++++++++++++++ .../modules/celery/client.py | 7 +++++- 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 packages/common-library/src/common_library/async_utils.py diff --git a/packages/common-library/src/common_library/async_utils.py b/packages/common-library/src/common_library/async_utils.py new file mode 100644 index 000000000000..6ad0666a9260 --- /dev/null +++ b/packages/common-library/src/common_library/async_utils.py @@ -0,0 +1,22 @@ +import asyncio +import functools +from collections.abc import Callable, Coroutine +from typing import Any, TypeVar + +R = TypeVar("R") + + +def make_async( + executor=None, +) -> Callable[[Callable[..., R]], Callable[..., Coroutine[Any, Any, R]]]: + def decorator(func) -> Callable[..., Coroutine[Any, Any, R]]: + @functools.wraps(func) + async def wrapper(*args, **kwargs) -> R: + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + executor, functools.partial(func, *args, **kwargs) + ) + + return wrapper + + return decorator diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 300aadb66b2c..eb6e500e0c35 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -1,6 +1,6 @@ import contextlib import logging -from typing import Any, Final +from typing import Any, Coroutine, Final from uuid import uuid4 from celery import Celery # type: ignore[import-untyped] @@ -141,3 +141,8 @@ def get_task_uuids(self, task_context: TaskContext) -> set[TaskUUID]: all_task_ids.add(TaskUUID(value.removeprefix(search_key + _CELERY_TASK_ID_KEY_SEPARATOR))) return all_task_ids + + async def list_tasks( + self, task_name: str, *, task_id_parts: TaskIDParts + ) -> list[TaskID]: + return await self.__list_tasks(task_name, task_id_parts=task_id_parts) From 779e525f1db320d2b67213ef7735137dec0b3703 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 26 Feb 2025 11:01:12 +0100 Subject: [PATCH 037/131] add tests --- .../src/common_library/async_utils.py | 22 ------------------- 1 file changed, 22 deletions(-) delete mode 100644 packages/common-library/src/common_library/async_utils.py diff --git a/packages/common-library/src/common_library/async_utils.py b/packages/common-library/src/common_library/async_utils.py deleted file mode 100644 index 6ad0666a9260..000000000000 --- a/packages/common-library/src/common_library/async_utils.py +++ /dev/null @@ -1,22 +0,0 @@ -import asyncio -import functools -from collections.abc import Callable, Coroutine -from typing import Any, TypeVar - -R = TypeVar("R") - - -def make_async( - executor=None, -) -> Callable[[Callable[..., R]], Callable[..., Coroutine[Any, Any, R]]]: - def decorator(func) -> Callable[..., Coroutine[Any, Any, R]]: - @functools.wraps(func) - async def wrapper(*args, **kwargs) -> R: - loop = asyncio.get_running_loop() - return await loop.run_in_executor( - executor, functools.partial(func, *args, **kwargs) - ) - - return wrapper - - return decorator From 3374d22e50a047e8959de4c9deb3da1028192868 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 26 Feb 2025 12:36:15 +0100 Subject: [PATCH 038/131] improve typehint --- .../src/simcore_service_storage/modules/celery/client.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index eb6e500e0c35..300aadb66b2c 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -1,6 +1,6 @@ import contextlib import logging -from typing import Any, Coroutine, Final +from typing import Any, Final from uuid import uuid4 from celery import Celery # type: ignore[import-untyped] @@ -141,8 +141,3 @@ def get_task_uuids(self, task_context: TaskContext) -> set[TaskUUID]: all_task_ids.add(TaskUUID(value.removeprefix(search_key + _CELERY_TASK_ID_KEY_SEPARATOR))) return all_task_ids - - async def list_tasks( - self, task_name: str, *, task_id_parts: TaskIDParts - ) -> list[TaskID]: - return await self.__list_tasks(task_name, task_id_parts=task_id_parts) From 48bfc1d67d8eb855e34f529800ac6796dfa01321 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 26 Feb 2025 12:50:53 +0100 Subject: [PATCH 039/131] remove unused --- .../src/simcore_service_storage/modules/celery/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/utils.py index 2d8ddb3f7593..134e663eabd4 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/utils.py +++ b/services/storage/src/simcore_service_storage/modules/celery/utils.py @@ -1,3 +1,5 @@ +from asyncio import AbstractEventLoop + from celery import Celery from fastapi import FastAPI @@ -6,6 +8,7 @@ _CLIENT_KEY = "client" _WORKER_KEY = "worker" +_EVENT_LOOP_KEY = "loop" def get_celery_app(fastapi: FastAPI) -> Celery: @@ -40,3 +43,9 @@ def get_celery_worker(celery_app: Celery) -> CeleryTaskQueueWorker: def set_celery_worker(celery_app: Celery, celery_worker: CeleryTaskQueueWorker) -> None: celery_app.conf[_WORKER_KEY] = celery_worker + + +def get_event_loop(celery_app: Celery) -> AbstractEventLoop: # nosec + loop = celery_app.conf[_EVENT_LOOP_KEY] + assert isinstance(loop, AbstractEventLoop) + return loop From ae3e0e6009d161ea1c976053d4eb9c95c41611b5 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 27 Feb 2025 11:59:15 +0100 Subject: [PATCH 040/131] add rabbit --- services/storage/tests/unit/modules/celery/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/storage/tests/unit/modules/celery/conftest.py b/services/storage/tests/unit/modules/celery/conftest.py index 3cd06195b286..adc6bf8a8f37 100644 --- a/services/storage/tests/unit/modules/celery/conftest.py +++ b/services/storage/tests/unit/modules/celery/conftest.py @@ -19,6 +19,7 @@ @pytest.fixture def app_environment( monkeypatch: pytest.MonkeyPatch, + rabbit_service: RabbitSettings, app_environment: EnvVarsDict, ) -> EnvVarsDict: return setenvs_from_dict( From 66c47f9afeabe8fe32545425081a26013952bbc3 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 27 Feb 2025 12:56:18 +0100 Subject: [PATCH 041/131] update interface --- services/storage/tests/unit/modules/celery/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/storage/tests/unit/modules/celery/conftest.py b/services/storage/tests/unit/modules/celery/conftest.py index adc6bf8a8f37..3cd06195b286 100644 --- a/services/storage/tests/unit/modules/celery/conftest.py +++ b/services/storage/tests/unit/modules/celery/conftest.py @@ -19,7 +19,6 @@ @pytest.fixture def app_environment( monkeypatch: pytest.MonkeyPatch, - rabbit_service: RabbitSettings, app_environment: EnvVarsDict, ) -> EnvVarsDict: return setenvs_from_dict( From 98590626058df23dfe8f4ce2e68bdf880f24e7e3 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 27 Feb 2025 13:15:10 +0100 Subject: [PATCH 042/131] adapt code --- .../src/models_library/api_schemas_rpc_async_jobs/async_jobs.py | 2 +- .../storage/src/simcore_service_storage/api/rpc/_async_jobs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py index 953cd1f819cc..bb410add1253 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py @@ -11,7 +11,7 @@ class AsyncJobStatus(BaseModel): job_id: AsyncJobId - progress: ProgressReport + progress: ProgressReport | None done: bool diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index f901502dda8d..7a40f3c7589d 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -66,7 +66,7 @@ async def get_result( @router.expose() async def list_jobs( - app: FastAPI, filter_: str, job_id_data: AsyncJobNameData + app: FastAPI, filter_: str, job_id_data: AsyncJobNameData # TODO: implement filter ) -> list[AsyncJobGet]: assert app # nosec From 8ea7362c994e71b65c0771318bd5e7d3ea88e5ff Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 3 Mar 2025 10:00:20 +0100 Subject: [PATCH 043/131] refactor --- .../modules/celery/{utils.py => _utils.py} | 0 .../modules/celery/common.py | 27 ------------------- 2 files changed, 27 deletions(-) rename services/storage/src/simcore_service_storage/modules/celery/{utils.py => _utils.py} (100%) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/common.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/_utils.py similarity index 100% rename from services/storage/src/simcore_service_storage/modules/celery/utils.py rename to services/storage/src/simcore_service_storage/modules/celery/_utils.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/common.py b/services/storage/src/simcore_service_storage/modules/celery/common.py deleted file mode 100644 index 5f5186548204..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/common.py +++ /dev/null @@ -1,27 +0,0 @@ -from celery import Celery # type: ignore[import-untyped] -from fastapi import FastAPI - -from .worker import CeleryTaskQueueWorker - -_WORKER_KEY = "celery_worker" -_FASTAPI_APP_KEY = "fastapi_app" - - -def get_celery_worker(celery_app: Celery) -> CeleryTaskQueueWorker: - worker = celery_app.conf[_WORKER_KEY] - assert isinstance(worker, CeleryTaskQueueWorker) - return worker - - -def get_fastapi_app(celery_app: Celery) -> FastAPI: - fastapi_app = celery_app.conf[_FASTAPI_APP_KEY] - assert isinstance(fastapi_app, FastAPI) - return fastapi_app - - -def set_celery_worker(celery_app: Celery, worker: CeleryTaskQueueWorker) -> None: - celery_app.conf[_WORKER_KEY] = worker - - -def set_fastapi_app(celery_app: Celery, fastapi_app: FastAPI) -> None: - celery_app.conf[_FASTAPI_APP_KEY] = fastapi_app From 0b6c98616993887d571248098fd7b72b7b1b6825 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 3 Mar 2025 10:19:18 +0100 Subject: [PATCH 044/131] fix settings --- .../storage/src/simcore_service_storage/core/application.py | 3 ++- services/storage/src/simcore_service_storage/core/settings.py | 4 ---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/services/storage/src/simcore_service_storage/core/application.py b/services/storage/src/simcore_service_storage/core/application.py index 13082a00fb06..dfa8372a334b 100644 --- a/services/storage/src/simcore_service_storage/core/application.py +++ b/services/storage/src/simcore_service_storage/core/application.py @@ -89,7 +89,8 @@ def create_app(settings: ApplicationSettings) -> FastAPI: setup_rest_api_routes(app, API_VTAG) set_exception_handlers(app) - setup_redis(app) + if settings.STORAGE_WORKER_MODE: + setup_redis(app) setup_dsm(app) if settings.STORAGE_CLEANER_INTERVAL_S and not settings.STORAGE_WORKER_MODE: diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 27d97a12ead4..95535efaa091 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -125,10 +125,6 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): ), ] - STORAGE_WORKER_MODE: Annotated[ - bool, Field(description="If True, run as a worker") - ] = False - STORAGE_WORKER_MODE: Annotated[ bool | None, Field(description="If True, run as a worker") ] = False From 8e5a6846a9b6d0af3a6901378fb979752203c051 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 3 Mar 2025 12:27:32 +0100 Subject: [PATCH 045/131] add enums --- .../src/simcore_service_storage/modules/celery/models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/storage/src/simcore_service_storage/modules/celery/models.py b/services/storage/src/simcore_service_storage/modules/celery/models.py index 2f04c5b81329..ddae850de6a6 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/models.py +++ b/services/storage/src/simcore_service_storage/modules/celery/models.py @@ -24,6 +24,14 @@ class TaskState(StrEnum): _TASK_DONE = {TaskState.SUCCESS, TaskState.ERROR, TaskState.ABORTED} +class TaskState(StrEnum): + PENDING = auto() + STARTED = auto() + SUCCESS = auto() + FAILURE = auto() + ABORTED = auto() + + class TaskStatus(BaseModel): task_uuid: TaskUUID task_state: TaskState From ac63188f63ea6520972f73f11f9ea9768418ab63 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 3 Mar 2025 13:32:40 +0100 Subject: [PATCH 046/131] add redis on startup --- .../storage/src/simcore_service_storage/core/application.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/core/application.py b/services/storage/src/simcore_service_storage/core/application.py index dfa8372a334b..13082a00fb06 100644 --- a/services/storage/src/simcore_service_storage/core/application.py +++ b/services/storage/src/simcore_service_storage/core/application.py @@ -89,8 +89,7 @@ def create_app(settings: ApplicationSettings) -> FastAPI: setup_rest_api_routes(app, API_VTAG) set_exception_handlers(app) - if settings.STORAGE_WORKER_MODE: - setup_redis(app) + setup_redis(app) setup_dsm(app) if settings.STORAGE_CLEANER_INTERVAL_S and not settings.STORAGE_WORKER_MODE: From f9c3fdf152ac6644207de631df3445290efbb589 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 3 Mar 2025 13:46:57 +0100 Subject: [PATCH 047/131] add enum --- .../src/simcore_service_storage/modules/celery/models.py | 1 + .../src/simcore_service_storage/modules/celery/worker.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/models.py b/services/storage/src/simcore_service_storage/modules/celery/models.py index ddae850de6a6..dfa8148d70c2 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/models.py +++ b/services/storage/src/simcore_service_storage/modules/celery/models.py @@ -27,6 +27,7 @@ class TaskState(StrEnum): class TaskState(StrEnum): PENDING = auto() STARTED = auto() + PROGRESS = auto() SUCCESS = auto() FAILURE = auto() ABORTED = auto() diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker.py b/services/storage/src/simcore_service_storage/modules/celery/worker.py index 36456d887b5a..f2ff11225a6f 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker.py @@ -4,7 +4,7 @@ from models_library.progress_bar import ProgressReport from servicelib.logging_utils import log_context -from .models import TaskID +from .models import TaskID, TaskState _logger = logging.getLogger(__name__) From 09b37b42b4461d110f800483c62e800e123d8c10 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Mon, 3 Mar 2025 14:44:11 +0100 Subject: [PATCH 048/131] fix worker startup --- services/storage/docker/boot.sh | 2 +- .../simcore_service_storage/modules/celery/_utils.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/services/storage/docker/boot.sh b/services/storage/docker/boot.sh index 0fd1d2b4edcf..5641804fe650 100755 --- a/services/storage/docker/boot.sh +++ b/services/storage/docker/boot.sh @@ -59,7 +59,7 @@ else if [ "${STORAGE_WORKER_MODE}" = "true" ]; then exec celery \ --app=simcore_service_storage.modules.celery.worker_main:app \ - worker --pool=threads \ + worker \ --loglevel="${SERVER_LOG_LEVEL}" \ --hostname="${HOSTNAME}" \ --concurrency="${CELERY_CONCURRENCY}" diff --git a/services/storage/src/simcore_service_storage/modules/celery/_utils.py b/services/storage/src/simcore_service_storage/modules/celery/_utils.py index 134e663eabd4..37466adaab1a 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/_utils.py +++ b/services/storage/src/simcore_service_storage/modules/celery/_utils.py @@ -3,6 +3,8 @@ from celery import Celery from fastapi import FastAPI +from ...core.settings import ApplicationSettings +from ._common import create_app from .client import CeleryTaskQueueClient from .worker import CeleryTaskQueueWorker @@ -11,6 +13,12 @@ _EVENT_LOOP_KEY = "loop" +def create_celery_app_worker(settings: ApplicationSettings) -> Celery: + celery_app = create_app(settings) + celery_app.conf[_WORKER_KEY] = CeleryTaskQueueWorker(celery_app) + return celery_app + + def get_celery_app(fastapi: FastAPI) -> Celery: celery = fastapi.state.celery_app assert isinstance(celery, Celery) @@ -41,10 +49,6 @@ def get_celery_worker(celery_app: Celery) -> CeleryTaskQueueWorker: return worker -def set_celery_worker(celery_app: Celery, celery_worker: CeleryTaskQueueWorker) -> None: - celery_app.conf[_WORKER_KEY] = celery_worker - - def get_event_loop(celery_app: Celery) -> AbstractEventLoop: # nosec loop = celery_app.conf[_EVENT_LOOP_KEY] assert isinstance(loop, AbstractEventLoop) From d18d63e8b73e1f4b0b7db0b3ace0ffb0e3608317 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 4 Mar 2025 09:31:55 +0100 Subject: [PATCH 049/131] continue --- services/storage/docker/boot.sh | 2 +- .../src/simcore_service_storage/modules/celery/models.py | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/services/storage/docker/boot.sh b/services/storage/docker/boot.sh index 5641804fe650..0fd1d2b4edcf 100755 --- a/services/storage/docker/boot.sh +++ b/services/storage/docker/boot.sh @@ -59,7 +59,7 @@ else if [ "${STORAGE_WORKER_MODE}" = "true" ]; then exec celery \ --app=simcore_service_storage.modules.celery.worker_main:app \ - worker \ + worker --pool=threads \ --loglevel="${SERVER_LOG_LEVEL}" \ --hostname="${HOSTNAME}" \ --concurrency="${CELERY_CONCURRENCY}" diff --git a/services/storage/src/simcore_service_storage/modules/celery/models.py b/services/storage/src/simcore_service_storage/modules/celery/models.py index dfa8148d70c2..2f04c5b81329 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/models.py +++ b/services/storage/src/simcore_service_storage/modules/celery/models.py @@ -24,15 +24,6 @@ class TaskState(StrEnum): _TASK_DONE = {TaskState.SUCCESS, TaskState.ERROR, TaskState.ABORTED} -class TaskState(StrEnum): - PENDING = auto() - STARTED = auto() - PROGRESS = auto() - SUCCESS = auto() - FAILURE = auto() - ABORTED = auto() - - class TaskStatus(BaseModel): task_uuid: TaskUUID task_state: TaskState From 98731154ab6528a95478214408a063dd2396e640 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 4 Mar 2025 10:04:15 +0100 Subject: [PATCH 050/131] progress not nullable --- .../src/models_library/api_schemas_rpc_async_jobs/async_jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py index bb410add1253..953cd1f819cc 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py @@ -11,7 +11,7 @@ class AsyncJobStatus(BaseModel): job_id: AsyncJobId - progress: ProgressReport | None + progress: ProgressReport done: bool From 4df9a9d14b41bed6bd72964d92bbbb717fca8da7 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 4 Mar 2025 10:30:30 +0100 Subject: [PATCH 051/131] removed for now --- .../src/models_library/api_schemas_webserver/storage.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index ba122471e61e..e8ce8f451c42 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -84,8 +84,6 @@ class StorageAsyncJobStatus(OutputSchema): job_id: AsyncJobId progress: ProgressReport done: bool - started: datetime - stopped: datetime | None links: AsyncJobLinks @classmethod @@ -96,8 +94,6 @@ def from_rpc_schema( job_id=async_job_rpc_status.job_id, progress=async_job_rpc_status.progress, done=async_job_rpc_status.done, - started=async_job_rpc_status.started, - stopped=async_job_rpc_status.stopped, links=AsyncJobLinks.from_job_id( app=app, job_id=f"{async_job_rpc_status.job_id}" ), From bb8ee4615e0a847b9b3a969383a190381b587782 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 4 Mar 2025 10:31:47 +0100 Subject: [PATCH 052/131] typecheck --- .../storage/src/simcore_service_storage/api/rpc/_async_jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 7a40f3c7589d..f901502dda8d 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -66,7 +66,7 @@ async def get_result( @router.expose() async def list_jobs( - app: FastAPI, filter_: str, job_id_data: AsyncJobNameData # TODO: implement filter + app: FastAPI, filter_: str, job_id_data: AsyncJobNameData ) -> list[AsyncJobGet]: assert app # nosec From 9484ccdd0e247d695c0594b143270dfa448b2ec8 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 4 Mar 2025 15:44:15 +0100 Subject: [PATCH 053/131] fix tests --- .../modules/celery/_utils.py | 55 ------------------- .../modules/celery/client.py | 4 ++ .../modules/celery/utils.py | 27 +++++++++ .../modules/celery/worker.py | 2 +- 4 files changed, 32 insertions(+), 56 deletions(-) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/_utils.py create mode 100644 services/storage/src/simcore_service_storage/modules/celery/utils.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/_utils.py b/services/storage/src/simcore_service_storage/modules/celery/_utils.py deleted file mode 100644 index 37466adaab1a..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/_utils.py +++ /dev/null @@ -1,55 +0,0 @@ -from asyncio import AbstractEventLoop - -from celery import Celery -from fastapi import FastAPI - -from ...core.settings import ApplicationSettings -from ._common import create_app -from .client import CeleryTaskQueueClient -from .worker import CeleryTaskQueueWorker - -_CLIENT_KEY = "client" -_WORKER_KEY = "worker" -_EVENT_LOOP_KEY = "loop" - - -def create_celery_app_worker(settings: ApplicationSettings) -> Celery: - celery_app = create_app(settings) - celery_app.conf[_WORKER_KEY] = CeleryTaskQueueWorker(celery_app) - return celery_app - - -def get_celery_app(fastapi: FastAPI) -> Celery: - celery = fastapi.state.celery_app - assert isinstance(celery, Celery) - return celery - - -def set_celery_app(fastapi: FastAPI, celery: Celery) -> None: - fastapi.state.celery_app = celery - - -def get_celery_client(fastapi_app: FastAPI) -> CeleryTaskQueueClient: - celery_app = get_celery_app(fastapi_app) - client = celery_app.conf[_CLIENT_KEY] - assert isinstance(client, CeleryTaskQueueClient) - return client - - -def set_celery_client( - fastapi_app: FastAPI, celery_client: CeleryTaskQueueClient -) -> None: - celery_app = get_celery_app(fastapi_app) - celery_app.conf[_CLIENT_KEY] = celery_client - - -def get_celery_worker(celery_app: Celery) -> CeleryTaskQueueWorker: - worker = celery_app.conf[_WORKER_KEY] - assert isinstance(worker, CeleryTaskQueueWorker) - return worker - - -def get_event_loop(celery_app: Celery) -> AbstractEventLoop: # nosec - loop = celery_app.conf[_EVENT_LOOP_KEY] - assert isinstance(loop, AbstractEventLoop) - return loop diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 300aadb66b2c..6a5e6c42aaef 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -103,6 +103,10 @@ def _get_state(self, task_context: TaskContext, task_uuid: TaskUUID) -> TaskStat task_id = _build_task_id(task_context, task_uuid) return _CELERY_STATES_MAPPING[self._celery_app.AsyncResult(task_id).state] + def _get_state(self, task_context: TaskContext, task_uuid: TaskUUID) -> TaskState: + task_id = _build_task_id(task_context, task_uuid) + return _CELERY_STATES_MAPPING[self._celery_app.AsyncResult(task_id).state] + @make_async() def get_task_status( self, task_context: TaskContext, task_uuid: TaskUUID diff --git a/services/storage/src/simcore_service_storage/modules/celery/utils.py b/services/storage/src/simcore_service_storage/modules/celery/utils.py new file mode 100644 index 000000000000..6eff8a9b081f --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/utils.py @@ -0,0 +1,27 @@ +from celery import Celery +from fastapi import FastAPI + +from .worker import CeleryTaskQueueWorker + +_WORKER_KEY = "celery_worker" +_FASTAPI_APP_KEY = "fastapi_app" + + +def get_celery_worker(celery_app: Celery) -> CeleryTaskQueueWorker: + worker = celery_app.conf[_WORKER_KEY] + assert isinstance(worker, CeleryTaskQueueWorker) + return worker + + +def get_fastapi_app(celery_app: Celery) -> FastAPI: + fastapi_app = celery_app.conf[_FASTAPI_APP_KEY] + assert isinstance(fastapi_app, FastAPI) + return fastapi_app + + +def set_celery_worker(celery_app: Celery, worker: CeleryTaskQueueWorker) -> None: + celery_app.conf[_WORKER_KEY] = worker + + +def set_fastapi_app(celery_app: Celery, fastapi_app: FastAPI) -> None: + celery_app.conf[_FASTAPI_APP_KEY] = fastapi_app diff --git a/services/storage/src/simcore_service_storage/modules/celery/worker.py b/services/storage/src/simcore_service_storage/modules/celery/worker.py index f2ff11225a6f..36456d887b5a 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/worker.py +++ b/services/storage/src/simcore_service_storage/modules/celery/worker.py @@ -4,7 +4,7 @@ from models_library.progress_bar import ProgressReport from servicelib.logging_utils import log_context -from .models import TaskID, TaskState +from .models import TaskID _logger = logging.getLogger(__name__) From 1369d8d3d790f0cefa3ded88a8040e5e3e0b2330 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 4 Mar 2025 21:26:56 +0100 Subject: [PATCH 054/131] fix tests --- .../modules/celery/tasks.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 services/storage/src/simcore_service_storage/modules/celery/tasks.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/tasks.py b/services/storage/src/simcore_service_storage/modules/celery/tasks.py new file mode 100644 index 000000000000..e5ca99aa771e --- /dev/null +++ b/services/storage/src/simcore_service_storage/modules/celery/tasks.py @@ -0,0 +1,27 @@ +import logging +import time + +from celery import Task +from models_library.progress_bar import ProgressReport +from models_library.projects_nodes_io import StorageFileID +from servicelib.logging_utils import log_context +from simcore_service_storage.modules.celery.utils import get_celery_worker + +_logger = logging.getLogger(__name__) + + +def export_data(task: Task, files: list[StorageFileID]): + for n, file in enumerate(files, start=1): + with log_context( + _logger, + logging.INFO, + msg=f"Exporting {file=} ({n}/{len(files)})", + ): + assert task.name + get_celery_worker(task.app).set_task_progress( + task_name=task.name, + task_id=task.request.id, + report=ProgressReport(actual_value=n / len(files) * 100), + ) + time.sleep(10) + return "done" From 7ec5e20b54a8867bf131096a3a7b2104248f6e8e Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 5 Mar 2025 10:48:35 +0100 Subject: [PATCH 055/131] progress --- .../api/v0/openapi.yaml | 3 --- .../simcore_service_webserver/storage/_rest.py | 17 +---------------- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 32037e6f5e3e..9cff91cc1247 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -14950,10 +14950,7 @@ components: - done - started - stopped - - links title: StorageAsyncJobStatus - Structure: - properties: key: anyOf: - type: string diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 301ac5494d01..a81379207eec 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -554,22 +554,7 @@ class _PathParams(BaseModel): _req_ctx = RequestContext.model_validate(request) rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) - - async_job_rpc_status = await get_status( - rabbitmq_rpc_client=rabbitmq_rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - job_id=async_job_get.job_id, - job_id_data=AsyncJobNameData( - user_id=_req_ctx.user_id, product_name=_req_ctx.product_name - ), - ) - if not async_job_rpc_status.done: - return create_data_response( - async_job_rpc_status, - status=status.HTTP_404_NOT_FOUND, - ) - + async_job_get = parse_request_path_parameters_as(_PathParams, request) async_job_rpc_result = await get_result( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, From f7e16f28110084ed7378ba114f680555fc4027c9 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 6 Mar 2025 10:41:20 +0100 Subject: [PATCH 056/131] improve error handling --- .../simcore_service_storage/modules/celery/client.py | 10 ++++++---- .../simcore_service_storage/modules/celery/models.py | 4 ++-- .../storage/tests/unit/modules/celery/test_celery.py | 3 +++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 6a5e6c42aaef..3635372dadb0 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -1,16 +1,16 @@ import contextlib import logging -from typing import Any, Final +from typing import Any, Final, Type from uuid import uuid4 from celery import Celery # type: ignore[import-untyped] from celery.contrib.abortable import AbortableAsyncResult # type: ignore[import-untyped] from common_library.async_tools import make_async from models_library.progress_bar import ProgressReport -from pydantic import ValidationError +from pydantic import TypeAdapter, ValidationError from servicelib.logging_utils import log_context -from .models import TaskContext, TaskID, TaskState, TaskStatus, TaskUUID +from .models import TaskContext, TaskError, TaskID, TaskResult, TaskState, TaskStatus, TaskUUID _logger = logging.getLogger(__name__) @@ -79,7 +79,9 @@ def abort_task( # pylint: disable=R6301 @make_async() def get_task_result(self, task_context: TaskContext, task_uuid: TaskUUID) -> Any: task_id = _build_task_id(task_context, task_uuid) - return self._celery_app.AsyncResult(task_id).result + return TypeAdapter(TaskResult).validate_python( + self._celery_app.AsyncResult(task_id).result + ) def _get_progress_report( self, task_context: TaskContext, task_uuid: TaskUUID diff --git a/services/storage/src/simcore_service_storage/modules/celery/models.py b/services/storage/src/simcore_service_storage/modules/celery/models.py index 2f04c5b81329..c3566cc92313 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/models.py +++ b/services/storage/src/simcore_service_storage/modules/celery/models.py @@ -1,9 +1,9 @@ from enum import StrEnum, auto -from typing import Any, Final, Self, TypeAlias +from typing import Annotated, Any, Final, Self, TypeAlias from uuid import UUID from models_library.progress_bar import ProgressReport -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, Field, model_validator TaskContext: TypeAlias = dict[str, Any] TaskID: TypeAlias = str diff --git a/services/storage/tests/unit/modules/celery/test_celery.py b/services/storage/tests/unit/modules/celery/test_celery.py index 99c3cc34263a..35576af77b8a 100644 --- a/services/storage/tests/unit/modules/celery/test_celery.py +++ b/services/storage/tests/unit/modules/celery/test_celery.py @@ -104,6 +104,9 @@ async def test_sumitting_task_calling_async_function_results_with_success_state( status = await celery_client.get_task_status(task_context, task_uuid) assert status.task_state == TaskState.SUCCESS + assert ( + await celery_client.get_task_result(task_context, task_uuid) + ) == "archive.zip" assert ( await celery_client.get_task_status(task_context, task_uuid) ).task_state == TaskState.SUCCESS From 903e5b457f2e6b5932d37f19f270d087d0e6d82b Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 6 Mar 2025 10:44:34 +0100 Subject: [PATCH 057/131] add task --- .../modules/celery/client.py | 4 ++-- .../modules/celery/tasks.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 3635372dadb0..53802caaf042 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -1,6 +1,6 @@ import contextlib import logging -from typing import Any, Final, Type +from typing import Any, Final from uuid import uuid4 from celery import Celery # type: ignore[import-untyped] @@ -10,7 +10,7 @@ from pydantic import TypeAdapter, ValidationError from servicelib.logging_utils import log_context -from .models import TaskContext, TaskError, TaskID, TaskResult, TaskState, TaskStatus, TaskUUID +from .models import TaskContext, TaskID, TaskResult, TaskState, TaskStatus, TaskUUID _logger = logging.getLogger(__name__) diff --git a/services/storage/src/simcore_service_storage/modules/celery/tasks.py b/services/storage/src/simcore_service_storage/modules/celery/tasks.py index e5ca99aa771e..3237dd5a587f 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/tasks.py +++ b/services/storage/src/simcore_service_storage/modules/celery/tasks.py @@ -1,16 +1,20 @@ import logging import time + from celery import Task +from common_library.errors_classes import OsparcErrorMixin from models_library.progress_bar import ProgressReport from models_library.projects_nodes_io import StorageFileID from servicelib.logging_utils import log_context -from simcore_service_storage.modules.celery.utils import get_celery_worker + +from .utils import get_celery_worker _logger = logging.getLogger(__name__) def export_data(task: Task, files: list[StorageFileID]): + _logger.info("Exporting files: %s", files) for n, file in enumerate(files, start=1): with log_context( _logger, @@ -25,3 +29,12 @@ def export_data(task: Task, files: list[StorageFileID]): ) time.sleep(10) return "done" + + +class MyError(OsparcErrorMixin, Exception): + msg_template = "Something strange happened: {msg}" + + +def export_data_with_error(task: Task, files: list[StorageFileID]): + msg = "BOOM!" + raise MyError(msg=msg) From 89f1d98a7eea7732687fd46081542c419ed457a8 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 6 Mar 2025 11:10:55 +0100 Subject: [PATCH 058/131] update --- .../src/simcore_service_storage/modules/celery/client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 53802caaf042..2ffaf3e6048c 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -79,9 +79,7 @@ def abort_task( # pylint: disable=R6301 @make_async() def get_task_result(self, task_context: TaskContext, task_uuid: TaskUUID) -> Any: task_id = _build_task_id(task_context, task_uuid) - return TypeAdapter(TaskResult).validate_python( - self._celery_app.AsyncResult(task_id).result - ) + return self._celery_app.AsyncResult(task_id).result def _get_progress_report( self, task_context: TaskContext, task_uuid: TaskUUID From 8ef8118c79bcf4c1836e469f83ead773abcf163f Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 6 Mar 2025 11:46:05 +0100 Subject: [PATCH 059/131] fix import --- .../src/simcore_service_storage/modules/celery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 2ffaf3e6048c..6a5e6c42aaef 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -7,10 +7,10 @@ from celery.contrib.abortable import AbortableAsyncResult # type: ignore[import-untyped] from common_library.async_tools import make_async from models_library.progress_bar import ProgressReport -from pydantic import TypeAdapter, ValidationError +from pydantic import ValidationError from servicelib.logging_utils import log_context -from .models import TaskContext, TaskID, TaskResult, TaskState, TaskStatus, TaskUUID +from .models import TaskContext, TaskID, TaskState, TaskStatus, TaskUUID _logger = logging.getLogger(__name__) From 9ac1d6e2060854be007594cf1ea7c43ae26d504e Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 6 Mar 2025 14:05:25 +0100 Subject: [PATCH 060/131] update --- .../src/simcore_service_storage/modules/celery/client.py | 4 ---- services/storage/tests/unit/modules/celery/test_celery.py | 3 --- 2 files changed, 7 deletions(-) diff --git a/services/storage/src/simcore_service_storage/modules/celery/client.py b/services/storage/src/simcore_service_storage/modules/celery/client.py index 6a5e6c42aaef..300aadb66b2c 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/client.py +++ b/services/storage/src/simcore_service_storage/modules/celery/client.py @@ -103,10 +103,6 @@ def _get_state(self, task_context: TaskContext, task_uuid: TaskUUID) -> TaskStat task_id = _build_task_id(task_context, task_uuid) return _CELERY_STATES_MAPPING[self._celery_app.AsyncResult(task_id).state] - def _get_state(self, task_context: TaskContext, task_uuid: TaskUUID) -> TaskState: - task_id = _build_task_id(task_context, task_uuid) - return _CELERY_STATES_MAPPING[self._celery_app.AsyncResult(task_id).state] - @make_async() def get_task_status( self, task_context: TaskContext, task_uuid: TaskUUID diff --git a/services/storage/tests/unit/modules/celery/test_celery.py b/services/storage/tests/unit/modules/celery/test_celery.py index 35576af77b8a..99c3cc34263a 100644 --- a/services/storage/tests/unit/modules/celery/test_celery.py +++ b/services/storage/tests/unit/modules/celery/test_celery.py @@ -104,9 +104,6 @@ async def test_sumitting_task_calling_async_function_results_with_success_state( status = await celery_client.get_task_status(task_context, task_uuid) assert status.task_state == TaskState.SUCCESS - assert ( - await celery_client.get_task_result(task_context, task_uuid) - ) == "archive.zip" assert ( await celery_client.get_task_status(task_context, task_uuid) ).task_state == TaskState.SUCCESS From 9277714db18a81b216abf897a46da89519dfb473 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 14:14:45 +0100 Subject: [PATCH 061/131] add proper exception handling --- .../api_schemas_rpc_async_jobs/async_jobs.py | 3 +- .../api_schemas_rpc_async_jobs/exceptions.py | 18 +++- .../api/rpc/_async_jobs.py | 83 ++++++++++++++----- .../with_dbs/01/storage/test_storage_rpc.py | 10 ++- 4 files changed, 85 insertions(+), 29 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py index 953cd1f819cc..3fb24ae952dc 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/async_jobs.py @@ -16,8 +16,7 @@ class AsyncJobStatus(BaseModel): class AsyncJobResult(BaseModel): - result: Any | None - error: Any | None + result: Any class AsyncJobGet(BaseModel): diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py index 5902bf317387..7d2509c24a8c 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py @@ -5,9 +5,23 @@ class BaseAsyncjobRpcError(OsparcErrorMixin, RuntimeError): pass +class JobSchedulerError(BaseAsyncjobRpcError): + msg_template: str = "Celery exception: {exc}" + + class StatusError(BaseAsyncjobRpcError): msg_template: str = "Could not get status of job {job_id}" -class ResultError(BaseAsyncjobRpcError): - msg_template: str = "Could not get results of job {job_id}" +class JobNotDoneError(BaseAsyncjobRpcError): + msg_template: str = "Job {job_id} not done" + + +class JobAbortedError(BaseAsyncjobRpcError): + msg_template: str = "Job {job_id} not done" + + +class JobError(BaseAsyncjobRpcError): + msg_template: str = ( + "Job {job_id} failed with exception type {exc_type} and message {exc_msg}" + ) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index f901502dda8d..0f359c15d396 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -1,5 +1,8 @@ # pylint: disable=unused-argument +import logging + +from celery.exceptions import CeleryError from fastapi import FastAPI from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobAbort, @@ -10,14 +13,18 @@ AsyncJobStatus, ) from models_library.api_schemas_rpc_async_jobs.exceptions import ( - ResultError, - StatusError, + JobAbortedError, + JobError, + JobNotDoneError, + JobSchedulerError, ) +from servicelib.logging_utils import log_catch from servicelib.rabbitmq import RPCRouter from ...modules.celery import get_celery_client -from ...modules.celery.models import TaskStatus +from ...modules.celery.models import TaskError, TaskState, TaskStatus +_logger = logging.getLogger(__name__) router = RPCRouter() @@ -30,17 +37,21 @@ async def abort( return AsyncJobAbort(result=True, job_id=job_id) -@router.expose(reraise_if_error_type=(StatusError,)) +@router.expose(reraise_if_error_type=(JobSchedulerError,)) async def get_status( app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData ) -> AsyncJobStatus: assert app # nosec assert job_id_data # nosec - task_status: TaskStatus = await get_celery_client(app).get_task_status( - task_context=job_id_data.model_dump(), - task_uuid=job_id, - ) + try: + task_status: TaskStatus = await get_celery_client(app).get_task_status( + task_context=job_id_data.model_dump(), + task_uuid=job_id, + ) + except CeleryError as exc: + raise JobSchedulerError(exc=f"{exc}") from exc + return AsyncJobStatus( job_id=job_id, progress=task_status.progress_report, @@ -48,7 +59,14 @@ async def get_status( ) -@router.expose(reraise_if_error_type=(ResultError,)) +@router.expose( + reraise_if_error_type=( + JobError, + JobNotDoneError, + JobAbortedError, + JobSchedulerError, + ) +) async def get_result( app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData ) -> AsyncJobResult: @@ -56,22 +74,45 @@ async def get_result( assert job_id # nosec assert job_id_data # nosec - result = await get_celery_client(app).get_task_result( - task_context=job_id_data.model_dump(), - task_uuid=job_id, - ) - - return AsyncJobResult(result=result, error=None) - - -@router.expose() + try: + status = await get_celery_client(app).get_task_status( + task_context=job_id_data.model_dump(), + task_uuid=job_id, + ) + if not status.is_done: + raise JobNotDoneError(job_id=job_id) + result = await get_celery_client(app).get_task_result( + task_context=job_id_data.model_dump(), + task_uuid=job_id, + ) + except CeleryError as exc: + raise JobSchedulerError(exc=f"{exc}") + + if status.task_state == TaskState.ABORTED: + raise JobAbortedError(job_id=job_id) + elif status.task_state == TaskState.ERROR: + exc_type = "" + exc_msg = "" + with log_catch(logger=_logger, reraise=False): + task_error = TaskError.model_validate_json(result) + exc_type = task_error.exc_type + exc_msg = task_error.exc_msg + raise JobError(job_id=job_id, exc_type=exc_type, exc_msg=exc_msg) + + return AsyncJobResult(result=result) + + +@router.expose(reraise_if_error_type=(JobSchedulerError,)) async def list_jobs( app: FastAPI, filter_: str, job_id_data: AsyncJobNameData ) -> list[AsyncJobGet]: assert app # nosec - task_uuids = await get_celery_client(app).get_task_uuids( - task_context=job_id_data.model_dump(), - ) + try: + task_uuids = await get_celery_client(app).get_task_uuids( + task_context=job_id_data.model_dump(), + ) + except CeleryError as exc: + raise JobSchedulerError(exc=f"{exc}") from exc return [AsyncJobGet(job_id=task_uuid) for task_uuid in task_uuids] diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 910836c02459..7c6061fff59e 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -1,8 +1,10 @@ +from collections.abc import Callable + # pylint: disable=redefined-outer-name # pylint: disable=unused-argument from datetime import datetime from pathlib import Path -from typing import Any, Callable +from typing import Any import pytest from aiohttp.test_utils import TestClient @@ -15,7 +17,7 @@ AsyncJobStatus, ) from models_library.api_schemas_rpc_async_jobs.exceptions import ( - ResultError, + JobError, StatusError, ) from models_library.api_schemas_storage.data_export_async_jobs import ( @@ -169,7 +171,7 @@ async def test_abort_async_jobs( "backend_result_or_exception", [ AsyncJobResult(result=None, error=_faker.text()), - ResultError(job_id=_faker.uuid4()), + JobError(job_id=_faker.uuid4()), ], ids=lambda x: type(x).__name__, ) @@ -188,7 +190,7 @@ async def test_get_async_job_result( if isinstance(backend_result_or_exception, AsyncJobResult): assert response.status == status.HTTP_200_OK - elif isinstance(backend_result_or_exception, ResultError): + elif isinstance(backend_result_or_exception, JobError): assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR else: pytest.fail("Incorrectly configured test") From a5d6827080ad59f0bf5fc3ea068e21c16cb8bd04 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 14:16:46 +0100 Subject: [PATCH 062/131] fix start task endpoint --- .../api/rpc/_data_export.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py index 424fbc2f0d0d..5f031e904bda 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py @@ -1,3 +1,4 @@ +from celery.exceptions import CeleryError from fastapi import FastAPI from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobGet, @@ -10,6 +11,7 @@ InvalidFileIdentifierError, ) from servicelib.rabbitmq import RPCRouter +from simcore_service_storage.api.rpc._async_jobs import JobSchedulerError from ...datcore_dsm import DatCoreDataManager from ...dsm import get_dsm_provider @@ -26,6 +28,7 @@ InvalidFileIdentifierError, AccessRightError, DataExportError, + JobSchedulerError, ) ) async def start_data_export( @@ -51,12 +54,14 @@ async def start_data_export( location_id=data_export_start.location_id, ) from err - task_uuid = await get_celery_client(app).send_task( - "export_data_with_error", - task_context=job_id_data.model_dump(), - files=data_export_start.file_and_folder_ids, # ANE: adapt here your signature - ) - + try: + task_uuid = await get_celery_client(app).send_task( + "export_data_with_error", + task_context=job_id_data.model_dump(), + files=data_export_start.file_and_folder_ids, # ANE: adapt here your signature + ) + except CeleryError as exc: + raise JobSchedulerError(exc=f"{exc}") from exc return AsyncJobGet( job_id=task_uuid, ) From 1f558dc4f23089f2440702688cb4348ea70cfa9d Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 15:26:26 +0100 Subject: [PATCH 063/131] start enhancing tests --- .../data_export_async_jobs.py | 8 +-- .../rpc_interfaces/storage/data_export.py | 22 +++++++ .../api/rpc/_data_export.py | 2 - .../storage/tests/unit/test_db_data_export.py | 64 ++++++++++++++----- 4 files changed, 74 insertions(+), 22 deletions(-) create mode 100644 packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py diff --git a/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py b/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py index a3db991452f9..44c796ad3e72 100644 --- a/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py +++ b/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py @@ -26,8 +26,6 @@ class InvalidFileIdentifierError(StorageRpcBaseError): class AccessRightError(StorageRpcBaseError): - msg_template: str = "User {user_id} does not have access to file {file_id} with location {location_id}" - - -class DataExportError(StorageRpcBaseError): - msg_template: str = "Could not complete data export job with id {job_id}" + msg_template: str = ( + "User {user_id} does not have access to file {file_id} with location {location_id}" + ) diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py new file mode 100644 index 000000000000..ac9195d241cf --- /dev/null +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py @@ -0,0 +1,22 @@ +from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobGet +from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE +from models_library.rabbitmq_basic_types import RPCMethodName +from pydantic import TypeAdapter +from simcore_service_storage.api.rpc._async_jobs import AsyncJobNameData + +from ... import RabbitMQRPCClient +from ..async_jobs.async_jobs import submit_job + +_RPC_METHOD_NAME_ADAPTER = TypeAdapter(RPCMethodName) + + +async def start_data_export( + rabbitmq_rpc_client: RabbitMQRPCClient, *, job_id_data: AsyncJobNameData, **kwargs +) -> AsyncJobGet: + return await submit_job( + rabbitmq_rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + method_name=_RPC_METHOD_NAME_ADAPTER.validate_python("start_data_export"), + job_id_data=job_id_data, + **kwargs, + ) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py index 5f031e904bda..b290ab64e5d5 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py @@ -6,7 +6,6 @@ ) from models_library.api_schemas_storage.data_export_async_jobs import ( AccessRightError, - DataExportError, DataExportTaskStartInput, InvalidFileIdentifierError, ) @@ -27,7 +26,6 @@ reraise_if_error_type=( InvalidFileIdentifierError, AccessRightError, - DataExportError, JobSchedulerError, ) ) diff --git a/services/storage/tests/unit/test_db_data_export.py b/services/storage/tests/unit/test_db_data_export.py index 0bf9db1a2edb..632c3bbb7c7a 100644 --- a/services/storage/tests/unit/test_db_data_export.py +++ b/services/storage/tests/unit/test_db_data_export.py @@ -2,12 +2,15 @@ # pylint: disable=W0613 # pylint: disable=R6301 from collections.abc import Awaitable, Callable +from dataclasses import dataclass from pathlib import Path from typing import Any, Literal, NamedTuple +from uuid import UUID import pytest from faker import Faker from fastapi import FastAPI +from models_library.api_schemas_long_running_tasks.tasks import TaskResult from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobAbort, AsyncJobGet, @@ -19,7 +22,6 @@ from models_library.api_schemas_storage.data_export_async_jobs import ( DataExportTaskStartInput, ) -from models_library.progress_bar import ProgressReport from models_library.projects_nodes_io import NodeID, SimcoreS3FileID from models_library.users import UserID from pydantic import ByteSize, TypeAdapter @@ -29,6 +31,7 @@ from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.rabbitmq import RabbitMQRPCClient from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs +from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from settings_library.rabbit import RabbitSettings from simcore_service_storage.api.rpc._async_jobs import AsyncJobNameData, TaskStatus from simcore_service_storage.api.rpc._data_export import AccessRightError @@ -56,27 +59,50 @@ async def mock_rabbit_setup(mocker: MockerFixture): pass +@dataclass class _MockCeleryClient: + send_task_object: UUID | Exception | None = None + get_task_status_object: TaskStatus | Exception | None = None + get_result_object: TaskResult | Exception | None = None + get_task_uuids_object: set[UUID] | Exception | None = None + async def send_task(self, *args, **kwargs) -> TaskUUID: - return _faker.uuid4() + assert self.send_task_object is not None + if isinstance(self.send_task_object, Exception): + raise self.send_task_object + return self.send_task_object async def get_task_status(self, *args, **kwargs) -> TaskStatus: - return TaskStatus( - task_uuid=_faker.uuid4(), - task_state=TaskState.RUNNING, - progress_report=ProgressReport(actual_value=42.0), - ) + assert self.get_task_status_object is not None + if isinstance(self.get_task_status_object, Exception): + raise self.get_task_status_object + return self.get_task_status_object - async def get_task_result(self, *args, **kwargs) -> Any: - return {} + async def get_result(self, *args, **kwargs) -> Any: + assert self.get_result_object is not None + if isinstance(self.get_result_object, Exception): + raise self.get_result_object + return self.get_result_object async def get_task_uuids(self, *args, **kwargs) -> set[TaskUUID]: - return {_faker.uuid4()} + assert self.get_task_uuids_object is not None + if isinstance(self.get_task_uuids_object, Exception): + raise self.get_task_uuids_object + return self.get_task_uuids_object @pytest.fixture -async def mock_celery_client(mocker: MockerFixture) -> MockerFixture: - _celery_client = _MockCeleryClient() +async def mock_celery_client( + mocker: MockerFixture, + request: pytest.FixtureRequest, +) -> MockerFixture: + params = request.param if hasattr(request, "param") else {} + _celery_client = _MockCeleryClient( + send_task_object=params.get("send_task_object", None), + get_task_status_object=params.get("get_task_status_object", None), + get_result_object=params.get("get_result_object", None), + get_task_uuids_object=params.get("get_task_uuids_object", None), + ) mocker.patch( "simcore_service_storage.api.rpc._async_jobs.get_celery_client", return_value=_celery_client, @@ -153,6 +179,13 @@ class UserWithFile(NamedTuple): ], ids=str, ) +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"send_task_object": TaskUUID(_faker.uuid4())}, + ], + indirect=True, +) async def test_start_data_export_success( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, @@ -181,10 +214,8 @@ async def test_start_data_export_success( else: pytest.fail("invalid parameter: to_check") - result = await async_jobs.submit_job( + result = await start_data_export( rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - method_name="start_data_export", job_id_data=AsyncJobNameData(user_id=user_id, product_name="osparc"), data_export_start=DataExportTaskStartInput( location_id=0, @@ -252,7 +283,10 @@ async def test_get_data_export_status( async def test_get_data_export_result( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, + mocker: MockerFixture, ): + mocker.patch("simcore_service_storage.api.rpc._async_jobs") + _job_id = AsyncJobId(_faker.uuid4()) result = await async_jobs.get_result( rpc_client, From 66bb3873146573ed40861b922e08f38489d25c30 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 15:57:26 +0100 Subject: [PATCH 064/131] add test in case of scheduler error --- .../storage/tests/unit/test_db_data_export.py | 62 ++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/services/storage/tests/unit/test_db_data_export.py b/services/storage/tests/unit/test_db_data_export.py index 632c3bbb7c7a..45a84245f607 100644 --- a/services/storage/tests/unit/test_db_data_export.py +++ b/services/storage/tests/unit/test_db_data_export.py @@ -8,6 +8,7 @@ from uuid import UUID import pytest +from celery.exceptions import CeleryError from faker import Faker from fastapi import FastAPI from models_library.api_schemas_long_running_tasks.tasks import TaskResult @@ -33,7 +34,11 @@ from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from settings_library.rabbit import RabbitSettings -from simcore_service_storage.api.rpc._async_jobs import AsyncJobNameData, TaskStatus +from simcore_service_storage.api.rpc._async_jobs import ( + AsyncJobNameData, + JobSchedulerError, + TaskStatus, +) from simcore_service_storage.api.rpc._data_export import AccessRightError from simcore_service_storage.core.settings import ApplicationSettings from simcore_service_storage.modules.celery.client import TaskUUID @@ -225,7 +230,60 @@ async def test_start_data_export_success( assert isinstance(result, AsyncJobGet) -async def test_start_data_export_fail( +@pytest.mark.parametrize( + "project_params", + [ + ProjectWithFilesParams( + num_nodes=1, + allowed_file_sizes=(TypeAdapter(ByteSize).validate_python("1b"),), + workspace_files_count=10, + ), + ], + ids=str, +) +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"send_task_object": CeleryError("error")}, + ], + indirect=True, +) +async def test_start_data_export_scheduler_error( + rpc_client: RabbitMQRPCClient, + mock_celery_client: MockerFixture, + with_random_project_with_files: tuple[ + dict[str, Any], + dict[NodeID, dict[SimcoreS3FileID, FileIDDict]], + ], + user_id: UserID, +): + + _, list_of_files = with_random_project_with_files + workspace_files = [ + p for p in list(list_of_files.values())[0].keys() if "/workspace/" in p + ] + assert len(workspace_files) > 0 + file_or_folder_id = workspace_files[0] + + with pytest.raises(JobSchedulerError): + _ = await start_data_export( + rpc_client, + job_id_data=AsyncJobNameData(user_id=user_id, product_name="osparc"), + data_export_start=DataExportTaskStartInput( + location_id=0, + file_and_folder_ids=[file_or_folder_id], + ), + ) + + +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"send_task_object": TaskUUID(_faker.uuid4())}, + ], + indirect=True, +) +async def test_start_data_export_access_error( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, user_id: UserID, From c46bc0a8356efb57f6b6fa07a54a7d61d60c42a8 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 16:17:07 +0100 Subject: [PATCH 065/131] add tests for get_export status --- .../rpc_interfaces/async_jobs/async_jobs.py | 7 +-- .../api/rpc/_async_jobs.py | 15 ++++--- .../storage/tests/unit/test_db_data_export.py | 45 +++++++++++++++++-- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py index a72463507735..239ab2bb75e2 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py @@ -1,7 +1,6 @@ from typing import Final from models_library.api_schemas_rpc_async_jobs.async_jobs import ( - AsyncJobAbort, AsyncJobGet, AsyncJobId, AsyncJobNameData, @@ -24,16 +23,14 @@ async def abort( rpc_namespace: RPCNamespace, job_id: AsyncJobId, job_id_data: AsyncJobNameData -) -> AsyncJobAbort: - result = await rabbitmq_rpc_client.request( +) -> None: + await rabbitmq_rpc_client.request( rpc_namespace, _RPC_METHOD_NAME_ADAPTER.validate_python("abort"), job_id=job_id, job_id_data=job_id_data, timeout_s=_DEFAULT_TIMEOUT_S, ) - assert isinstance(result, AsyncJobAbort) - return result async def get_status( diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 0f359c15d396..1e2d16e2b3c5 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -5,7 +5,6 @@ from celery.exceptions import CeleryError from fastapi import FastAPI from models_library.api_schemas_rpc_async_jobs.async_jobs import ( - AsyncJobAbort, AsyncJobGet, AsyncJobId, AsyncJobNameData, @@ -28,13 +27,17 @@ router = RPCRouter() -@router.expose() -async def abort( - app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData -) -> AsyncJobAbort: +@router.expose(reraise_if_error_type=(JobSchedulerError,)) +async def abort(app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData): assert app # nosec assert job_id_data # nosec - return AsyncJobAbort(result=True, job_id=job_id) + try: + await get_celery_client(app).abort_task( + task_context=job_id_data.model_dump(), + task_uuid=job_id, + ) + except CeleryError as exc: + raise JobSchedulerError(exc=f"{exc}") from exc @router.expose(reraise_if_error_type=(JobSchedulerError,)) diff --git a/services/storage/tests/unit/test_db_data_export.py b/services/storage/tests/unit/test_db_data_export.py index 45a84245f607..4593248a8413 100644 --- a/services/storage/tests/unit/test_db_data_export.py +++ b/services/storage/tests/unit/test_db_data_export.py @@ -13,7 +13,6 @@ from fastapi import FastAPI from models_library.api_schemas_long_running_tasks.tasks import TaskResult from models_library.api_schemas_rpc_async_jobs.async_jobs import ( - AsyncJobAbort, AsyncJobGet, AsyncJobId, AsyncJobResult, @@ -70,6 +69,7 @@ class _MockCeleryClient: get_task_status_object: TaskStatus | Exception | None = None get_result_object: TaskResult | Exception | None = None get_task_uuids_object: set[UUID] | Exception | None = None + abort_task_object: Exception | None = None async def send_task(self, *args, **kwargs) -> TaskUUID: assert self.send_task_object is not None @@ -95,6 +95,12 @@ async def get_task_uuids(self, *args, **kwargs) -> set[TaskUUID]: raise self.get_task_uuids_object return self.get_task_uuids_object + async def abort_task(self, *args, **kwargs) -> None: + if isinstance(self.abort_task_object, Exception): + raise self.abort_task_object + else: + return self.abort_task_object + @pytest.fixture async def mock_celery_client( @@ -107,6 +113,7 @@ async def mock_celery_client( get_task_status_object=params.get("get_task_status_object", None), get_result_object=params.get("get_result_object", None), get_task_uuids_object=params.get("get_task_uuids_object", None), + abort_task_object=params.get("abort_task_object", None), ) mocker.patch( "simcore_service_storage.api.rpc._async_jobs.get_celery_client", @@ -304,12 +311,19 @@ async def test_start_data_export_access_error( ) +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"abort_task_object": None}, + ], + indirect=True, +) async def test_abort_data_export( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, ): _job_id = AsyncJobId(_faker.uuid4()) - result = await async_jobs.abort( + await async_jobs.abort( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id_data=AsyncJobNameData( @@ -317,8 +331,31 @@ async def test_abort_data_export( ), job_id=_job_id, ) - assert isinstance(result, AsyncJobAbort) - assert result.job_id == _job_id + print("something") + + +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"abort_task_object": CeleryError("error")}, + ], + indirect=True, +) +async def test_abort_data_export_scheduler_error( + rpc_client: RabbitMQRPCClient, + mock_celery_client: MockerFixture, +): + _job_id = AsyncJobId(_faker.uuid4()) + with pytest.raises(JobSchedulerError): + _ = await async_jobs.abort( + rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id_data=AsyncJobNameData( + user_id=_faker.pyint(min_value=1, max_value=100), product_name="osparc" + ), + job_id=_job_id, + ) + print("something") async def test_get_data_export_status( From 8620972b43fd64be85476937b776131ed196974e Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 16:17:25 +0100 Subject: [PATCH 066/131] add tests for get_export status --- services/storage/tests/unit/test_db_data_export.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/services/storage/tests/unit/test_db_data_export.py b/services/storage/tests/unit/test_db_data_export.py index 4593248a8413..504823759032 100644 --- a/services/storage/tests/unit/test_db_data_export.py +++ b/services/storage/tests/unit/test_db_data_export.py @@ -331,7 +331,6 @@ async def test_abort_data_export( ), job_id=_job_id, ) - print("something") @pytest.mark.parametrize( @@ -355,7 +354,6 @@ async def test_abort_data_export_scheduler_error( ), job_id=_job_id, ) - print("something") async def test_get_data_export_status( From ba809d8a8ad2b1ab05d24d68c1667662365b8f28 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 16:28:36 +0100 Subject: [PATCH 067/131] add test for get_status method --- .../storage/tests/unit/test_db_data_export.py | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/services/storage/tests/unit/test_db_data_export.py b/services/storage/tests/unit/test_db_data_export.py index 504823759032..284ab25d71b5 100644 --- a/services/storage/tests/unit/test_db_data_export.py +++ b/services/storage/tests/unit/test_db_data_export.py @@ -22,6 +22,7 @@ from models_library.api_schemas_storage.data_export_async_jobs import ( DataExportTaskStartInput, ) +from models_library.progress_bar import ProgressReport from models_library.projects_nodes_io import NodeID, SimcoreS3FileID from models_library.users import UserID from pydantic import ByteSize, TypeAdapter @@ -42,7 +43,6 @@ from simcore_service_storage.core.settings import ApplicationSettings from simcore_service_storage.modules.celery.client import TaskUUID from simcore_service_storage.modules.celery.models import TaskState -from simcore_service_storage.simcore_s3_dsm import SimcoreS3DataManager pytest_plugins = [ "pytest_simcore.rabbit_service", @@ -356,6 +356,19 @@ async def test_abort_data_export_scheduler_error( ) +@pytest.mark.parametrize( + "mock_celery_client", + [ + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.RUNNING, + progress_report=ProgressReport(actual_value=0), + ) + }, + ], + indirect=True, +) async def test_get_data_export_status( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, @@ -373,6 +386,29 @@ async def test_get_data_export_status( assert result.job_id == _job_id +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"get_task_status_object": CeleryError("error")}, + ], + indirect=True, +) +async def test_get_data_export_status_scheduler_error( + rpc_client: RabbitMQRPCClient, + mock_celery_client: MockerFixture, +): + _job_id = AsyncJobId(_faker.uuid4()) + with pytest.raises(JobSchedulerError): + _ = await async_jobs.get_status( + rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id=_job_id, + job_id_data=AsyncJobNameData( + user_id=_faker.pyint(min_value=1, max_value=100), product_name="osparc" + ), + ) + + async def test_get_data_export_result( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, From 50a7bf1f8764a867d387d08397848007d79ca4cb Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 7 Mar 2025 16:34:03 +0100 Subject: [PATCH 068/131] remove db from filename --- .../tests/unit/{test_db_data_export.py => test_data_export.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename services/storage/tests/unit/{test_db_data_export.py => test_data_export.py} (100%) diff --git a/services/storage/tests/unit/test_db_data_export.py b/services/storage/tests/unit/test_data_export.py similarity index 100% rename from services/storage/tests/unit/test_db_data_export.py rename to services/storage/tests/unit/test_data_export.py From a1e094789ab3881072f79e036248795bbd4f37fb Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 09:35:32 +0100 Subject: [PATCH 069/131] cover all exception cases in result error endpoint --- .../storage/tests/unit/test_data_export.py | 128 +++++++++++++++--- 1 file changed, 110 insertions(+), 18 deletions(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 284ab25d71b5..3d1258e75a92 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -36,6 +36,9 @@ from settings_library.rabbit import RabbitSettings from simcore_service_storage.api.rpc._async_jobs import ( AsyncJobNameData, + JobAbortedError, + JobError, + JobNotDoneError, JobSchedulerError, TaskStatus, ) @@ -67,7 +70,7 @@ async def mock_rabbit_setup(mocker: MockerFixture): class _MockCeleryClient: send_task_object: UUID | Exception | None = None get_task_status_object: TaskStatus | Exception | None = None - get_result_object: TaskResult | Exception | None = None + get_task_result_object: TaskResult | Exception | None = None get_task_uuids_object: set[UUID] | Exception | None = None abort_task_object: Exception | None = None @@ -83,11 +86,11 @@ async def get_task_status(self, *args, **kwargs) -> TaskStatus: raise self.get_task_status_object return self.get_task_status_object - async def get_result(self, *args, **kwargs) -> Any: - assert self.get_result_object is not None - if isinstance(self.get_result_object, Exception): - raise self.get_result_object - return self.get_result_object + async def get_task_result(self, *args, **kwargs) -> Any: + assert self.get_task_result_object is not None + if isinstance(self.get_task_result_object, Exception): + raise self.get_task_result_object + return self.get_task_result_object async def get_task_uuids(self, *args, **kwargs) -> set[TaskUUID]: assert self.get_task_uuids_object is not None @@ -106,12 +109,12 @@ async def abort_task(self, *args, **kwargs) -> None: async def mock_celery_client( mocker: MockerFixture, request: pytest.FixtureRequest, -) -> MockerFixture: +) -> _MockCeleryClient: params = request.param if hasattr(request, "param") else {} _celery_client = _MockCeleryClient( send_task_object=params.get("send_task_object", None), get_task_status_object=params.get("get_task_status_object", None), - get_result_object=params.get("get_result_object", None), + get_task_result_object=params.get("get_task_result_object", None), get_task_uuids_object=params.get("get_task_uuids_object", None), abort_task_object=params.get("abort_task_object", None), ) @@ -123,7 +126,7 @@ async def mock_celery_client( "simcore_service_storage.api.rpc._data_export.get_celery_client", return_value=_celery_client, ) - return mocker + return _celery_client @pytest.fixture @@ -200,7 +203,7 @@ class UserWithFile(NamedTuple): ) async def test_start_data_export_success( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, with_random_project_with_files: tuple[ dict[str, Any], dict[NodeID, dict[SimcoreS3FileID, FileIDDict]], @@ -257,7 +260,7 @@ async def test_start_data_export_success( ) async def test_start_data_export_scheduler_error( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, with_random_project_with_files: tuple[ dict[str, Any], dict[NodeID, dict[SimcoreS3FileID, FileIDDict]], @@ -292,7 +295,7 @@ async def test_start_data_export_scheduler_error( ) async def test_start_data_export_access_error( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, user_id: UserID, faker: Faker, ): @@ -320,7 +323,7 @@ async def test_start_data_export_access_error( ) async def test_abort_data_export( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, ): _job_id = AsyncJobId(_faker.uuid4()) await async_jobs.abort( @@ -342,7 +345,7 @@ async def test_abort_data_export( ) async def test_abort_data_export_scheduler_error( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, ): _job_id = AsyncJobId(_faker.uuid4()) with pytest.raises(JobSchedulerError): @@ -371,7 +374,7 @@ async def test_abort_data_export_scheduler_error( ) async def test_get_data_export_status( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, ): _job_id = AsyncJobId(_faker.uuid4()) result = await async_jobs.get_status( @@ -395,7 +398,7 @@ async def test_get_data_export_status( ) async def test_get_data_export_status_scheduler_error( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, ): _job_id = AsyncJobId(_faker.uuid4()) with pytest.raises(JobSchedulerError): @@ -409,9 +412,23 @@ async def test_get_data_export_status_scheduler_error( ) -async def test_get_data_export_result( +@pytest.mark.parametrize( + "mock_celery_client", + [ + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.SUCCESS, + progress_report=ProgressReport(actual_value=100), + ), + "get_task_result_object": "result", + }, + ], + indirect=True, +) +async def test_get_data_export_result_success( rpc_client: RabbitMQRPCClient, - mock_celery_client: MockerFixture, + mock_celery_client: _MockCeleryClient, mocker: MockerFixture, ): mocker.patch("simcore_service_storage.api.rpc._async_jobs") @@ -428,6 +445,81 @@ async def test_get_data_export_result( assert isinstance(result, AsyncJobResult) +@pytest.mark.parametrize( + "mock_celery_client", + [ + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.RUNNING, + progress_report=ProgressReport(actual_value=50), + ), + "get_task_result_object": "status", + }, + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.ABORTED, + progress_report=ProgressReport(actual_value=100), + ), + "get_task_result_object": "status", + }, + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.ERROR, + progress_report=ProgressReport(actual_value=100), + ), + "get_task_result_object": "status", + }, + { + "get_task_status_object": CeleryError("error"), + "get_task_result_object": "status", + }, + ], + indirect=True, +) +async def test_get_data_export_result_error( + rpc_client: RabbitMQRPCClient, + mock_celery_client: _MockCeleryClient, + mocker: MockerFixture, +): + mocker.patch("simcore_service_storage.api.rpc._async_jobs") + _job_id = AsyncJobId(_faker.uuid4()) + + exception_cls_raised: type[Exception] + task_status_object = mock_celery_client.get_task_status_object + if ( + isinstance(task_status_object, TaskStatus) + and task_status_object.task_state == TaskState.RUNNING + ): + exception_cls_raised = JobNotDoneError + elif ( + isinstance(task_status_object, TaskStatus) + and task_status_object.task_state == TaskState.ABORTED + ): + exception_cls_raised = JobAbortedError + elif ( + isinstance(task_status_object, TaskStatus) + and task_status_object.task_state == TaskState.ERROR + ): + exception_cls_raised = JobError + elif isinstance(task_status_object, CeleryError): + exception_cls_raised = JobSchedulerError + else: + pytest.fail("invalid parameters") + + with pytest.raises(exception_cls_raised): + _ = await async_jobs.get_result( + rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id=_job_id, + job_id_data=AsyncJobNameData( + user_id=_faker.pyint(min_value=1, max_value=100), product_name="osparc" + ), + ) + + async def test_list_jobs( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, From 41b3b90c56337c1188c292efb5e20328ca647c01 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 09:40:54 +0100 Subject: [PATCH 070/131] cleanup parametrization --- .../storage/tests/unit/test_data_export.py | 97 +++++++++---------- 1 file changed, 44 insertions(+), 53 deletions(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 3d1258e75a92..db46f5d452d8 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -446,70 +446,61 @@ async def test_get_data_export_result_success( @pytest.mark.parametrize( - "mock_celery_client", + "mock_celery_client, expected_exception", [ - { - "get_task_status_object": TaskStatus( - task_uuid=TaskUUID(_faker.uuid4()), - task_state=TaskState.RUNNING, - progress_report=ProgressReport(actual_value=50), - ), - "get_task_result_object": "status", - }, - { - "get_task_status_object": TaskStatus( - task_uuid=TaskUUID(_faker.uuid4()), - task_state=TaskState.ABORTED, - progress_report=ProgressReport(actual_value=100), - ), - "get_task_result_object": "status", - }, - { - "get_task_status_object": TaskStatus( - task_uuid=TaskUUID(_faker.uuid4()), - task_state=TaskState.ERROR, - progress_report=ProgressReport(actual_value=100), - ), - "get_task_result_object": "status", - }, - { - "get_task_status_object": CeleryError("error"), - "get_task_result_object": "status", - }, + ( + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.RUNNING, + progress_report=ProgressReport(actual_value=50), + ), + "get_task_result_object": _faker.text(), + }, + JobNotDoneError, + ), + ( + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.ABORTED, + progress_report=ProgressReport(actual_value=100), + ), + "get_task_result_object": _faker.text(), + }, + JobAbortedError, + ), + ( + { + "get_task_status_object": TaskStatus( + task_uuid=TaskUUID(_faker.uuid4()), + task_state=TaskState.ERROR, + progress_report=ProgressReport(actual_value=100), + ), + "get_task_result_object": _faker.text(), + }, + JobError, + ), + ( + { + "get_task_status_object": CeleryError("error"), + "get_task_result_object": _faker.text(), + }, + JobSchedulerError, + ), ], - indirect=True, + indirect=["mock_celery_client"], ) async def test_get_data_export_result_error( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, mocker: MockerFixture, + expected_exception: type[Exception], ): mocker.patch("simcore_service_storage.api.rpc._async_jobs") _job_id = AsyncJobId(_faker.uuid4()) - exception_cls_raised: type[Exception] - task_status_object = mock_celery_client.get_task_status_object - if ( - isinstance(task_status_object, TaskStatus) - and task_status_object.task_state == TaskState.RUNNING - ): - exception_cls_raised = JobNotDoneError - elif ( - isinstance(task_status_object, TaskStatus) - and task_status_object.task_state == TaskState.ABORTED - ): - exception_cls_raised = JobAbortedError - elif ( - isinstance(task_status_object, TaskStatus) - and task_status_object.task_state == TaskState.ERROR - ): - exception_cls_raised = JobError - elif isinstance(task_status_object, CeleryError): - exception_cls_raised = JobSchedulerError - else: - pytest.fail("invalid parameters") - - with pytest.raises(exception_cls_raised): + with pytest.raises(expected_exception): _ = await async_jobs.get_result( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, From 3aa4ef033cef75925e276063c8a2db56e3a9cb18 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 09:46:46 +0100 Subject: [PATCH 071/131] handle list_jobs endpoint --- .../storage/tests/unit/test_data_export.py | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index db46f5d452d8..7db6720c188b 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -511,7 +511,14 @@ async def test_get_data_export_result_error( ) -async def test_list_jobs( +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"get_task_uuids_object": [_faker.uuid4() for _ in range(_faker.pyint(1, 10))]}, + ], + indirect=True, +) +async def test_list_jobs_success( rpc_client: RabbitMQRPCClient, mock_celery_client: MockerFixture, ): @@ -525,3 +532,25 @@ async def test_list_jobs( ) assert isinstance(result, list) assert all(isinstance(elm, AsyncJobGet) for elm in result) + + +@pytest.mark.parametrize( + "mock_celery_client", + [ + {"get_task_uuids_object": CeleryError("error")}, + ], + indirect=True, +) +async def test_list_jobs_error( + rpc_client: RabbitMQRPCClient, + mock_celery_client: MockerFixture, +): + with pytest.raises(JobSchedulerError): + _ = await async_jobs.list_jobs( + rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id_data=AsyncJobNameData( + user_id=_faker.pyint(min_value=1, max_value=100), product_name="osparc" + ), + filter_="", + ) From 4a84997096a79fd6c602fb140f0f5fc3e1aae12a Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 11:02:46 +0100 Subject: [PATCH 072/131] propagate backend errors to webserver --- .../api_schemas_rpc_async_jobs/exceptions.py | 2 +- .../data_export_async_jobs.py | 4 --- .../storage/_exception_handlers.py | 32 ++++++++++++++++--- .../with_dbs/01/storage/test_storage_rpc.py | 6 ++-- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py index 7d2509c24a8c..5748eb698099 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py @@ -9,7 +9,7 @@ class JobSchedulerError(BaseAsyncjobRpcError): msg_template: str = "Celery exception: {exc}" -class StatusError(BaseAsyncjobRpcError): +class JobStatusError(BaseAsyncjobRpcError): msg_template: str = "Could not get status of job {job_id}" diff --git a/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py b/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py index 44c796ad3e72..57a39c34ecbe 100644 --- a/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py +++ b/packages/models-library/src/models_library/api_schemas_storage/data_export_async_jobs.py @@ -17,10 +17,6 @@ class StorageRpcBaseError(OsparcErrorMixin, RuntimeError): pass -class InvalidLocationIdError(StorageRpcBaseError): - msg_template: str = "Invalid location_id {location_id}" - - class InvalidFileIdentifierError(StorageRpcBaseError): msg_template: str = "Could not find the file {file_id}" diff --git a/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py b/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py index 4c68394b6a20..e4becd6dbe2d 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py +++ b/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py @@ -1,6 +1,12 @@ +from models_library.api_schemas_rpc_async_jobs.exceptions import ( + JobAbortedError, + JobError, + JobNotDoneError, + JobSchedulerError, + JobStatusError, +) from models_library.api_schemas_storage.data_export_async_jobs import ( AccessRightError, - DataExportError, InvalidFileIdentifierError, ) from servicelib.aiohttp import status @@ -15,15 +21,31 @@ _TO_HTTP_ERROR_MAP: ExceptionToHttpErrorMap = { InvalidFileIdentifierError: HttpErrorInfo( status.HTTP_404_NOT_FOUND, - "Could not find file.", + "Could not find file {file_id}", ), AccessRightError: HttpErrorInfo( status.HTTP_403_FORBIDDEN, - "Accessright error.", + "Accessright error: user {user_id} does not have access to file {file_id} with location {location_id}", + ), + JobAbortedError: HttpErrorInfo( + status.HTTP_410_GONE, + "Job {job_id} is aborted", + ), + JobError: HttpErrorInfo( + status.HTTP_500_INTERNAL_SERVER_ERROR, + "Job {job_id} failed with exception type {exc_type} and message {exc_msg}", + ), + JobNotDoneError: HttpErrorInfo( + status.HTTP_409_CONFLICT, + "Job {job_id} is not done yet", + ), + JobSchedulerError: HttpErrorInfo( + status.HTTP_500_INTERNAL_SERVER_ERROR, + "Encountered a an error with the job scheduling system", ), - DataExportError: HttpErrorInfo( + JobStatusError: HttpErrorInfo( status.HTTP_500_INTERNAL_SERVER_ERROR, - "Could not export data.", + "Encountered an error while getting the status of job {job_id}", ), } diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 7c6061fff59e..2cf4b70befdf 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -18,7 +18,7 @@ ) from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobError, - StatusError, + JobStatusError, ) from models_library.api_schemas_storage.data_export_async_jobs import ( AccessRightError, @@ -116,7 +116,7 @@ async def test_data_export( started=datetime.now(), stopped=None, ), - StatusError(job_id=_faker.uuid4()), + JobStatusError(job_id=_faker.uuid4()), ], ids=lambda x: type(x).__name__, ) @@ -137,7 +137,7 @@ async def test_get_async_jobs_status( Envelope[StorageAsyncJobGet].model_validate(await response.json()).data ) assert response_body_data is not None - elif isinstance(backend_result_or_exception, StatusError): + elif isinstance(backend_result_or_exception, JobStatusError): assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR else: pytest.fail("Incorrectly configured test") From bee9cf0b390d837b789fc8f5ab2289ca9d31f37e Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 11:28:16 +0100 Subject: [PATCH 073/131] handle all exception types in test_data_export --- .../rpc_interfaces/storage/data_export.py | 6 ++- .../storage/_rest.py | 6 +-- .../with_dbs/01/storage/test_storage_rpc.py | 41 +++++++++---------- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py index ac9195d241cf..7e5a67f52af3 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py @@ -1,8 +1,10 @@ -from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobGet +from models_library.api_schemas_rpc_async_jobs.async_jobs import ( + AsyncJobGet, + AsyncJobNameData, +) from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE from models_library.rabbitmq_basic_types import RPCMethodName from pydantic import TypeAdapter -from simcore_service_storage.api.rpc._async_jobs import AsyncJobNameData from ... import RabbitMQRPCClient from ..async_jobs.async_jobs import submit_job diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index a81379207eec..f5d62dc79add 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -45,8 +45,8 @@ get_result, get_status, list_jobs, - submit_job, ) +from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from servicelib.request_keys import RQT_USERID_KEY from servicelib.rest_responses import unwrap_envelope from yarl import URL @@ -424,10 +424,8 @@ class _PathParams(BaseModel): data_export_post = await parse_request_body_as( model_schema_cls=DataExportPost, request=request ) - async_job_rpc_get = await submit_job( + async_job_rpc_get = await start_data_export( rabbitmq_rpc_client=rabbitmq_rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - method_name="start_data_export", job_id_data=AsyncJobNameData( user_id=_req_ctx.user_id, product_name=_req_ctx.product_name ), diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 2cf4b70befdf..b96a6f9c89e4 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -2,7 +2,6 @@ # pylint: disable=redefined-outer-name # pylint: disable=unused-argument -from datetime import datetime from pathlib import Path from typing import Any @@ -18,11 +17,11 @@ ) from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobError, + JobSchedulerError, JobStatusError, ) from models_library.api_schemas_storage.data_export_async_jobs import ( AccessRightError, - DataExportError, InvalidFileIdentifierError, ) from models_library.api_schemas_webserver.storage import ( @@ -39,8 +38,8 @@ get_result, get_status, list_jobs, - submit_job, ) +from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from simcore_postgres_database.models.users import UserRole _faker = Faker() @@ -65,12 +64,20 @@ def side_effect(*args, **kwargs): @pytest.mark.parametrize("user_role", [UserRole.USER]) @pytest.mark.parametrize( - "backend_result_or_exception", + "backend_result_or_exception, expected_status", [ - AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), - InvalidFileIdentifierError(file_id=Path("/my/file")), - AccessRightError(user_id=_faker.pyint(min_value=0), file_id=Path("/my/file")), - DataExportError(job_id=_faker.pyint(min_value=0)), + (AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), status.HTTP_202_ACCEPTED), + ( + InvalidFileIdentifierError(file_id=Path("/my/file")), + status.HTTP_404_NOT_FOUND, + ), + ( + AccessRightError( + user_id=_faker.pyint(min_value=0), file_id=Path("/my/file") + ), + status.HTTP_403_FORBIDDEN, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), ], ids=lambda x: type(x).__name__, ) @@ -81,9 +88,10 @@ async def test_data_export( create_storage_rpc_client_mock: Callable[[str, Any], None], faker: Faker, backend_result_or_exception: Any, + expected_status: int, ): create_storage_rpc_client_mock( - submit_job.__name__, + start_data_export.__name__, backend_result_or_exception, ) @@ -93,16 +101,9 @@ async def test_data_export( response = await client.post( "/v0/storage/locations/0/export-data", data=_body.model_dump_json() ) - if isinstance(backend_result_or_exception, AsyncJobGet): - assert response.status == status.HTTP_202_ACCEPTED + assert response.status == expected_status + if response.status == status.HTTP_202_ACCEPTED: Envelope[StorageAsyncJobGet].model_validate(await response.json()) - elif isinstance(backend_result_or_exception, InvalidFileIdentifierError): - assert response.status == status.HTTP_404_NOT_FOUND - elif isinstance(backend_result_or_exception, AccessRightError): - assert response.status == status.HTTP_403_FORBIDDEN - else: - assert isinstance(backend_result_or_exception, DataExportError) - assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR @pytest.mark.parametrize("user_role", [UserRole.USER]) @@ -110,11 +111,9 @@ async def test_data_export( "backend_result_or_exception", [ AsyncJobStatus( - job_id=f"{_faker.uuid4()}", + job_id=AsyncJobId(f"{_faker.uuid4()}"), progress=ProgressReport(actual_value=0.5, total=1.0), done=False, - started=datetime.now(), - stopped=None, ), JobStatusError(job_id=_faker.uuid4()), ], From 4ef3e88473ac77e207812d1d7bbb0b7371fa469e Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 11:33:58 +0100 Subject: [PATCH 074/131] handle different exceptions in test_get_async_jobs_status --- .../with_dbs/01/storage/test_storage_rpc.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index b96a6f9c89e4..e05da1f6a406 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -18,7 +18,6 @@ from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobError, JobSchedulerError, - JobStatusError, ) from models_library.api_schemas_storage.data_export_async_jobs import ( AccessRightError, @@ -108,14 +107,17 @@ async def test_data_export( @pytest.mark.parametrize("user_role", [UserRole.USER]) @pytest.mark.parametrize( - "backend_result_or_exception", + "backend_result_or_exception, expected_status", [ - AsyncJobStatus( - job_id=AsyncJobId(f"{_faker.uuid4()}"), - progress=ProgressReport(actual_value=0.5, total=1.0), - done=False, + ( + AsyncJobStatus( + job_id=AsyncJobId(f"{_faker.uuid4()}"), + progress=ProgressReport(actual_value=0.5, total=1.0), + done=False, + ), + status.HTTP_200_OK, ), - JobStatusError(job_id=_faker.uuid4()), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), ], ids=lambda x: type(x).__name__, ) @@ -125,21 +127,18 @@ async def test_get_async_jobs_status( client: TestClient, create_storage_rpc_client_mock: Callable[[str, Any], None], backend_result_or_exception: Any, + expected_status: int, ): _job_id = AsyncJobId(_faker.uuid4()) create_storage_rpc_client_mock(get_status.__name__, backend_result_or_exception) response = await client.get(f"/v0/storage/async-jobs/{_job_id}/status") - if isinstance(backend_result_or_exception, AsyncJobStatus): - assert response.status == status.HTTP_200_OK + assert response.status == expected_status + if response.status == status.HTTP_200_OK: response_body_data = ( Envelope[StorageAsyncJobGet].model_validate(await response.json()).data ) assert response_body_data is not None - elif isinstance(backend_result_or_exception, JobStatusError): - assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR - else: - pytest.fail("Incorrectly configured test") @pytest.mark.parametrize("user_role", [UserRole.USER]) From f6fc5b93b81e4535138d93c4fff6b7ea1f16e53d Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 11:37:48 +0100 Subject: [PATCH 075/131] Handle all exceptions in test_abort_async_jobs --- .../with_dbs/01/storage/test_storage_rpc.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index e05da1f6a406..0acbc2c8ef3a 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -142,26 +142,31 @@ async def test_get_async_jobs_status( @pytest.mark.parametrize("user_role", [UserRole.USER]) -@pytest.mark.parametrize("abort_success", [True, False]) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + ( + AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), + status.HTTP_200_OK, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + ], + ids=lambda x: type(x).__name__, +) async def test_abort_async_jobs( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, create_storage_rpc_client_mock: Callable[[str, Any], None], faker: Faker, - abort_success: bool, + backend_result_or_exception: Any, + expected_status: int, ): _job_id = AsyncJobId(faker.uuid4()) - create_storage_rpc_client_mock( - abort.__name__, AsyncJobAbort(result=abort_success, job_id=_job_id) - ) + create_storage_rpc_client_mock(abort.__name__, backend_result_or_exception) response = await client.post(f"/v0/storage/async-jobs/{_job_id}:abort") - - if abort_success: - assert response.status == status.HTTP_200_OK - else: - assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR + assert response.status == expected_status @pytest.mark.parametrize("user_role", [UserRole.USER]) From 723ae64659340f38caac7aef888f4f4a72780b5c Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 12:27:29 +0100 Subject: [PATCH 076/131] handle all exception types in test_get_async_job_result --- .../api_schemas_webserver/storage.py | 5 +--- .../storage/_rest.py | 8 +------ .../with_dbs/01/storage/test_storage_rpc.py | 24 +++++++++---------- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index e8ce8f451c42..8b64050afdcf 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -102,12 +102,9 @@ def from_rpc_schema( class StorageAsyncJobResult(OutputSchema): result: Any | None - error: Any | None @classmethod def from_rpc_schema( cls, async_job_rpc_result: AsyncJobResult ) -> "StorageAsyncJobResult": - return StorageAsyncJobResult( - result=async_job_rpc_result.result, error=async_job_rpc_result.error - ) + return StorageAsyncJobResult(result=async_job_rpc_result.result) diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index f5d62dc79add..86b6b0aa8693 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -529,13 +529,7 @@ class _PathParams(BaseModel): user_id=_req_ctx.user_id, product_name=_req_ctx.product_name ), ) - return web.Response( - status=( - status.HTTP_200_OK - if async_job_rpc_abort.result - else status.HTTP_500_INTERNAL_SERVER_ERROR - ) - ) + return web.Response(status=status.HTTP_200_OK) @routes.get( diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 0acbc2c8ef3a..55568034c245 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -16,7 +16,9 @@ AsyncJobStatus, ) from models_library.api_schemas_rpc_async_jobs.exceptions import ( + JobAbortedError, JobError, + JobNotDoneError, JobSchedulerError, ) from models_library.api_schemas_storage.data_export_async_jobs import ( @@ -171,10 +173,13 @@ async def test_abort_async_jobs( @pytest.mark.parametrize("user_role", [UserRole.USER]) @pytest.mark.parametrize( - "backend_result_or_exception", + "result_or_exception, expected_status", [ - AsyncJobResult(result=None, error=_faker.text()), - JobError(job_id=_faker.uuid4()), + (JobNotDoneError(job_id=_faker.uuid4()), status.HTTP_409_CONFLICT), + (AsyncJobResult(result=None), status.HTTP_200_OK), + (JobError(job_id=_faker.uuid4()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobAbortedError(job_id=_faker.uuid4()), status.HTTP_410_GONE), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), ], ids=lambda x: type(x).__name__, ) @@ -184,19 +189,14 @@ async def test_get_async_job_result( client: TestClient, create_storage_rpc_client_mock: Callable[[str, Any], None], faker: Faker, - backend_result_or_exception: Any, + result_or_exception: Any, + expected_status: int, ): _job_id = AsyncJobId(faker.uuid4()) - create_storage_rpc_client_mock(get_result.__name__, backend_result_or_exception) + create_storage_rpc_client_mock(get_result.__name__, result_or_exception) response = await client.get(f"/v0/storage/async-jobs/{_job_id}/result") - - if isinstance(backend_result_or_exception, AsyncJobResult): - assert response.status == status.HTTP_200_OK - elif isinstance(backend_result_or_exception, JobError): - assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR - else: - pytest.fail("Incorrectly configured test") + assert response.status == expected_status @pytest.mark.parametrize("user_role", [UserRole.USER]) From 2b6adda360960b24499684d66481e362240245d6 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 12:31:40 +0100 Subject: [PATCH 077/131] handle all exceptions in test_get_user_async_jobs --- .../with_dbs/01/storage/test_storage_rpc.py | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 55568034c245..2ac54c0b5981 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -26,6 +26,7 @@ InvalidFileIdentifierError, ) from models_library.api_schemas_webserver.storage import ( + AsyncJobLinks, DataExportPost, StorageAsyncJobGet, ) @@ -200,17 +201,37 @@ async def test_get_async_job_result( @pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + ( + [ + StorageAsyncJobGet( + job_id=AsyncJobId(_faker.uuid4()), + links=AsyncJobLinks( + status_href=_faker.uri(), + abort_href=_faker.uri(), + result_href=_faker.uri(), + ), + ) + ], + status.HTTP_200_OK, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + ], + ids=lambda x: type(x).__name__, +) async def test_get_user_async_jobs( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, create_storage_rpc_client_mock: Callable[[str, Any], None], + backend_result_or_exception: Any, + expected_status: int, ): - create_storage_rpc_client_mock( - list_jobs.__name__, [StorageAsyncJobGet(job_id=AsyncJobId(_faker.uuid4()))] - ) + create_storage_rpc_client_mock(list_jobs.__name__, backend_result_or_exception) response = await client.get("/v0/storage/async-jobs") - - assert response.status == status.HTTP_200_OK - Envelope[list[StorageAsyncJobGet]].model_validate(await response.json()) + assert response.status == expected_status + if response.status == status.HTTP_200_OK: + Envelope[list[StorageAsyncJobGet]].model_validate(await response.json()) From 54f3d586b445a1379cdc59bf239753bc083f6e92 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 13:23:14 +0100 Subject: [PATCH 078/131] parametrize user roles --- .../with_dbs/01/storage/test_storage_rpc.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 2ac54c0b5981..667be3f2de06 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -3,7 +3,7 @@ # pylint: disable=redefined-outer-name # pylint: disable=unused-argument from pathlib import Path -from typing import Any +from typing import Any, Final import pytest from aiohttp.test_utils import TestClient @@ -45,6 +45,13 @@ from simcore_postgres_database.models.users import UserRole _faker = Faker() +_user_roles: Final[list[UserRole]] = [ + UserRole.GUEST, + UserRole.USER, + UserRole.TESTER, + UserRole.PRODUCT_OWNER, + UserRole.ADMIN, +] @pytest.fixture @@ -64,7 +71,7 @@ def side_effect(*args, **kwargs): return _ -@pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( "backend_result_or_exception, expected_status", [ @@ -108,7 +115,7 @@ async def test_data_export( Envelope[StorageAsyncJobGet].model_validate(await response.json()) -@pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( "backend_result_or_exception, expected_status", [ @@ -144,7 +151,7 @@ async def test_get_async_jobs_status( assert response_body_data is not None -@pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( "backend_result_or_exception, expected_status", [ @@ -172,7 +179,7 @@ async def test_abort_async_jobs( assert response.status == expected_status -@pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( "result_or_exception, expected_status", [ @@ -200,7 +207,7 @@ async def test_get_async_job_result( assert response.status == expected_status -@pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( "backend_result_or_exception, expected_status", [ From 0708173b4d648f7e3f24ccc14c9a26ee62fead60 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 14:32:50 +0100 Subject: [PATCH 079/131] update openapi specs with status codes --- api/specs/web-server/_storage.py | 5 + .../api/v0/openapi.yaml | 510 +++++++++++++++++- 2 files changed, 509 insertions(+), 6 deletions(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index f85c5a786c14..aba249f5be4c 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -29,15 +29,20 @@ ) from models_library.generics import Envelope from models_library.projects_nodes_io import LocationID +from models_library.rest_error import EnvelopedError from models_library.users import UserID from pydantic import AnyUrl, ByteSize from servicelib.fastapi.rest_pagination import CustomizedPathsCursorPage from simcore_service_webserver._meta import API_VTAG +from simcore_service_webserver.storage._exception_handlers import _TO_HTTP_ERROR_MAP from simcore_service_webserver.storage.schemas import DatasetMetaData, FileMetaData router = APIRouter( prefix=f"/{API_VTAG}", tags=["storage"], + responses={ + i.status_code: {"model": EnvelopedError} for i in _TO_HTTP_ERROR_MAP.values() + }, ) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 9cff91cc1247..2e19c8306057 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -6010,6 +6010,36 @@ paths: $ref: '#/components/schemas/FileLocation' type: array title: Response List Storage Locations + '404': + description: Not Found + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '403': + description: Forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '410': + description: Gone + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '500': + description: Internal Server Error + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '409': + description: Conflict + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' /v0/storage/locations/{location_id}/paths: get: tags: @@ -6058,6 +6088,36 @@ paths: application/json: schema: $ref: '#/components/schemas/CursorPage___T_Customized_PathMetaDataGet_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/datasets: get: tags: @@ -6079,6 +6139,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_DatasetMetaData__' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/files/metadata: get: tags: @@ -6118,6 +6208,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_DatasetMetaData__' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/datasets/{dataset_id}/metadata: get: tags: @@ -6156,6 +6276,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_FileMetaDataGet__' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/files/{file_id}/metadata: get: tags: @@ -6186,6 +6336,36 @@ paths: - $ref: '#/components/schemas/FileMetaData' - $ref: '#/components/schemas/Envelope_FileMetaDataGet_' title: Response Get File Metadata + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/files/{file_id}: get: tags: @@ -6219,6 +6399,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_PresignedLink_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict put: tags: - storage @@ -6272,6 +6482,36 @@ paths: - $ref: '#/components/schemas/Envelope_FileUploadSchema_' - $ref: '#/components/schemas/Envelope_AnyUrl_' title: Response Upload File + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict delete: tags: - storage @@ -6294,6 +6534,36 @@ paths: responses: '204': description: Successful Response + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/files/{file_id}:abort: post: tags: @@ -6319,6 +6589,36 @@ paths: responses: '204': description: Successful Response + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/files/{file_id}:complete: post: tags: @@ -6352,6 +6652,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_FileUploadCompleteResponse_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/files/{file_id}:complete/futures/{future_id}: post: tags: @@ -6385,6 +6715,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_FileUploadCompleteFutureResponse_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/locations/{location_id}/export-data: post: tags: @@ -6412,6 +6772,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_StorageAsyncJobGet_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/async-jobs/{job_id}/status: get: tags: @@ -6434,6 +6824,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_StorageAsyncJobStatus_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/async-jobs/{job_id}:abort: post: tags: @@ -6455,6 +6875,36 @@ paths: content: application/json: schema: {} + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/async-jobs/{job_id}/result: get: tags: @@ -6483,6 +6933,30 @@ paths: application/json: schema: $ref: '#/components/schemas/StorageAsyncJobStatus' + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/storage/async-jobs: get: tags: @@ -6506,6 +6980,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_StorageAsyncJobGet__' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error + '409': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Conflict /v0/trash:empty: post: tags: @@ -14910,15 +15414,9 @@ components: - {} - type: 'null' title: Result - error: - anyOf: - - {} - - type: 'null' - title: Error type: object required: - result - - error title: StorageAsyncJobResult StorageAsyncJobStatus: properties: From 2f01ba4082e57be98f2565662bcf9ff9e88f61d6 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 10 Mar 2025 14:41:20 +0100 Subject: [PATCH 080/131] only add response types to data_export endpoints --- api/specs/web-server/_storage.py | 26 +- .../api/v0/openapi.yaml | 364 +----------------- 2 files changed, 17 insertions(+), 373 deletions(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index aba249f5be4c..366f45d223d6 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -4,7 +4,7 @@ # pylint: disable=too-many-arguments -from typing import Annotated, TypeAlias +from typing import Annotated, Any, TypeAlias from fastapi import APIRouter, Depends, Query, status from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobId @@ -34,15 +34,14 @@ from pydantic import AnyUrl, ByteSize from servicelib.fastapi.rest_pagination import CustomizedPathsCursorPage from simcore_service_webserver._meta import API_VTAG -from simcore_service_webserver.storage._exception_handlers import _TO_HTTP_ERROR_MAP +from simcore_service_webserver.storage._exception_handlers import ( + _TO_HTTP_ERROR_MAP as data_export_http_error_map, +) from simcore_service_webserver.storage.schemas import DatasetMetaData, FileMetaData router = APIRouter( prefix=f"/{API_VTAG}", tags=["storage"], - responses={ - i.status_code: {"model": EnvelopedError} for i in _TO_HTTP_ERROR_MAP.values() - }, ) @@ -200,11 +199,18 @@ async def is_completed_upload_file( # data export +_data_export_responses: dict[int | str, dict[str, Any]] = { + i.status_code: {"model": EnvelopedError} + for i in data_export_http_error_map.values() +} + + @router.post( "/storage/locations/{location_id}/export-data", response_model=Envelope[StorageAsyncJobGet], name="export_data", description="Export data", + responses=_data_export_responses, ) async def export_data(data_export: DataExportPost, location_id: LocationID): """Trigger data export. Returns async job id for getting status and results""" @@ -214,6 +220,7 @@ async def export_data(data_export: DataExportPost, location_id: LocationID): "/storage/async-jobs/{job_id}/status", response_model=Envelope[StorageAsyncJobStatus], name="get_async_job_status", + responses=_data_export_responses, ) async def get_async_job_status(job_id: AsyncJobId): """Get async job status""" @@ -222,6 +229,7 @@ async def get_async_job_status(job_id: AsyncJobId): @router.post( "/storage/async-jobs/{job_id}:abort", name="abort_async_job", + responses=_data_export_responses, ) async def abort_async_job(job_id: AsyncJobId): """aborts execution of an async job""" @@ -231,12 +239,7 @@ async def abort_async_job(job_id: AsyncJobId): "/storage/async-jobs/{job_id}/result", response_model=Envelope[StorageAsyncJobResult], name="get_async_job_result", - responses={ - status.HTTP_404_NOT_FOUND: { - "description": "Result not found", - "model": StorageAsyncJobStatus, - } - }, + responses=_data_export_responses, ) async def get_async_job_result(job_id: AsyncJobId): """Get the result of the async job""" @@ -246,6 +249,7 @@ async def get_async_job_result(job_id: AsyncJobId): "/storage/async-jobs", response_model=Envelope[list[StorageAsyncJobGet]], name="get_async_jobs", + responses=_data_export_responses, ) async def get_async_jobs(user_id: UserID): """Returns a list of async jobs for the user""" diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 2e19c8306057..24ad9f4d67dd 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -6010,36 +6010,6 @@ paths: $ref: '#/components/schemas/FileLocation' type: array title: Response List Storage Locations - '404': - description: Not Found - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '403': - description: Forbidden - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '410': - description: Gone - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '500': - description: Internal Server Error - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '409': - description: Conflict - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' /v0/storage/locations/{location_id}/paths: get: tags: @@ -6088,36 +6058,6 @@ paths: application/json: schema: $ref: '#/components/schemas/CursorPage___T_Customized_PathMetaDataGet_' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/datasets: get: tags: @@ -6139,36 +6079,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_DatasetMetaData__' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/files/metadata: get: tags: @@ -6208,36 +6118,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_DatasetMetaData__' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/datasets/{dataset_id}/metadata: get: tags: @@ -6276,36 +6156,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_FileMetaDataGet__' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/files/{file_id}/metadata: get: tags: @@ -6336,36 +6186,6 @@ paths: - $ref: '#/components/schemas/FileMetaData' - $ref: '#/components/schemas/Envelope_FileMetaDataGet_' title: Response Get File Metadata - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/files/{file_id}: get: tags: @@ -6399,36 +6219,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_PresignedLink_' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict put: tags: - storage @@ -6482,36 +6272,6 @@ paths: - $ref: '#/components/schemas/Envelope_FileUploadSchema_' - $ref: '#/components/schemas/Envelope_AnyUrl_' title: Response Upload File - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict delete: tags: - storage @@ -6534,36 +6294,6 @@ paths: responses: '204': description: Successful Response - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/files/{file_id}:abort: post: tags: @@ -6589,36 +6319,6 @@ paths: responses: '204': description: Successful Response - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/files/{file_id}:complete: post: tags: @@ -6652,36 +6352,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_FileUploadCompleteResponse_' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/files/{file_id}:complete/futures/{future_id}: post: tags: @@ -6715,36 +6385,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_FileUploadCompleteFutureResponse_' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/locations/{location_id}/export-data: post: tags: @@ -6928,11 +6568,11 @@ paths: schema: $ref: '#/components/schemas/Envelope_StorageAsyncJobResult_' '404': - description: Result not found content: application/json: schema: - $ref: '#/components/schemas/StorageAsyncJobStatus' + $ref: '#/components/schemas/EnvelopedError' + description: Not Found '403': content: application/json: From 0b6ccfd042e0c544241c09b45981f952dbdbf94f Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 09:27:02 +0100 Subject: [PATCH 081/131] add test for hateos links --- .../with_dbs/01/storage/test_storage_rpc.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 667be3f2de06..263b31e4517a 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -25,10 +25,12 @@ AccessRightError, InvalidFileIdentifierError, ) +from models_library.api_schemas_webserver._base import OutputSchema from models_library.api_schemas_webserver.storage import ( AsyncJobLinks, DataExportPost, StorageAsyncJobGet, + StorageAsyncJobResult, ) from models_library.generics import Envelope from models_library.progress_bar import ProgressReport @@ -43,6 +45,7 @@ ) from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from simcore_postgres_database.models.users import UserRole +from simcore_service_webserver.storage._rest import StorageAsyncJobStatus _faker = Faker() _user_roles: Final[list[UserRole]] = [ @@ -242,3 +245,73 @@ async def test_get_user_async_jobs( assert response.status == expected_status if response.status == status.HTTP_200_OK: Envelope[list[StorageAsyncJobGet]].model_validate(await response.json()) + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "http_method, href, backend_method, backend_object, return_schema", + [ + ( + "GET", + "status_href", + get_status.__name__, + AsyncJobStatus( + job_id=AsyncJobId(_faker.uuid4()), + progress=ProgressReport(actual_value=0.5, total=1.0), + done=False, + ), + StorageAsyncJobStatus, + ), + ( + "POST", + "abort_href", + abort.__name__, + AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), + None, + ), + ( + "GET", + "result_href", + get_result.__name__, + AsyncJobResult(result=None), + StorageAsyncJobResult, + ), + ], +) +async def test_get_async_job_links( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, Any], None], + faker: Faker, + http_method: str, + href: str, + backend_method: str, + backend_object: Any, + return_schema: OutputSchema | None, +): + create_storage_rpc_client_mock( + start_data_export.__name__, + AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), + ) + + _body = DataExportPost( + paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] + ) + response = await client.post( + "/v0/storage/locations/0/export-data", data=_body.model_dump_json() + ) + assert response.status == status.HTTP_202_ACCEPTED + response_body_data = ( + Envelope[StorageAsyncJobGet].model_validate(await response.json()).data + ) + assert response_body_data is not None + + # Call the different links and check the correct model and return status + create_storage_rpc_client_mock(backend_method, backend_object) + response = await client.request( + http_method, getattr(response_body_data.links, href) + ) + assert response.status == status.HTTP_200_OK + if return_schema: + Envelope[return_schema].model_validate(await response.json()) From 8e44e9768c8cf47b618f3ee675fe7983f667a87f Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 14:17:29 +0100 Subject: [PATCH 082/131] revert unwanted changes after merge --- .../modules/celery/example_tasks.py | 31 ------------------- .../modules/celery/tasks.py | 13 +------- 2 files changed, 1 insertion(+), 43 deletions(-) delete mode 100644 services/storage/src/simcore_service_storage/modules/celery/example_tasks.py diff --git a/services/storage/src/simcore_service_storage/modules/celery/example_tasks.py b/services/storage/src/simcore_service_storage/modules/celery/example_tasks.py deleted file mode 100644 index 3f295f6e65a5..000000000000 --- a/services/storage/src/simcore_service_storage/modules/celery/example_tasks.py +++ /dev/null @@ -1,31 +0,0 @@ -import asyncio -import logging -import time - - -from celery import Task # type: ignore[import-untyped] -from models_library.progress_bar import ProgressReport -from models_library.projects_nodes_io import StorageFileID -from servicelib.logging_utils import log_context - -from .utils import get_celery_worker - -_logger = logging.getLogger(__name__) - - -def export_data(task: Task, files: list[StorageFileID]): - _logger.info("Exporting files: %s", files) - for n, file in enumerate(files, start=1): - with log_context( - _logger, - logging.INFO, - msg=f"Exporting {file=} ({n}/{len(files)})", - ): - assert task.name - get_celery_worker(task.app).set_task_progress( - task_name=task.name, - task_id=task.request.id, - report=ProgressReport(actual_value=n / len(files) * 100), - ) - time.sleep(10) - return "done" diff --git a/services/storage/src/simcore_service_storage/modules/celery/tasks.py b/services/storage/src/simcore_service_storage/modules/celery/tasks.py index 3237dd5a587f..014151acd74a 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/tasks.py +++ b/services/storage/src/simcore_service_storage/modules/celery/tasks.py @@ -1,9 +1,7 @@ import logging import time - -from celery import Task -from common_library.errors_classes import OsparcErrorMixin +from celery import Task # type: ignore[import-untyped] from models_library.progress_bar import ProgressReport from models_library.projects_nodes_io import StorageFileID from servicelib.logging_utils import log_context @@ -29,12 +27,3 @@ def export_data(task: Task, files: list[StorageFileID]): ) time.sleep(10) return "done" - - -class MyError(OsparcErrorMixin, Exception): - msg_template = "Something strange happened: {msg}" - - -def export_data_with_error(task: Task, files: list[StorageFileID]): - msg = "BOOM!" - raise MyError(msg=msg) From 359199a3253353a7dc81e1f7923350d6931282b1 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 14:21:54 +0100 Subject: [PATCH 083/131] fix storage settings after rebase --- .../src/simcore_service_storage/core/settings.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 95535efaa091..3051358c1598 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -79,11 +79,15 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): description="Maximal amount of threads used by underlying S3 client to transfer data to S3 backend", ) - STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED: bool = Field( - default=False, - validation_alias=AliasChoices( - "STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED", - "LOG_FORMAT_LOCAL_DEV_ENABLED", + STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED: Annotated[ + bool, + Field( + default=False, + validation_alias=AliasChoices( + "STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED", + "LOG_FORMAT_LOCAL_DEV_ENABLED", + ), + description="Enables local development _logger format. WARNING: make sure it is disabled if you want to have structured logs!", ), ] @@ -91,7 +95,7 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): RabbitSettings | None, Field( json_schema_extra={"auto_default_from_env": True}, - ) + ), ] STORAGE_S3_CLIENT_MAX_TRANSFER_CONCURRENCY: Annotated[ From bc1d070fa4fd309c91d1d5bd9d8679aa4a253631 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 14:23:13 +0100 Subject: [PATCH 084/131] regenserate openapi specs --- .../simcore_service_webserver/api/v0/openapi.yaml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 24ad9f4d67dd..29c67e61571c 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -15069,16 +15069,6 @@ components: done: type: boolean title: Done - started: - type: string - format: date-time - title: Started - stopped: - anyOf: - - type: string - format: date-time - - type: 'null' - title: Stopped links: $ref: '#/components/schemas/AsyncJobLinks' type: object @@ -15086,9 +15076,10 @@ components: - jobId - progress - done - - started - - stopped + - links title: StorageAsyncJobStatus + Structure: + properties: key: anyOf: - type: string From fb3cf829905b73f0cd3725c907dcff753be5d907 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 14:36:23 +0100 Subject: [PATCH 085/131] add __init__ file --- .../src/servicelib/rabbitmq/rpc_interfaces/storage/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/__init__.py diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/__init__.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From 8efe566ebcde20c9ee341beb22c0626346c3914a Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 14:48:49 +0100 Subject: [PATCH 086/131] minor fix --- .../web/server/src/simcore_service_webserver/storage/_rest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 86b6b0aa8693..291a7a39876e 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -521,7 +521,7 @@ class _PathParams(BaseModel): rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) - async_job_rpc_abort = await abort( + await abort( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=async_job_get.job_id, From 20439bae6e31fb07f14f0b1945b357ed32b9e5b6 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 14:54:21 +0100 Subject: [PATCH 087/131] add import of SimcoreS3Dsm --- services/storage/tests/unit/test_data_export.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 7db6720c188b..cc15706d96cd 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -46,6 +46,7 @@ from simcore_service_storage.core.settings import ApplicationSettings from simcore_service_storage.modules.celery.client import TaskUUID from simcore_service_storage.modules.celery.models import TaskState +from simcore_service_storage.simcore_s3_dsm import SimcoreS3DataManager pytest_plugins = [ "pytest_simcore.rabbit_service", From 59c90b39130cbd64c70f103d1d36498a55a68d33 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 11 Mar 2025 16:14:27 +0100 Subject: [PATCH 088/131] make pylint happy --- .../src/simcore_service_storage/api/rpc/_async_jobs.py | 4 ++-- .../storage/src/simcore_service_storage/core/settings.py | 5 ----- .../src/simcore_service_storage/modules/celery/models.py | 4 ++-- services/storage/tests/unit/test_data_export.py | 3 +-- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 1e2d16e2b3c5..507ee8d6b359 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -89,11 +89,11 @@ async def get_result( task_uuid=job_id, ) except CeleryError as exc: - raise JobSchedulerError(exc=f"{exc}") + raise JobSchedulerError(exc=f"{exc}") from exc if status.task_state == TaskState.ABORTED: raise JobAbortedError(job_id=job_id) - elif status.task_state == TaskState.ERROR: + if status.task_state == TaskState.ERROR: exc_type = "" exc_msg = "" with log_catch(logger=_logger, reraise=False): diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 3051358c1598..7e11973864a3 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -74,11 +74,6 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): description="Interval in seconds when task cleaning pending uploads runs. setting to NULL disables the cleaner.", ) - STORAGE_S3_CLIENT_MAX_TRANSFER_CONCURRENCY: int = Field( - 4, - description="Maximal amount of threads used by underlying S3 client to transfer data to S3 backend", - ) - STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED: Annotated[ bool, Field( diff --git a/services/storage/src/simcore_service_storage/modules/celery/models.py b/services/storage/src/simcore_service_storage/modules/celery/models.py index c3566cc92313..2f04c5b81329 100644 --- a/services/storage/src/simcore_service_storage/modules/celery/models.py +++ b/services/storage/src/simcore_service_storage/modules/celery/models.py @@ -1,9 +1,9 @@ from enum import StrEnum, auto -from typing import Annotated, Any, Final, Self, TypeAlias +from typing import Any, Final, Self, TypeAlias from uuid import UUID from models_library.progress_bar import ProgressReport -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, model_validator TaskContext: TypeAlias = dict[str, Any] TaskID: TypeAlias = str diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index cc15706d96cd..8f84497c5c58 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -102,8 +102,7 @@ async def get_task_uuids(self, *args, **kwargs) -> set[TaskUUID]: async def abort_task(self, *args, **kwargs) -> None: if isinstance(self.abort_task_object, Exception): raise self.abort_task_object - else: - return self.abort_task_object + return self.abort_task_object @pytest.fixture From 6010f6167a359a240a043fe17f0e1b8e1b53332c Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 12 Mar 2025 10:18:49 +0100 Subject: [PATCH 089/131] @pcrespov remove aiohttp from models library --- .../api_schemas_webserver/storage.py | 24 ++-------- .../storage/_rest.py | 44 +++++++++++++++++-- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index 8b64050afdcf..d9d9fd50a84b 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -1,7 +1,6 @@ from pathlib import Path from typing import Annotated, Any -from aiohttp import web from models_library.api_schemas_storage.storage_schemas import ( DEFAULT_NUMBER_OF_PATHS_PER_PAGE, MAX_NUMBER_OF_PATHS_PER_PAGE, @@ -55,14 +54,6 @@ class AsyncJobLinks(OutputSchema): abort_href: str result_href: str - @classmethod - def from_job_id(cls, app: web.Application, job_id: str) -> "AsyncJobLinks": - return AsyncJobLinks( - status_href=f"{app.router['get_async_job_status'].url_for(job_id=job_id)}", - abort_href=f"{app.router['abort_async_job'].url_for(job_id=job_id)}", - result_href=f"{app.router['get_async_job_result'].url_for(job_id=job_id)}", - ) - class StorageAsyncJobGet(OutputSchema): job_id: AsyncJobId @@ -70,14 +61,9 @@ class StorageAsyncJobGet(OutputSchema): @classmethod def from_rpc_schema( - cls, *, app: web.Application, async_job_rpc_get: AsyncJobGet + cls, *, async_job_rpc_get: AsyncJobGet, links: AsyncJobLinks ) -> "StorageAsyncJobGet": - return StorageAsyncJobGet( - job_id=async_job_rpc_get.job_id, - links=AsyncJobLinks.from_job_id( - app=app, job_id=f"{async_job_rpc_get.job_id}" - ), - ) + return StorageAsyncJobGet(job_id=async_job_rpc_get.job_id, links=links) class StorageAsyncJobStatus(OutputSchema): @@ -88,15 +74,13 @@ class StorageAsyncJobStatus(OutputSchema): @classmethod def from_rpc_schema( - cls, *, app: web.Application, async_job_rpc_status: AsyncJobStatus + cls, *, async_job_rpc_status: AsyncJobStatus, links: AsyncJobLinks ) -> "StorageAsyncJobStatus": return StorageAsyncJobStatus( job_id=async_job_rpc_status.job_id, progress=async_job_rpc_status.progress, done=async_job_rpc_status.done, - links=AsyncJobLinks.from_job_id( - app=app, job_id=f"{async_job_rpc_status.job_id}" - ), + links=links, ) diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 291a7a39876e..fc4973d2fcb0 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -22,6 +22,7 @@ LinkType, ) from models_library.api_schemas_webserver.storage import ( + AsyncJobLinks, DataExportPost, StorageAsyncJobGet, StorageAsyncJobResult, @@ -433,9 +434,21 @@ class _PathParams(BaseModel): location_id=_path_params.location_id, ), ) + _job_id = f"{async_job_rpc_get.job_id}" return create_data_response( StorageAsyncJobGet.from_rpc_schema( - app=request.app, async_job_rpc_get=async_job_rpc_get + async_job_rpc_get=async_job_rpc_get, + links=AsyncJobLinks( + status_href=request.app.router["get_async_job_status"].url_for( + job_id=_job_id + ), + abort_href=request.app.router["abort_async_job"].url_for( + job_id=_job_id + ), + result_href=request.app.router["get_async_job_result"].url_for( + job_id=_job_id + ), + ), ), status=status.HTTP_202_ACCEPTED, ) @@ -463,7 +476,20 @@ async def get_async_jobs(request: web.Request) -> web.Response: ) return create_data_response( [ - StorageAsyncJobGet.from_rpc_schema(app=request.app, async_job_rpc_get=job) + StorageAsyncJobGet.from_rpc_schema( + async_job_rpc_get=job, + links=AsyncJobLinks( + status_href=request.app.router["get_async_job_status"].url_for( + job_id=f"{job.job_id}" + ), + abort_href=request.app.router["abort_async_job"].url_for( + job_id=f"{job.job_id}" + ), + result_href=request.app.router["get_async_job_result"].url_for( + job_id=f"{job.job_id}" + ), + ), + ) for job in user_async_jobs ], status=status.HTTP_200_OK, @@ -498,9 +524,21 @@ class _PathParams(BaseModel): user_id=_req_ctx.user_id, product_name=_req_ctx.product_name ), ) + _job_id = f"{async_job_rpc_status.job_id}" return create_data_response( StorageAsyncJobStatus.from_rpc_schema( - app=request.app, async_job_rpc_status=async_job_rpc_status + async_job_rpc_status=async_job_rpc_status, + links=AsyncJobLinks( + status_href=request.app.router["get_async_job_status"].url_for( + job_id=_job_id + ), + abort_href=request.app.router["abort_async_job"].url_for( + job_id=_job_id + ), + result_href=request.app.router["get_async_job_result"].url_for( + job_id=_job_id + ), + ), ), status=status.HTTP_200_OK, ) From c20806f55cdff445a9cf0c58eb190f24ca33ff1c Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 12 Mar 2025 10:32:21 +0100 Subject: [PATCH 090/131] fix validation errors --- .../storage/_rest.py | 36 +++++-------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index fc4973d2fcb0..e754c50dc554 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -439,15 +439,9 @@ class _PathParams(BaseModel): StorageAsyncJobGet.from_rpc_schema( async_job_rpc_get=async_job_rpc_get, links=AsyncJobLinks( - status_href=request.app.router["get_async_job_status"].url_for( - job_id=_job_id - ), - abort_href=request.app.router["abort_async_job"].url_for( - job_id=_job_id - ), - result_href=request.app.router["get_async_job_result"].url_for( - job_id=_job_id - ), + status_href=f"{request.app.router['get_async_job_status'].url_for(job_id=_job_id)}", + abort_href=f"{request.app.router['abort_async_job'].url_for(job_id=_job_id)}", + result_href=f"{request.app.router['get_async_job_result'].url_for(job_id=_job_id)}", ), ), status=status.HTTP_202_ACCEPTED, @@ -479,15 +473,9 @@ async def get_async_jobs(request: web.Request) -> web.Response: StorageAsyncJobGet.from_rpc_schema( async_job_rpc_get=job, links=AsyncJobLinks( - status_href=request.app.router["get_async_job_status"].url_for( - job_id=f"{job.job_id}" - ), - abort_href=request.app.router["abort_async_job"].url_for( - job_id=f"{job.job_id}" - ), - result_href=request.app.router["get_async_job_result"].url_for( - job_id=f"{job.job_id}" - ), + status_href=f"{request.app.router['get_async_job_status'].url_for(job_id=str(job.job_id))}", + abort_href=f"{request.app.router['abort_async_job'].url_for(job_id=str(job.job_id))}", + result_href=f"{request.app.router['get_async_job_result'].url_for(job_id=str(job.job_id))}", ), ) for job in user_async_jobs @@ -529,15 +517,9 @@ class _PathParams(BaseModel): StorageAsyncJobStatus.from_rpc_schema( async_job_rpc_status=async_job_rpc_status, links=AsyncJobLinks( - status_href=request.app.router["get_async_job_status"].url_for( - job_id=_job_id - ), - abort_href=request.app.router["abort_async_job"].url_for( - job_id=_job_id - ), - result_href=request.app.router["get_async_job_result"].url_for( - job_id=_job_id - ), + status_href=f"{request.app.router['get_async_job_status'].url_for(job_id=_job_id)}", + abort_href=f"{request.app.router['abort_async_job'].url_for(job_id=_job_id)}", + result_href=f"{request.app.router['get_async_job_result'].url_for(job_id=_job_id)}", ), ), status=status.HTTP_200_OK, From 6fdf75366ac84d97c990bd856cc083ff776abfa3 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 12 Mar 2025 10:36:10 +0100 Subject: [PATCH 091/131] @pcrespov revert changes in storage settings --- .../simcore_service_storage/core/settings.py | 92 ++++++++----------- 1 file changed, 40 insertions(+), 52 deletions(-) diff --git a/services/storage/src/simcore_service_storage/core/settings.py b/services/storage/src/simcore_service_storage/core/settings.py index 7e11973864a3..4d246a89eeb0 100644 --- a/services/storage/src/simcore_service_storage/core/settings.py +++ b/services/storage/src/simcore_service_storage/core/settings.py @@ -30,59 +30,47 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): STORAGE_MONITORING_ENABLED: bool = False STORAGE_PROFILING: bool = False - BF_API_KEY: str | None = Field( - None, description="Pennsieve API key ONLY for testing purposes" - ) - BF_API_SECRET: str | None = Field( - None, description="Pennsieve API secret ONLY for testing purposes" - ) - - STORAGE_POSTGRES: PostgresSettings | None = Field( - json_schema_extra={"auto_default_from_env": True} - ) - - STORAGE_REDIS: RedisSettings | None = Field( - json_schema_extra={"auto_default_from_env": True} - ) - - STORAGE_S3: S3Settings | None = Field( - json_schema_extra={"auto_default_from_env": True} - ) - - STORAGE_CELERY: CelerySettings | None = Field( - json_schema_extra={"auto_default_from_env": True} - ) - - STORAGE_TRACING: TracingSettings | None = Field( - json_schema_extra={"auto_default_from_env": True} - ) - - DATCORE_ADAPTER: DatcoreAdapterSettings = Field( - json_schema_extra={"auto_default_from_env": True} - ) - - STORAGE_SYNC_METADATA_TIMEOUT: PositiveInt = Field( - 180, description="Timeout (seconds) for metadata sync task" - ) - - STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS: int = Field( - 3600, description="Default expiration time in seconds for presigned links" - ) - - STORAGE_CLEANER_INTERVAL_S: int | None = Field( - 30, - description="Interval in seconds when task cleaning pending uploads runs. setting to NULL disables the cleaner.", - ) + STORAGE_POSTGRES: Annotated[ + PostgresSettings | None, + Field(json_schema_extra={"auto_default_from_env": True}), + ] - STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED: Annotated[ - bool, + STORAGE_REDIS: Annotated[ + RedisSettings | None, Field(json_schema_extra={"auto_default_from_env": True}) + ] + + STORAGE_S3: Annotated[ + S3Settings | None, Field(json_schema_extra={"auto_default_from_env": True}) + ] + + STORAGE_CELERY: Annotated[ + CelerySettings | None, Field(json_schema_extra={"auto_default_from_env": True}) + ] + + STORAGE_TRACING: Annotated[ + TracingSettings | None, Field(json_schema_extra={"auto_default_from_env": True}) + ] + + DATCORE_ADAPTER: Annotated[ + DatcoreAdapterSettings, Field(json_schema_extra={"auto_default_from_env": True}) + ] + + STORAGE_SYNC_METADATA_TIMEOUT: Annotated[ + PositiveInt, Field(180, description="Timeout (seconds) for metadata sync task") + ] + + STORAGE_DEFAULT_PRESIGNED_LINK_EXPIRATION_SECONDS: Annotated[ + int, Field( - default=False, - validation_alias=AliasChoices( - "STORAGE_LOG_FORMAT_LOCAL_DEV_ENABLED", - "LOG_FORMAT_LOCAL_DEV_ENABLED", - ), - description="Enables local development _logger format. WARNING: make sure it is disabled if you want to have structured logs!", + 3600, description="Default expiration time in seconds for presigned links" + ), + ] + + STORAGE_CLEANER_INTERVAL_S: Annotated[ + int | None, + Field( + 30, + description="Interval in seconds when task cleaning pending uploads runs. setting to NULL disables the cleaner.", ), ] @@ -125,7 +113,7 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): ] STORAGE_WORKER_MODE: Annotated[ - bool | None, Field(description="If True, run as a worker") + bool, Field(description="If True, run as a worker") ] = False @field_validator("LOG_LEVEL", mode="before") From 10aab9c031c86b31daaa890f35117f18f32342b9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard <126242332+bisgaard-itis@users.noreply.github.com> Date: Fri, 14 Mar 2025 08:51:18 +0100 Subject: [PATCH 092/131] Update packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py Co-authored-by: Sylvain <35365065+sanderegg@users.noreply.github.com> --- .../src/models_library/api_schemas_rpc_async_jobs/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py index 5748eb698099..c39af000e90b 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py @@ -18,7 +18,7 @@ class JobNotDoneError(BaseAsyncjobRpcError): class JobAbortedError(BaseAsyncjobRpcError): - msg_template: str = "Job {job_id} not done" + msg_template: str = "Job {job_id} aborted" class JobError(BaseAsyncjobRpcError): From 8af006806a586c14b0da534c0133e809167cbd21 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 14 Mar 2025 09:20:41 +0100 Subject: [PATCH 093/131] @sanderegg align status codes between long running tasks and async jobs --- .../simcore_service_webserver/storage/_exception_handlers.py | 2 +- .../server/tests/unit/with_dbs/01/storage/test_storage_rpc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py b/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py index e4becd6dbe2d..8e710f883ecc 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py +++ b/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py @@ -36,7 +36,7 @@ "Job {job_id} failed with exception type {exc_type} and message {exc_msg}", ), JobNotDoneError: HttpErrorInfo( - status.HTTP_409_CONFLICT, + status.HTTP_404_NOT_FOUND, "Job {job_id} is not done yet", ), JobSchedulerError: HttpErrorInfo( diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 263b31e4517a..9eb21f9ac4a0 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -186,7 +186,7 @@ async def test_abort_async_jobs( @pytest.mark.parametrize( "result_or_exception, expected_status", [ - (JobNotDoneError(job_id=_faker.uuid4()), status.HTTP_409_CONFLICT), + (JobNotDoneError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), (AsyncJobResult(result=None), status.HTTP_200_OK), (JobError(job_id=_faker.uuid4()), status.HTTP_500_INTERNAL_SERVER_ERROR), (JobAbortedError(job_id=_faker.uuid4()), status.HTTP_410_GONE), From cc1e57c1443620798aad8abddbc8e7c1b13b37b9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 14 Mar 2025 10:31:20 +0100 Subject: [PATCH 094/131] fix task name --- .../storage/src/simcore_service_storage/api/rpc/_data_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py index b290ab64e5d5..14abde05c43f 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py @@ -54,7 +54,7 @@ async def start_data_export( try: task_uuid = await get_celery_client(app).send_task( - "export_data_with_error", + "export_data", task_context=job_id_data.model_dump(), files=data_export_start.file_and_folder_ids, # ANE: adapt here your signature ) From 2e7bd10f6af695293cce6d6d2873fa9ed1c5fd63 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 14 Mar 2025 10:35:18 +0100 Subject: [PATCH 095/131] fix async job listing signature --- api/specs/web-server/_storage.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 366f45d223d6..c44a6957eded 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -30,7 +30,6 @@ from models_library.generics import Envelope from models_library.projects_nodes_io import LocationID from models_library.rest_error import EnvelopedError -from models_library.users import UserID from pydantic import AnyUrl, ByteSize from servicelib.fastapi.rest_pagination import CustomizedPathsCursorPage from simcore_service_webserver._meta import API_VTAG @@ -251,5 +250,5 @@ async def get_async_job_result(job_id: AsyncJobId): name="get_async_jobs", responses=_data_export_responses, ) -async def get_async_jobs(user_id: UserID): - """Returns a list of async jobs for the user""" +async def get_async_jobs(): + """Returns the user's async jobs""" From 02b44f0655cf4c959d575afea8ec9a1e86b973e9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 14 Mar 2025 10:37:04 +0100 Subject: [PATCH 096/131] update webserver openapi specs --- .../api/v0/openapi.yaml | 47 ++----------------- 1 file changed, 4 insertions(+), 43 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index bf7873a2f1f6..124904f6fc2d 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -6436,12 +6436,6 @@ paths: schema: $ref: '#/components/schemas/EnvelopedError' description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/async-jobs/{job_id}/status: get: tags: @@ -6488,12 +6482,6 @@ paths: schema: $ref: '#/components/schemas/EnvelopedError' description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/async-jobs/{job_id}:abort: post: tags: @@ -6539,12 +6527,6 @@ paths: schema: $ref: '#/components/schemas/EnvelopedError' description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/async-jobs/{job_id}/result: get: tags: @@ -6591,28 +6573,13 @@ paths: schema: $ref: '#/components/schemas/EnvelopedError' description: Internal Server Error - '409': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/storage/async-jobs: get: tags: - storage summary: Get Async Jobs - description: Returns a list of async jobs for the user + description: Returns the user's async jobs operationId: get_async_jobs - parameters: - - name: user_id - in: query - required: true - schema: - type: integer - exclusiveMinimum: true - title: User Id - minimum: 0 responses: '200': description: Successful Response @@ -6621,35 +6588,29 @@ paths: schema: $ref: '#/components/schemas/Envelope_list_StorageAsyncJobGet__' '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' description: Not Found - '403': content: application/json: schema: $ref: '#/components/schemas/EnvelopedError' + '403': description: Forbidden - '410': content: application/json: schema: $ref: '#/components/schemas/EnvelopedError' + '410': description: Gone - '500': content: application/json: schema: $ref: '#/components/schemas/EnvelopedError' + '500': description: Internal Server Error - '409': content: application/json: schema: $ref: '#/components/schemas/EnvelopedError' - description: Conflict /v0/trash:empty: post: tags: From 6e5b186d6e196031192003817c543808c16e4f2c Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Fri, 14 Mar 2025 11:56:19 +0100 Subject: [PATCH 097/131] remove cast --- .../src/simcore_service_storage/api/rpc/_async_jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 507ee8d6b359..836fd0524599 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -21,7 +21,7 @@ from servicelib.rabbitmq import RPCRouter from ...modules.celery import get_celery_client -from ...modules.celery.models import TaskError, TaskState, TaskStatus +from ...modules.celery.models import TaskError, TaskState _logger = logging.getLogger(__name__) router = RPCRouter() @@ -48,7 +48,7 @@ async def get_status( assert job_id_data # nosec try: - task_status: TaskStatus = await get_celery_client(app).get_task_status( + task_status = await get_celery_client(app).get_task_status( task_context=job_id_data.model_dump(), task_uuid=job_id, ) From dd449b21595b29259925e3d614e4d4b85a507ea9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 17 Mar 2025 16:08:00 +0100 Subject: [PATCH 098/131] adhere to long running tasks interface --- .../api_schemas_long_running_tasks/tasks.py | 2 +- .../storage/_rest.py | 51 +++++++++---------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_long_running_tasks/tasks.py b/packages/models-library/src/models_library/api_schemas_long_running_tasks/tasks.py index b5a8d8443b93..acd73831b22f 100644 --- a/packages/models-library/src/models_library/api_schemas_long_running_tasks/tasks.py +++ b/packages/models-library/src/models_library/api_schemas_long_running_tasks/tasks.py @@ -10,7 +10,7 @@ class TaskStatus(BaseModel): task_progress: TaskProgress done: bool - started: datetime + started: datetime | None class TaskResult(BaseModel): diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 24749e7a0b2f..b33db692a21e 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -10,6 +10,12 @@ from uuid import UUID from aiohttp import ClientTimeout, web +from models_library.api_schemas_long_running_tasks.base import TaskProgress +from models_library.api_schemas_long_running_tasks.tasks import ( + TaskGet, + TaskResult, + TaskStatus, +) from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobId, AsyncJobNameData, @@ -22,11 +28,7 @@ LinkType, ) from models_library.api_schemas_webserver.storage import ( - AsyncJobLinks, DataExportPost, - StorageAsyncJobGet, - StorageAsyncJobResult, - StorageAsyncJobStatus, StoragePathComputeSizeParams, ) from models_library.projects_nodes_io import LocationID @@ -467,13 +469,12 @@ class _PathParams(BaseModel): ) _job_id = f"{async_job_rpc_get.job_id}" return create_data_response( - StorageAsyncJobGet.from_rpc_schema( - async_job_rpc_get=async_job_rpc_get, - links=AsyncJobLinks( - status_href=f"{request.app.router['get_async_job_status'].url_for(job_id=_job_id)}", - abort_href=f"{request.app.router['abort_async_job'].url_for(job_id=_job_id)}", - result_href=f"{request.app.router['get_async_job_result'].url_for(job_id=_job_id)}", - ), + TaskGet( + task_id=_job_id, + task_name=_job_id, + status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(job_id=_job_id)))}", + abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(job_id=_job_id)))}", + result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(job_id=_job_id)))}", ), status=status.HTTP_202_ACCEPTED, ) @@ -501,13 +502,12 @@ async def get_async_jobs(request: web.Request) -> web.Response: ) return create_data_response( [ - StorageAsyncJobGet.from_rpc_schema( - async_job_rpc_get=job, - links=AsyncJobLinks( - status_href=f"{request.app.router['get_async_job_status'].url_for(job_id=str(job.job_id))}", - abort_href=f"{request.app.router['abort_async_job'].url_for(job_id=str(job.job_id))}", - result_href=f"{request.app.router['get_async_job_result'].url_for(job_id=str(job.job_id))}", - ), + TaskGet( + task_id=f"{job.job_id}", + task_name=f"{job.job_id}", + status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(job_id=str(job.job_id))))}", + abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(job_id=str(job.job_id))))}", + result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(job_id=str(job.job_id))))}", ) for job in user_async_jobs ], @@ -544,13 +544,12 @@ class _PathParams(BaseModel): ) _job_id = f"{async_job_rpc_status.job_id}" return create_data_response( - StorageAsyncJobStatus.from_rpc_schema( - async_job_rpc_status=async_job_rpc_status, - links=AsyncJobLinks( - status_href=f"{request.app.router['get_async_job_status'].url_for(job_id=_job_id)}", - abort_href=f"{request.app.router['abort_async_job'].url_for(job_id=_job_id)}", - result_href=f"{request.app.router['get_async_job_result'].url_for(job_id=_job_id)}", + TaskStatus( + task_progress=TaskProgress( + task_id=_job_id, percent=async_job_rpc_status.progress.actual_value ), + done=async_job_rpc_status.done, + started=None, ), status=status.HTTP_200_OK, ) @@ -579,7 +578,7 @@ class _PathParams(BaseModel): user_id=_req_ctx.user_id, product_name=_req_ctx.product_name ), ) - return web.Response(status=status.HTTP_200_OK) + return web.Response(status=status.HTTP_204_NO_CONTENT) @routes.get( @@ -607,6 +606,6 @@ class _PathParams(BaseModel): ) return create_data_response( - StorageAsyncJobResult.from_rpc_schema(async_job_rpc_result), + TaskResult(result=async_job_rpc_result.result, error=None), status=status.HTTP_200_OK, ) From 842ab75e706b41ac48627c01fe4cf24dcd950065 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 17 Mar 2025 16:42:34 +0100 Subject: [PATCH 099/131] fix tests --- .../with_dbs/01/storage/test_storage_rpc.py | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 59cc4f4320ad..1a667aa91293 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -8,6 +8,11 @@ import pytest from aiohttp.test_utils import TestClient from faker import Faker +from models_library.api_schemas_long_running_tasks.tasks import ( + TaskGet, + TaskResult, + TaskStatus, +) from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobAbort, AsyncJobGet, @@ -30,7 +35,6 @@ AsyncJobLinks, DataExportPost, StorageAsyncJobGet, - StorageAsyncJobResult, ) from models_library.generics import Envelope from models_library.progress_bar import ProgressReport @@ -45,7 +49,7 @@ ) from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from simcore_postgres_database.models.users import UserRole -from simcore_service_webserver.storage._rest import StorageAsyncJobStatus +from yarl import URL _faker = Faker() _user_roles: Final[list[UserRole]] = [ @@ -113,7 +117,7 @@ async def test_data_export( ) assert response.status == expected_status if response.status == status.HTTP_202_ACCEPTED: - Envelope[StorageAsyncJobGet].model_validate(await response.json()) + Envelope[TaskGet].model_validate(await response.json()) @pytest.mark.parametrize("user_role", _user_roles) @@ -147,7 +151,7 @@ async def test_get_async_jobs_status( assert response.status == expected_status if response.status == status.HTTP_200_OK: response_body_data = ( - Envelope[StorageAsyncJobGet].model_validate(await response.json()).data + Envelope[TaskStatus].model_validate(await response.json()).data ) assert response_body_data is not None @@ -158,7 +162,7 @@ async def test_get_async_jobs_status( [ ( AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), - status.HTTP_200_OK, + status.HTTP_204_NO_CONTENT, ), (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), ], @@ -242,12 +246,12 @@ async def test_get_user_async_jobs( response = await client.get("/v0/storage/async-jobs") assert response.status == expected_status if response.status == status.HTTP_200_OK: - Envelope[list[StorageAsyncJobGet]].model_validate(await response.json()) + Envelope[list[TaskGet]].model_validate(await response.json()) @pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( - "http_method, href, backend_method, backend_object, return_schema", + "http_method, href, backend_method, backend_object, return_status, return_schema", [ ( "GET", @@ -258,13 +262,15 @@ async def test_get_user_async_jobs( progress=ProgressReport(actual_value=0.5, total=1.0), done=False, ), - StorageAsyncJobStatus, + status.HTTP_200_OK, + TaskStatus, ), ( "POST", "abort_href", abort.__name__, AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), + status.HTTP_204_NO_CONTENT, None, ), ( @@ -272,7 +278,8 @@ async def test_get_user_async_jobs( "result_href", get_result.__name__, AsyncJobResult(result=None), - StorageAsyncJobResult, + status.HTTP_200_OK, + TaskResult, ), ], ) @@ -286,6 +293,7 @@ async def test_get_async_job_links( href: str, backend_method: str, backend_object: Any, + return_status: int, return_schema: OutputSchema | None, ): create_storage_rpc_client_mock( @@ -300,16 +308,14 @@ async def test_get_async_job_links( "/v0/storage/locations/0/export-data", data=_body.model_dump_json() ) assert response.status == status.HTTP_202_ACCEPTED - response_body_data = ( - Envelope[StorageAsyncJobGet].model_validate(await response.json()).data - ) + response_body_data = Envelope[TaskGet].model_validate(await response.json()).data assert response_body_data is not None # Call the different links and check the correct model and return status create_storage_rpc_client_mock(backend_method, backend_object) response = await client.request( - http_method, getattr(response_body_data.links, href) + http_method, URL(getattr(response_body_data, href)).path ) - assert response.status == status.HTTP_200_OK + assert response.status == return_status if return_schema: Envelope[return_schema].model_validate(await response.json()) From 2240282a7377110a4bdf3f64330cdcd37a3461f5 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 17 Mar 2025 16:48:10 +0100 Subject: [PATCH 100/131] update openapi specs --- api/specs/web-server/_storage.py | 16 +- .../api/v0/openapi.yaml | 175 +++++------------- 2 files changed, 57 insertions(+), 134 deletions(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 02ed681287bf..7718b1fc3c44 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -7,6 +7,11 @@ from typing import Annotated, Any, TypeAlias from fastapi import APIRouter, Depends, Query, status +from models_library.api_schemas_long_running_tasks.tasks import ( + TaskGet, + TaskResult, + TaskStatus, +) from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobId from models_library.api_schemas_storage.storage_schemas import ( FileLocation, @@ -23,8 +28,6 @@ DataExportPost, ListPathsQueryParams, StorageAsyncJobGet, - StorageAsyncJobResult, - StorageAsyncJobStatus, StorageLocationPathParams, StoragePathComputeSizeParams, ) @@ -216,7 +219,7 @@ async def is_completed_upload_file( @router.post( "/storage/locations/{location_id}/export-data", - response_model=Envelope[StorageAsyncJobGet], + response_model=Envelope[TaskGet], name="export_data", description="Export data", responses=_data_export_responses, @@ -227,7 +230,7 @@ async def export_data(data_export: DataExportPost, location_id: LocationID): @router.get( "/storage/async-jobs/{job_id}/status", - response_model=Envelope[StorageAsyncJobStatus], + response_model=Envelope[TaskStatus], name="get_async_job_status", responses=_data_export_responses, ) @@ -239,6 +242,7 @@ async def get_async_job_status(job_id: AsyncJobId): "/storage/async-jobs/{job_id}:abort", name="abort_async_job", responses=_data_export_responses, + status_code=status.HTTP_204_NO_CONTENT, ) async def abort_async_job(job_id: AsyncJobId): """aborts execution of an async job""" @@ -246,7 +250,7 @@ async def abort_async_job(job_id: AsyncJobId): @router.get( "/storage/async-jobs/{job_id}/result", - response_model=Envelope[StorageAsyncJobResult], + response_model=Envelope[TaskResult], name="get_async_job_result", responses=_data_export_responses, ) @@ -256,7 +260,7 @@ async def get_async_job_result(job_id: AsyncJobId): @router.get( "/storage/async-jobs", - response_model=Envelope[list[StorageAsyncJobGet]], + response_model=Envelope[list[TaskResult]], name="get_async_jobs", responses=_data_export_responses, ) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 14ab5fcec858..754f22bf731a 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -6439,7 +6439,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Envelope_StorageAsyncJobGet_' + $ref: '#/components/schemas/Envelope_TaskGet_' '404': content: application/json: @@ -6485,7 +6485,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Envelope_StorageAsyncJobStatus_' + $ref: '#/components/schemas/Envelope_TaskStatus_' '404': content: application/json: @@ -6526,11 +6526,8 @@ paths: format: uuid title: Job Id responses: - '200': + '204': description: Successful Response - content: - application/json: - schema: {} '404': content: application/json: @@ -6576,7 +6573,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Envelope_StorageAsyncJobResult_' + $ref: '#/components/schemas/Envelope_TaskResult_' '404': content: application/json: @@ -6614,7 +6611,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Envelope_list_StorageAsyncJobGet__' + $ref: '#/components/schemas/Envelope_list_TaskResult__' '404': description: Not Found content: @@ -9610,24 +9607,11 @@ components: title: Error type: object title: Envelope[StorageAsyncJobGet] - Envelope_StorageAsyncJobResult_: - properties: - data: - anyOf: - - $ref: '#/components/schemas/StorageAsyncJobResult' - - type: 'null' - error: - anyOf: - - {} - - type: 'null' - title: Error - type: object - title: Envelope[StorageAsyncJobResult] - Envelope_StorageAsyncJobStatus_: + Envelope_TagGet_: properties: data: anyOf: - - $ref: '#/components/schemas/StorageAsyncJobStatus' + - $ref: '#/components/schemas/TagGet' - type: 'null' error: anyOf: @@ -9635,12 +9619,12 @@ components: - type: 'null' title: Error type: object - title: Envelope[StorageAsyncJobStatus] - Envelope_TagGet_: + title: Envelope[TagGet] + Envelope_TaskGet_: properties: data: anyOf: - - $ref: '#/components/schemas/TagGet' + - $ref: '#/components/schemas/TaskGet' - type: 'null' error: anyOf: @@ -9648,12 +9632,12 @@ components: - type: 'null' title: Error type: object - title: Envelope[TagGet] - Envelope_TaskGet_: + title: Envelope[TaskGet] + Envelope_TaskResult_: properties: data: anyOf: - - $ref: '#/components/schemas/TaskGet' + - $ref: '#/components/schemas/TaskResult' - type: 'null' error: anyOf: @@ -9661,7 +9645,7 @@ components: - type: 'null' title: Error type: object - title: Envelope[TaskGet] + title: Envelope[TaskResult] Envelope_TaskStatus_: properties: data: @@ -10255,12 +10239,12 @@ components: title: Error type: object title: Envelope[list[ServiceOutputGet]] - Envelope_list_StorageAsyncJobGet__: + Envelope_list_TagGet__: properties: data: anyOf: - items: - $ref: '#/components/schemas/StorageAsyncJobGet' + $ref: '#/components/schemas/TagGet' type: array - type: 'null' title: Data @@ -10270,13 +10254,13 @@ components: - type: 'null' title: Error type: object - title: Envelope[list[StorageAsyncJobGet]] - Envelope_list_TagGet__: + title: Envelope[list[TagGet]] + Envelope_list_TagGroupGet__: properties: data: anyOf: - items: - $ref: '#/components/schemas/TagGet' + $ref: '#/components/schemas/TagGroupGet' type: array - type: 'null' title: Data @@ -10286,13 +10270,13 @@ components: - type: 'null' title: Error type: object - title: Envelope[list[TagGet]] - Envelope_list_TagGroupGet__: + title: Envelope[list[TagGroupGet]] + Envelope_list_TaskGet__: properties: data: anyOf: - items: - $ref: '#/components/schemas/TagGroupGet' + $ref: '#/components/schemas/TaskGet' type: array - type: 'null' title: Data @@ -10302,13 +10286,13 @@ components: - type: 'null' title: Error type: object - title: Envelope[list[TagGroupGet]] - Envelope_list_TaskGet__: + title: Envelope[list[TaskGet]] + Envelope_list_TaskResult__: properties: data: anyOf: - items: - $ref: '#/components/schemas/TaskGet' + $ref: '#/components/schemas/TaskResult' type: array - type: 'null' title: Data @@ -10318,7 +10302,7 @@ components: - type: 'null' title: Error type: object - title: Envelope[list[TaskGet]] + title: Envelope[list[TaskResult]] Envelope_list_UserForAdminGet__: properties: data: @@ -13451,59 +13435,6 @@ components: - productName - ui title: ProductUIGet - ProgressReport: - properties: - actual_value: - type: number - title: Actual Value - total: - type: number - title: Total - default: 1.0 - attempt: - type: integer - title: Attempt - default: 0 - unit: - anyOf: - - type: string - const: Byte - - type: 'null' - title: Unit - message: - anyOf: - - $ref: '#/components/schemas/ProgressStructuredMessage' - - type: 'null' - type: object - required: - - actual_value - title: ProgressReport - ProgressStructuredMessage: - properties: - description: - type: string - title: Description - current: - type: number - title: Current - total: - type: integer - title: Total - unit: - anyOf: - - type: string - - type: 'null' - title: Unit - sub: - anyOf: - - $ref: '#/components/schemas/ProgressStructuredMessage' - - type: 'null' - type: object - required: - - description - - current - - total - title: ProgressStructuredMessage ProjectCopyOverride: properties: name: @@ -15168,37 +15099,6 @@ components: - jobId - links title: StorageAsyncJobGet - StorageAsyncJobResult: - properties: - result: - anyOf: - - {} - - type: 'null' - title: Result - type: object - required: - - result - title: StorageAsyncJobResult - StorageAsyncJobStatus: - properties: - jobId: - type: string - format: uuid - title: Jobid - progress: - $ref: '#/components/schemas/ProgressReport' - done: - type: boolean - title: Done - links: - $ref: '#/components/schemas/AsyncJobLinks' - type: object - required: - - jobId - - progress - - done - - links - title: StorageAsyncJobStatus Structure: properties: key: @@ -15495,6 +15395,23 @@ components: to be defined as a float bound between 0.0 and 1.0' + TaskResult: + properties: + result: + anyOf: + - {} + - type: 'null' + title: Result + error: + anyOf: + - {} + - type: 'null' + title: Error + type: object + required: + - result + - error + title: TaskResult TaskStatus: properties: task_progress: @@ -15503,8 +15420,10 @@ components: type: boolean title: Done started: - type: string - format: date-time + anyOf: + - type: string + format: date-time + - type: 'null' title: Started type: object required: From 9010342248cee2e82f0097251689ac8b12a2b1ce Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Mon, 17 Mar 2025 17:46:33 +0100 Subject: [PATCH 101/131] move async jobs endpoints to tasks --- api/specs/web-server/_storage.py | 4 +- .../long_running_tasks.py | 2 +- .../storage/_rest.py | 145 +------------ .../{storage => tasks}/_exception_handlers.py | 0 .../simcore_service_webserver/tasks/_rest.py | 190 ++++++++++++++++++ 5 files changed, 194 insertions(+), 147 deletions(-) rename services/web/server/src/simcore_service_webserver/{storage => tasks}/_exception_handlers.py (100%) create mode 100644 services/web/server/src/simcore_service_webserver/tasks/_rest.py diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 7718b1fc3c44..6fc15ac6d121 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -37,10 +37,10 @@ from pydantic import AnyUrl, ByteSize from servicelib.fastapi.rest_pagination import CustomizedPathsCursorPage from simcore_service_webserver._meta import API_VTAG -from simcore_service_webserver.storage._exception_handlers import ( +from simcore_service_webserver.storage.schemas import DatasetMetaData, FileMetaData +from simcore_service_webserver.tasks._exception_handlers import ( _TO_HTTP_ERROR_MAP as data_export_http_error_map, ) -from simcore_service_webserver.storage.schemas import DatasetMetaData, FileMetaData router = APIRouter( prefix=f"/{API_VTAG}", diff --git a/services/web/server/src/simcore_service_webserver/long_running_tasks.py b/services/web/server/src/simcore_service_webserver/long_running_tasks.py index 29dd8d7caec9..c2f842eab7a4 100644 --- a/services/web/server/src/simcore_service_webserver/long_running_tasks.py +++ b/services/web/server/src/simcore_service_webserver/long_running_tasks.py @@ -29,7 +29,7 @@ async def _test_task_context_decorator( def setup_long_running_tasks(app: web.Application) -> None: setup( app, - router_prefix=f"/{API_VTAG}/tasks", + router_prefix=f"/{API_VTAG}/tasks-legacy", handler_check_decorator=login_required, task_request_context_decorator=_webserver_request_context_decorator, ) diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index b33db692a21e..c9fe87f8fa61 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -7,20 +7,14 @@ import urllib.parse from typing import Any, Final, NamedTuple from urllib.parse import quote, unquote -from uuid import UUID from aiohttp import ClientTimeout, web -from models_library.api_schemas_long_running_tasks.base import TaskProgress from models_library.api_schemas_long_running_tasks.tasks import ( TaskGet, - TaskResult, - TaskStatus, ) from models_library.api_schemas_rpc_async_jobs.async_jobs import ( - AsyncJobId, AsyncJobNameData, ) -from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE from models_library.api_schemas_storage.storage_schemas import ( FileUploadCompleteResponse, FileUploadCompletionBody, @@ -44,12 +38,6 @@ ) from servicelib.aiohttp.rest_responses import create_data_response from servicelib.common_headers import X_FORWARDED_PROTO -from servicelib.rabbitmq.rpc_interfaces.async_jobs.async_jobs import ( - abort, - get_result, - get_status, - list_jobs, -) from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from servicelib.rabbitmq.rpc_interfaces.storage.paths import ( compute_path_size as remote_compute_path_size, @@ -63,7 +51,7 @@ from ..models import RequestContext from ..rabbitmq import get_rabbitmq_rpc_client from ..security.decorators import permission_required -from ._exception_handlers import handle_data_export_exceptions +from ..tasks._exception_handlers import handle_data_export_exceptions from .schemas import StorageFileIDStr from .settings import StorageSettings, get_plugin_settings @@ -478,134 +466,3 @@ class _PathParams(BaseModel): ), status=status.HTTP_202_ACCEPTED, ) - - -@routes.get( - _storage_prefix + "/async-jobs", - name="get_async_jobs", -) -@login_required -@permission_required("storage.files.*") -@handle_data_export_exceptions -async def get_async_jobs(request: web.Request) -> web.Response: - _req_ctx = RequestContext.model_validate(request) - - rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - - user_async_jobs = await list_jobs( - rabbitmq_rpc_client=rabbitmq_rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - job_id_data=AsyncJobNameData( - user_id=_req_ctx.user_id, product_name=_req_ctx.product_name - ), - filter_="", - ) - return create_data_response( - [ - TaskGet( - task_id=f"{job.job_id}", - task_name=f"{job.job_id}", - status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(job_id=str(job.job_id))))}", - abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(job_id=str(job.job_id))))}", - result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(job_id=str(job.job_id))))}", - ) - for job in user_async_jobs - ], - status=status.HTTP_200_OK, - ) - - -class _StorageAsyncJobId(BaseModel): - job_id: AsyncJobId - - -@routes.get( - _storage_prefix + "/async-jobs/{job_id}/status", - name="get_async_job_status", -) -@login_required -@permission_required("storage.files.*") -@handle_data_export_exceptions -async def get_async_job_status(request: web.Request) -> web.Response: - class _PathParams(BaseModel): - job_id: UUID - - _req_ctx = RequestContext.model_validate(request) - rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - - async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) - async_job_rpc_status = await get_status( - rabbitmq_rpc_client=rabbitmq_rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - job_id=async_job_get.job_id, - job_id_data=AsyncJobNameData( - user_id=_req_ctx.user_id, product_name=_req_ctx.product_name - ), - ) - _job_id = f"{async_job_rpc_status.job_id}" - return create_data_response( - TaskStatus( - task_progress=TaskProgress( - task_id=_job_id, percent=async_job_rpc_status.progress.actual_value - ), - done=async_job_rpc_status.done, - started=None, - ), - status=status.HTTP_200_OK, - ) - - -@routes.post( - _storage_prefix + "/async-jobs/{job_id}:abort", - name="abort_async_job", -) -@login_required -@permission_required("storage.files.*") -@handle_data_export_exceptions -async def abort_async_job(request: web.Request) -> web.Response: - class _PathParams(BaseModel): - job_id: UUID - - _req_ctx = RequestContext.model_validate(request) - - rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) - await abort( - rabbitmq_rpc_client=rabbitmq_rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - job_id=async_job_get.job_id, - job_id_data=AsyncJobNameData( - user_id=_req_ctx.user_id, product_name=_req_ctx.product_name - ), - ) - return web.Response(status=status.HTTP_204_NO_CONTENT) - - -@routes.get( - _storage_prefix + "/async-jobs/{job_id}/result", - name="get_async_job_result", -) -@login_required -@permission_required("storage.files.*") -@handle_data_export_exceptions -async def get_async_job_result(request: web.Request) -> web.Response: - class _PathParams(BaseModel): - job_id: UUID - - _req_ctx = RequestContext.model_validate(request) - - rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - async_job_get = parse_request_path_parameters_as(_PathParams, request) - async_job_rpc_result = await get_result( - rabbitmq_rpc_client=rabbitmq_rpc_client, - rpc_namespace=STORAGE_RPC_NAMESPACE, - job_id=async_job_get.job_id, - job_id_data=AsyncJobNameData( - user_id=_req_ctx.user_id, product_name=_req_ctx.product_name - ), - ) - - return create_data_response( - TaskResult(result=async_job_rpc_result.result, error=None), - status=status.HTTP_200_OK, - ) diff --git a/services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py b/services/web/server/src/simcore_service_webserver/tasks/_exception_handlers.py similarity index 100% rename from services/web/server/src/simcore_service_webserver/storage/_exception_handlers.py rename to services/web/server/src/simcore_service_webserver/tasks/_exception_handlers.py diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py new file mode 100644 index 000000000000..0cb72d53384e --- /dev/null +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -0,0 +1,190 @@ +"""Handlers exposed by storage subsystem + +Mostly resolves and redirect to storage API +""" + +import logging +from uuid import UUID + +from aiohttp import web +from models_library.api_schemas_long_running_tasks.base import TaskProgress +from models_library.api_schemas_long_running_tasks.tasks import ( + TaskGet, + TaskResult, + TaskStatus, +) +from models_library.api_schemas_rpc_async_jobs.async_jobs import ( + AsyncJobId, + AsyncJobNameData, +) +from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE +from models_library.generics import Envelope +from pydantic import BaseModel +from servicelib.aiohttp import status +from servicelib.aiohttp.client_session import get_client_session +from servicelib.aiohttp.requests_validation import ( + parse_request_path_parameters_as, +) +from servicelib.aiohttp.rest_responses import create_data_response +from servicelib.rabbitmq.rpc_interfaces.async_jobs.async_jobs import ( + abort, + get_result, + get_status, + list_jobs, +) + +from ..login.decorators import login_required +from ..models import RequestContext +from ..rabbitmq import get_rabbitmq_rpc_client +from ..security.decorators import permission_required +from ._exception_handlers import handle_data_export_exceptions + +log = logging.getLogger(__name__) + + +routes = web.RouteTableDef() + + +@routes.get( + "", + name="get_async_jobs", +) +@login_required +@permission_required("storage.files.*") +@handle_data_export_exceptions +async def get_async_jobs(request: web.Request) -> web.Response: + session = get_client_session(request.app) + async with session.request( + "GET", + request.url.with_path(str(request.app.router["list_tasks"])), + ssl=False, + cookies=request.cookies, + ) as resp: + if resp.status != status.HTTP_200_OK: + return web.Response( + status=resp.status, + body=await resp.read(), + content_type=resp.content_type, + ) + inprocess_tasks = ( + Envelope[list[TaskGet]].model_validate_json(await resp.json()).data + ) + assert inprocess_tasks is not None # nosec + + _req_ctx = RequestContext.model_validate(request) + + rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) + + user_async_jobs = await list_jobs( + rabbitmq_rpc_client=rabbitmq_rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id_data=AsyncJobNameData( + user_id=_req_ctx.user_id, product_name=_req_ctx.product_name + ), + filter_="", + ) + return create_data_response( + [ + TaskGet( + task_id=f"{job.job_id}", + task_name=f"{job.job_id}", + status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(job_id=str(job.job_id))))}", + abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(job_id=str(job.job_id))))}", + result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(job_id=str(job.job_id))))}", + ) + for job in user_async_jobs + ] + + inprocess_tasks, + status=status.HTTP_200_OK, + ) + + +class _StorageAsyncJobId(BaseModel): + task_id: AsyncJobId + + +@routes.get( + "/status", + name="get_async_job_status", +) +@login_required +@handle_data_export_exceptions +async def get_async_job_status(request: web.Request) -> web.Response: + + _req_ctx = RequestContext.model_validate(request) + rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) + + async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) + async_job_rpc_status = await get_status( + rabbitmq_rpc_client=rabbitmq_rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id=async_job_get.task_id, + job_id_data=AsyncJobNameData( + user_id=_req_ctx.user_id, product_name=_req_ctx.product_name + ), + ) + _task_id = f"{async_job_rpc_status.job_id}" + return create_data_response( + TaskStatus( + task_progress=TaskProgress( + task_id=_task_id, percent=async_job_rpc_status.progress.actual_value + ), + done=async_job_rpc_status.done, + started=None, + ), + status=status.HTTP_200_OK, + ) + + +@routes.post( + "/{task_id}:abort", + name="abort_async_job", +) +@login_required +@permission_required("storage.files.*") +@handle_data_export_exceptions +async def abort_async_job(request: web.Request) -> web.Response: + + _req_ctx = RequestContext.model_validate(request) + + rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) + async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) + await abort( + rabbitmq_rpc_client=rabbitmq_rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id=async_job_get.task_id, + job_id_data=AsyncJobNameData( + user_id=_req_ctx.user_id, product_name=_req_ctx.product_name + ), + ) + return web.Response(status=status.HTTP_204_NO_CONTENT) + + +@routes.get( + "/{task_id}/result", + name="get_async_job_result", +) +@login_required +@permission_required("storage.files.*") +@handle_data_export_exceptions +async def get_async_job_result(request: web.Request) -> web.Response: + class _PathParams(BaseModel): + job_id: UUID + + _req_ctx = RequestContext.model_validate(request) + + rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) + async_job_get = parse_request_path_parameters_as(_PathParams, request) + async_job_rpc_result = await get_result( + rabbitmq_rpc_client=rabbitmq_rpc_client, + rpc_namespace=STORAGE_RPC_NAMESPACE, + job_id=async_job_get.job_id, + job_id_data=AsyncJobNameData( + user_id=_req_ctx.user_id, product_name=_req_ctx.product_name + ), + ) + + return create_data_response( + TaskResult(result=async_job_rpc_result.result, error=None), + status=status.HTTP_200_OK, + ) From 902c152013aad4d888cfce8efb629864eab96b66 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 08:28:24 +0100 Subject: [PATCH 102/131] make initial tests pass --- .../simcore_service_webserver/application.py | 6 +- .../storage/_rest.py | 6 +- .../simcore_service_webserver/tasks/_rest.py | 16 ++++-- .../simcore_service_webserver/tasks/plugin.py | 16 ++++++ .../with_dbs/01/storage/test_storage_rpc.py | 57 +++++++++++++------ 5 files changed, 71 insertions(+), 30 deletions(-) create mode 100644 services/web/server/src/simcore_service_webserver/tasks/plugin.py diff --git a/services/web/server/src/simcore_service_webserver/application.py b/services/web/server/src/simcore_service_webserver/application.py index abb11c29ba16..073ebb0c08b1 100644 --- a/services/web/server/src/simcore_service_webserver/application.py +++ b/services/web/server/src/simcore_service_webserver/application.py @@ -1,6 +1,4 @@ -""" Main application - -""" +"""Main application""" import logging from pprint import pformat @@ -8,6 +6,7 @@ from aiohttp import web from servicelib.aiohttp.application import create_safe_application +from simcore_service_webserver.tasks.plugin import setup_tasks from ._meta import WELCOME_DB_LISTENER_MSG, WELCOME_GC_MSG, WELCOME_MSG, info from .activity.plugin import setup_activity @@ -121,6 +120,7 @@ def create_application() -> web.Application: setup_director_v2(app) setup_dynamic_scheduler(app) setup_storage(app) + setup_tasks(app) setup_catalog(app) # resource management diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index c9fe87f8fa61..172c14baa0fa 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -460,9 +460,9 @@ class _PathParams(BaseModel): TaskGet( task_id=_job_id, task_name=_job_id, - status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(job_id=_job_id)))}", - abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(job_id=_job_id)))}", - result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(job_id=_job_id)))}", + status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(task_id=_job_id)))}", + abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(task_id=_job_id)))}", + result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(task_id=_job_id)))}", ), status=status.HTTP_202_ACCEPTED, ) diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py index 0cb72d53384e..d56528e502e5 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_rest.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -4,6 +4,7 @@ """ import logging +from typing import Final from uuid import UUID from aiohttp import web @@ -33,6 +34,7 @@ list_jobs, ) +from .._meta import API_VTAG from ..login.decorators import login_required from ..models import RequestContext from ..rabbitmq import get_rabbitmq_rpc_client @@ -44,9 +46,11 @@ routes = web.RouteTableDef() +_task_prefix: Final[str] = f"/{API_VTAG}/tasks" + @routes.get( - "", + _task_prefix, name="get_async_jobs", ) @login_required @@ -104,7 +108,7 @@ class _StorageAsyncJobId(BaseModel): @routes.get( - "/status", + _task_prefix + "/{task_id}/status", name="get_async_job_status", ) @login_required @@ -137,7 +141,7 @@ async def get_async_job_status(request: web.Request) -> web.Response: @routes.post( - "/{task_id}:abort", + _task_prefix + "/{task_id}:abort", name="abort_async_job", ) @login_required @@ -161,7 +165,7 @@ async def abort_async_job(request: web.Request) -> web.Response: @routes.get( - "/{task_id}/result", + _task_prefix + "/{task_id}/result", name="get_async_job_result", ) @login_required @@ -169,7 +173,7 @@ async def abort_async_job(request: web.Request) -> web.Response: @handle_data_export_exceptions async def get_async_job_result(request: web.Request) -> web.Response: class _PathParams(BaseModel): - job_id: UUID + task_id: UUID _req_ctx = RequestContext.model_validate(request) @@ -178,7 +182,7 @@ class _PathParams(BaseModel): async_job_rpc_result = await get_result( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, - job_id=async_job_get.job_id, + job_id=async_job_get.task_id, job_id_data=AsyncJobNameData( user_id=_req_ctx.user_id, product_name=_req_ctx.product_name ), diff --git a/services/web/server/src/simcore_service_webserver/tasks/plugin.py b/services/web/server/src/simcore_service_webserver/tasks/plugin.py new file mode 100644 index 000000000000..541155f785d1 --- /dev/null +++ b/services/web/server/src/simcore_service_webserver/tasks/plugin.py @@ -0,0 +1,16 @@ +import logging + +from aiohttp import web + +from ..rest.plugin import setup_rest +from . import _rest + +_logger = logging.getLogger(__name__) + + +# @app_module_setup( +# __name__, ModuleCategory.ADDON, logger=_logger +# ) +def setup_tasks(app: web.Application): + setup_rest(app) + app.router.add_routes(_rest.routes) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 1a667aa91293..a0d8067214a1 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -61,16 +61,21 @@ ] +API_VERSION: Final[str] = "v0" + + @pytest.fixture -def create_storage_rpc_client_mock(mocker: MockerFixture) -> Callable[[str, Any], None]: - def _(method: str, result_or_exception: Any): +def create_storage_rpc_client_mock( + mocker: MockerFixture, +) -> Callable[[str, str, Any], None]: + def _(module: str, method: str, result_or_exception: Any): def side_effect(*args, **kwargs): if isinstance(result_or_exception, Exception): raise result_or_exception return result_or_exception - for fct in (f"simcore_service_webserver.storage._rest.{method}",): + for fct in (f"{module}.{method}",): mocker.patch(fct, side_effect=side_effect) return _ @@ -99,12 +104,13 @@ async def test_data_export( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, - create_storage_rpc_client_mock: Callable[[str, Any], None], + create_storage_rpc_client_mock: Callable[[str, str, Any], None], faker: Faker, backend_result_or_exception: Any, expected_status: int, ): create_storage_rpc_client_mock( + "simcore_service_webserver.storage._rest", start_data_export.__name__, backend_result_or_exception, ) @@ -113,7 +119,7 @@ async def test_data_export( paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] ) response = await client.post( - "/v0/storage/locations/0/export-data", data=_body.model_dump_json() + f"/{API_VERSION}/storage/locations/0/export-data", data=_body.model_dump_json() ) assert response.status == expected_status if response.status == status.HTTP_202_ACCEPTED: @@ -140,14 +146,18 @@ async def test_get_async_jobs_status( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, - create_storage_rpc_client_mock: Callable[[str, Any], None], + create_storage_rpc_client_mock: Callable[[str, str, Any], None], backend_result_or_exception: Any, expected_status: int, ): _job_id = AsyncJobId(_faker.uuid4()) - create_storage_rpc_client_mock(get_status.__name__, backend_result_or_exception) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + get_status.__name__, + backend_result_or_exception, + ) - response = await client.get(f"/v0/storage/async-jobs/{_job_id}/status") + response = await client.get(f"/{API_VERSION}/tasks/{_job_id}/status") assert response.status == expected_status if response.status == status.HTTP_200_OK: response_body_data = ( @@ -180,7 +190,7 @@ async def test_abort_async_jobs( _job_id = AsyncJobId(faker.uuid4()) create_storage_rpc_client_mock(abort.__name__, backend_result_or_exception) - response = await client.post(f"/v0/storage/async-jobs/{_job_id}:abort") + response = await client.post(f"/{API_VERSION}/storage/async-jobs/{_job_id}:abort") assert response.status == expected_status @@ -200,15 +210,19 @@ async def test_get_async_job_result( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, - create_storage_rpc_client_mock: Callable[[str, Any], None], + create_storage_rpc_client_mock: Callable[[str, str, Any], None], faker: Faker, result_or_exception: Any, expected_status: int, ): _job_id = AsyncJobId(faker.uuid4()) - create_storage_rpc_client_mock(get_result.__name__, result_or_exception) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + get_result.__name__, + result_or_exception, + ) - response = await client.get(f"/v0/storage/async-jobs/{_job_id}/result") + response = await client.get(f"/{API_VERSION}/tasks/{_job_id}/result") assert response.status == expected_status @@ -237,13 +251,17 @@ async def test_get_user_async_jobs( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, - create_storage_rpc_client_mock: Callable[[str, Any], None], + create_storage_rpc_client_mock: Callable[[str, str, Any], None], backend_result_or_exception: Any, expected_status: int, ): - create_storage_rpc_client_mock(list_jobs.__name__, backend_result_or_exception) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + list_jobs.__name__, + backend_result_or_exception, + ) - response = await client.get("/v0/storage/async-jobs") + response = await client.get(f"/{API_VERSION}/tasks") assert response.status == expected_status if response.status == status.HTTP_200_OK: Envelope[list[TaskGet]].model_validate(await response.json()) @@ -287,7 +305,7 @@ async def test_get_async_job_links( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, - create_storage_rpc_client_mock: Callable[[str, Any], None], + create_storage_rpc_client_mock: Callable[[str, str, Any], None], faker: Faker, http_method: str, href: str, @@ -297,6 +315,7 @@ async def test_get_async_job_links( return_schema: OutputSchema | None, ): create_storage_rpc_client_mock( + "simcore_service_webserver.storage._rest", start_data_export.__name__, AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), ) @@ -305,14 +324,16 @@ async def test_get_async_job_links( paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] ) response = await client.post( - "/v0/storage/locations/0/export-data", data=_body.model_dump_json() + f"/{API_VERSION}/storage/locations/0/export-data", data=_body.model_dump_json() ) assert response.status == status.HTTP_202_ACCEPTED response_body_data = Envelope[TaskGet].model_validate(await response.json()).data assert response_body_data is not None # Call the different links and check the correct model and return status - create_storage_rpc_client_mock(backend_method, backend_object) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", backend_method, backend_object + ) response = await client.request( http_method, URL(getattr(response_body_data, href)).path ) From 17872668e88c55212ea08e2be38dd8a6c71f2673 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 08:37:43 +0100 Subject: [PATCH 103/131] polish list jobs endpoint --- .../src/simcore_service_webserver/tasks/_rest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py index d56528e502e5..b46c70be7752 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_rest.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -60,7 +60,7 @@ async def get_async_jobs(request: web.Request) -> web.Response: session = get_client_session(request.app) async with session.request( "GET", - request.url.with_path(str(request.app.router["list_tasks"])), + request.url.with_path(str(request.app.router["list_tasks"].url_for())), ssl=False, cookies=request.cookies, ) as resp: @@ -71,7 +71,7 @@ async def get_async_jobs(request: web.Request) -> web.Response: content_type=resp.content_type, ) inprocess_tasks = ( - Envelope[list[TaskGet]].model_validate_json(await resp.json()).data + Envelope[list[TaskGet]].model_validate_json(await resp.text()).data ) assert inprocess_tasks is not None # nosec @@ -92,9 +92,9 @@ async def get_async_jobs(request: web.Request) -> web.Response: TaskGet( task_id=f"{job.job_id}", task_name=f"{job.job_id}", - status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(job_id=str(job.job_id))))}", - abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(job_id=str(job.job_id))))}", - result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(job_id=str(job.job_id))))}", + status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(task_id=str(job.job_id))))}", + abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(task_id=str(job.job_id))))}", + result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(task_id=str(job.job_id))))}", ) for job in user_async_jobs ] From 30ca456b177b4f949d313f7c2ce37f6d55edc16d Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 08:41:28 +0100 Subject: [PATCH 104/131] minor fix --- services/web/server/src/simcore_service_webserver/tasks/_rest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py index b46c70be7752..e42f63288b80 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_rest.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -61,7 +61,6 @@ async def get_async_jobs(request: web.Request) -> web.Response: async with session.request( "GET", request.url.with_path(str(request.app.router["list_tasks"].url_for())), - ssl=False, cookies=request.cookies, ) as resp: if resp.status != status.HTTP_200_OK: From c8c46eeb36c2c0524c89b8150117ab6bd042f491 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 09:13:17 +0100 Subject: [PATCH 105/131] fix tests --- api/specs/web-server/_long_running_tasks.py | 31 +++++++++---- api/specs/web-server/_storage.py | 43 ------------------- api/specs/web-server/_tasks.py | 43 ------------------- .../simcore_service_webserver/tasks/_rest.py | 4 +- .../with_dbs/01/storage/test_storage_rpc.py | 12 ++++-- 5 files changed, 32 insertions(+), 101 deletions(-) delete mode 100644 api/specs/web-server/_tasks.py diff --git a/api/specs/web-server/_long_running_tasks.py b/api/specs/web-server/_long_running_tasks.py index 859bd470d294..33b4b4bccb0f 100644 --- a/api/specs/web-server/_long_running_tasks.py +++ b/api/specs/web-server/_long_running_tasks.py @@ -4,13 +4,17 @@ # pylint: disable=too-many-arguments -from typing import Annotated +from typing import Annotated, Any from fastapi import APIRouter, Depends, status from models_library.generics import Envelope +from models_library.rest_error import EnvelopedError from servicelib.aiohttp.long_running_tasks._routes import _PathParam from servicelib.long_running_tasks._models import TaskGet, TaskStatus from simcore_service_webserver._meta import API_VTAG +from simcore_service_webserver.tasks._exception_handlers import ( + _TO_HTTP_ERROR_MAP as data_export_http_error_map, +) router = APIRouter( prefix=f"/{API_VTAG}", @@ -19,37 +23,46 @@ ], ) +_data_export_responses: dict[int | str, dict[str, Any]] = { + i.status_code: {"model": EnvelopedError} + for i in data_export_http_error_map.values() +} + @router.get( "/tasks", response_model=Envelope[list[TaskGet]], + name="get_async_jobs", + responses=_data_export_responses, ) -def list_tasks(): - ... +def list_tasks(): ... @router.get( "/tasks/{task_id}", response_model=Envelope[TaskStatus], + name="get_async_job_status", + responses=_data_export_responses, ) def get_task_status( _path_params: Annotated[_PathParam, Depends()], -): - ... +): ... @router.delete( "/tasks/{task_id}", + name="abort_async_job", + responses=_data_export_responses, status_code=status.HTTP_204_NO_CONTENT, ) def cancel_and_delete_task( _path_params: Annotated[_PathParam, Depends()], -): - ... +): ... @router.get("/tasks/{task_id}/result") def get_task_result( _path_params: Annotated[_PathParam, Depends()], -): - ... + name="get_async_job_result", + responses=_data_export_responses, +): ... diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index 6fc15ac6d121..cdf03ee2cd4f 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -9,10 +9,7 @@ from fastapi import APIRouter, Depends, Query, status from models_library.api_schemas_long_running_tasks.tasks import ( TaskGet, - TaskResult, - TaskStatus, ) -from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobId from models_library.api_schemas_storage.storage_schemas import ( FileLocation, FileMetaDataGet, @@ -226,43 +223,3 @@ async def is_completed_upload_file( ) async def export_data(data_export: DataExportPost, location_id: LocationID): """Trigger data export. Returns async job id for getting status and results""" - - -@router.get( - "/storage/async-jobs/{job_id}/status", - response_model=Envelope[TaskStatus], - name="get_async_job_status", - responses=_data_export_responses, -) -async def get_async_job_status(job_id: AsyncJobId): - """Get async job status""" - - -@router.post( - "/storage/async-jobs/{job_id}:abort", - name="abort_async_job", - responses=_data_export_responses, - status_code=status.HTTP_204_NO_CONTENT, -) -async def abort_async_job(job_id: AsyncJobId): - """aborts execution of an async job""" - - -@router.get( - "/storage/async-jobs/{job_id}/result", - response_model=Envelope[TaskResult], - name="get_async_job_result", - responses=_data_export_responses, -) -async def get_async_job_result(job_id: AsyncJobId): - """Get the result of the async job""" - - -@router.get( - "/storage/async-jobs", - response_model=Envelope[list[TaskResult]], - name="get_async_jobs", - responses=_data_export_responses, -) -async def get_async_jobs(): - """Returns the user's async jobs""" diff --git a/api/specs/web-server/_tasks.py b/api/specs/web-server/_tasks.py deleted file mode 100644 index a2c6fbe14022..000000000000 --- a/api/specs/web-server/_tasks.py +++ /dev/null @@ -1,43 +0,0 @@ -from fastapi import APIRouter -from simcore_service_webserver._meta import API_VTAG - -router = APIRouter( - prefix=f"/{API_VTAG}", - tags=[ - "tasks", - ], -) - - -@router.get("/tasks", response_model=List[TasksGetResponse]) -def list_tasks() -> List[TasksGetResponse]: - pass - - -@router.get( - "/tasks/{task_id}", - response_model=TasksTaskIdGetResponse, - responses={"default": {"model": TasksTaskIdGetResponse1}}, -) -def get_task_status( - task_id: str, -) -> Union[TasksTaskIdGetResponse, TasksTaskIdGetResponse1]: - pass - - -@router.delete( - "/tasks/{task_id}", - response_model=None, - responses={"default": {"model": TasksTaskIdDeleteResponse}}, -) -def cancel_and_delete_task(task_id: str) -> Union[None, TasksTaskIdDeleteResponse]: - pass - - -@router.get( - "/tasks/{task_id}/result", - response_model=None, - responses={"default": {"model": TasksTaskIdResultGetResponse}}, -) -def get_task_result(task_id: str) -> Union[None, TasksTaskIdResultGetResponse]: - pass diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py index e42f63288b80..66414a82b725 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_rest.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -139,8 +139,8 @@ async def get_async_job_status(request: web.Request) -> web.Response: ) -@routes.post( - _task_prefix + "/{task_id}:abort", +@routes.delete( + _task_prefix + "/{task_id}", name="abort_async_job", ) @login_required diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index a0d8067214a1..04150bd39184 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -182,15 +182,19 @@ async def test_abort_async_jobs( user_role: UserRole, logged_user: UserInfoDict, client: TestClient, - create_storage_rpc_client_mock: Callable[[str, Any], None], + create_storage_rpc_client_mock: Callable[[str, str, Any], None], faker: Faker, backend_result_or_exception: Any, expected_status: int, ): _job_id = AsyncJobId(faker.uuid4()) - create_storage_rpc_client_mock(abort.__name__, backend_result_or_exception) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + abort.__name__, + backend_result_or_exception, + ) - response = await client.post(f"/{API_VERSION}/storage/async-jobs/{_job_id}:abort") + response = await client.delete(f"/{API_VERSION}/tasks/{_job_id}") assert response.status == expected_status @@ -284,7 +288,7 @@ async def test_get_user_async_jobs( TaskStatus, ), ( - "POST", + "DELETE", "abort_href", abort.__name__, AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), From 9e05a7ac525f8cf2ef19bd17731d271d3ace42cb Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 09:28:25 +0100 Subject: [PATCH 106/131] update openapi specs --- api/specs/web-server/_long_running_tasks.py | 18 +- .../api/v0/openapi.yaml | 318 ++++++------------ 2 files changed, 112 insertions(+), 224 deletions(-) diff --git a/api/specs/web-server/_long_running_tasks.py b/api/specs/web-server/_long_running_tasks.py index 33b4b4bccb0f..8b6fdc957095 100644 --- a/api/specs/web-server/_long_running_tasks.py +++ b/api/specs/web-server/_long_running_tasks.py @@ -32,7 +32,8 @@ @router.get( "/tasks", response_model=Envelope[list[TaskGet]], - name="get_async_jobs", + name="list_tasks", + description="Lists all long running tasks", responses=_data_export_responses, ) def list_tasks(): ... @@ -41,7 +42,8 @@ def list_tasks(): ... @router.get( "/tasks/{task_id}", response_model=Envelope[TaskStatus], - name="get_async_job_status", + name="get_task_status", + description="Retrieves the status of a task", responses=_data_export_responses, ) def get_task_status( @@ -51,7 +53,8 @@ def get_task_status( @router.delete( "/tasks/{task_id}", - name="abort_async_job", + name="cancel_and_delete_task", + description="Cancels and deletes a task", responses=_data_export_responses, status_code=status.HTTP_204_NO_CONTENT, ) @@ -60,9 +63,12 @@ def cancel_and_delete_task( ): ... -@router.get("/tasks/{task_id}/result") +@router.get( + "/tasks/{task_id}/result", + name="get_task_result", + description="Retrieves the result of a task", + responses=_data_export_responses, +) def get_task_result( _path_params: Annotated[_PathParam, Depends()], - name="get_async_job_result", - responses=_data_export_responses, ): ... diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 754f22bf731a..3eda048bed73 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -2981,6 +2981,7 @@ paths: tags: - long-running-tasks summary: List Tasks + description: Lists all long running tasks operationId: list_tasks responses: '200': @@ -2989,11 +2990,36 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_list_TaskGet__' + '404': + description: Not Found + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '403': + description: Forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '410': + description: Gone + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + '500': + description: Internal Server Error + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' /v0/tasks/{task_id}: get: tags: - long-running-tasks summary: Get Task Status + description: Retrieves the status of a task operationId: get_task_status parameters: - name: task_id @@ -3009,10 +3035,35 @@ paths: application/json: schema: $ref: '#/components/schemas/Envelope_TaskStatus_' + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error delete: tags: - long-running-tasks summary: Cancel And Delete Task + description: Cancels and deletes a task operationId: cancel_and_delete_task parameters: - name: task_id @@ -3024,11 +3075,36 @@ paths: responses: '204': description: Successful Response + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error /v0/tasks/{task_id}/result: get: tags: - long-running-tasks summary: Get Task Result + description: Retrieves the result of a task operationId: get_task_result parameters: - name: task_id @@ -3043,6 +3119,30 @@ paths: content: application/json: schema: {} + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Not Found + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Forbidden + '410': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Gone + '500': + content: + application/json: + schema: + $ref: '#/components/schemas/EnvelopedError' + description: Internal Server Error /v0/catalog/licensed-items: get: tags: @@ -6464,178 +6564,6 @@ paths: schema: $ref: '#/components/schemas/EnvelopedError' description: Internal Server Error - /v0/storage/async-jobs/{job_id}/status: - get: - tags: - - storage - summary: Get Async Job Status - description: Get async job status - operationId: get_async_job_status - parameters: - - name: job_id - in: path - required: true - schema: - type: string - format: uuid - title: Job Id - responses: - '200': - description: Successful Response - content: - application/json: - schema: - $ref: '#/components/schemas/Envelope_TaskStatus_' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - /v0/storage/async-jobs/{job_id}:abort: - post: - tags: - - storage - summary: Abort Async Job - description: aborts execution of an async job - operationId: abort_async_job - parameters: - - name: job_id - in: path - required: true - schema: - type: string - format: uuid - title: Job Id - responses: - '204': - description: Successful Response - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - /v0/storage/async-jobs/{job_id}/result: - get: - tags: - - storage - summary: Get Async Job Result - description: Get the result of the async job - operationId: get_async_job_result - parameters: - - name: job_id - in: path - required: true - schema: - type: string - format: uuid - title: Job Id - responses: - '200': - description: Successful Response - content: - application/json: - schema: - $ref: '#/components/schemas/Envelope_TaskResult_' - '404': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Not Found - '403': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Forbidden - '410': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Gone - '500': - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - description: Internal Server Error - /v0/storage/async-jobs: - get: - tags: - - storage - summary: Get Async Jobs - description: Returns the user's async jobs - operationId: get_async_jobs - responses: - '200': - description: Successful Response - content: - application/json: - schema: - $ref: '#/components/schemas/Envelope_list_TaskResult__' - '404': - description: Not Found - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '403': - description: Forbidden - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '410': - description: Gone - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' - '500': - description: Internal Server Error - content: - application/json: - schema: - $ref: '#/components/schemas/EnvelopedError' /v0/trash:empty: post: tags: @@ -9633,19 +9561,6 @@ components: title: Error type: object title: Envelope[TaskGet] - Envelope_TaskResult_: - properties: - data: - anyOf: - - $ref: '#/components/schemas/TaskResult' - - type: 'null' - error: - anyOf: - - {} - - type: 'null' - title: Error - type: object - title: Envelope[TaskResult] Envelope_TaskStatus_: properties: data: @@ -10287,22 +10202,6 @@ components: title: Error type: object title: Envelope[list[TaskGet]] - Envelope_list_TaskResult__: - properties: - data: - anyOf: - - items: - $ref: '#/components/schemas/TaskResult' - type: array - - type: 'null' - title: Data - error: - anyOf: - - {} - - type: 'null' - title: Error - type: object - title: Envelope[list[TaskResult]] Envelope_list_UserForAdminGet__: properties: data: @@ -15395,23 +15294,6 @@ components: to be defined as a float bound between 0.0 and 1.0' - TaskResult: - properties: - result: - anyOf: - - {} - - type: 'null' - title: Result - error: - anyOf: - - {} - - type: 'null' - title: Error - type: object - required: - - result - - error - title: TaskResult TaskStatus: properties: task_progress: From bb32ddc1eccca50a545a236d634692db7ce0862c Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 09:42:37 +0100 Subject: [PATCH 107/131] clean up storage rpc tests --- .../storage/tests/unit/test_data_export.py | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 8f84497c5c58..d2a23f4c91ac 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -15,9 +15,16 @@ from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobGet, AsyncJobId, + AsyncJobNameData, AsyncJobResult, AsyncJobStatus, ) +from models_library.api_schemas_rpc_async_jobs.exceptions import ( + JobAbortedError, + JobError, + JobNotDoneError, + JobSchedulerError, +) from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE from models_library.api_schemas_storage.data_export_async_jobs import ( DataExportTaskStartInput, @@ -34,18 +41,10 @@ from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from settings_library.rabbit import RabbitSettings -from simcore_service_storage.api.rpc._async_jobs import ( - AsyncJobNameData, - JobAbortedError, - JobError, - JobNotDoneError, - JobSchedulerError, - TaskStatus, -) from simcore_service_storage.api.rpc._data_export import AccessRightError from simcore_service_storage.core.settings import ApplicationSettings from simcore_service_storage.modules.celery.client import TaskUUID -from simcore_service_storage.modules.celery.models import TaskState +from simcore_service_storage.modules.celery.models import TaskState, TaskStatus from simcore_service_storage.simcore_s3_dsm import SimcoreS3DataManager pytest_plugins = [ @@ -61,12 +60,6 @@ _faker = Faker() -@pytest.fixture -async def mock_rabbit_setup(mocker: MockerFixture): - # fixture to avoid mocking the rabbit - pass - - @dataclass class _MockCeleryClient: send_task_object: UUID | Exception | None = None From 6e837fa9104e7ef91bc40b75c047dfd2d7f77e18 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 10:58:53 +0100 Subject: [PATCH 108/131] add init file to tasks --- .../web/server/src/simcore_service_webserver/tasks/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 services/web/server/src/simcore_service_webserver/tasks/__init__.py diff --git a/services/web/server/src/simcore_service_webserver/tasks/__init__.py b/services/web/server/src/simcore_service_webserver/tasks/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From fb98b97a6478d0e08d46743feda41d856e48ace0 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 11:27:51 +0100 Subject: [PATCH 109/131] make mypy happy --- .../src/simcore_service_storage/api/rpc/_async_jobs.py | 2 +- .../src/simcore_service_storage/api/rpc/_data_export.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 836fd0524599..ab0d6063b25f 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -2,7 +2,7 @@ import logging -from celery.exceptions import CeleryError +from celery.exceptions import CeleryError # type: ignore[import-untyped] from fastapi import FastAPI from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobGet, diff --git a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py index 14abde05c43f..7fe6612e5e39 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_data_export.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_data_export.py @@ -1,16 +1,16 @@ -from celery.exceptions import CeleryError +from celery.exceptions import CeleryError # type: ignore[import-untyped] from fastapi import FastAPI from models_library.api_schemas_rpc_async_jobs.async_jobs import ( AsyncJobGet, AsyncJobNameData, ) +from models_library.api_schemas_rpc_async_jobs.exceptions import JobSchedulerError from models_library.api_schemas_storage.data_export_async_jobs import ( AccessRightError, DataExportTaskStartInput, InvalidFileIdentifierError, ) from servicelib.rabbitmq import RPCRouter -from simcore_service_storage.api.rpc._async_jobs import JobSchedulerError from ...datcore_dsm import DatCoreDataManager from ...dsm import get_dsm_provider From 8881c8500e47f79f0b494737ff3ef71981abbdf9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 11:43:35 +0100 Subject: [PATCH 110/131] fix location_id parametrization of test --- services/storage/tests/unit/test_data_export.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index d2a23f4c91ac..4c5f6d4828cf 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -233,6 +233,12 @@ async def test_start_data_export_success( assert isinstance(result, AsyncJobGet) +@pytest.mark.parametrize( + "location_id", + [SimcoreS3DataManager.get_location_id()], + ids=[SimcoreS3DataManager.get_location_name()], + indirect=True, +) @pytest.mark.parametrize( "project_params", [ From a30503244072c89b084f82ab5f2acb71c6d1acfb Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 12:00:09 +0100 Subject: [PATCH 111/131] make pylint happy --- .../server/tests/unit/with_dbs/01/storage/test_storage_rpc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 04150bd39184..34af66401305 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -1,7 +1,7 @@ -from collections.abc import Callable - +# pylint: disable=too-many-arguments # pylint: disable=redefined-outer-name # pylint: disable=unused-argument +from collections.abc import Callable from pathlib import Path from typing import Any, Final From 9736c84f27d247bb24b74ec858405e63c3c0667e Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 16:18:48 +0100 Subject: [PATCH 112/131] update path size computations --- api/specs/web-server/_storage.py | 3 +- .../rabbitmq/rpc_interfaces/storage/paths.py | 6 +-- .../api/v0/openapi.yaml | 45 +------------------ .../storage/_rest.py | 9 +++- .../with_dbs/01/test_long_running_tasks.py | 16 +++++++ 5 files changed, 29 insertions(+), 50 deletions(-) diff --git a/api/specs/web-server/_storage.py b/api/specs/web-server/_storage.py index cdf03ee2cd4f..56a175d75521 100644 --- a/api/specs/web-server/_storage.py +++ b/api/specs/web-server/_storage.py @@ -24,7 +24,6 @@ from models_library.api_schemas_webserver.storage import ( DataExportPost, ListPathsQueryParams, - StorageAsyncJobGet, StorageLocationPathParams, StoragePathComputeSizeParams, ) @@ -74,7 +73,7 @@ async def list_storage_paths( @router.post( "/storage/locations/{location_id}/paths/{path}:size", - response_model=Envelope[StorageAsyncJobGet], + response_model=Envelope[TaskGet], status_code=status.HTTP_202_ACCEPTED, ) async def compute_path_size(_path: Annotated[StoragePathComputeSizeParams, Depends()]): diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py index d924a94fbe7a..fa504f728119 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py @@ -1,10 +1,10 @@ from pathlib import Path from models_library.api_schemas_rpc_async_jobs.async_jobs import ( + AsyncJobGet, AsyncJobNameData, ) from models_library.api_schemas_storage import STORAGE_RPC_NAMESPACE -from models_library.api_schemas_webserver.storage import StorageAsyncJobGet from models_library.projects_nodes_io import LocationID from models_library.rabbitmq_basic_types import RPCMethodName from models_library.users import UserID @@ -20,7 +20,7 @@ async def compute_path_size( product_name: str, location_id: LocationID, path: Path, -) -> tuple[StorageAsyncJobGet, AsyncJobNameData]: +) -> tuple[AsyncJobGet, AsyncJobNameData]: job_id_data = AsyncJobNameData(user_id=user_id, product_name=product_name) async_job_rpc_get = await submit_job( rabbitmq_rpc_client=client, @@ -30,4 +30,4 @@ async def compute_path_size( location_id=location_id, path=path, ) - return StorageAsyncJobGet.from_rpc_schema(async_job_rpc_get), job_id_data + return async_job_rpc_get, job_id_data diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 3eda048bed73..8e4d3af1d877 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -6185,7 +6185,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Envelope_StorageAsyncJobGet_' + $ref: '#/components/schemas/Envelope_TaskGet_' /v0/storage/locations/{location_id}/datasets: get: tags: @@ -7844,23 +7844,6 @@ components: - app_name - version title: AppStatusCheck - AsyncJobLinks: - properties: - statusHref: - type: string - title: Statushref - abortHref: - type: string - title: Aborthref - resultHref: - type: string - title: Resulthref - type: object - required: - - statusHref - - abortHref - - resultHref - title: AsyncJobLinks Author: properties: name: @@ -9522,19 +9505,6 @@ components: title: Error type: object title: Envelope[StatusDiagnosticsGet] - Envelope_StorageAsyncJobGet_: - properties: - data: - anyOf: - - $ref: '#/components/schemas/StorageAsyncJobGet' - - type: 'null' - error: - anyOf: - - {} - - type: 'null' - title: Error - type: object - title: Envelope[StorageAsyncJobGet] Envelope_TagGet_: properties: data: @@ -14985,19 +14955,6 @@ components: - loop_tasks - top_tracemalloc title: StatusDiagnosticsGet - StorageAsyncJobGet: - properties: - jobId: - type: string - format: uuid - title: Jobid - links: - $ref: '#/components/schemas/AsyncJobLinks' - type: object - required: - - jobId - - links - title: StorageAsyncJobGet Structure: properties: key: diff --git a/services/web/server/src/simcore_service_webserver/storage/_rest.py b/services/web/server/src/simcore_service_webserver/storage/_rest.py index 172c14baa0fa..fbc419d90153 100644 --- a/services/web/server/src/simcore_service_webserver/storage/_rest.py +++ b/services/web/server/src/simcore_service_webserver/storage/_rest.py @@ -193,8 +193,15 @@ async def compute_path_size(request: web.Request) -> web.Response: path=path_params.path, ) + _job_id = f"{async_job.job_id}" return create_data_response( - async_job, + TaskGet( + task_id=_job_id, + task_name=_job_id, + status_href=f"{request.url.with_path(str(request.app.router['get_async_job_status'].url_for(task_id=_job_id)))}", + abort_href=f"{request.url.with_path(str(request.app.router['abort_async_job'].url_for(task_id=_job_id)))}", + result_href=f"{request.url.with_path(str(request.app.router['get_async_job_result'].url_for(task_id=_job_id)))}", + ), status=status.HTTP_202_ACCEPTED, ) diff --git a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py index 4e3f10a9c4de..79e13196eb87 100644 --- a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py +++ b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py @@ -63,3 +63,19 @@ async def test_listing_tasks_empty( assert not data return assert data == [] + + +@pytest.mark.parametrize(*_tasks_role_responses()) +async def test_propagation_of_legacy_tasks_failure( + client: TestClient, + logged_user, + expected, +): + assert client.app + list_task_url = client.app.router["list_tasks"].url_for() + resp = await client.get(f"{list_task_url}") + data, error = await assert_status(resp, expected.ok) + if error: + assert not data + return + assert data == [] From c4bc27cbcea347c4cd063c10f8eea6c1953f3347 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Tue, 18 Mar 2025 16:20:21 +0100 Subject: [PATCH 113/131] remove wrongly commited function --- .../unit/with_dbs/01/test_long_running_tasks.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py index 79e13196eb87..4e3f10a9c4de 100644 --- a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py +++ b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py @@ -63,19 +63,3 @@ async def test_listing_tasks_empty( assert not data return assert data == [] - - -@pytest.mark.parametrize(*_tasks_role_responses()) -async def test_propagation_of_legacy_tasks_failure( - client: TestClient, - logged_user, - expected, -): - assert client.app - list_task_url = client.app.router["list_tasks"].url_for() - resp = await client.get(f"{list_task_url}") - data, error = await assert_status(resp, expected.ok) - if error: - assert not data - return - assert data == [] From b175f85d00fbcdbc69958baba2e117726ebc0e2b Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 09:05:23 +0100 Subject: [PATCH 114/131] remove redundant webserver schemas --- .../api_schemas_webserver/storage.py | 56 +------------------ .../unit/with_dbs/01/storage/test_storage.py | 4 +- .../with_dbs/01/storage/test_storage_rpc.py | 9 +-- 3 files changed, 5 insertions(+), 64 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/storage.py b/packages/models-library/src/models_library/api_schemas_webserver/storage.py index 2724ed16d348..3049bf4d0bdc 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/storage.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/storage.py @@ -1,25 +1,18 @@ from pathlib import Path -from typing import Annotated, Any +from typing import Annotated from pydantic import BaseModel, Field -from ..api_schemas_rpc_async_jobs.async_jobs import ( - AsyncJobGet, - AsyncJobId, - AsyncJobResult, - AsyncJobStatus, -) from ..api_schemas_storage.data_export_async_jobs import DataExportTaskStartInput from ..api_schemas_storage.storage_schemas import ( DEFAULT_NUMBER_OF_PATHS_PER_PAGE, MAX_NUMBER_OF_PATHS_PER_PAGE, ) -from ..progress_bar import ProgressReport from ..projects_nodes_io import LocationID, StorageFileID from ..rest_pagination import ( CursorQueryParameters, ) -from ._base import InputSchema, OutputSchema +from ._base import InputSchema class StorageLocationPathParams(BaseModel): @@ -51,48 +44,3 @@ def to_rpc_schema(self, location_id: LocationID) -> DataExportTaskStartInput: file_and_folder_ids=self.paths, location_id=location_id, ) - - -class AsyncJobLinks(OutputSchema): - status_href: str - abort_href: str - result_href: str - - -class StorageAsyncJobGet(OutputSchema): - job_id: AsyncJobId - links: AsyncJobLinks - - @classmethod - def from_rpc_schema( - cls, *, async_job_rpc_get: AsyncJobGet, links: AsyncJobLinks - ) -> "StorageAsyncJobGet": - return StorageAsyncJobGet(job_id=async_job_rpc_get.job_id, links=links) - - -class StorageAsyncJobStatus(OutputSchema): - job_id: AsyncJobId - progress: ProgressReport - done: bool - links: AsyncJobLinks - - @classmethod - def from_rpc_schema( - cls, *, async_job_rpc_status: AsyncJobStatus, links: AsyncJobLinks - ) -> "StorageAsyncJobStatus": - return StorageAsyncJobStatus( - job_id=async_job_rpc_status.job_id, - progress=async_job_rpc_status.progress, - done=async_job_rpc_status.done, - links=links, - ) - - -class StorageAsyncJobResult(OutputSchema): - result: Any | None - - @classmethod - def from_rpc_schema( - cls, async_job_rpc_result: AsyncJobResult - ) -> "StorageAsyncJobResult": - return StorageAsyncJobResult(result=async_job_rpc_result.result) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py index 9eb5d42407f3..01c2bedc92dc 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py @@ -11,6 +11,7 @@ from aiohttp.test_utils import TestClient from faker import Faker from fastapi_pagination.cursor import CursorPage +from models_library.api_schemas_long_running_tasks.tasks import TaskGet from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobGet, AsyncJobId from models_library.api_schemas_storage.storage_schemas import ( DatasetMetaDataGet, @@ -19,7 +20,6 @@ FileUploadSchema, PathMetaDataGet, ) -from models_library.api_schemas_webserver.storage import StorageAsyncJobGet from models_library.projects_nodes_io import LocationID, StorageFileID from pydantic import TypeAdapter from pytest_mock import MockerFixture @@ -146,7 +146,7 @@ async def test_compute_path_size( resp = await client.post(f"{url}") data, error = await assert_status(resp, expected) if not error: - TypeAdapter(StorageAsyncJobGet).validate_python(data) + TypeAdapter(TaskGet).validate_python(data) @pytest.mark.parametrize( diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 34af66401305..4f10a1733224 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -32,9 +32,7 @@ ) from models_library.api_schemas_webserver._base import OutputSchema from models_library.api_schemas_webserver.storage import ( - AsyncJobLinks, DataExportPost, - StorageAsyncJobGet, ) from models_library.generics import Envelope from models_library.progress_bar import ProgressReport @@ -236,13 +234,8 @@ async def test_get_async_job_result( [ ( [ - StorageAsyncJobGet( + AsyncJobGet( job_id=AsyncJobId(_faker.uuid4()), - links=AsyncJobLinks( - status_href=_faker.uri(), - abort_href=_faker.uri(), - result_href=_faker.uri(), - ), ) ], status.HTTP_200_OK, From 1d714b841593f0b77a3b68510e3e15f4314d28b7 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 10:31:57 +0100 Subject: [PATCH 115/131] add test for case when legacy task listing fails --- .../with_dbs/01/test_long_running_tasks.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py index 4e3f10a9c4de..efeb123884fa 100644 --- a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py +++ b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py @@ -7,6 +7,8 @@ import pytest from aiohttp.test_utils import TestClient +from faker import Faker +from pytest_mock import MockerFixture from pytest_simcore.helpers.assert_checks import assert_status from pytest_simcore.helpers.webserver_parametrizations import ( ExpectedResponse, @@ -63,3 +65,23 @@ async def test_listing_tasks_empty( assert not data return assert data == [] + + +@pytest.mark.parametrize("user_role", [UserRole.GUEST, UserRole.TESTER, UserRole.USER]) +async def test_listing_tasks_with_list_inprocess_tasks_error( + client: TestClient, logged_user, faker: Faker, mocker: MockerFixture +): + assert client.app + + class _DummyTaskManager: + def list_tasks(self, *args, **kwargs): + raise Exception() + + mocker.patch( + "servicelib.aiohttp.long_running_tasks._routes.get_tasks_manager", + return_value=_DummyTaskManager(), + ) + + _async_jobs_listing_path = client.app.router["get_async_jobs"].url_for() + resp = await client.request("GET", f"{_async_jobs_listing_path}") + assert resp.status == status.HTTP_500_INTERNAL_SERVER_ERROR From 59a607a458799131d8c191ac0aa9dd571df46c09 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 11:30:00 +0100 Subject: [PATCH 116/131] add job existence check for abort method --- .../api_schemas_rpc_async_jobs/exceptions.py | 4 ++ .../api/rpc/_async_jobs.py | 22 ++++++++++- .../storage/tests/unit/test_data_export.py | 38 +++++++++++++------ 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py index c39af000e90b..8403bdd2ff03 100644 --- a/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py +++ b/packages/models-library/src/models_library/api_schemas_rpc_async_jobs/exceptions.py @@ -9,6 +9,10 @@ class JobSchedulerError(BaseAsyncjobRpcError): msg_template: str = "Celery exception: {exc}" +class JobMissingError(BaseAsyncjobRpcError): + msg_template: str = "Job {job_id} does not exist" + + class JobStatusError(BaseAsyncjobRpcError): msg_template: str = "Could not get status of job {job_id}" diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index ab0d6063b25f..8e40ec61c0a8 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -14,24 +14,42 @@ from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobAbortedError, JobError, + JobMissingError, JobNotDoneError, JobSchedulerError, ) from servicelib.logging_utils import log_catch from servicelib.rabbitmq import RPCRouter -from ...modules.celery import get_celery_client +from ...modules.celery import CeleryTaskQueueClient, get_celery_client from ...modules.celery.models import TaskError, TaskState _logger = logging.getLogger(__name__) router = RPCRouter() -@router.expose(reraise_if_error_type=(JobSchedulerError,)) +async def _assert_job_exists( + *, + job_id: AsyncJobId, + job_id_data: AsyncJobNameData, + celery_client: CeleryTaskQueueClient, +) -> None: + """Raises JobMissingError if job doesn't exist""" + job_ids = await celery_client.get_task_uuids( + task_context=job_id_data.model_dump(), + ) + if not job_id in job_ids: + raise JobMissingError(job_id=f"{job_id}") + + +@router.expose(reraise_if_error_type=(JobSchedulerError, JobMissingError)) async def abort(app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData): assert app # nosec assert job_id_data # nosec try: + await _assert_job_exists( + job_id=job_id, job_id_data=job_id_data, celery_client=get_celery_client(app) + ) await get_celery_client(app).abort_task( task_context=job_id_data.model_dump(), task_uuid=job_id, diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 4c5f6d4828cf..0bc78d334e63 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -22,6 +22,7 @@ from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobAbortedError, JobError, + JobMissingError, JobNotDoneError, JobSchedulerError, ) @@ -316,39 +317,54 @@ async def test_start_data_export_access_error( @pytest.mark.parametrize( "mock_celery_client", [ - {"abort_task_object": None}, + { + "abort_task_object": None, + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], + }, ], indirect=True, ) -async def test_abort_data_export( +async def test_abort_data_export_success( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, ): - _job_id = AsyncJobId(_faker.uuid4()) + assert mock_celery_client.get_task_uuids_object is not None + assert not isinstance(mock_celery_client.get_task_uuids_object, Exception) await async_jobs.abort( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id_data=AsyncJobNameData( user_id=_faker.pyint(min_value=1, max_value=100), product_name="osparc" ), - job_id=_job_id, + job_id=next(iter(mock_celery_client.get_task_uuids_object)), ) @pytest.mark.parametrize( - "mock_celery_client", + "mock_celery_client, expected_exception_type", [ - {"abort_task_object": CeleryError("error")}, + ({"abort_task_object": None, "get_task_uuids_object": []}, JobMissingError), + ( + { + "abort_task_object": CeleryError("error"), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], + }, + JobSchedulerError, + ), ], - indirect=True, + indirect=["mock_celery_client"], ) -async def test_abort_data_export_scheduler_error( +async def test_abort_data_export_error( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, + expected_exception_type: type[Exception], ): - _job_id = AsyncJobId(_faker.uuid4()) - with pytest.raises(JobSchedulerError): - _ = await async_jobs.abort( + job_ids = mock_celery_client.get_task_uuids_object + assert job_ids is not None + assert not isinstance(job_ids, Exception) + _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) + with pytest.raises(expected_exception_type): + await async_jobs.abort( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id_data=AsyncJobNameData( From e89d3971c13de5c345a0f1e4b8f77005b879e03a Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 12:00:35 +0100 Subject: [PATCH 117/131] check job existence in status --- .../api/rpc/_async_jobs.py | 5 +++- .../storage/tests/unit/test_data_export.py | 26 ++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 8e40ec61c0a8..3cef5d4eabc1 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -58,7 +58,7 @@ async def abort(app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData) raise JobSchedulerError(exc=f"{exc}") from exc -@router.expose(reraise_if_error_type=(JobSchedulerError,)) +@router.expose(reraise_if_error_type=(JobSchedulerError, JobMissingError)) async def get_status( app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData ) -> AsyncJobStatus: @@ -66,6 +66,9 @@ async def get_status( assert job_id_data # nosec try: + await _assert_job_exists( + job_id=job_id, job_id_data=job_id_data, celery_client=get_celery_client(app) + ) task_status = await get_celery_client(app).get_task_status( task_context=job_id_data.model_dump(), task_uuid=job_id, diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 0bc78d334e63..3dda3617b4aa 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -405,18 +405,32 @@ async def test_get_data_export_status( @pytest.mark.parametrize( - "mock_celery_client", + "mock_celery_client, expected_exception_type", [ - {"get_task_status_object": CeleryError("error")}, + ( + {"get_task_status_object": None, "get_task_uuids_object": []}, + JobMissingError, + ), + ( + { + "get_task_status_object": CeleryError("error"), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], + }, + JobSchedulerError, + ), ], - indirect=True, + indirect=["mock_celery_client"], ) -async def test_get_data_export_status_scheduler_error( +async def test_get_data_export_status_error( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, + expected_exception_type: type[Exception], ): - _job_id = AsyncJobId(_faker.uuid4()) - with pytest.raises(JobSchedulerError): + job_ids = mock_celery_client.get_task_uuids_object + assert job_ids is not None + assert not isinstance(job_ids, Exception) + _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) + with pytest.raises(expected_exception_type): _ = await async_jobs.get_status( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, From d550797500a3b434b27e6ea19c448f5e32c6bae9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 12:25:13 +0100 Subject: [PATCH 118/131] add job existence check to result method --- .../api/rpc/_async_jobs.py | 4 ++++ services/storage/tests/unit/test_data_export.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 3cef5d4eabc1..88ee2d5404f2 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -89,6 +89,7 @@ async def get_status( JobNotDoneError, JobAbortedError, JobSchedulerError, + JobMissingError, ) ) async def get_result( @@ -99,6 +100,9 @@ async def get_result( assert job_id_data # nosec try: + await _assert_job_exists( + job_id=job_id, job_id_data=job_id_data, celery_client=get_celery_client(app) + ) status = await get_celery_client(app).get_task_status( task_context=job_id_data.model_dump(), task_uuid=job_id, diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 3dda3617b4aa..88da40f9da2e 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -485,6 +485,7 @@ async def test_get_data_export_result_success( progress_report=ProgressReport(actual_value=50), ), "get_task_result_object": _faker.text(), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], }, JobNotDoneError, ), @@ -496,6 +497,7 @@ async def test_get_data_export_result_success( progress_report=ProgressReport(actual_value=100), ), "get_task_result_object": _faker.text(), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], }, JobAbortedError, ), @@ -507,6 +509,7 @@ async def test_get_data_export_result_success( progress_report=ProgressReport(actual_value=100), ), "get_task_result_object": _faker.text(), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], }, JobError, ), @@ -514,9 +517,16 @@ async def test_get_data_export_result_success( { "get_task_status_object": CeleryError("error"), "get_task_result_object": _faker.text(), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], }, JobSchedulerError, ), + ( + { + "get_task_uuids_object": [], + }, + JobMissingError, + ), ], indirect=["mock_celery_client"], ) @@ -526,8 +536,10 @@ async def test_get_data_export_result_error( mocker: MockerFixture, expected_exception: type[Exception], ): - mocker.patch("simcore_service_storage.api.rpc._async_jobs") - _job_id = AsyncJobId(_faker.uuid4()) + job_ids = mock_celery_client.get_task_uuids_object + assert job_ids is not None + assert not isinstance(job_ids, Exception) + _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) with pytest.raises(expected_exception): _ = await async_jobs.get_result( From 6ce34952ac3207331ec4012baa855dca912ac2e2 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 12:39:02 +0100 Subject: [PATCH 119/131] fix storage tests --- services/storage/tests/unit/test_data_export.py | 17 +++++++++++------ .../tasks/_exception_handlers.py | 15 ++++++++++----- .../with_dbs/01/storage/test_storage_rpc.py | 10 +++++++--- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 88da40f9da2e..d69de825523d 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -382,7 +382,8 @@ async def test_abort_data_export_error( task_uuid=TaskUUID(_faker.uuid4()), task_state=TaskState.RUNNING, progress_report=ProgressReport(actual_value=0), - ) + ), + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], }, ], indirect=True, @@ -391,7 +392,10 @@ async def test_get_data_export_status( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, ): - _job_id = AsyncJobId(_faker.uuid4()) + job_ids = mock_celery_client.get_task_uuids_object + assert job_ids is not None + assert not isinstance(job_ids, Exception) + _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) result = await async_jobs.get_status( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, @@ -451,6 +455,7 @@ async def test_get_data_export_status_error( progress_report=ProgressReport(actual_value=100), ), "get_task_result_object": "result", + "get_task_uuids_object": [AsyncJobId(_faker.uuid4())], }, ], indirect=True, @@ -458,11 +463,11 @@ async def test_get_data_export_status_error( async def test_get_data_export_result_success( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, - mocker: MockerFixture, ): - mocker.patch("simcore_service_storage.api.rpc._async_jobs") - - _job_id = AsyncJobId(_faker.uuid4()) + job_ids = mock_celery_client.get_task_uuids_object + assert job_ids is not None + assert not isinstance(job_ids, Exception) + _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) result = await async_jobs.get_result( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, diff --git a/services/web/server/src/simcore_service_webserver/tasks/_exception_handlers.py b/services/web/server/src/simcore_service_webserver/tasks/_exception_handlers.py index 8e710f883ecc..8e4f467cf474 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_exception_handlers.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_exception_handlers.py @@ -1,6 +1,7 @@ from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobAbortedError, JobError, + JobMissingError, JobNotDoneError, JobSchedulerError, JobStatusError, @@ -29,23 +30,27 @@ ), JobAbortedError: HttpErrorInfo( status.HTTP_410_GONE, - "Job {job_id} is aborted", + "Task {job_id} is aborted", ), JobError: HttpErrorInfo( status.HTTP_500_INTERNAL_SERVER_ERROR, - "Job {job_id} failed with exception type {exc_type} and message {exc_msg}", + "Task {job_id} failed with exception type {exc_type} and message {exc_msg}", ), JobNotDoneError: HttpErrorInfo( status.HTTP_404_NOT_FOUND, - "Job {job_id} is not done yet", + "task {job_id} is not done yet", + ), + JobMissingError: HttpErrorInfo( + status.HTTP_404_NOT_FOUND, + "No task with id: {job_id}", ), JobSchedulerError: HttpErrorInfo( status.HTTP_500_INTERNAL_SERVER_ERROR, - "Encountered a an error with the job scheduling system", + "Encountered an error with the task scheduling system", ), JobStatusError: HttpErrorInfo( status.HTTP_500_INTERNAL_SERVER_ERROR, - "Encountered an error while getting the status of job {job_id}", + "Encountered an error while getting the status of task {job_id}", ), } diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 4f10a1733224..a389bf5fd7c4 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -23,6 +23,7 @@ from models_library.api_schemas_rpc_async_jobs.exceptions import ( JobAbortedError, JobError, + JobMissingError, JobNotDoneError, JobSchedulerError, ) @@ -137,6 +138,7 @@ async def test_data_export( status.HTTP_200_OK, ), (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), ], ids=lambda x: type(x).__name__, ) @@ -173,6 +175,7 @@ async def test_get_async_jobs_status( status.HTTP_204_NO_CONTENT, ), (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), ], ids=lambda x: type(x).__name__, ) @@ -198,13 +201,14 @@ async def test_abort_async_jobs( @pytest.mark.parametrize("user_role", _user_roles) @pytest.mark.parametrize( - "result_or_exception, expected_status", + "backend_result_or_exception, expected_status", [ (JobNotDoneError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), (AsyncJobResult(result=None), status.HTTP_200_OK), (JobError(job_id=_faker.uuid4()), status.HTTP_500_INTERNAL_SERVER_ERROR), (JobAbortedError(job_id=_faker.uuid4()), status.HTTP_410_GONE), (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), ], ids=lambda x: type(x).__name__, ) @@ -214,14 +218,14 @@ async def test_get_async_job_result( client: TestClient, create_storage_rpc_client_mock: Callable[[str, str, Any], None], faker: Faker, - result_or_exception: Any, + backend_result_or_exception: Any, expected_status: int, ): _job_id = AsyncJobId(faker.uuid4()) create_storage_rpc_client_mock( "simcore_service_webserver.tasks._rest", get_result.__name__, - result_or_exception, + backend_result_or_exception, ) response = await client.get(f"/{API_VERSION}/tasks/{_job_id}/result") From 5e6f40894d9cf9d95678ef147be1b97e595a783f Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 12:42:11 +0100 Subject: [PATCH 120/131] fix --- services/storage/tests/unit/test_data_export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index d69de825523d..b0900bd46de7 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -538,7 +538,6 @@ async def test_get_data_export_result_success( async def test_get_data_export_result_error( rpc_client: RabbitMQRPCClient, mock_celery_client: _MockCeleryClient, - mocker: MockerFixture, expected_exception: type[Exception], ): job_ids = mock_celery_client.get_task_uuids_object From b0544bd7fe9ef69dbffb9f6c81cf33fdfcd91fdd Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 13:36:12 +0100 Subject: [PATCH 121/131] fix pylint errors --- .../server/tests/unit/with_dbs/01/test_long_running_tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py index efeb123884fa..c157bd21641e 100644 --- a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py +++ b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py @@ -2,6 +2,8 @@ # pylint: disable=too-many-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable +# pylint: disable=no-self-use +# pylint: disable=no-self-argument from typing import Any From 686c9f1aff0bcf6d6b2019e7ac9b5f9d0192bdd0 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 13:38:03 +0100 Subject: [PATCH 122/131] fix import --- .../storage/src/simcore_service_storage/api/rpc/_async_jobs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 88ee2d5404f2..4935c9a6c36a 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -21,7 +21,8 @@ from servicelib.logging_utils import log_catch from servicelib.rabbitmq import RPCRouter -from ...modules.celery import CeleryTaskQueueClient, get_celery_client +from ...modules.celery import get_celery_client +from ...modules.celery.client import CeleryTaskQueueClient from ...modules.celery.models import TaskError, TaskState _logger = logging.getLogger(__name__) From 43b018ecac2d3d662416f74f2f6b483f5b88dd82 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 14:49:26 +0100 Subject: [PATCH 123/131] fix openapi test --- api/specs/web-server/_long_running_tasks.py | 8 ++++---- .../src/simcore_service_webserver/api/v0/openapi.yaml | 8 ++++---- .../server/src/simcore_service_webserver/tasks/_rest.py | 2 +- .../tests/unit/with_dbs/01/storage/test_storage_rpc.py | 2 +- .../server/tests/unit/with_dbs/03/test__openapi_specs.py | 9 ++++++++- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/api/specs/web-server/_long_running_tasks.py b/api/specs/web-server/_long_running_tasks.py index 8b6fdc957095..0dacf42a03bc 100644 --- a/api/specs/web-server/_long_running_tasks.py +++ b/api/specs/web-server/_long_running_tasks.py @@ -36,7 +36,7 @@ description="Lists all long running tasks", responses=_data_export_responses, ) -def list_tasks(): ... +def get_async_jobs(): ... @router.get( @@ -46,7 +46,7 @@ def list_tasks(): ... description="Retrieves the status of a task", responses=_data_export_responses, ) -def get_task_status( +def get_async_job_status( _path_params: Annotated[_PathParam, Depends()], ): ... @@ -58,7 +58,7 @@ def get_task_status( responses=_data_export_responses, status_code=status.HTTP_204_NO_CONTENT, ) -def cancel_and_delete_task( +def abort_async_job( _path_params: Annotated[_PathParam, Depends()], ): ... @@ -69,6 +69,6 @@ def cancel_and_delete_task( description="Retrieves the result of a task", responses=_data_export_responses, ) -def get_task_result( +def get_async_job_result( _path_params: Annotated[_PathParam, Depends()], ): ... diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index ae9d127b0082..79d94f09c73b 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -2980,7 +2980,7 @@ paths: - long-running-tasks summary: List Tasks description: Lists all long running tasks - operationId: list_tasks + operationId: get_async_jobs responses: '200': description: Successful Response @@ -3018,7 +3018,7 @@ paths: - long-running-tasks summary: Get Task Status description: Retrieves the status of a task - operationId: get_task_status + operationId: get_async_job_status parameters: - name: task_id in: path @@ -3062,7 +3062,7 @@ paths: - long-running-tasks summary: Cancel And Delete Task description: Cancels and deletes a task - operationId: cancel_and_delete_task + operationId: abort_async_job parameters: - name: task_id in: path @@ -3103,7 +3103,7 @@ paths: - long-running-tasks summary: Get Task Result description: Retrieves the result of a task - operationId: get_task_result + operationId: get_async_job_result parameters: - name: task_id in: path diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py index 66414a82b725..98ca6498e5e8 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_rest.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -107,7 +107,7 @@ class _StorageAsyncJobId(BaseModel): @routes.get( - _task_prefix + "/{task_id}/status", + _task_prefix + "/{task_id}", name="get_async_job_status", ) @login_required diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index a389bf5fd7c4..56459ceeb708 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -157,7 +157,7 @@ async def test_get_async_jobs_status( backend_result_or_exception, ) - response = await client.get(f"/{API_VERSION}/tasks/{_job_id}/status") + response = await client.get(f"/{API_VERSION}/tasks/{_job_id}") assert response.status == expected_status if response.status == status.HTTP_200_OK: response_body_data = ( diff --git a/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py b/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py index 886817bbf263..a3ff168c2fda 100644 --- a/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py +++ b/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py @@ -13,6 +13,7 @@ from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict from pytest_simcore.helpers.typing_env import EnvVarsDict from pytest_simcore.openapi_specs import Entrypoint +from simcore_service_webserver._meta import API_VTAG from simcore_service_webserver.application import create_application from simcore_service_webserver.application_settings import get_application_settings from simcore_service_webserver.rest._utils import get_openapi_specs_path @@ -75,7 +76,13 @@ def test_app_named_resources_against_openapi_specs( openapi_specs_entrypoints: set[Entrypoint], app_rest_entrypoints: set[Entrypoint], ): - assert app_rest_entrypoints == openapi_specs_entrypoints + # remove task-legacy routes. These should not be exposed. + required_entry_points = { + e + for e in app_rest_entrypoints + if not e.path.startswith(f"/{API_VTAG}/tasks-legacy") + } + assert required_entry_points == openapi_specs_entrypoints # NOTE: missing here is: # - input schemas (path, query and body) From 0ebfb6cce531f31d35f74f970dddab81540e7140 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 14:51:54 +0100 Subject: [PATCH 124/131] @GitHK use TypeAdapter --- .../servicelib/rabbitmq/rpc_interfaces/storage/data_export.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py index 7e5a67f52af3..567fd148bf42 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py @@ -9,8 +9,6 @@ from ... import RabbitMQRPCClient from ..async_jobs.async_jobs import submit_job -_RPC_METHOD_NAME_ADAPTER = TypeAdapter(RPCMethodName) - async def start_data_export( rabbitmq_rpc_client: RabbitMQRPCClient, *, job_id_data: AsyncJobNameData, **kwargs @@ -18,7 +16,7 @@ async def start_data_export( return await submit_job( rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, - method_name=_RPC_METHOD_NAME_ADAPTER.validate_python("start_data_export"), + method_name=TypeAdapter(RPCMethodName).validate_python("start_data_export"), job_id_data=job_id_data, **kwargs, ) From 6be292ecd8140058ee893de9372ad2ca4524fd0a Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 14:53:41 +0100 Subject: [PATCH 125/131] @GitHK _type -> selection_type --- services/storage/tests/unit/test_data_export.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index b0900bd46de7..2c47ed6596b2 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -167,7 +167,7 @@ class UserWithFile(NamedTuple): indirect=True, ) @pytest.mark.parametrize( - "project_params,_type", + "project_params,selection_type", [ ( ProjectWithFilesParams( @@ -203,7 +203,7 @@ async def test_start_data_export_success( dict[NodeID, dict[SimcoreS3FileID, FileIDDict]], ], user_id: UserID, - _type: Literal["file", "folder"], + selection_type: Literal["file", "folder"], ): _, list_of_files = with_random_project_with_files workspace_files = [ @@ -211,9 +211,9 @@ async def test_start_data_export_success( ] assert len(workspace_files) > 0 file_or_folder_id: SimcoreS3FileID - if _type == "file": + if selection_type == "file": file_or_folder_id = workspace_files[0] - elif _type == "folder": + elif selection_type == "folder": parts = Path(workspace_files[0]).parts parts = parts[0 : parts.index("workspace") + 1] assert len(parts) > 0 From 93fb7ca88fc7bf01f36101f36ec52c8802bb14fa Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 14:55:06 +0100 Subject: [PATCH 126/131] to_check -> selection_type @GitHK --- services/storage/tests/unit/test_data_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 2c47ed6596b2..22858dcc0732 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -221,7 +221,7 @@ async def test_start_data_export_success( assert folder.name == "workspace" file_or_folder_id = f"{folder}" else: - pytest.fail("invalid parameter: to_check") + pytest.fail(f"invalid parameter: {selection_type=}") result = await start_data_export( rpc_client, From f49ab7ce9fc1873a2790a22eb91bf7ba245fe22f Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 14:56:37 +0100 Subject: [PATCH 127/131] @GitHK remove comment --- .../server/src/simcore_service_webserver/tasks/plugin.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/tasks/plugin.py b/services/web/server/src/simcore_service_webserver/tasks/plugin.py index 541155f785d1..e9bfdeea222e 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/plugin.py +++ b/services/web/server/src/simcore_service_webserver/tasks/plugin.py @@ -1,16 +1,9 @@ -import logging - from aiohttp import web from ..rest.plugin import setup_rest from . import _rest -_logger = logging.getLogger(__name__) - -# @app_module_setup( -# __name__, ModuleCategory.ADDON, logger=_logger -# ) def setup_tasks(app: web.Application): setup_rest(app) app.router.add_routes(_rest.routes) From ef97f04cc151d77ff27f369d461e452e23a28603 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Wed, 19 Mar 2025 15:11:34 +0100 Subject: [PATCH 128/131] make pylint happy --- .../server/tests/unit/with_dbs/01/test_long_running_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py index c157bd21641e..c6f58f29ee1a 100644 --- a/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py +++ b/services/web/server/tests/unit/with_dbs/01/test_long_running_tasks.py @@ -77,7 +77,7 @@ async def test_listing_tasks_with_list_inprocess_tasks_error( class _DummyTaskManager: def list_tasks(self, *args, **kwargs): - raise Exception() + raise Exception() # pylint: disable=broad-exception-raised mocker.patch( "servicelib.aiohttp.long_running_tasks._routes.get_tasks_manager", From 26d92f1b6fee1a8585413f49b54ebb2a0b8a37bb Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Thu, 20 Mar 2025 09:45:04 +0100 Subject: [PATCH 129/131] add comment for future improvement --- .../web/server/tests/unit/with_dbs/03/test__openapi_specs.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py b/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py index a3ff168c2fda..9c6ad78f4a8d 100644 --- a/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py +++ b/services/web/server/tests/unit/with_dbs/03/test__openapi_specs.py @@ -77,6 +77,9 @@ def test_app_named_resources_against_openapi_specs( app_rest_entrypoints: set[Entrypoint], ): # remove task-legacy routes. These should not be exposed. + # this test compares directly against the openapi specs. In future it would be + # cleaner to compare against the fastapi app entry points in specs and + # avoid including the endpoints there required_entry_points = { e for e in app_rest_entrypoints From 97f641ea4782c9a2bae183eadc2f2195dee748b9 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Thu, 20 Mar 2025 10:42:20 +0100 Subject: [PATCH 130/131] @sanderegg renmaing rpc methods --- .../rpc_interfaces/async_jobs/async_jobs.py | 36 +++++++++---------- .../rpc_interfaces/storage/data_export.py | 4 +-- .../rabbitmq/rpc_interfaces/storage/paths.py | 4 +-- .../api/rpc/_async_jobs.py | 22 ++++++------ .../storage/tests/unit/test_data_export.py | 14 ++++---- .../simcore_service_webserver/tasks/_rest.py | 15 +++----- .../unit/with_dbs/01/storage/test_storage.py | 4 +-- .../with_dbs/01/storage/test_storage_rpc.py | 25 ++++++------- 8 files changed, 58 insertions(+), 66 deletions(-) diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py index 245b40771ac9..db81b8d9f58d 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/async_jobs/async_jobs.py @@ -17,7 +17,7 @@ _RPC_METHOD_NAME_ADAPTER = TypeAdapter(RPCMethodName) -async def abort( +async def cancel( rabbitmq_rpc_client: RabbitMQRPCClient, *, rpc_namespace: RPCNamespace, @@ -26,47 +26,47 @@ async def abort( ) -> None: await rabbitmq_rpc_client.request( rpc_namespace, - _RPC_METHOD_NAME_ADAPTER.validate_python("abort"), + _RPC_METHOD_NAME_ADAPTER.validate_python("cancel"), job_id=job_id, job_id_data=job_id_data, timeout_s=_DEFAULT_TIMEOUT_S, ) -async def get_status( +async def status( rabbitmq_rpc_client: RabbitMQRPCClient, *, rpc_namespace: RPCNamespace, job_id: AsyncJobId, job_id_data: AsyncJobNameData, ) -> AsyncJobStatus: - result = await rabbitmq_rpc_client.request( + _result = await rabbitmq_rpc_client.request( rpc_namespace, - _RPC_METHOD_NAME_ADAPTER.validate_python("get_status"), + _RPC_METHOD_NAME_ADAPTER.validate_python("status"), job_id=job_id, job_id_data=job_id_data, timeout_s=_DEFAULT_TIMEOUT_S, ) - assert isinstance(result, AsyncJobStatus) - return result + assert isinstance(_result, AsyncJobStatus) + return _result -async def get_result( +async def result( rabbitmq_rpc_client: RabbitMQRPCClient, *, rpc_namespace: RPCNamespace, job_id: AsyncJobId, job_id_data: AsyncJobNameData, ) -> AsyncJobResult: - result = await rabbitmq_rpc_client.request( + _result = await rabbitmq_rpc_client.request( rpc_namespace, - _RPC_METHOD_NAME_ADAPTER.validate_python("get_result"), + _RPC_METHOD_NAME_ADAPTER.validate_python("result"), job_id=job_id, job_id_data=job_id_data, timeout_s=_DEFAULT_TIMEOUT_S, ) - assert isinstance(result, AsyncJobResult) - return result + assert isinstance(_result, AsyncJobResult) + return _result async def list_jobs( @@ -76,17 +76,17 @@ async def list_jobs( filter_: str, job_id_data: AsyncJobNameData, ) -> list[AsyncJobGet]: - result: list[AsyncJobGet] = await rabbitmq_rpc_client.request( + _result: list[AsyncJobGet] = await rabbitmq_rpc_client.request( rpc_namespace, _RPC_METHOD_NAME_ADAPTER.validate_python("list_jobs"), filter_=filter_, job_id_data=job_id_data, timeout_s=_DEFAULT_TIMEOUT_S, ) - return result + return _result -async def submit_job( +async def submit( rabbitmq_rpc_client: RabbitMQRPCClient, *, rpc_namespace: RPCNamespace, @@ -94,12 +94,12 @@ async def submit_job( job_id_data: AsyncJobNameData, **kwargs, ) -> AsyncJobGet: - result = await rabbitmq_rpc_client.request( + _result = await rabbitmq_rpc_client.request( rpc_namespace, _RPC_METHOD_NAME_ADAPTER.validate_python(method_name), job_id_data=job_id_data, **kwargs, timeout_s=_DEFAULT_TIMEOUT_S, ) - assert isinstance(result, AsyncJobGet) # nosec - return result + assert isinstance(_result, AsyncJobGet) # nosec + return _result diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py index 567fd148bf42..cd9770cb688d 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/data_export.py @@ -7,13 +7,13 @@ from pydantic import TypeAdapter from ... import RabbitMQRPCClient -from ..async_jobs.async_jobs import submit_job +from ..async_jobs.async_jobs import submit async def start_data_export( rabbitmq_rpc_client: RabbitMQRPCClient, *, job_id_data: AsyncJobNameData, **kwargs ) -> AsyncJobGet: - return await submit_job( + return await submit( rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, method_name=TypeAdapter(RPCMethodName).validate_python("start_data_export"), diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py index fa504f728119..a549b8fcffc2 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/storage/paths.py @@ -10,7 +10,7 @@ from models_library.users import UserID from ..._client_rpc import RabbitMQRPCClient -from ..async_jobs.async_jobs import submit_job +from ..async_jobs.async_jobs import submit async def compute_path_size( @@ -22,7 +22,7 @@ async def compute_path_size( path: Path, ) -> tuple[AsyncJobGet, AsyncJobNameData]: job_id_data = AsyncJobNameData(user_id=user_id, product_name=product_name) - async_job_rpc_get = await submit_job( + async_job_rpc_get = await submit( rabbitmq_rpc_client=client, rpc_namespace=STORAGE_RPC_NAMESPACE, method_name=RPCMethodName("compute_path_size"), diff --git a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py index 4935c9a6c36a..ba7c920f8aa9 100644 --- a/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py +++ b/services/storage/src/simcore_service_storage/api/rpc/_async_jobs.py @@ -39,12 +39,12 @@ async def _assert_job_exists( job_ids = await celery_client.get_task_uuids( task_context=job_id_data.model_dump(), ) - if not job_id in job_ids: + if job_id not in job_ids: raise JobMissingError(job_id=f"{job_id}") @router.expose(reraise_if_error_type=(JobSchedulerError, JobMissingError)) -async def abort(app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData): +async def cancel(app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData): assert app # nosec assert job_id_data # nosec try: @@ -60,7 +60,7 @@ async def abort(app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData) @router.expose(reraise_if_error_type=(JobSchedulerError, JobMissingError)) -async def get_status( +async def status( app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData ) -> AsyncJobStatus: assert app # nosec @@ -93,7 +93,7 @@ async def get_status( JobMissingError, ) ) -async def get_result( +async def result( app: FastAPI, job_id: AsyncJobId, job_id_data: AsyncJobNameData ) -> AsyncJobResult: assert app # nosec @@ -104,31 +104,31 @@ async def get_result( await _assert_job_exists( job_id=job_id, job_id_data=job_id_data, celery_client=get_celery_client(app) ) - status = await get_celery_client(app).get_task_status( + _status = await get_celery_client(app).get_task_status( task_context=job_id_data.model_dump(), task_uuid=job_id, ) - if not status.is_done: + if not _status.is_done: raise JobNotDoneError(job_id=job_id) - result = await get_celery_client(app).get_task_result( + _result = await get_celery_client(app).get_task_result( task_context=job_id_data.model_dump(), task_uuid=job_id, ) except CeleryError as exc: raise JobSchedulerError(exc=f"{exc}") from exc - if status.task_state == TaskState.ABORTED: + if _status.task_state == TaskState.ABORTED: raise JobAbortedError(job_id=job_id) - if status.task_state == TaskState.ERROR: + if _status.task_state == TaskState.ERROR: exc_type = "" exc_msg = "" with log_catch(logger=_logger, reraise=False): - task_error = TaskError.model_validate_json(result) + task_error = TaskError.model_validate_json(_result) exc_type = task_error.exc_type exc_msg = task_error.exc_msg raise JobError(job_id=job_id, exc_type=exc_type, exc_msg=exc_msg) - return AsyncJobResult(result=result) + return AsyncJobResult(result=_result) @router.expose(reraise_if_error_type=(JobSchedulerError,)) diff --git a/services/storage/tests/unit/test_data_export.py b/services/storage/tests/unit/test_data_export.py index 22858dcc0732..05c0f99a176a 100644 --- a/services/storage/tests/unit/test_data_export.py +++ b/services/storage/tests/unit/test_data_export.py @@ -300,7 +300,7 @@ async def test_start_data_export_access_error( faker: Faker, ): with pytest.raises(AccessRightError): - _ = await async_jobs.submit_job( + _ = await async_jobs.submit( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, method_name="start_data_export", @@ -330,7 +330,7 @@ async def test_abort_data_export_success( ): assert mock_celery_client.get_task_uuids_object is not None assert not isinstance(mock_celery_client.get_task_uuids_object, Exception) - await async_jobs.abort( + await async_jobs.cancel( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id_data=AsyncJobNameData( @@ -364,7 +364,7 @@ async def test_abort_data_export_error( assert not isinstance(job_ids, Exception) _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) with pytest.raises(expected_exception_type): - await async_jobs.abort( + await async_jobs.cancel( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id_data=AsyncJobNameData( @@ -396,7 +396,7 @@ async def test_get_data_export_status( assert job_ids is not None assert not isinstance(job_ids, Exception) _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) - result = await async_jobs.get_status( + result = await async_jobs.status( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=_job_id, @@ -435,7 +435,7 @@ async def test_get_data_export_status_error( assert not isinstance(job_ids, Exception) _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) with pytest.raises(expected_exception_type): - _ = await async_jobs.get_status( + _ = await async_jobs.status( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=_job_id, @@ -468,7 +468,7 @@ async def test_get_data_export_result_success( assert job_ids is not None assert not isinstance(job_ids, Exception) _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) - result = await async_jobs.get_result( + result = await async_jobs.result( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=_job_id, @@ -546,7 +546,7 @@ async def test_get_data_export_result_error( _job_id = next(iter(job_ids)) if len(job_ids) > 0 else AsyncJobId(_faker.uuid4()) with pytest.raises(expected_exception): - _ = await async_jobs.get_result( + _ = await async_jobs.result( rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=_job_id, diff --git a/services/web/server/src/simcore_service_webserver/tasks/_rest.py b/services/web/server/src/simcore_service_webserver/tasks/_rest.py index 98ca6498e5e8..809870e594af 100644 --- a/services/web/server/src/simcore_service_webserver/tasks/_rest.py +++ b/services/web/server/src/simcore_service_webserver/tasks/_rest.py @@ -27,12 +27,7 @@ parse_request_path_parameters_as, ) from servicelib.aiohttp.rest_responses import create_data_response -from servicelib.rabbitmq.rpc_interfaces.async_jobs.async_jobs import ( - abort, - get_result, - get_status, - list_jobs, -) +from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs from .._meta import API_VTAG from ..login.decorators import login_required @@ -78,7 +73,7 @@ async def get_async_jobs(request: web.Request) -> web.Response: rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) - user_async_jobs = await list_jobs( + user_async_jobs = await async_jobs.list_jobs( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id_data=AsyncJobNameData( @@ -118,7 +113,7 @@ async def get_async_job_status(request: web.Request) -> web.Response: rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) - async_job_rpc_status = await get_status( + async_job_rpc_status = await async_jobs.status( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=async_job_get.task_id, @@ -152,7 +147,7 @@ async def abort_async_job(request: web.Request) -> web.Response: rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) async_job_get = parse_request_path_parameters_as(_StorageAsyncJobId, request) - await abort( + await async_jobs.cancel( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=async_job_get.task_id, @@ -178,7 +173,7 @@ class _PathParams(BaseModel): rabbitmq_rpc_client = get_rabbitmq_rpc_client(request.app) async_job_get = parse_request_path_parameters_as(_PathParams, request) - async_job_rpc_result = await get_result( + async_job_rpc_result = await async_jobs.result( rabbitmq_rpc_client=rabbitmq_rpc_client, rpc_namespace=STORAGE_RPC_NAMESPACE, job_id=async_job_get.task_id, diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py index 01c2bedc92dc..d00f72dcff2e 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py @@ -26,7 +26,7 @@ from pytest_simcore.helpers.assert_checks import assert_status from servicelib.aiohttp import status from servicelib.rabbitmq.rpc_interfaces.async_jobs.async_jobs import ( - submit_job, + submit, ) from simcore_postgres_database.models.users import UserRole @@ -133,7 +133,7 @@ async def test_compute_path_size( backend_result_or_exception: Any, ): create_storage_paths_rpc_client_mock( - submit_job.__name__, + submit.__name__, backend_result_or_exception, ) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py index 56459ceeb708..0997249f8fb3 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py @@ -40,12 +40,7 @@ from pytest_mock import MockerFixture from pytest_simcore.helpers.webserver_login import UserInfoDict from servicelib.aiohttp import status -from servicelib.rabbitmq.rpc_interfaces.async_jobs.async_jobs import ( - abort, - get_result, - get_status, - list_jobs, -) +from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from simcore_postgres_database.models.users import UserRole from yarl import URL @@ -153,7 +148,7 @@ async def test_get_async_jobs_status( _job_id = AsyncJobId(_faker.uuid4()) create_storage_rpc_client_mock( "simcore_service_webserver.tasks._rest", - get_status.__name__, + f"async_jobs.{async_jobs.status.__name__}", backend_result_or_exception, ) @@ -191,7 +186,7 @@ async def test_abort_async_jobs( _job_id = AsyncJobId(faker.uuid4()) create_storage_rpc_client_mock( "simcore_service_webserver.tasks._rest", - abort.__name__, + f"async_jobs.{async_jobs.cancel.__name__}", backend_result_or_exception, ) @@ -224,7 +219,7 @@ async def test_get_async_job_result( _job_id = AsyncJobId(faker.uuid4()) create_storage_rpc_client_mock( "simcore_service_webserver.tasks._rest", - get_result.__name__, + f"async_jobs.{async_jobs.result.__name__}", backend_result_or_exception, ) @@ -258,7 +253,7 @@ async def test_get_user_async_jobs( ): create_storage_rpc_client_mock( "simcore_service_webserver.tasks._rest", - list_jobs.__name__, + f"async_jobs.{async_jobs.list_jobs.__name__}", backend_result_or_exception, ) @@ -275,7 +270,7 @@ async def test_get_user_async_jobs( ( "GET", "status_href", - get_status.__name__, + async_jobs.status.__name__, AsyncJobStatus( job_id=AsyncJobId(_faker.uuid4()), progress=ProgressReport(actual_value=0.5, total=1.0), @@ -287,7 +282,7 @@ async def test_get_user_async_jobs( ( "DELETE", "abort_href", - abort.__name__, + async_jobs.cancel.__name__, AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), status.HTTP_204_NO_CONTENT, None, @@ -295,7 +290,7 @@ async def test_get_user_async_jobs( ( "GET", "result_href", - get_result.__name__, + async_jobs.result.__name__, AsyncJobResult(result=None), status.HTTP_200_OK, TaskResult, @@ -333,7 +328,9 @@ async def test_get_async_job_links( # Call the different links and check the correct model and return status create_storage_rpc_client_mock( - "simcore_service_webserver.tasks._rest", backend_method, backend_object + "simcore_service_webserver.tasks._rest", + f"async_jobs.{backend_method}", + backend_object, ) response = await client.request( http_method, URL(getattr(response_body_data, href)).path From f581b63b8362bc8bd6c196030ee1b78f57a0690b Mon Sep 17 00:00:00 2001 From: Mads Bisgaard Date: Thu, 20 Mar 2025 11:29:49 +0100 Subject: [PATCH 131/131] consolidate test files @sanderegg --- .../unit/with_dbs/01/storage/test_storage.py | 329 ++++++++++++++++- .../with_dbs/01/storage/test_storage_rpc.py | 340 ------------------ 2 files changed, 326 insertions(+), 343 deletions(-) delete mode 100644 services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py index d00f72dcff2e..3ea1ec402306 100644 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py +++ b/services/web/server/tests/unit/with_dbs/01/storage/test_storage.py @@ -4,15 +4,37 @@ # pylint: disable=too-many-arguments from collections.abc import Callable -from typing import Any +from pathlib import Path +from typing import Any, Final from urllib.parse import quote import pytest from aiohttp.test_utils import TestClient from faker import Faker from fastapi_pagination.cursor import CursorPage -from models_library.api_schemas_long_running_tasks.tasks import TaskGet -from models_library.api_schemas_rpc_async_jobs.async_jobs import AsyncJobGet, AsyncJobId +from models_library.api_schemas_long_running_tasks.tasks import ( + TaskGet, + TaskResult, + TaskStatus, +) +from models_library.api_schemas_rpc_async_jobs.async_jobs import ( + AsyncJobAbort, + AsyncJobGet, + AsyncJobId, + AsyncJobResult, + AsyncJobStatus, +) +from models_library.api_schemas_rpc_async_jobs.exceptions import ( + JobAbortedError, + JobError, + JobMissingError, + JobNotDoneError, + JobSchedulerError, +) +from models_library.api_schemas_storage.data_export_async_jobs import ( + AccessRightError, + InvalidFileIdentifierError, +) from models_library.api_schemas_storage.storage_schemas import ( DatasetMetaDataGet, FileLocation, @@ -20,21 +42,40 @@ FileUploadSchema, PathMetaDataGet, ) +from models_library.api_schemas_webserver._base import OutputSchema +from models_library.api_schemas_webserver.storage import ( + DataExportPost, +) +from models_library.generics import Envelope +from models_library.progress_bar import ProgressReport from models_library.projects_nodes_io import LocationID, StorageFileID from pydantic import TypeAdapter from pytest_mock import MockerFixture from pytest_simcore.helpers.assert_checks import assert_status +from pytest_simcore.helpers.webserver_login import UserInfoDict from servicelib.aiohttp import status +from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs from servicelib.rabbitmq.rpc_interfaces.async_jobs.async_jobs import ( submit, ) +from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export from simcore_postgres_database.models.users import UserRole +from yarl import URL API_VERSION = "v0" PREFIX = "/" + API_VERSION + "/storage" +_faker = Faker() +_user_roles: Final[list[UserRole]] = [ + UserRole.GUEST, + UserRole.USER, + UserRole.TESTER, + UserRole.PRODUCT_OWNER, + UserRole.ADMIN, +] + @pytest.mark.parametrize( "user_role,expected", @@ -317,3 +358,285 @@ async def test_upload_file( data, error = await assert_status(resp, status.HTTP_204_NO_CONTENT) assert not error assert not data + + +@pytest.fixture +def create_storage_rpc_client_mock( + mocker: MockerFixture, +) -> Callable[[str, str, Any], None]: + def _(module: str, method: str, result_or_exception: Any): + def side_effect(*args, **kwargs): + if isinstance(result_or_exception, Exception): + raise result_or_exception + + return result_or_exception + + for fct in (f"{module}.{method}",): + mocker.patch(fct, side_effect=side_effect) + + return _ + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + (AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), status.HTTP_202_ACCEPTED), + ( + InvalidFileIdentifierError(file_id=Path("/my/file")), + status.HTTP_404_NOT_FOUND, + ), + ( + AccessRightError( + user_id=_faker.pyint(min_value=0), file_id=Path("/my/file") + ), + status.HTTP_403_FORBIDDEN, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + ], + ids=lambda x: type(x).__name__, +) +async def test_data_export( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, str, Any], None], + faker: Faker, + backend_result_or_exception: Any, + expected_status: int, +): + create_storage_rpc_client_mock( + "simcore_service_webserver.storage._rest", + start_data_export.__name__, + backend_result_or_exception, + ) + + _body = DataExportPost( + paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] + ) + response = await client.post( + f"/{API_VERSION}/storage/locations/0/export-data", data=_body.model_dump_json() + ) + assert response.status == expected_status + if response.status == status.HTTP_202_ACCEPTED: + Envelope[TaskGet].model_validate(await response.json()) + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + ( + AsyncJobStatus( + job_id=AsyncJobId(f"{_faker.uuid4()}"), + progress=ProgressReport(actual_value=0.5, total=1.0), + done=False, + ), + status.HTTP_200_OK, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), + ], + ids=lambda x: type(x).__name__, +) +async def test_get_async_jobs_status( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, str, Any], None], + backend_result_or_exception: Any, + expected_status: int, +): + _job_id = AsyncJobId(_faker.uuid4()) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + f"async_jobs.{async_jobs.status.__name__}", + backend_result_or_exception, + ) + + response = await client.get(f"/{API_VERSION}/tasks/{_job_id}") + assert response.status == expected_status + if response.status == status.HTTP_200_OK: + response_body_data = ( + Envelope[TaskStatus].model_validate(await response.json()).data + ) + assert response_body_data is not None + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + ( + AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), + status.HTTP_204_NO_CONTENT, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), + ], + ids=lambda x: type(x).__name__, +) +async def test_abort_async_jobs( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, str, Any], None], + faker: Faker, + backend_result_or_exception: Any, + expected_status: int, +): + _job_id = AsyncJobId(faker.uuid4()) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + f"async_jobs.{async_jobs.cancel.__name__}", + backend_result_or_exception, + ) + + response = await client.delete(f"/{API_VERSION}/tasks/{_job_id}") + assert response.status == expected_status + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + (JobNotDoneError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), + (AsyncJobResult(result=None), status.HTTP_200_OK), + (JobError(job_id=_faker.uuid4()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobAbortedError(job_id=_faker.uuid4()), status.HTTP_410_GONE), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), + ], + ids=lambda x: type(x).__name__, +) +async def test_get_async_job_result( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, str, Any], None], + faker: Faker, + backend_result_or_exception: Any, + expected_status: int, +): + _job_id = AsyncJobId(faker.uuid4()) + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + f"async_jobs.{async_jobs.result.__name__}", + backend_result_or_exception, + ) + + response = await client.get(f"/{API_VERSION}/tasks/{_job_id}/result") + assert response.status == expected_status + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "backend_result_or_exception, expected_status", + [ + ( + [ + AsyncJobGet( + job_id=AsyncJobId(_faker.uuid4()), + ) + ], + status.HTTP_200_OK, + ), + (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), + ], + ids=lambda x: type(x).__name__, +) +async def test_get_user_async_jobs( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, str, Any], None], + backend_result_or_exception: Any, + expected_status: int, +): + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + f"async_jobs.{async_jobs.list_jobs.__name__}", + backend_result_or_exception, + ) + + response = await client.get(f"/{API_VERSION}/tasks") + assert response.status == expected_status + if response.status == status.HTTP_200_OK: + Envelope[list[TaskGet]].model_validate(await response.json()) + + +@pytest.mark.parametrize("user_role", _user_roles) +@pytest.mark.parametrize( + "http_method, href, backend_method, backend_object, return_status, return_schema", + [ + ( + "GET", + "status_href", + async_jobs.status.__name__, + AsyncJobStatus( + job_id=AsyncJobId(_faker.uuid4()), + progress=ProgressReport(actual_value=0.5, total=1.0), + done=False, + ), + status.HTTP_200_OK, + TaskStatus, + ), + ( + "DELETE", + "abort_href", + async_jobs.cancel.__name__, + AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), + status.HTTP_204_NO_CONTENT, + None, + ), + ( + "GET", + "result_href", + async_jobs.result.__name__, + AsyncJobResult(result=None), + status.HTTP_200_OK, + TaskResult, + ), + ], +) +async def test_get_async_job_links( + user_role: UserRole, + logged_user: UserInfoDict, + client: TestClient, + create_storage_rpc_client_mock: Callable[[str, str, Any], None], + faker: Faker, + http_method: str, + href: str, + backend_method: str, + backend_object: Any, + return_status: int, + return_schema: OutputSchema | None, +): + create_storage_rpc_client_mock( + "simcore_service_webserver.storage._rest", + start_data_export.__name__, + AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), + ) + + _body = DataExportPost( + paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] + ) + response = await client.post( + f"/{API_VERSION}/storage/locations/0/export-data", data=_body.model_dump_json() + ) + assert response.status == status.HTTP_202_ACCEPTED + response_body_data = Envelope[TaskGet].model_validate(await response.json()).data + assert response_body_data is not None + + # Call the different links and check the correct model and return status + create_storage_rpc_client_mock( + "simcore_service_webserver.tasks._rest", + f"async_jobs.{backend_method}", + backend_object, + ) + response = await client.request( + http_method, URL(getattr(response_body_data, href)).path + ) + assert response.status == return_status + if return_schema: + Envelope[return_schema].model_validate(await response.json()) diff --git a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py b/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py deleted file mode 100644 index 0997249f8fb3..000000000000 --- a/services/web/server/tests/unit/with_dbs/01/storage/test_storage_rpc.py +++ /dev/null @@ -1,340 +0,0 @@ -# pylint: disable=too-many-arguments -# pylint: disable=redefined-outer-name -# pylint: disable=unused-argument -from collections.abc import Callable -from pathlib import Path -from typing import Any, Final - -import pytest -from aiohttp.test_utils import TestClient -from faker import Faker -from models_library.api_schemas_long_running_tasks.tasks import ( - TaskGet, - TaskResult, - TaskStatus, -) -from models_library.api_schemas_rpc_async_jobs.async_jobs import ( - AsyncJobAbort, - AsyncJobGet, - AsyncJobId, - AsyncJobResult, - AsyncJobStatus, -) -from models_library.api_schemas_rpc_async_jobs.exceptions import ( - JobAbortedError, - JobError, - JobMissingError, - JobNotDoneError, - JobSchedulerError, -) -from models_library.api_schemas_storage.data_export_async_jobs import ( - AccessRightError, - InvalidFileIdentifierError, -) -from models_library.api_schemas_webserver._base import OutputSchema -from models_library.api_schemas_webserver.storage import ( - DataExportPost, -) -from models_library.generics import Envelope -from models_library.progress_bar import ProgressReport -from pytest_mock import MockerFixture -from pytest_simcore.helpers.webserver_login import UserInfoDict -from servicelib.aiohttp import status -from servicelib.rabbitmq.rpc_interfaces.async_jobs import async_jobs -from servicelib.rabbitmq.rpc_interfaces.storage.data_export import start_data_export -from simcore_postgres_database.models.users import UserRole -from yarl import URL - -_faker = Faker() -_user_roles: Final[list[UserRole]] = [ - UserRole.GUEST, - UserRole.USER, - UserRole.TESTER, - UserRole.PRODUCT_OWNER, - UserRole.ADMIN, -] - - -API_VERSION: Final[str] = "v0" - - -@pytest.fixture -def create_storage_rpc_client_mock( - mocker: MockerFixture, -) -> Callable[[str, str, Any], None]: - def _(module: str, method: str, result_or_exception: Any): - def side_effect(*args, **kwargs): - if isinstance(result_or_exception, Exception): - raise result_or_exception - - return result_or_exception - - for fct in (f"{module}.{method}",): - mocker.patch(fct, side_effect=side_effect) - - return _ - - -@pytest.mark.parametrize("user_role", _user_roles) -@pytest.mark.parametrize( - "backend_result_or_exception, expected_status", - [ - (AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), status.HTTP_202_ACCEPTED), - ( - InvalidFileIdentifierError(file_id=Path("/my/file")), - status.HTTP_404_NOT_FOUND, - ), - ( - AccessRightError( - user_id=_faker.pyint(min_value=0), file_id=Path("/my/file") - ), - status.HTTP_403_FORBIDDEN, - ), - (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), - ], - ids=lambda x: type(x).__name__, -) -async def test_data_export( - user_role: UserRole, - logged_user: UserInfoDict, - client: TestClient, - create_storage_rpc_client_mock: Callable[[str, str, Any], None], - faker: Faker, - backend_result_or_exception: Any, - expected_status: int, -): - create_storage_rpc_client_mock( - "simcore_service_webserver.storage._rest", - start_data_export.__name__, - backend_result_or_exception, - ) - - _body = DataExportPost( - paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] - ) - response = await client.post( - f"/{API_VERSION}/storage/locations/0/export-data", data=_body.model_dump_json() - ) - assert response.status == expected_status - if response.status == status.HTTP_202_ACCEPTED: - Envelope[TaskGet].model_validate(await response.json()) - - -@pytest.mark.parametrize("user_role", _user_roles) -@pytest.mark.parametrize( - "backend_result_or_exception, expected_status", - [ - ( - AsyncJobStatus( - job_id=AsyncJobId(f"{_faker.uuid4()}"), - progress=ProgressReport(actual_value=0.5, total=1.0), - done=False, - ), - status.HTTP_200_OK, - ), - (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), - (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), - ], - ids=lambda x: type(x).__name__, -) -async def test_get_async_jobs_status( - user_role: UserRole, - logged_user: UserInfoDict, - client: TestClient, - create_storage_rpc_client_mock: Callable[[str, str, Any], None], - backend_result_or_exception: Any, - expected_status: int, -): - _job_id = AsyncJobId(_faker.uuid4()) - create_storage_rpc_client_mock( - "simcore_service_webserver.tasks._rest", - f"async_jobs.{async_jobs.status.__name__}", - backend_result_or_exception, - ) - - response = await client.get(f"/{API_VERSION}/tasks/{_job_id}") - assert response.status == expected_status - if response.status == status.HTTP_200_OK: - response_body_data = ( - Envelope[TaskStatus].model_validate(await response.json()).data - ) - assert response_body_data is not None - - -@pytest.mark.parametrize("user_role", _user_roles) -@pytest.mark.parametrize( - "backend_result_or_exception, expected_status", - [ - ( - AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), - status.HTTP_204_NO_CONTENT, - ), - (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), - (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), - ], - ids=lambda x: type(x).__name__, -) -async def test_abort_async_jobs( - user_role: UserRole, - logged_user: UserInfoDict, - client: TestClient, - create_storage_rpc_client_mock: Callable[[str, str, Any], None], - faker: Faker, - backend_result_or_exception: Any, - expected_status: int, -): - _job_id = AsyncJobId(faker.uuid4()) - create_storage_rpc_client_mock( - "simcore_service_webserver.tasks._rest", - f"async_jobs.{async_jobs.cancel.__name__}", - backend_result_or_exception, - ) - - response = await client.delete(f"/{API_VERSION}/tasks/{_job_id}") - assert response.status == expected_status - - -@pytest.mark.parametrize("user_role", _user_roles) -@pytest.mark.parametrize( - "backend_result_or_exception, expected_status", - [ - (JobNotDoneError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), - (AsyncJobResult(result=None), status.HTTP_200_OK), - (JobError(job_id=_faker.uuid4()), status.HTTP_500_INTERNAL_SERVER_ERROR), - (JobAbortedError(job_id=_faker.uuid4()), status.HTTP_410_GONE), - (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), - (JobMissingError(job_id=_faker.uuid4()), status.HTTP_404_NOT_FOUND), - ], - ids=lambda x: type(x).__name__, -) -async def test_get_async_job_result( - user_role: UserRole, - logged_user: UserInfoDict, - client: TestClient, - create_storage_rpc_client_mock: Callable[[str, str, Any], None], - faker: Faker, - backend_result_or_exception: Any, - expected_status: int, -): - _job_id = AsyncJobId(faker.uuid4()) - create_storage_rpc_client_mock( - "simcore_service_webserver.tasks._rest", - f"async_jobs.{async_jobs.result.__name__}", - backend_result_or_exception, - ) - - response = await client.get(f"/{API_VERSION}/tasks/{_job_id}/result") - assert response.status == expected_status - - -@pytest.mark.parametrize("user_role", _user_roles) -@pytest.mark.parametrize( - "backend_result_or_exception, expected_status", - [ - ( - [ - AsyncJobGet( - job_id=AsyncJobId(_faker.uuid4()), - ) - ], - status.HTTP_200_OK, - ), - (JobSchedulerError(exc=_faker.text()), status.HTTP_500_INTERNAL_SERVER_ERROR), - ], - ids=lambda x: type(x).__name__, -) -async def test_get_user_async_jobs( - user_role: UserRole, - logged_user: UserInfoDict, - client: TestClient, - create_storage_rpc_client_mock: Callable[[str, str, Any], None], - backend_result_or_exception: Any, - expected_status: int, -): - create_storage_rpc_client_mock( - "simcore_service_webserver.tasks._rest", - f"async_jobs.{async_jobs.list_jobs.__name__}", - backend_result_or_exception, - ) - - response = await client.get(f"/{API_VERSION}/tasks") - assert response.status == expected_status - if response.status == status.HTTP_200_OK: - Envelope[list[TaskGet]].model_validate(await response.json()) - - -@pytest.mark.parametrize("user_role", _user_roles) -@pytest.mark.parametrize( - "http_method, href, backend_method, backend_object, return_status, return_schema", - [ - ( - "GET", - "status_href", - async_jobs.status.__name__, - AsyncJobStatus( - job_id=AsyncJobId(_faker.uuid4()), - progress=ProgressReport(actual_value=0.5, total=1.0), - done=False, - ), - status.HTTP_200_OK, - TaskStatus, - ), - ( - "DELETE", - "abort_href", - async_jobs.cancel.__name__, - AsyncJobAbort(result=True, job_id=AsyncJobId(_faker.uuid4())), - status.HTTP_204_NO_CONTENT, - None, - ), - ( - "GET", - "result_href", - async_jobs.result.__name__, - AsyncJobResult(result=None), - status.HTTP_200_OK, - TaskResult, - ), - ], -) -async def test_get_async_job_links( - user_role: UserRole, - logged_user: UserInfoDict, - client: TestClient, - create_storage_rpc_client_mock: Callable[[str, str, Any], None], - faker: Faker, - http_method: str, - href: str, - backend_method: str, - backend_object: Any, - return_status: int, - return_schema: OutputSchema | None, -): - create_storage_rpc_client_mock( - "simcore_service_webserver.storage._rest", - start_data_export.__name__, - AsyncJobGet(job_id=AsyncJobId(f"{_faker.uuid4()}")), - ) - - _body = DataExportPost( - paths=[f"{faker.uuid4()}/{faker.uuid4()}/{faker.file_name()}"] - ) - response = await client.post( - f"/{API_VERSION}/storage/locations/0/export-data", data=_body.model_dump_json() - ) - assert response.status == status.HTTP_202_ACCEPTED - response_body_data = Envelope[TaskGet].model_validate(await response.json()).data - assert response_body_data is not None - - # Call the different links and check the correct model and return status - create_storage_rpc_client_mock( - "simcore_service_webserver.tasks._rest", - f"async_jobs.{backend_method}", - backend_object, - ) - response = await client.request( - http_method, URL(getattr(response_body_data, href)).path - ) - assert response.status == return_status - if return_schema: - Envelope[return_schema].model_validate(await response.json())