diff --git a/.env-devel b/.env-devel index 22690e818710..8207e9f8bf6f 100644 --- a/.env-devel +++ b/.env-devel @@ -25,6 +25,8 @@ TRAEFIK_API_SERVER_INFLIGHTREQ_AMOUNT=25 AUTOSCALING_DASK=null AUTOSCALING_DRAIN_NODES_WITH_LABELS=False +AUTOSCALING_DOCKER_JOIN_DRAINED=True +AUTOSCALING_WAIT_FOR_CLOUD_INIT_BEFORE_WARM_BUFFER_ACTIVATION=False AUTOSCALING_EC2_ACCESS=null AUTOSCALING_EC2_INSTANCES=null AUTOSCALING_LOGLEVEL=WARNING @@ -48,6 +50,7 @@ CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG=master-github-latest CLUSTERS_KEEPER_DASK_NTHREADS=0 CLUSTERS_KEEPER_DASK_WORKER_SATURATION=inf CLUSTERS_KEEPER_EC2_ACCESS=null +CLUSTERS_KEEPER_SSM_ACCESS=null CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX="" CLUSTERS_KEEPER_LOGLEVEL=WARNING CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION=5 @@ -101,6 +104,7 @@ DYNAMIC_SIDECAR_IMAGE=${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKE DYNAMIC_SIDECAR_LOG_LEVEL=DEBUG DYNAMIC_SIDECAR_PROMETHEUS_MONITORING_NETWORKS=[] DYNAMIC_SIDECAR_PROMETHEUS_SERVICE_LABELS={} +DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT=3600 # DIRECTOR_V2 ---- DYNAMIC_SCHEDULER_PROFILING=1 @@ -149,6 +153,10 @@ POSTGRES_PASSWORD=adminadmin POSTGRES_PORT=5432 POSTGRES_USER=scu +POSTGRES_READONLY_PASSWORD=readonly +POSTGRES_READONLY_USER=postgres_readonly + + RABBIT_HOST=rabbit RABBIT_PASSWORD=adminadmin RABBIT_PORT=5672 @@ -158,6 +166,8 @@ RABBIT_USER=admin REDIS_HOST=redis REDIS_PORT=6379 REDIS_PASSWORD=adminadmin +REDIS_SECURE=false +REDIS_USER=null REGISTRY_AUTH=True REGISTRY_PW=adminadminadmin @@ -211,6 +221,13 @@ STORAGE_PROFILING=1 SWARM_STACK_NAME=master-simcore +## VENDOR DEVELOPMENT SERVICES --- +VENDOR_DEV_MANUAL_IMAGE=containous/whoami +VENDOR_DEV_MANUAL_REPLICAS=1 +VENDOR_DEV_MANUAL_SUBDOMAIN=manual + +## VENDOR DEVELOPMENT SERVICES --- + WB_API_WEBSERVER_HOST=wb-api-server WB_API_WEBSERVER_PORT=8080 @@ -282,6 +299,7 @@ WB_DB_EL_WALLETS=0 # WEBSERVER ---- AIODEBUG_SLOW_DURATION_SECS=0 +DIAGNOSTICS_HEALTHCHECK_ENABLED=False DIAGNOSTICS_MAX_AVG_LATENCY=10 DIAGNOSTICS_MAX_TASK_DELAY=30 DIAGNOSTICS_SLOW_DURATION_SECS=1 @@ -303,9 +321,11 @@ SESSION_COOKIE_SECURE=False SIMCORE_VCS_RELEASE_TAG=latest STUDIES_ACCESS_ANONYMOUS_ALLOWED=0 STUDIES_DEFAULT_SERVICE_THUMBNAIL=https://via.placeholder.com/170x120.png -TRACING_ENABLED=1 -TRACING_THRIFT_COMPACT_ENDPOINT=http://jaeger:5775 -TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 +TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE=2 +TRACING_OPENTELEMETRY_COLLECTOR_PORT=4318 +TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT=http://opentelemetry-collector +TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE=100 +TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT=http://jaeger:4318 TRAEFIK_SIMCORE_ZONE=internal_simcore_stack TWILIO_ACCOUNT_SID=DUMMY TWILIO_AUTH_TOKEN=DUMMY diff --git a/.github/dependabot.yml b/.github/dependabot.yml index b70d1f17b031..3249e3f2d6db 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,9 +12,6 @@ updates: assignees: - pcrespov ignore: - - dependency-name: aiozipkin - versions: - - ">= 1.a, < 2" - dependency-name: docker-compose versions: - 1.28.2 @@ -41,9 +38,6 @@ updates: assignees: - pcrespov ignore: - - dependency-name: aiozipkin - versions: - - ">= 1.a, < 2" - dependency-name: openapi-core versions: - "> 0.12.0, < 1" diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index adbea2e8e41b..66cd8dce4db1 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -279,6 +279,18 @@ jobs: fail-fast: false name: "[build] docker images" steps: + - name: Remove unused software + run: | + echo "Available storage before:" + sudo df -h + echo + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + echo "Available storage after:" + sudo df -h + echo - uses: actions/checkout@v4 - name: setup docker buildx id: buildx @@ -593,7 +605,7 @@ jobs: unit-test-autoscaling: needs: changes if: ${{ needs.changes.outputs.autoscaling == 'true' || github.event_name == 'push' }} - timeout-minutes: 18 # if this timeout gets too small, then split the tests + timeout-minutes: 22 # temporary: mypy takes a huge amount of time to run here, maybe we should cache it name: "[unit] autoscaling" runs-on: ${{ matrix.os }} strategy: @@ -1291,7 +1303,7 @@ jobs: uses: docker/setup-buildx-action@v3 with: driver: docker-container - - uses: actions/setup-node@v4.0.3 + - uses: actions/setup-node@v4.0.4 with: node-version: ${{ matrix.node }} cache: "npm" @@ -2404,7 +2416,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - - uses: actions/setup-node@v4.0.3 + - uses: actions/setup-node@v4.0.4 with: node-version: ${{ matrix.node }} cache: "npm" @@ -2530,7 +2542,6 @@ jobs: name: ${{ github.job }}_tracing path: tests/e2e-playwright/test-results - system-test-environment-setup: timeout-minutes: 30 # if this timeout gets too small, then split the tests name: "[sys] environment setup" diff --git a/.pylintrc b/.pylintrc index 9454261215a1..9f0e88f06ef7 100644 --- a/.pylintrc +++ b/.pylintrc @@ -466,6 +466,8 @@ max-statements=50 # Minimum number of public methods for a class (see R0903). min-public-methods=2 +# Minimum number of public methods for a class (see R0903). +max-positional-arguments=12 [EXCEPTIONS] diff --git a/.vscode/settings.template.json b/.vscode/settings.template.json index 8df39917defa..2de40f80ad62 100644 --- a/.vscode/settings.template.json +++ b/.vscode/settings.template.json @@ -9,11 +9,12 @@ "files.associations": { ".*rc": "ini", ".env*": "ini", + "*.logs*": "log", "**/requirements/*.in": "pip-requirements", "**/requirements/*.txt": "pip-requirements", "*logs.txt": "log", - "*.logs*": "log", "*Makefile": "makefile", + "*sql.*": "sql", "docker-compose*.yml": "dockercompose", "Dockerfile*": "dockerfile" }, diff --git a/Makefile b/Makefile index 824d39628372..e8e0113d9013 100644 --- a/Makefile +++ b/Makefile @@ -269,6 +269,11 @@ CPU_COUNT = $(shell cat /proc/cpuinfo | grep processor | wc -l ) services/docker-compose.local.yml \ > $@ +.stack-vendor-services.yml: .env $(docker-compose-configs) + # Creating config for vendors stack to $@ + @scripts/docker/docker-stack-config.bash -e $< \ + services/docker-compose-dev-vendors.yml \ + > $@ .stack-ops.yml: .env $(docker-compose-configs) # Creating config for ops stack to $@ @@ -288,7 +293,11 @@ endif -.PHONY: up-devel up-prod up-prod-ci up-version up-latest .deploy-ops +.PHONY: up-devel up-prod up-prod-ci up-version up-latest .deploy-ops .deploy-vendors + +.deploy-vendors: .stack-vendor-services.yml + # Deploy stack 'vendors' + docker stack deploy --detach=true --with-registry-auth -c $< vendors .deploy-ops: .stack-ops.yml # Deploy stack 'ops' @@ -310,18 +319,19 @@ TableWidth=140;\ printf "%24s | %90s | %12s | %12s\n" Name Endpoint User Password;\ printf "%.$${TableWidth}s\n" "$$separator";\ printf "$$rows" "oSparc platform" "http://$(get_my_ip).nip.io:9081";\ -printf "$$rows" "oSparc web API doc" "http://$(get_my_ip).nip.io:9081/dev/doc";\ printf "$$rows" "oSparc public API doc" "http://$(get_my_ip).nip.io:8006/dev/doc";\ -printf "$$rows" "Postgres DB" "http://$(get_my_ip).nip.io:18080/?pgsql=postgres&username="$${POSTGRES_USER}"&db="$${POSTGRES_DB}"&ns=public" $${POSTGRES_USER} $${POSTGRES_PASSWORD};\ -printf "$$rows" "Portainer" "http://$(get_my_ip).nip.io:9000" admin adminadmin;\ -printf "$$rows" "Redis" "http://$(get_my_ip).nip.io:18081";\ +printf "$$rows" "oSparc web API doc" "http://$(get_my_ip).nip.io:9081/dev/doc";\ printf "$$rows" "Dask Dashboard" "http://$(get_my_ip).nip.io:8787";\ printf "$$rows" "Docker Registry" "http://$${REGISTRY_URL}/v2/_catalog" $${REGISTRY_USER} $${REGISTRY_PW};\ printf "$$rows" "Invitations" "http://$(get_my_ip).nip.io:8008/dev/doc" $${INVITATIONS_USERNAME} $${INVITATIONS_PASSWORD};\ +printf "$$rows" "Jaeger" "http://$(get_my_ip).nip.io:16686";\ printf "$$rows" "Payments" "http://$(get_my_ip).nip.io:8011/dev/doc" $${PAYMENTS_USERNAME} $${PAYMENTS_PASSWORD};\ +printf "$$rows" "Portainer" "http://$(get_my_ip).nip.io:9000" admin adminadmin;\ +printf "$$rows" "Postgres DB" "http://$(get_my_ip).nip.io:18080/?pgsql=postgres&username="$${POSTGRES_USER}"&db="$${POSTGRES_DB}"&ns=public" $${POSTGRES_USER} $${POSTGRES_PASSWORD};\ printf "$$rows" "Rabbit Dashboard" "http://$(get_my_ip).nip.io:15672" admin adminadmin;\ -printf "$$rows" "Traefik Dashboard" "http://$(get_my_ip).nip.io:8080/dashboard/";\ +printf "$$rows" "Redis" "http://$(get_my_ip).nip.io:18081";\ printf "$$rows" "Storage S3 Minio" "http://$(get_my_ip).nip.io:9001" 12345678 12345678;\ +printf "$$rows" "Traefik Dashboard" "http://$(get_my_ip).nip.io:8080/dashboard/";\ printf "\n%s\n" "⚠️ if a DNS is not used (as displayed above), the interactive services started via dynamic-sidecar";\ echo "⚠️ will not be shown. The frontend accesses them via the uuid.services.YOUR_IP.nip.io:9081"; @@ -337,6 +347,7 @@ up-devel: .stack-simcore-development.yml .init-swarm $(CLIENT_WEB_OUTPUT) ## Dep @$(MAKE_C) services/dask-sidecar certificates # Deploy stack $(SWARM_STACK_NAME) [back-end] @docker stack deploy --detach=true --with-registry-auth -c $< $(SWARM_STACK_NAME) + @$(MAKE) .deploy-vendors @$(MAKE) .deploy-ops @$(_show_endpoints) @$(MAKE_C) services/static-webserver/client follow-dev-logs @@ -347,6 +358,7 @@ up-devel-frontend: .stack-simcore-development-frontend.yml .init-swarm ## Every @$(MAKE_C) services/dask-sidecar certificates # Deploy stack $(SWARM_STACK_NAME) [back-end] @docker stack deploy --detach=true --with-registry-auth -c $< $(SWARM_STACK_NAME) + @$(MAKE) .deploy-vendors @$(MAKE) .deploy-ops @$(_show_endpoints) @$(MAKE_C) services/static-webserver/client follow-dev-logs @@ -357,6 +369,7 @@ ifeq ($(target),) @$(MAKE_C) services/dask-sidecar certificates # Deploy stack $(SWARM_STACK_NAME) @docker stack deploy --detach=true --with-registry-auth -c $< $(SWARM_STACK_NAME) + @$(MAKE) .deploy-vendors @$(MAKE) .deploy-ops else # deploys ONLY $(target) service @@ -368,6 +381,7 @@ up-version: .stack-simcore-version.yml .init-swarm ## Deploys versioned stack '$ @$(MAKE_C) services/dask-sidecar certificates # Deploy stack $(SWARM_STACK_NAME) @docker stack deploy --detach=true --with-registry-auth -c $< $(SWARM_STACK_NAME) + @$(MAKE) .deploy-vendors @$(MAKE) .deploy-ops @$(_show_endpoints) diff --git a/api/specs/web-server/_auth.py b/api/specs/web-server/_auth.py index 9214d5f60fdb..4c323869e04e 100644 --- a/api/specs/web-server/_auth.py +++ b/api/specs/web-server/_auth.py @@ -155,6 +155,21 @@ async def logout(_body: LogoutBody): """user logout""" +@router.get( + "/auth:check", + operation_id="check_authentication", + status_code=status.HTTP_204_NO_CONTENT, + responses={ + status.HTTP_401_UNAUTHORIZED: { + "model": Envelope[Error], + "description": "unauthorized reset due to invalid token code", + } + }, +) +async def check_auth(): + """checks if user is authenticated in the platform""" + + @router.post( "/auth/reset-password", response_model=Envelope[Log], diff --git a/api/specs/web-server/_projects_crud.py b/api/specs/web-server/_projects_crud.py index 1b6030cc061b..46073f921fc2 100644 --- a/api/specs/web-server/_projects_crud.py +++ b/api/specs/web-server/_projects_crud.py @@ -32,7 +32,10 @@ from simcore_service_webserver._meta import API_VTAG from simcore_service_webserver.projects._common_models import ProjectPathParams from simcore_service_webserver.projects._crud_handlers import ProjectCreateParams -from simcore_service_webserver.projects._crud_handlers_models import ProjectListParams +from simcore_service_webserver.projects._crud_handlers_models import ( + ProjectListFullSearchParams, + ProjectListParams, +) router = APIRouter( prefix=f"/{API_VTAG}", @@ -137,6 +140,23 @@ async def clone_project( ... +@router.get( + "/projects:search", + response_model=Page[ProjectListFullSearchParams], +) +async def list_projects_full_search( + _params: Annotated[ProjectListFullSearchParams, Depends()], + order_by: Annotated[ + Json, + Query( + description="Order by field (type|uuid|name|description|prj_owner|creation_date|last_change_date) and direction (asc|desc). The default sorting order is ascending.", + example='{"field": "last_change_date", "direction": "desc"}', + ), + ] = ('{"field": "last_change_date", "direction": "desc"}',), +): + ... + + @router.get( "/projects/{project_id}/inactivity", response_model=Envelope[GetProjectInactivityResponse], diff --git a/api/specs/web-server/_resource_usage.py b/api/specs/web-server/_resource_usage.py index c60ab4f36b57..549244737462 100644 --- a/api/specs/web-server/_resource_usage.py +++ b/api/specs/web-server/_resource_usage.py @@ -13,6 +13,9 @@ from _common import assert_handler_signature_against_model from fastapi import APIRouter, Query, status +from models_library.api_schemas_resource_usage_tracker.service_runs import ( + OsparcCreditsAggregatedByServiceGet, +) from models_library.api_schemas_webserver.resource_usage import ( ConnectServiceToPricingPlanBodyParams, CreatePricingPlanBodyParams, @@ -93,7 +96,7 @@ async def list_resource_usage_services( @router.get( "/services/-/aggregated-usages", - response_model=Envelope[list[ServiceRunGet]], + response_model=Envelope[list[OsparcCreditsAggregatedByServiceGet]], summary="Used credits based on aggregate by type, currently supported `services`. (user and product are taken from context, optionally wallet_id parameter might be provided).", tags=["usage"], ) diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 1f0319393b78..8be5481a3c58 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -1,5 +1,6 @@ -# pylint: disable=unused-argument # pylint: disable=redefined-outer-name +# pylint: disable=too-many-positional-arguments +# pylint: disable=unused-argument # pylint: disable=unused-variable import logging diff --git a/api/tests/requirements.txt b/api/tests/requirements.txt index d596bedcadc2..04cf811ff2af 100644 --- a/api/tests/requirements.txt +++ b/api/tests/requirements.txt @@ -6,14 +6,12 @@ aiohttp==3.10.5 # -r requirements.in aiosignal==1.3.1 # via aiohttp -async-timeout==4.0.3 - # via aiohttp attrs==24.2.0 # via # aiohttp # jsonschema # referencing -certifi==2024.7.4 +certifi==2024.8.30 # via # -c ../../requirements/constraints.txt # requests @@ -23,13 +21,11 @@ coverage==7.6.1 # via # -r requirements.in # pytest-cov -exceptiongroup==1.2.2 - # via pytest frozenlist==1.4.1 # via # aiohttp # aiosignal -idna==3.7 +idna==3.10 # via # requests # yarl @@ -54,13 +50,13 @@ lazy-object-proxy==1.10.0 # via openapi-spec-validator markupsafe==2.1.5 # via werkzeug -more-itertools==10.4.0 +more-itertools==10.5.0 # via openapi-core -multidict==6.0.5 +multidict==6.1.0 # via # aiohttp # yarl -openapi-core==0.19.3 +openapi-core==0.19.4 # via -r requirements.in openapi-schema-validator==0.6.2 # via @@ -78,7 +74,7 @@ pathable==0.4.3 # via jsonschema-path pluggy==1.5.0 # via pytest -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements.in # pytest-asyncio @@ -118,15 +114,11 @@ six==1.16.0 # rfc3339-validator termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # pytest -urllib3==2.2.2 +urllib3==2.2.3 # via # -c ../../requirements/constraints.txt # requests -werkzeug==3.0.3 +werkzeug==3.0.4 # via openapi-core -yarl==1.9.4 +yarl==1.12.1 # via aiohttp diff --git a/docs/coding-conventions.md b/docs/coding-conventions.md index f17272549a28..f3f23fedd71e 100644 --- a/docs/coding-conventions.md +++ b/docs/coding-conventions.md @@ -136,3 +136,7 @@ Have a look at `ESLint`'s configuration files [.eslintrc.json](.eslintrc.json) a [mypy]:https://www.mypy-lang.org/ [pep257]:https://peps.python.org/pep-0257/ [pylint]:https://pylint.readthedocs.io/en/latest/ + +# My first osparc-simcore PR: common pitfalls + +- Make sure to run `make mypy` and `make pylint`, as the associated github-actions are required to pass. If you include new dependencies in `requirements/*.in`, make sure to run `make touch && make reqs "upgrade=NAME_OF_YOUR_NEW_DEPENDENCY"`. It is best to do this inside a reproducible environment, for this purpose a shell inside a docker container can be used: Go to `osparc-simcore/requirements/tools` and run `make shell`. Inside the new shell the osparc-simcore repo is placed in `~`. Run `make reqs` from inside this shell. diff --git a/package.json b/package.json index 8533c589643a..c50867df4cee 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,7 @@ { "scripts": { - "linter": "npx eslint ./services/*/client/source/class/*/" + "linter": "npx eslint ./services/*/client/source/class/*/", + "linter-fix": "npx eslint ./services/*/client/source/class/*/ --fix" }, "devDependencies": { "babel-eslint": "^10.1.0", diff --git a/packages/aws-library/requirements/_base.txt b/packages/aws-library/requirements/_base.txt index 53832fe75c52..d1dd688f484f 100644 --- a/packages/aws-library/requirements/_base.txt +++ b/packages/aws-library/requirements/_base.txt @@ -5,10 +5,12 @@ aioboto3==13.1.1 aiobotocore==2.13.1 # via aioboto3 aiocache==0.12.2 - # via -r requirements/_base.in + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in -aiodocker==0.22.2 +aiodocker==0.23.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiofiles==24.1.0 # via @@ -26,15 +28,15 @@ aiohttp==3.10.5 # -c requirements/../../../requirements/constraints.txt # aiobotocore # aiodocker -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore -aiormq==6.8.0 +aiormq==6.8.1 # via aio-pika aiosignal==1.3.1 # via aiohttp annotated-types==0.7.0 # via pydantic -anyio==4.4.0 +anyio==4.6.0 # via # fast-depends # faststream @@ -56,27 +58,53 @@ botocore==1.34.131 # aiobotocore # boto3 # s3transfer -botocore-stubs==1.35.2 +botocore-stubs==1.35.25 # via types-aiobotocore +certifi==2024.8.30 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via typer +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.2.0 # via pydantic -fast-depends==2.4.8 +fast-depends==2.4.11 # via faststream -faststream==0.5.18 +faststream==0.5.23 # via -r requirements/../../../packages/service-library/requirements/_base.in frozenlist==1.4.1 # via # aiohttp # aiosignal -idna==3.7 +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.1 + # via opentelemetry-exporter-otlp-proto-grpc +idna==3.10 # via # anyio # email-validator + # requests # yarl +importlib-metadata==8.4.0 + # via opentelemetry-api jmespath==1.0.1 # via # boto3 @@ -91,10 +119,49 @@ markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -multidict==6.0.5 +multidict==6.1.0 # via # aiohttp # yarl +opentelemetry-api==1.27.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.27.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.27.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.27.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.27.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.48b0 + # via opentelemetry-instrumentation-requests +opentelemetry-instrumentation-requests==0.48b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.27.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.27.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.48b0 + # via + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.48b0 + # via opentelemetry-instrumentation-requests orjson==3.10.7 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -107,6 +174,12 @@ orjson==3.10.7 # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in pamqp==3.3.0 # via aiormq +protobuf==4.25.5 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in pydantic==2.9.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -138,7 +211,7 @@ pydantic-settings==2.5.2 # -r requirements/../../../packages/settings-library/requirements/_base.in pygments==2.18.0 # via rich -pyinstrument==4.7.2 +pyinstrument==4.7.3 # via -r requirements/../../../packages/service-library/requirements/_base.in python-dateutil==2.9.0.post0 # via @@ -169,7 +242,11 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications -rich==13.7.1 +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http +rich==13.8.1 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in @@ -180,6 +257,8 @@ rpds-py==0.20.0 # referencing s3transfer==0.10.2 # via boto3 +setuptools==75.1.0 + # via opentelemetry-instrumentation sh==2.0.7 # via -r requirements/_base.in shellingham==1.5.4 @@ -194,27 +273,27 @@ toolz==0.12.1 # via -r requirements/../../../packages/service-library/requirements/_base.in tqdm==4.66.5 # via -r requirements/../../../packages/service-library/requirements/_base.in -typer==0.12.4 +typer==0.12.5 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in - # faststream -types-aiobotocore==2.13.2 +types-aiobotocore==2.15.1 # via -r requirements/_base.in -types-aiobotocore-ec2==2.13.2 +types-aiobotocore-ec2==2.15.1 # via types-aiobotocore -types-aiobotocore-s3==2.13.2 +types-aiobotocore-s3==2.15.1 # via types-aiobotocore -types-aiobotocore-ssm==2.13.2 +types-aiobotocore-ssm==2.15.1 # via types-aiobotocore -types-awscrt==0.21.2 +types-awscrt==0.21.5 # via botocore-stubs -types-python-dateutil==2.9.0.20240821 +types-python-dateutil==2.9.0.20240906 # via arrow typing-extensions==4.12.2 # via # aiodebug # faststream + # opentelemetry-sdk # pydantic # pydantic-core # typer @@ -222,7 +301,7 @@ typing-extensions==4.12.2 # types-aiobotocore-ec2 # types-aiobotocore-s3 # types-aiobotocore-ssm -urllib3==2.2.2 +urllib3==2.2.3 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -231,10 +310,16 @@ urllib3==2.2.2 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # botocore + # requests wrapt==1.16.0 - # via aiobotocore -yarl==1.9.4 + # via + # aiobotocore + # deprecated + # opentelemetry-instrumentation +yarl==1.12.1 # via # aio-pika # aiohttp # aiormq +zipp==3.20.2 + # via importlib-metadata diff --git a/packages/aws-library/requirements/_test.txt b/packages/aws-library/requirements/_test.txt index 1a9b4a7afa2e..562f30e34869 100644 --- a/packages/aws-library/requirements/_test.txt +++ b/packages/aws-library/requirements/_test.txt @@ -11,7 +11,7 @@ attrs==24.2.0 # -c requirements/_base.txt # jsonschema # referencing -aws-sam-translator==1.89.0 +aws-sam-translator==1.91.0 # via cfn-lint aws-xray-sdk==2.14.0 # via moto @@ -22,7 +22,7 @@ boto3==1.34.131 # -c requirements/_base.txt # aws-sam-translator # moto -boto3-stubs==1.35.2 +boto3-stubs==1.35.25 # via types-boto3 botocore==1.34.131 # via @@ -31,22 +31,25 @@ botocore==1.34.131 # boto3 # moto # s3transfer -botocore-stubs==1.35.2 +botocore-stubs==1.35.25 # via # -c requirements/_base.txt # boto3-stubs # types-aioboto3 # types-aiobotocore -certifi==2024.7.4 +certifi==2024.8.30 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography -cfn-lint==1.10.3 +cfn-lint==1.15.0 # via moto charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -55,30 +58,30 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # joserfc # moto docker==7.1.0 # via moto -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in flask==3.0.3 # via # flask-cors # moto -flask-cors==4.0.1 +flask-cors==5.0.0 # via moto flexcache==0.3 # via pint flexparser==0.3.1 # via pint -graphql-core==3.2.3 +graphql-core==3.2.4 # via moto icdiff==2.0.7 # via pytest-icdiff -idna==3.7 +idna==3.10 # via # -c requirements/_base.txt # requests @@ -98,7 +101,7 @@ jmespath==1.0.1 # botocore joserfc==1.0.0 # via moto -jsondiff==2.2.0 +jsondiff==2.2.1 # via moto jsonpatch==1.33 # via cfn-lint @@ -125,7 +128,7 @@ markupsafe==2.1.5 # via # jinja2 # werkzeug -moto==5.0.13 +moto==5.0.15 # via -r requirements/_test.in mpmath==1.3.0 # via sympy @@ -151,7 +154,7 @@ pprintpp==0.4.0 # via pytest-icdiff py-cpuinfo==9.0.0 # via pytest-benchmark -py-partiql-parser==0.5.5 +py-partiql-parser==0.5.6 # via moto pycparser==2.22 # via cffi @@ -164,9 +167,9 @@ pydantic-core==2.23.3 # via # -c requirements/_base.txt # pydantic -pyparsing==3.1.2 +pyparsing==3.1.4 # via moto -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -220,10 +223,11 @@ referencing==0.29.3 # jsonschema # jsonschema-path # jsonschema-specifications -regex==2024.7.24 +regex==2024.9.11 # via cfn-lint requests==2.32.3 # via + # -c requirements/_base.txt # docker # jsonschema-path # moto @@ -241,30 +245,32 @@ s3transfer==0.10.2 # via # -c requirements/_base.txt # boto3 -setuptools==73.0.1 - # via moto +setuptools==75.1.0 + # via + # -c requirements/_base.txt + # moto six==1.16.0 # via # -c requirements/_base.txt # python-dateutil # rfc3339-validator -sympy==1.13.2 +sympy==1.13.3 # via cfn-lint termcolor==2.4.0 # via pytest-sugar types-aioboto3==13.1.1 # via -r requirements/_test.in -types-aiobotocore==2.13.2 +types-aiobotocore==2.15.1 # via # -c requirements/_base.txt # types-aioboto3 -types-awscrt==0.21.2 +types-awscrt==0.21.5 # via # -c requirements/_base.txt # botocore-stubs types-boto3==1.0.2 # via -r requirements/_test.in -types-s3transfer==0.10.1 +types-s3transfer==0.10.2 # via # boto3-stubs # types-aioboto3 @@ -281,7 +287,7 @@ typing-extensions==4.12.2 # pydantic-core # types-aioboto3 # types-aiobotocore -urllib3==2.2.2 +urllib3==2.2.3 # via # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt @@ -289,7 +295,7 @@ urllib3==2.2.2 # docker # requests # responses -werkzeug==3.0.3 +werkzeug==3.0.4 # via # flask # moto diff --git a/packages/aws-library/requirements/_tools.txt b/packages/aws-library/requirements/_tools.txt index 1c9b94efc3ef..861338d5b7f7 100644 --- a/packages/aws-library/requirements/_tools.txt +++ b/packages/aws-library/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -47,14 +47,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -66,10 +66,11 @@ pyyaml==6.0.2 # -c requirements/_base.txt # -c requirements/_test.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via + # -c requirements/_base.txt # -c requirements/_test.txt # pip-tools tomlkit==0.13.2 @@ -79,7 +80,7 @@ typing-extensions==4.12.2 # -c requirements/_base.txt # -c requirements/_test.txt # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/packages/aws-library/tests/test_ec2_client.py b/packages/aws-library/tests/test_ec2_client.py index 2b1d8fca3768..625555e9f5d1 100644 --- a/packages/aws-library/tests/test_ec2_client.py +++ b/packages/aws-library/tests/test_ec2_client.py @@ -130,7 +130,7 @@ async def test_get_ec2_instance_capabilities_empty_list_returns_all_options( instance_types = await simcore_ec2_api.get_ec2_instance_capabilities(set()) assert instance_types # NOTE: this might need adaptation when moto is updated - assert 700 < len(instance_types) < 807 + assert 700 < len(instance_types) < 828 async def test_get_ec2_instance_capabilities_with_invalid_type_raises( diff --git a/packages/common-library/src/common_library/errors_classes.py b/packages/common-library/src/common_library/errors_classes.py index 99ed586e7443..f6f08837f7c2 100644 --- a/packages/common-library/src/common_library/errors_classes.py +++ b/packages/common-library/src/common_library/errors_classes.py @@ -1,4 +1,5 @@ from typing import Any + from pydantic.errors import PydanticErrorMixin @@ -8,7 +9,7 @@ def __missing__(self, key): class OsparcErrorMixin(PydanticErrorMixin): - code: str # type: ignore[assignment] + code: str # type: ignore[assignment] msg_template: str def __new__(cls, *_args, **_kwargs): @@ -18,7 +19,7 @@ def __new__(cls, *_args, **_kwargs): def __init__(self, **ctx: Any) -> None: self.__dict__ = ctx - super().__init__(message=self._build_message(), code=self.code) # type: ignore[arg-type] + super().__init__(message=self._build_message(), code=self.code) # type: ignore[arg-type] def __str__(self) -> str: return self._build_message() @@ -41,3 +42,7 @@ def _get_full_class_name(cls) -> str: ) ] return ".".join(reversed(relevant_classes)) + + def error_context(self): + """Returns context in which error occurred and stored within the exception""" + return dict(**self.__dict__) diff --git a/packages/common-library/src/common_library/pydantic_basic_types.py b/packages/common-library/src/common_library/pydantic_basic_types.py new file mode 100644 index 000000000000..452c118dae95 --- /dev/null +++ b/packages/common-library/src/common_library/pydantic_basic_types.py @@ -0,0 +1,79 @@ +from re import Pattern +from typing import Annotated, Final, TypeAlias + +from pydantic import Field +from pydantic_core import core_schema + +# https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers#Registered_ports +RegisteredPortInt: TypeAlias = Annotated[int, Field(gt=1024, lt=65535)] + +# non-empty bounded string used as identifier +# e.g. "123" or "name_123" or "fa327c73-52d8-462a-9267-84eeaf0f90e3" but NOT "" +_ELLIPSIS_CHAR: Final[str] = "..." + + +class ConstrainedStr(str): # noqa: SLOT000 + pattern: str | Pattern[str] | None = None + min_length: int | None = None + max_length: int | None = None + strip_whitespace: bool = False + curtail_length: int | None = None + + @classmethod + def _validate(cls, __input_value: str) -> str: + if cls.curtail_length and len(__input_value) > cls.curtail_length: + __input_value = __input_value[: cls.curtail_length] + return cls(__input_value) + + @classmethod + def __get_pydantic_core_schema__(cls, _source_type, _handler): + return core_schema.no_info_after_validator_function( + cls._validate, + core_schema.str_schema( + pattern=cls.pattern, + min_length=cls.min_length, + max_length=cls.max_length, + strip_whitespace=cls.strip_whitespace, + ), + ) + + +class IDStr(ConstrainedStr): + strip_whitespace = True + min_length = 1 + max_length = 100 + + @staticmethod + def concatenate(*args: "IDStr", link_char: str = " ") -> "IDStr": + result = link_char.join(args).strip() + assert IDStr.min_length # nosec + assert IDStr.max_length # nosec + if len(result) > IDStr.max_length: + if IDStr.max_length > len(_ELLIPSIS_CHAR): + result = ( + result[: IDStr.max_length - len(_ELLIPSIS_CHAR)].rstrip() + + _ELLIPSIS_CHAR + ) + else: + result = _ELLIPSIS_CHAR[0] * IDStr.max_length + if len(result) < IDStr.min_length: + msg = f"IDStr.concatenate: result is too short: {result}" + raise ValueError(msg) + return IDStr(result) + + +class ShortTruncatedStr(ConstrainedStr): + # NOTE: Use to input e.g. titles or display names + # A truncated string: + # - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). + # - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, i.e. without raising errors. + # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 + strip_whitespace = True + curtail_length = 600 + + +class LongTruncatedStr(ConstrainedStr): + # NOTE: Use to input e.g. descriptions or summaries + # Analogous to ShortTruncatedStr + strip_whitespace = True + curtail_length = 65536 # same as github descripton diff --git a/packages/common-library/tests/test_errors_classes.py b/packages/common-library/tests/test_errors_classes.py index 63674fbd3b48..3be2532f1ab5 100644 --- a/packages/common-library/tests/test_errors_classes.py +++ b/packages/common-library/tests/test_errors_classes.py @@ -140,3 +140,24 @@ class MyError(OsparcErrorMixin, ValueError): msg_template = "{value} and {missing}" assert str(MyError(value=42)) == "42 and 'missing=?'" + + +def test_exception_context(): + class MyError(OsparcErrorMixin, ValueError): + msg_template = "{value} and {missing}" + + exc = MyError(value=42, missing="foo", extra="bar") + assert exc.error_context() == { + "code": "ValueError.MyError", + "message": "42 and foo", + "value": 42, + "missing": "foo", + "extra": "bar", + } + + exc = MyError(value=42) + assert exc.error_context() == { + "code": "ValueError.MyError", + "message": "42 and 'missing=?'", + "value": 42, + } diff --git a/packages/dask-task-models-library/requirements/_base.txt b/packages/dask-task-models-library/requirements/_base.txt index 81807f5ffe68..eb111f70fb14 100644 --- a/packages/dask-task-models-library/requirements/_base.txt +++ b/packages/dask-task-models-library/requirements/_base.txt @@ -15,21 +15,21 @@ cloudpickle==3.0.0 # via # dask # distributed -dask==2024.8.1 +dask==2024.9.0 # via # -r requirements/_base.in # distributed -distributed==2024.8.1 +distributed==2024.9.0 # via dask dnspython==2.6.1 # via email-validator email-validator==2.2.0 # via pydantic -fsspec==2024.6.1 +fsspec==2024.9.0 # via dask -idna==3.7 +idna==3.10 # via email-validator -importlib-metadata==8.4.0 +importlib-metadata==8.5.0 # via dask jinja2==3.1.4 # via @@ -51,7 +51,7 @@ markupsafe==2.1.5 # via jinja2 mdurl==0.1.2 # via markdown-it-py -msgpack==1.0.8 +msgpack==1.1.0 # via distributed orjson==3.10.7 # via @@ -102,7 +102,7 @@ referencing==0.35.1 # via # jsonschema # jsonschema-specifications -rich==13.7.1 +rich==13.8.1 # via # -r requirements/../../../packages/settings-library/requirements/_base.in # typer @@ -125,16 +125,16 @@ toolz==0.12.1 # partd tornado==6.4.1 # via distributed -typer==0.12.4 +typer==0.12.5 # via -r requirements/../../../packages/settings-library/requirements/_base.in -types-python-dateutil==2.9.0.20240821 +types-python-dateutil==2.9.0.20240906 # via arrow typing-extensions==4.12.2 # via # pydantic # pydantic-core # typer -urllib3==2.2.2 +urllib3==2.2.3 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt @@ -142,5 +142,5 @@ urllib3==2.2.2 # distributed zict==3.0.0 # via distributed -zipp==3.20.0 +zipp==3.20.2 # via importlib-metadata diff --git a/packages/dask-task-models-library/requirements/_test.txt b/packages/dask-task-models-library/requirements/_test.txt index 521d13265d9b..b05932129396 100644 --- a/packages/dask-task-models-library/requirements/_test.txt +++ b/packages/dask-task-models-library/requirements/_test.txt @@ -4,7 +4,7 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in flexcache==0.3 # via pint @@ -25,7 +25,7 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio diff --git a/packages/dask-task-models-library/requirements/_tools.txt b/packages/dask-task-models-library/requirements/_tools.txt index b9ee0a3c96da..779105b3894d 100644 --- a/packages/dask-task-models-library/requirements/_tools.txt +++ b/packages/dask-task-models-library/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -47,14 +47,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -66,9 +66,9 @@ pyyaml==6.0.2 # -c requirements/_base.txt # -c requirements/_test.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via pip-tools tomlkit==0.13.2 # via pylint @@ -77,7 +77,7 @@ typing-extensions==4.12.2 # -c requirements/_base.txt # -c requirements/_test.txt # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/packages/models-library/Makefile b/packages/models-library/Makefile index 6795fcd2610b..b41bafd2f2c7 100644 --- a/packages/models-library/Makefile +++ b/packages/models-library/Makefile @@ -110,14 +110,15 @@ erd-ServiceInput.svg: _erdantic DOWNLOADED_TEST_DATA_DIR = "$(CURDIR)/tests/data/.downloaded-ignore" .PHONY: _httpx -_httpx: _check_venv_active +_ensure_httpx: _check_venv_active # ensures requirements installed @python3 -c "import httpx" 2>/dev/null || uv pip install httpx -PHONY: pull_test_data -pull_test_data: $(DOT_ENV_FILE) _httpx ## downloads tests data from registry (this can take some time!) - # downloading all metadata files +PHONY: tests-data +tests-data: $(DOT_ENV_FILE) _ensure_httpx ## downloads tests data from registry defined in .env (time-intensive!) + # Downloading all metadata files ... @set -o allexport; \ source $<; \ set +o allexport; \ python3 "$(PACKAGES_DIR)/pytest-simcore/src/pytest_simcore/helpers/docker_registry.py" $(DOWNLOADED_TEST_DATA_DIR) + @echo "Run now 'pytest -vv -m diagnostics tests'" diff --git a/packages/models-library/requirements/_base.txt b/packages/models-library/requirements/_base.txt index f07b0ddd44bf..2825b4c6bc8c 100644 --- a/packages/models-library/requirements/_base.txt +++ b/packages/models-library/requirements/_base.txt @@ -6,23 +6,26 @@ attrs==24.2.0 # via # jsonschema # referencing -dnspython==2.6.1 +dnspython==2.7.0 # via email-validator email-validator==2.2.0 # via pydantic -idna==3.7 +idna==3.10 # via email-validator jsonschema==4.23.0 # via -r requirements/_base.in -jsonschema-specifications==2023.12.1 +jsonschema-specifications==2024.10.1 # via jsonschema orjson==3.10.7 # via + # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/_base.in pydantic==2.9.2 # via + # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/common-library/requirements/_base.in # -r requirements/_base.in # pydantic-extra-types # pydantic-settings @@ -30,7 +33,7 @@ pydantic-core==2.23.4 # via pydantic pydantic-extra-types==2.9.0 # via -r requirements/_base.in -pydantic-settings==2.4.0 +pydantic-settings==2.5.2 # via -r requirements/_base.in python-dateutil==2.9.0.post0 # via arrow @@ -46,7 +49,7 @@ rpds-py==0.20.0 # referencing six==1.16.0 # via python-dateutil -types-python-dateutil==2.9.0.20240821 +types-python-dateutil==2.9.0.20241003 # via arrow typing-extensions==4.12.2 # via diff --git a/packages/models-library/requirements/_test.txt b/packages/models-library/requirements/_test.txt index b44ab4c5a3fe..669ab403c702 100644 --- a/packages/models-library/requirements/_test.txt +++ b/packages/models-library/requirements/_test.txt @@ -4,11 +4,11 @@ attrs==24.2.0 # via # -c requirements/_base.txt # referencing -coverage==7.6.1 +coverage==7.6.3 # via # -r requirements/_test.in # pytest-cov -faker==27.0.0 +faker==30.3.0 # via -r requirements/_test.in flexcache==0.3 # via pint @@ -16,13 +16,13 @@ flexparser==0.3.1 # via pint icdiff==2.0.7 # via pytest-icdiff -idna==3.7 +idna==3.10 # via # -c requirements/_base.txt # yarl iniconfig==2.0.0 # via pytest -multidict==6.0.5 +multidict==6.1.0 # via yarl packaging==24.1 # via @@ -34,9 +34,11 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff +propcache==0.2.0 + # via yarl psutil==6.0.0 # via -r requirements/_test.in -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -85,17 +87,18 @@ six==1.16.0 # via # -c requirements/_base.txt # python-dateutil -termcolor==2.4.0 +termcolor==2.5.0 # via pytest-sugar types-jsonschema==4.23.0.20240813 # via -r requirements/_test.in -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.12.2 # via # -c requirements/_base.txt + # faker # flexcache # flexparser # pint -yarl==1.9.4 +yarl==1.15.2 # via -r requirements/_test.in diff --git a/packages/models-library/requirements/_tools.txt b/packages/models-library/requirements/_tools.txt index 0efdb1139714..f6aae381b845 100644 --- a/packages/models-library/requirements/_tools.txt +++ b/packages/models-library/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.5 # via pylint -black==24.8.0 +black==24.10.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2.post1 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -13,13 +13,13 @@ click==8.1.7 # black # pip-tools # typer -dill==0.3.8 +dill==0.3.9 # via pylint -distlib==0.3.8 +distlib==0.3.9 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -31,7 +31,7 @@ mccabe==0.7.0 # via pylint mdurl==0.1.2 # via markdown-it-py -mypy==1.11.1 +mypy==1.12.0 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -50,18 +50,18 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv -pre-commit==3.8.0 +pre-commit==4.0.1 # via -r requirements/../../../requirements/devenv.txt pygments==2.18.0 # via rich -pylint==3.2.6 +pylint==3.3.1 # via -r requirements/../../../requirements/devenv.txt -pyproject-hooks==1.1.0 +pyproject-hooks==1.2.0 # via # build # pip-tools @@ -70,17 +70,17 @@ pyyaml==6.0.2 # -c requirements/../../../requirements/constraints.txt # -c requirements/_test.txt # pre-commit -rich==13.7.1 +rich==13.9.2 # via typer -ruff==0.6.1 +ruff==0.6.9 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via pip-tools shellingham==1.5.4 # via typer tomlkit==0.13.2 # via pylint -typer==0.12.4 +typer==0.12.5 # via -r requirements/_tools.in typing-extensions==4.12.2 # via @@ -88,7 +88,7 @@ typing-extensions==4.12.2 # -c requirements/_test.txt # mypy # typer -virtualenv==20.26.3 +virtualenv==20.26.6 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/packages/models-library/src/models_library/api_schemas__common/errors.py b/packages/models-library/src/models_library/api_schemas__common/errors.py index 92ed48088d05..85fef862b7fc 100644 --- a/packages/models-library/src/models_library/api_schemas__common/errors.py +++ b/packages/models-library/src/models_library/api_schemas__common/errors.py @@ -1,9 +1,9 @@ import http from typing import Any +from common_library.pydantic_basic_types import IDStr from pydantic import BaseModel, Field -from ..basic_types import IDStr from ..utils.pydantic_tools_extension import NOT_REQUIRED diff --git a/packages/models-library/src/models_library/api_schemas_directorv2/health.py b/packages/models-library/src/models_library/api_schemas__common/health.py similarity index 100% rename from packages/models-library/src/models_library/api_schemas_directorv2/health.py rename to packages/models-library/src/models_library/api_schemas__common/health.py diff --git a/packages/models-library/src/models_library/api_schemas_directorv2/comp_tasks.py b/packages/models-library/src/models_library/api_schemas_directorv2/comp_tasks.py index e383d45f20ed..e9570dffbc0e 100644 --- a/packages/models-library/src/models_library/api_schemas_directorv2/comp_tasks.py +++ b/packages/models-library/src/models_library/api_schemas_directorv2/comp_tasks.py @@ -1,6 +1,6 @@ from typing import Any, TypeAlias -from models_library.basic_types import IDStr +from common_library.pydantic_basic_types import IDStr from pydantic import AnyHttpUrl, AnyUrl, BaseModel, Field, field_validator from ..clusters import ClusterID diff --git a/packages/models-library/src/models_library/api_schemas_directorv2/services.py b/packages/models-library/src/models_library/api_schemas_directorv2/services.py index 3905680b3747..c797c687fd1f 100644 --- a/packages/models-library/src/models_library/api_schemas_directorv2/services.py +++ b/packages/models-library/src/models_library/api_schemas_directorv2/services.py @@ -1,3 +1,5 @@ +from typing import Final + from pydantic import BaseModel, ConfigDict, Field, field_validator from pydantic.types import ByteSize, NonNegativeInt @@ -21,7 +23,7 @@ class NodeRequirements(BaseModel): None, description="defines the required (maximum) GPU for running the services", alias="GPU", - validate_default=True + validate_default=True, ) ram: ByteSize = Field( ..., @@ -32,7 +34,7 @@ class NodeRequirements(BaseModel): default=None, description="defines the required (maximum) amount of VRAM for running the services", alias="VRAM", - validate_default=True + validate_default=True, ) @field_validator("vram", "gpu", mode="before") @@ -98,3 +100,6 @@ class ServiceExtras(BaseModel): ] } ) + + +CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME: Final[NonNegativeInt] = 89 diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py new file mode 100644 index 000000000000..89a493a56cce --- /dev/null +++ b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py @@ -0,0 +1,3 @@ +from typing import Final + +SOCKET_IO_SERVICE_STATUS_EVENT: Final[str] = "serviceStatus" diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/ports.py b/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/ports.py new file mode 100644 index 000000000000..5863b53b2bc6 --- /dev/null +++ b/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/ports.py @@ -0,0 +1,35 @@ +from enum import auto + +from models_library.projects import ProjectID +from models_library.projects_nodes_io import NodeID +from models_library.services_types import ServicePortKey +from models_library.utils.enums import StrAutoEnum +from pydantic import BaseModel + + +class OutputStatus(StrAutoEnum): + UPLOAD_STARTED = auto() + UPLOAD_WAS_ABORTED = auto() + UPLOAD_FINISHED_SUCCESSFULLY = auto() + UPLOAD_FINISHED_WITH_ERRROR = auto() + + +class InputStatus(StrAutoEnum): + DOWNLOAD_STARTED = auto() + DOWNLOAD_WAS_ABORTED = auto() + DOWNLOAD_FINISHED_SUCCESSFULLY = auto() + DOWNLOAD_FINISHED_WITH_ERRROR = auto() + + +class _PortStatusCommon(BaseModel): + project_id: ProjectID + node_id: NodeID + port_key: ServicePortKey + + +class OutputPortStatus(_PortStatusCommon): + status: OutputStatus + + +class InputPortSatus(_PortStatusCommon): + status: InputStatus diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/socketio.py b/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/socketio.py index 054b0834bc4d..93e34a1682bb 100644 --- a/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/socketio.py +++ b/packages/models-library/src/models_library/api_schemas_dynamic_sidecar/socketio.py @@ -1,3 +1,5 @@ from typing import Final SOCKET_IO_SERVICE_DISK_USAGE_EVENT: Final[str] = "serviceDiskUsage" +SOCKET_IO_STATE_OUTPUT_PORTS_EVENT: Final[str] = "stateOutputPorts" +SOCKET_IO_STATE_INPUT_PORTS_EVENT: Final[str] = "stateInputPorts" diff --git a/packages/models-library/src/models_library/api_schemas_resource_usage_tracker/service_runs.py b/packages/models-library/src/models_library/api_schemas_resource_usage_tracker/service_runs.py index 0ec5a1dc2ff6..22a56b0da4ef 100644 --- a/packages/models-library/src/models_library/api_schemas_resource_usage_tracker/service_runs.py +++ b/packages/models-library/src/models_library/api_schemas_resource_usage_tracker/service_runs.py @@ -43,6 +43,7 @@ class ServiceRunPage(NamedTuple): class OsparcCreditsAggregatedByServiceGet(BaseModel): osparc_credits: Decimal service_key: ServiceKey + running_time_in_hours: Decimal class OsparcCreditsAggregatedUsagesPage(NamedTuple): diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders.py b/packages/models-library/src/models_library/api_schemas_webserver/folders.py index 48a2ae605e41..6e62a242d79d 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import NamedTuple -from models_library.basic_types import IDStr +from common_library.pydantic_basic_types import IDStr from models_library.folders import FolderID from models_library.projects_access import AccessRights from models_library.users import GroupID diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py index 29fed6baced1..e7376758b8fb 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py @@ -1,8 +1,8 @@ from datetime import datetime from typing import NamedTuple +from common_library.pydantic_basic_types import IDStr from models_library.access_rights import AccessRights -from models_library.basic_types import IDStr from models_library.folders import FolderID from models_library.users import GroupID from models_library.utils.common_validators import null_or_none_str_to_none_validator diff --git a/packages/models-library/src/models_library/api_schemas_webserver/product.py b/packages/models-library/src/models_library/api_schemas_webserver/product.py index f967e15d548e..6d34aee09581 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/product.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/product.py @@ -1,9 +1,10 @@ from datetime import datetime from typing import Annotated, TypeAlias +from common_library.pydantic_basic_types import IDStr from pydantic import ConfigDict, Field, HttpUrl, NonNegativeInt, PositiveInt -from ..basic_types import IDStr, NonNegativeDecimal +from ..basic_types import NonNegativeDecimal from ..emails import LowerCaseEmailStr from ..products import ProductName from ._base import InputSchema, OutputSchema diff --git a/packages/models-library/src/models_library/api_schemas_webserver/projects.py b/packages/models-library/src/models_library/api_schemas_webserver/projects.py index dd1625301fa9..9066d0f6f6d0 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/projects.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/projects.py @@ -7,12 +7,12 @@ from typing import Any, Literal, TypeAlias +from common_library.pydantic_basic_types import LongTruncatedStr, ShortTruncatedStr from models_library.folders import FolderID from models_library.workspaces import WorkspaceID from pydantic import Field, HttpUrl, field_validator from ..api_schemas_long_running_tasks.tasks import TaskGet -from ..basic_types import LongTruncatedStr, ShortTruncatedStr from ..emails import LowerCaseEmailStr from ..projects import ClassifierID, DateTimeStr, NodesDict, ProjectID from ..projects_access import AccessRights, GroupIDStr @@ -79,7 +79,8 @@ class ProjectGet(OutputSchema): quality: dict[str, Any] = {} dev: dict | None = None permalink: ProjectPermalink = FieldNotRequired() - workspace_id: WorkspaceID | None = None + workspace_id: WorkspaceID | None + folder_id: FolderID | None _empty_description = field_validator("description", mode="before")( none_to_empty_str_pre_validator diff --git a/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py b/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py index 02fabd46f7a7..2e21f9d7c52d 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py @@ -90,21 +90,39 @@ class NodeGet(OutputSchema): description="the service message", ) user_id: str = Field(..., description="the user that started the service") + model_config = ConfigDict( json_schema_extra={ - "example": { - "published_port": 30000, - "entrypoint": "/the/entry/point/is/here", - "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "service_key": "simcore/services/comp/itis/sleeper", - "service_version": "1.2.3", - "service_host": "jupyter_E1O2E-LAH", - "service_port": 8081, - "service_basepath": "/x/E1O2E-LAH", - "service_state": "pending", - "service_message": "no suitable node (insufficient resources on 1 node)", - "user_id": "123", - } + "examples": [ + # computational + { + "published_port": 30000, + "entrypoint": "/the/entry/point/is/here", + "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "service_key": "simcore/services/comp/itis/sleeper", + "service_version": "1.2.3", + "service_host": "jupyter_E1O2E-LAH", + "service_port": 8081, + "service_basepath": "/x/E1O2E-LAH", + "service_state": "pending", + "service_message": "no suitable node (insufficient resources on 1 node)", + "user_id": 123, + }, + # dynamic + { + "published_port": 30000, + "entrypoint": "/the/entry/point/is/here", + "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "service_key": "simcore/services/dynamic/some-dynamic-service", + "service_version": "1.2.3", + "service_host": "jupyter_E1O2E-LAH", + "service_port": 8081, + "service_basepath": "/x/E1O2E-LAH", + "service_state": "pending", + "service_message": "no suitable node (insufficient resources on 1 node)", + "user_id": 123, + }, + ] } ) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/socketio.py b/packages/models-library/src/models_library/api_schemas_webserver/socketio.py index 05bd342a4c32..b9ca848890ea 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/socketio.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/socketio.py @@ -1,4 +1,5 @@ -from ..basic_types import IDStr +from common_library.pydantic_basic_types import IDStr + from ..users import GroupID, UserID diff --git a/packages/models-library/src/models_library/api_schemas_webserver/wallets.py b/packages/models-library/src/models_library/api_schemas_webserver/wallets.py index f9ebbd9fb2d2..963d3940c4f3 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/wallets.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/wallets.py @@ -2,9 +2,10 @@ from decimal import Decimal from typing import Literal, TypeAlias +from common_library.pydantic_basic_types import IDStr from pydantic import ConfigDict, Field, HttpUrl, field_validator -from ..basic_types import AmountDecimal, IDStr, NonNegativeDecimal +from ..basic_types import AmountDecimal, NonNegativeDecimal from ..users import GroupID from ..utils.pydantic_tools_extension import FieldNotRequired from ..wallets import WalletID, WalletStatus diff --git a/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py b/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py index 32f17200ee4c..6667e2444e3d 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import NamedTuple -from models_library.basic_types import IDStr +from common_library.pydantic_basic_types import IDStr from models_library.users import GroupID from models_library.workspaces import WorkspaceID from pydantic import ConfigDict, PositiveInt diff --git a/packages/models-library/src/models_library/basic_types.py b/packages/models-library/src/models_library/basic_types.py index 28e3710adeb1..785e2575826e 100644 --- a/packages/models-library/src/models_library/basic_types.py +++ b/packages/models-library/src/models_library/basic_types.py @@ -1,10 +1,8 @@ from decimal import Decimal from enum import StrEnum -from re import Pattern -from typing import Annotated, Final, TypeAlias +from typing import Annotated, TypeAlias from pydantic import Field, HttpUrl, PositiveInt, StringConstraints -from pydantic_core import core_schema from .basic_regex import ( PROPERTY_KEY_RE, @@ -26,6 +24,7 @@ # port number range PortInt: TypeAlias = Annotated[int, Field(gt=0, lt=65535)] + # e.g. 'v5' VersionTag: TypeAlias = Annotated[str, StringConstraints(pattern=r"^v\d$")] @@ -53,78 +52,6 @@ UUIDStr: TypeAlias = Annotated[str, StringConstraints(pattern=UUID_RE)] -# non-empty bounded string used as identifier -# e.g. "123" or "name_123" or "fa327c73-52d8-462a-9267-84eeaf0f90e3" but NOT "" -_ELLIPSIS_CHAR: Final[str] = "..." - - -class ConstrainedStr(str): - pattern: str | Pattern[str] | None = None - min_length: int | None = None - max_length: int | None = None - strip_whitespace: bool = False - curtail_length: int | None = None - - @classmethod - def _validate(cls, __input_value: str) -> str: - if cls.curtail_length and len(__input_value) > cls.curtail_length: - __input_value = __input_value[: cls.curtail_length] - return cls(__input_value) - - @classmethod - def __get_pydantic_core_schema__(cls, _source_type, _handler): - return core_schema.no_info_after_validator_function( - cls._validate, - core_schema.str_schema( - pattern=cls.pattern, - min_length=cls.min_length, - max_length=cls.max_length, - strip_whitespace=cls.strip_whitespace, - ), - ) - - -class IDStr(ConstrainedStr): - strip_whitespace = True - min_length = 1 - max_length = 100 - - @staticmethod - def concatenate(*args: "IDStr", link_char: str = " ") -> "IDStr": - result = link_char.join(args).strip() - assert IDStr.min_length # nosec - assert IDStr.max_length # nosec - if len(result) > IDStr.max_length: - if IDStr.max_length > len(_ELLIPSIS_CHAR): - result = ( - result[: IDStr.max_length - len(_ELLIPSIS_CHAR)].rstrip() - + _ELLIPSIS_CHAR - ) - else: - result = _ELLIPSIS_CHAR[0] * IDStr.max_length - if len(result) < IDStr.min_length: - msg = f"IDStr.concatenate: result is too short: {result}" - raise ValueError(msg) - return IDStr(result) - - -class ShortTruncatedStr(ConstrainedStr): - # NOTE: Use to input e.g. titles or display names - # A truncated string: - # - Strips whitespaces and truncate strings that exceed the specified characters limit (curtail_length). - # - Ensures that the **input** data length to the API is controlled and prevents exceeding large inputs silently, i.e. without raising errors. - # SEE https://github.com/ITISFoundation/osparc-simcore/pull/5989#discussion_r1650506583 - strip_whitespace = True - curtail_length = 600 - - -class LongTruncatedStr(ConstrainedStr): - # NOTE: Use to input e.g. descriptions or summaries - # Analogous to ShortTruncatedStr - strip_whitespace = True - curtail_length = 65536 # same as github descripton - - # auto-incremented primary-key IDs IdInt: TypeAlias = PositiveInt PrimaryKeyInt: TypeAlias = PositiveInt diff --git a/packages/models-library/src/models_library/clusters.py b/packages/models-library/src/models_library/clusters.py index 1dbcff0bc702..c98ea29757ae 100644 --- a/packages/models-library/src/models_library/clusters.py +++ b/packages/models-library/src/models_library/clusters.py @@ -95,6 +95,8 @@ class JupyterHubTokenAuthentication(BaseAuthentication): class NoAuthentication(BaseAuthentication): type: Literal["none"] = "none" + model_config = ConfigDict(json_schema_extra={"examples": [{"type": "none"}]}) + class TLSAuthentication(BaseAuthentication): type: Literal["tls"] = "tls" @@ -134,7 +136,7 @@ class BaseCluster(BaseModel): default=None, description="url to the image describing this cluster", examples=["https://placeimg.com/171/96/tech/grayscale/?0.jpg"], - validate_default=True + validate_default=True, ) endpoint: AnyUrl authentication: ClusterAuthentication = Field( diff --git a/packages/models-library/src/models_library/docker.py b/packages/models-library/src/models_library/docker.py index a75f21105462..4ee0264795ab 100644 --- a/packages/models-library/src/models_library/docker.py +++ b/packages/models-library/src/models_library/docker.py @@ -2,6 +2,7 @@ import re from typing import Annotated, Any, Final, TypeAlias +from common_library.pydantic_basic_types import ConstrainedStr from pydantic import ( BaseModel, ByteSize, @@ -14,7 +15,6 @@ ) from .basic_regex import DOCKER_GENERIC_TAG_KEY_RE, DOCKER_LABEL_KEY_REGEX -from .basic_types import ConstrainedStr from .generated_models.docker_rest_api import Task from .products import ProductName from .projects import ProjectID diff --git a/packages/models-library/src/models_library/osparc_variable_identifier.py b/packages/models-library/src/models_library/osparc_variable_identifier.py index 80a8e6d0fc07..447560e971dd 100644 --- a/packages/models-library/src/models_library/osparc_variable_identifier.py +++ b/packages/models-library/src/models_library/osparc_variable_identifier.py @@ -2,8 +2,7 @@ from typing import Any, TypeVar from common_library.errors_classes import OsparcErrorMixin -from models_library.basic_types import ConstrainedStr - +from common_library.pydantic_basic_types import ConstrainedStr from pydantic import BaseModel from .utils.string_substitution import OSPARC_IDENTIFIER_PREFIX diff --git a/packages/models-library/src/models_library/progress_bar.py b/packages/models-library/src/models_library/progress_bar.py index da2829b0c94b..504bd13373c7 100644 --- a/packages/models-library/src/models_library/progress_bar.py +++ b/packages/models-library/src/models_library/progress_bar.py @@ -1,9 +1,8 @@ from typing import Literal, TypeAlias +from common_library.pydantic_basic_types import IDStr from pydantic import BaseModel, ConfigDict -from .basic_types import IDStr - # NOTE: keep a list of possible unit, and please use correct official unit names ProgressUnit: TypeAlias = Literal["Byte"] diff --git a/packages/models-library/src/models_library/projects.py b/packages/models-library/src/models_library/projects.py index 120e54d899df..7e41dfef2517 100644 --- a/packages/models-library/src/models_library/projects.py +++ b/packages/models-library/src/models_library/projects.py @@ -6,7 +6,8 @@ from typing import Any, Final, TypeAlias from uuid import UUID -from models_library.basic_types import ConstrainedStr +from common_library.pydantic_basic_types import ConstrainedStr +from models_library.folders import FolderID from models_library.workspaces import WorkspaceID from pydantic import BaseModel, ConfigDict, Field, HttpUrl, field_validator @@ -170,6 +171,11 @@ class Project(BaseProjectModel): description="To which workspace project belongs. If None, belongs to private user workspace.", alias="workspaceId", ) + folder_id: FolderID | None = Field( + default=None, + description="To which folder project belongs. If None, belongs to root folder.", + alias="folderId", + ) model_config = ConfigDict( title="osparc-simcore project", diff --git a/packages/models-library/src/models_library/projects_access.py b/packages/models-library/src/models_library/projects_access.py index 29ca6c9f5921..664863c5d894 100644 --- a/packages/models-library/src/models_library/projects_access.py +++ b/packages/models-library/src/models_library/projects_access.py @@ -4,10 +4,10 @@ from enum import Enum +from common_library.pydantic_basic_types import IDStr from pydantic import BaseModel, ConfigDict, Field from pydantic.types import PositiveInt -from .basic_types import IDStr from .users import FirstNameStr, LastNameStr diff --git a/packages/models-library/src/models_library/projects_nodes_io.py b/packages/models-library/src/models_library/projects_nodes_io.py index 3a79b6acf003..0dedfecf3526 100644 --- a/packages/models-library/src/models_library/projects_nodes_io.py +++ b/packages/models-library/src/models_library/projects_nodes_io.py @@ -10,7 +10,8 @@ from typing import Annotated, TypeAlias from uuid import UUID -from models_library.basic_types import ConstrainedStr, KeyIDStr +from common_library.pydantic_basic_types import ConstrainedStr +from models_library.basic_types import KeyIDStr from pydantic import ( AnyUrl, BaseModel, diff --git a/packages/models-library/src/models_library/rabbitmq_basic_types.py b/packages/models-library/src/models_library/rabbitmq_basic_types.py index e8ae694b8be2..728235f41acd 100644 --- a/packages/models-library/src/models_library/rabbitmq_basic_types.py +++ b/packages/models-library/src/models_library/rabbitmq_basic_types.py @@ -1,6 +1,6 @@ from typing import Final -from models_library.basic_types import ConstrainedStr +from common_library.pydantic_basic_types import ConstrainedStr from pydantic import TypeAdapter REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS: Final[str] = r"^[\w\-\.]*$" diff --git a/packages/models-library/src/models_library/rabbitmq_messages.py b/packages/models-library/src/models_library/rabbitmq_messages.py index 11e0ad55796a..1b8c2df34e77 100644 --- a/packages/models-library/src/models_library/rabbitmq_messages.py +++ b/packages/models-library/src/models_library/rabbitmq_messages.py @@ -85,6 +85,7 @@ class ProgressType(StrAutoEnum): SERVICE_OUTPUTS_PULLING = auto() SERVICE_STATE_PULLING = auto() SERVICE_IMAGES_PULLING = auto() + SERVICE_CONTAINERS_STARTING = auto() SERVICE_STATE_PUSHING = auto() SERVICE_OUTPUTS_PUSHING = auto() diff --git a/packages/models-library/src/models_library/rest_ordering.py b/packages/models-library/src/models_library/rest_ordering.py index 7b1b6b39c39e..2d5a4394f4d6 100644 --- a/packages/models-library/src/models_library/rest_ordering.py +++ b/packages/models-library/src/models_library/rest_ordering.py @@ -1,9 +1,8 @@ from enum import Enum +from common_library.pydantic_basic_types import IDStr from pydantic import BaseModel, ConfigDict, Field -from .basic_types import IDStr - class OrderDirection(str, Enum): ASC = "asc" diff --git a/packages/models-library/src/models_library/services_enums.py b/packages/models-library/src/models_library/services_enums.py index 50a83313482e..ec5414218e3c 100644 --- a/packages/models-library/src/models_library/services_enums.py +++ b/packages/models-library/src/models_library/services_enums.py @@ -11,14 +11,18 @@ class ServiceBootType(str, Enum): @functools.total_ordering @unique class ServiceState(Enum): + FAILED = "failed" + PENDING = "pending" PULLING = "pulling" STARTING = "starting" RUNNING = "running" - COMPLETE = "complete" - FAILED = "failed" + STOPPING = "stopping" + COMPLETE = "complete" + IDLE = "idle" + def __lt__(self, other): if self.__class__ is other.__class__: comparison_order = ServiceState.comparison_order() @@ -39,6 +43,7 @@ def comparison_order() -> dict["ServiceState", int]: ServiceState.RUNNING: 4, ServiceState.STOPPING: 5, ServiceState.COMPLETE: 6, + ServiceState.IDLE: 7, } diff --git a/packages/models-library/src/models_library/users.py b/packages/models-library/src/models_library/users.py index 8cb7793d2f83..2ec3de1fa6bd 100644 --- a/packages/models-library/src/models_library/users.py +++ b/packages/models-library/src/models_library/users.py @@ -6,9 +6,13 @@ GroupID: TypeAlias = PositiveInt -FirstNameStr: TypeAlias = Annotated[str, StringConstraints(strip_whitespace=True, max_length=255)] +FirstNameStr: TypeAlias = Annotated[ + str, StringConstraints(strip_whitespace=True, max_length=255) +] -LastNameStr: TypeAlias = Annotated[str, StringConstraints(strip_whitespace=True, max_length=255)] +LastNameStr: TypeAlias = Annotated[ + str, StringConstraints(strip_whitespace=True, max_length=255) +] class UserBillingDetails(BaseModel): @@ -18,7 +22,7 @@ class UserBillingDetails(BaseModel): address: str | None city: str | None state: str | None = Field(description="State, province, canton, ...") - country: str + country: str # Required for taxes postal_code: str | None phone: str | None diff --git a/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py b/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py index b48628b9ab95..a168e2e22c42 100644 --- a/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py +++ b/packages/models-library/src/models_library/utils/_original_fastapi_encoders.py @@ -168,7 +168,7 @@ def jsonable_encoder( sqlalchemy_safe=sqlalchemy_safe, ) if dataclasses.is_dataclass(obj): - obj_dict = dataclasses.asdict(obj) # type: ignore[call-overload] + obj_dict = dataclasses.asdict(obj) # type: ignore[arg-type] return jsonable_encoder( obj_dict, include=include, diff --git a/packages/models-library/src/models_library/utils/json_schema.py b/packages/models-library/src/models_library/utils/json_schema.py index 1c5afc4ca55d..7db8133eb2ad 100644 --- a/packages/models-library/src/models_library/utils/json_schema.py +++ b/packages/models-library/src/models_library/utils/json_schema.py @@ -5,6 +5,7 @@ See how is used to validate input/output content-schemas of service models """ + # SEE possible enhancements in https://github.com/ITISFoundation/osparc-simcore/issues/3008 diff --git a/packages/models-library/src/models_library/utils/json_serialization.py b/packages/models-library/src/models_library/utils/json_serialization.py index a2fee1295f1c..5887cadbb613 100644 --- a/packages/models-library/src/models_library/utils/json_serialization.py +++ b/packages/models-library/src/models_library/utils/json_serialization.py @@ -97,7 +97,7 @@ def pydantic_encoder(obj: Any) -> Any: return obj.model_dump() if is_dataclass(obj): - return asdict(obj) # type: ignore[call-overload] + return asdict(obj) # type: ignore[arg-type] # Check the class type and its superclasses for a matching encoder for base in obj.__class__.__mro__[:-1]: diff --git a/packages/models-library/tests/conftest.py b/packages/models-library/tests/conftest.py index 9169e570b510..8bf433b901d7 100644 --- a/packages/models-library/tests/conftest.py +++ b/packages/models-library/tests/conftest.py @@ -9,6 +9,7 @@ import pytest pytest_plugins = [ + "pytest_simcore.faker_projects_data", "pytest_simcore.pydantic_models", "pytest_simcore.pytest_global_environs", "pytest_simcore.repository_paths", diff --git a/packages/models-library/tests/test_basic_types.py b/packages/models-library/tests/test_basic_types.py index dbd847246cf9..1592b55d9fe1 100644 --- a/packages/models-library/tests/test_basic_types.py +++ b/packages/models-library/tests/test_basic_types.py @@ -1,15 +1,8 @@ from typing import NamedTuple import pytest -from models_library.basic_types import ( - EnvVarKey, - IDStr, - MD5Str, - SHA1Str, - ShortTruncatedStr, - UUIDStr, - VersionTag, -) +from common_library.pydantic_basic_types import IDStr, ShortTruncatedStr +from models_library.basic_types import EnvVarKey, MD5Str, SHA1Str, UUIDStr, VersionTag from pydantic import TypeAdapter, ValidationError diff --git a/packages/models-library/tests/test_projects.py b/packages/models-library/tests/test_projects.py index 5cbb0e135735..4a089ca48180 100644 --- a/packages/models-library/tests/test_projects.py +++ b/packages/models-library/tests/test_projects.py @@ -6,8 +6,9 @@ from typing import Any import pytest +from common_library.pydantic_basic_types import LongTruncatedStr from faker import Faker -from models_library.api_schemas_webserver.projects import LongTruncatedStr, ProjectPatch +from models_library.api_schemas_webserver.projects import ProjectPatch from models_library.projects import Project diff --git a/packages/models-library/tests/test_utils_nodes.py b/packages/models-library/tests/test_utils_nodes.py index 87831ac88078..a41595ec5680 100644 --- a/packages/models-library/tests/test_utils_nodes.py +++ b/packages/models-library/tests/test_utils_nodes.py @@ -15,12 +15,6 @@ ) from models_library.utils.nodes import compute_node_hash - -@pytest.fixture() -def node_id() -> NodeID: - return uuid4() - - ANOTHER_NODE_ID = uuid4() ANOTHER_NODE_OUTPUT_KEY = "the_output_link" ANOTHER_NODE_PAYLOAD = {"outputs": {ANOTHER_NODE_OUTPUT_KEY: 36}} diff --git a/packages/notifications-library/requirements/_base.txt b/packages/notifications-library/requirements/_base.txt index a4df8b512d1b..08807d95984e 100644 --- a/packages/notifications-library/requirements/_base.txt +++ b/packages/notifications-library/requirements/_base.txt @@ -2,7 +2,7 @@ aiofiles==24.1.0 # via -r requirements/_base.in aiosmtplib==3.0.2 # via -r requirements/_base.in -alembic==1.13.2 +alembic==1.13.3 # via -r requirements/../../../packages/postgres-database/requirements/_base.in annotated-types==0.7.0 # via pydantic @@ -22,9 +22,9 @@ dnspython==2.6.1 # via email-validator email-validator==2.2.0 # via pydantic -greenlet==3.0.3 +greenlet==3.1.1 # via sqlalchemy -idna==3.7 +idna==3.10 # via # email-validator # yarl @@ -54,7 +54,7 @@ markupsafe==2.1.5 # mako mdurl==0.1.2 # via markdown-it-py -multidict==6.0.5 +multidict==6.1.0 # via yarl orjson==3.10.7 # via @@ -94,7 +94,7 @@ referencing==0.35.1 # via # jsonschema # jsonschema-specifications -rich==13.7.1 +rich==13.8.1 # via # -r requirements/../../../packages/settings-library/requirements/_base.in # typer @@ -106,7 +106,7 @@ shellingham==1.5.4 # via typer six==1.16.0 # via python-dateutil -sqlalchemy==1.4.53 +sqlalchemy==1.4.54 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt @@ -114,9 +114,9 @@ sqlalchemy==1.4.53 # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/postgres-database/requirements/_base.in # alembic -typer==0.12.4 +typer==0.12.5 # via -r requirements/../../../packages/settings-library/requirements/_base.in -types-python-dateutil==2.9.0.20240821 +types-python-dateutil==2.9.0.20240906 # via arrow typing-extensions==4.12.2 # via @@ -124,5 +124,5 @@ typing-extensions==4.12.2 # pydantic # pydantic-core # typer -yarl==1.9.4 +yarl==1.12.1 # via -r requirements/../../../packages/postgres-database/requirements/_base.in diff --git a/packages/notifications-library/requirements/_test.txt b/packages/notifications-library/requirements/_test.txt index ab645dfb576a..c9f9adda7f68 100644 --- a/packages/notifications-library/requirements/_test.txt +++ b/packages/notifications-library/requirements/_test.txt @@ -1,4 +1,4 @@ -certifi==2024.7.4 +certifi==2024.8.30 # via # -c requirements/../../../requirements/constraints.txt # requests @@ -10,21 +10,21 @@ coverage==7.6.1 # pytest-cov docker==7.1.0 # via -r requirements/_test.in -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in -greenlet==3.0.3 +greenlet==3.1.1 # via # -c requirements/_base.txt # sqlalchemy icdiff==2.0.7 # via pytest-icdiff -idna==3.7 +idna==3.10 # via # -c requirements/_base.txt # requests iniconfig==2.0.0 # via pytest -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -36,7 +36,7 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -79,7 +79,7 @@ six==1.16.0 # via # -c requirements/_base.txt # python-dateutil -sqlalchemy==1.4.53 +sqlalchemy==1.4.54 # via # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt @@ -97,7 +97,7 @@ typing-extensions==4.12.2 # -c requirements/_base.txt # mypy # sqlalchemy2-stubs -urllib3==2.2.2 +urllib3==2.2.3 # via # -c requirements/../../../requirements/constraints.txt # docker diff --git a/packages/notifications-library/requirements/_tools.txt b/packages/notifications-library/requirements/_tools.txt index 8204f34a33c9..fa8bee59633c 100644 --- a/packages/notifications-library/requirements/_tools.txt +++ b/packages/notifications-library/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -49,14 +49,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -67,9 +67,9 @@ pyyaml==6.0.2 # -c requirements/../../../requirements/constraints.txt # -c requirements/_test.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via pip-tools tomlkit==0.13.2 # via pylint @@ -78,7 +78,7 @@ typing-extensions==4.12.2 # -c requirements/_base.txt # -c requirements/_test.txt # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.html b/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.html index 96230e800008..edf5f1a640cc 100644 --- a/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.html +++ b/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.html @@ -4,7 +4,7 @@

Dear Support team

- We have received the following request form for an account in {{ product.display_name }} from {{ host }} + We have received the following request form for an account in {{ product.display_name }} from {{ host }}

diff --git a/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.txt b/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.txt
index 596ac7d01e5f..0eb9d7d4a641 100644
--- a/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.txt
+++ b/packages/notifications-library/src/notifications_library/templates/on_account_form.email.content.txt
@@ -1,6 +1,6 @@
 Dear Support team,
 
-We have received the following request form for an account in {{ product.display_name }} from {{ host }}:
+We have received the following request form for an account in {{ product.display_name }} from **{{ host }}**:
 
 {{ dumps(request_form) }}
 
diff --git a/packages/notifications-library/tests/with_db/conftest.py b/packages/notifications-library/tests/with_db/conftest.py
index 750f3cc24a49..236472958cd4 100644
--- a/packages/notifications-library/tests/with_db/conftest.py
+++ b/packages/notifications-library/tests/with_db/conftest.py
@@ -10,7 +10,7 @@
 
 import pytest
 import sqlalchemy as sa
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
 from models_library.products import ProductName
 from models_library.users import GroupID, UserID
 from notifications_library._templates import get_default_named_templates
diff --git a/packages/postgres-database/requirements/_base.txt b/packages/postgres-database/requirements/_base.txt
index 5cb99144fd9c..bf02ea152a86 100644
--- a/packages/postgres-database/requirements/_base.txt
+++ b/packages/postgres-database/requirements/_base.txt
@@ -1,4 +1,4 @@
-alembic==1.13.2
+alembic==1.13.3
     # via -r requirements/_base.in
 annotated-types==0.7.0
     # via pydantic
@@ -6,9 +6,9 @@ async-timeout==4.0.3
     # via asyncpg
 asyncpg==0.29.0
     # via sqlalchemy
-greenlet==3.0.3
+greenlet==3.1.1
     # via sqlalchemy
-idna==3.7
+idna==3.10
     # via yarl
 mako==1.3.5
     # via
@@ -16,7 +16,7 @@ mako==1.3.5
     #   alembic
 markupsafe==2.1.5
     # via mako
-multidict==6.0.5
+multidict==6.1.0
     # via yarl
 psycopg2-binary==2.9.9
     # via sqlalchemy
@@ -26,7 +26,7 @@ pydantic==2.9.1
     #   -r requirements/_base.in
 pydantic-core==2.23.3
     # via pydantic
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/_base.in
@@ -35,6 +35,5 @@ typing-extensions==4.12.2
     # via
     #   alembic
     #   pydantic
-    #   pydantic-core
-yarl==1.9.4
+yarl==1.12.1
     # via -r requirements/_base.in
diff --git a/packages/postgres-database/requirements/_migration.txt b/packages/postgres-database/requirements/_migration.txt
index 914d0820310f..a0dd4d6577f2 100644
--- a/packages/postgres-database/requirements/_migration.txt
+++ b/packages/postgres-database/requirements/_migration.txt
@@ -1,8 +1,8 @@
-alembic==1.13.2
+alembic==1.13.3
     # via
     #   -c requirements/_base.txt
     #   -r requirements/_migration.in
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   requests
@@ -12,11 +12,11 @@ click==8.1.7
     # via -r requirements/_migration.in
 docker==7.1.0
     # via -r requirements/_migration.in
-greenlet==3.0.3
+greenlet==3.1.1
     # via
     #   -c requirements/_base.txt
     #   sqlalchemy
-idna==3.7
+idna==3.10
     # via
     #   -c requirements/_base.txt
     #   requests
@@ -31,7 +31,7 @@ markupsafe==2.1.5
     #   mako
 requests==2.32.3
     # via docker
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
@@ -42,7 +42,7 @@ typing-extensions==4.12.2
     # via
     #   -c requirements/_base.txt
     #   alembic
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/_migration.in
diff --git a/packages/postgres-database/requirements/_test.txt b/packages/postgres-database/requirements/_test.txt
index 8bd80b78b95b..91705ca63c53 100644
--- a/packages/postgres-database/requirements/_test.txt
+++ b/packages/postgres-database/requirements/_test.txt
@@ -10,16 +10,16 @@ coverage==7.6.1
     # via
     #   -r requirements/_test.in
     #   pytest-cov
-faker==27.0.0
+faker==29.0.0
     # via -r requirements/_test.in
-greenlet==3.0.3
+greenlet==3.1.1
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_migration.txt
     #   sqlalchemy
 iniconfig==2.0.0
     # via pytest
-mypy==1.11.1
+mypy==1.11.2
     # via sqlalchemy
 mypy-extensions==1.0.0
     # via mypy
@@ -32,7 +32,7 @@ psycopg2-binary==2.9.9
     #   -c requirements/_base.txt
     #   aiopg
     #   sqlalchemy
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -r requirements/_test.in
     #   pytest-asyncio
@@ -59,7 +59,7 @@ pyyaml==6.0.2
     #   -r requirements/_test.in
 six==1.16.0
     # via python-dateutil
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
@@ -68,11 +68,11 @@ sqlalchemy==1.4.53
     #   aiopg
 sqlalchemy2-stubs==0.0.2a38
     # via sqlalchemy
-types-docker==7.1.0.20240821
+types-docker==7.1.0.20240827
     # via -r requirements/_test.in
 types-psycopg2==2.9.21.20240819
     # via -r requirements/_test.in
-types-requests==2.32.0.20240712
+types-requests==2.32.0.20240914
     # via types-docker
 typing-extensions==4.12.2
     # via
@@ -80,7 +80,7 @@ typing-extensions==4.12.2
     #   -c requirements/_migration.txt
     #   mypy
     #   sqlalchemy2-stubs
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_migration.txt
diff --git a/packages/postgres-database/requirements/_tools.txt b/packages/postgres-database/requirements/_tools.txt
index 9247bc4b1a97..6d01f81c8e17 100644
--- a/packages/postgres-database/requirements/_tools.txt
+++ b/packages/postgres-database/requirements/_tools.txt
@@ -1,8 +1,8 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
     # via -r requirements/../../../requirements/devenv.txt
-build==1.2.1
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
@@ -16,9 +16,9 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
@@ -26,7 +26,7 @@ isort==5.13.2
     #   pylint
 mccabe==0.7.0
     # via pylint
-mypy==1.11.1
+mypy==1.11.2
     # via
     #   -c requirements/_test.txt
     #   -r requirements/../../../requirements/devenv.txt
@@ -48,14 +48,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.0
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -66,9 +66,9 @@ pyyaml==6.0.2
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_test.txt
     #   pre-commit
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
-setuptools==73.0.1
+setuptools==75.1.0
     # via pip-tools
 tomlkit==0.13.2
     # via pylint
@@ -77,7 +77,7 @@ typing-extensions==4.12.2
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/packages/postgres-database/src/simcore_postgres_database/migration/versions/10729e07000d_improve_foreign_key_dependencies.py b/packages/postgres-database/src/simcore_postgres_database/migration/versions/10729e07000d_improve_foreign_key_dependencies.py
new file mode 100644
index 000000000000..16bfc82acd8c
--- /dev/null
+++ b/packages/postgres-database/src/simcore_postgres_database/migration/versions/10729e07000d_improve_foreign_key_dependencies.py
@@ -0,0 +1,110 @@
+"""improve foreign key dependencies
+
+Revision ID: 10729e07000d
+Revises: 47ca7335e146
+Create Date: 2024-09-24 07:52:20.253076+00:00
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "10729e07000d"
+down_revision = "47ca7335e146"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_foreign_key(
+        "fk_rut_pricing_plan_to_service_key_and_version",
+        "resource_tracker_pricing_plan_to_service",
+        "services_meta_data",
+        ["service_key", "service_version"],
+        ["key", "version"],
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    )
+    op.drop_index(
+        "ix_resource_tracker_pricing_plans_product_name",
+        table_name="resource_tracker_pricing_plans",
+    )
+    op.create_foreign_key(
+        "fk_rut_pricing_plans_product_name",
+        "resource_tracker_pricing_plans",
+        "products",
+        ["product_name"],
+        ["name"],
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    )
+    op.create_foreign_key(
+        "fk_resource_tracker_pricing_units_costs_pricing_plan_id",
+        "resource_tracker_pricing_unit_costs",
+        "resource_tracker_pricing_plans",
+        ["pricing_plan_id"],
+        ["pricing_plan_id"],
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    )
+    op.create_foreign_key(
+        "fk_resource_tracker_pricing_units_costs_pricing_unit_id",
+        "resource_tracker_pricing_unit_costs",
+        "resource_tracker_pricing_units",
+        ["pricing_unit_id"],
+        ["pricing_unit_id"],
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    )
+    op.create_foreign_key(
+        "fk_wallets_product_name",
+        "wallets",
+        "products",
+        ["product_name"],
+        ["name"],
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    )
+    op.create_foreign_key(
+        "fk_workspaces_product_name",
+        "workspaces",
+        "products",
+        ["product_name"],
+        ["name"],
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_constraint("fk_workspaces_product_name", "workspaces", type_="foreignkey")
+    op.drop_constraint("fk_wallets_product_name", "wallets", type_="foreignkey")
+    op.drop_constraint(
+        "fk_resource_tracker_pricing_units_costs_pricing_unit_id",
+        "resource_tracker_pricing_unit_costs",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "fk_resource_tracker_pricing_units_costs_pricing_plan_id",
+        "resource_tracker_pricing_unit_costs",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "fk_rut_pricing_plans_product_name",
+        "resource_tracker_pricing_plans",
+        type_="foreignkey",
+    )
+    op.create_index(
+        "ix_resource_tracker_pricing_plans_product_name",
+        "resource_tracker_pricing_plans",
+        ["product_name"],
+        unique=False,
+    )
+    op.drop_constraint(
+        "fk_rut_pricing_plan_to_service_key_and_version",
+        "resource_tracker_pricing_plan_to_service",
+        type_="foreignkey",
+    )
+    # ### end Alembic commands ###
diff --git a/packages/postgres-database/src/simcore_postgres_database/migration/versions/47ca7335e146_remove_old_folders.py b/packages/postgres-database/src/simcore_postgres_database/migration/versions/47ca7335e146_remove_old_folders.py
new file mode 100644
index 000000000000..63fb1a299231
--- /dev/null
+++ b/packages/postgres-database/src/simcore_postgres_database/migration/versions/47ca7335e146_remove_old_folders.py
@@ -0,0 +1,169 @@
+"""remove old folders
+
+Revision ID: 47ca7335e146
+Revises: 9f381dcb9b95
+Create Date: 2024-09-17 11:54:39.600025+00:00
+
+"""
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "47ca7335e146"
+down_revision = "9f381dcb9b95"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table("folders_to_projects")
+    op.drop_table("folders_access_rights")
+    op.drop_table("folders")
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "folders",
+        sa.Column(
+            "id",
+            sa.BIGINT(),
+            server_default=sa.text("nextval('folders_id_seq'::regclass)"),
+            autoincrement=True,
+            nullable=False,
+        ),
+        sa.Column("name", sa.VARCHAR(), autoincrement=False, nullable=False),
+        sa.Column(
+            "description",
+            sa.VARCHAR(),
+            server_default=sa.text("''::character varying"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.Column("created_by", sa.BIGINT(), autoincrement=False, nullable=True),
+        sa.Column(
+            "created",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.Column(
+            "modified",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.Column("product_name", sa.VARCHAR(), autoincrement=False, nullable=False),
+        sa.ForeignKeyConstraint(
+            ["created_by"],
+            ["groups.gid"],
+            name="fk_folders_to_groups_gid",
+            ondelete="SET NULL",
+        ),
+        sa.ForeignKeyConstraint(
+            ["product_name"],
+            ["products.name"],
+            name="fk_folders_to_products_name",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("id", name="folders_pkey"),
+        postgresql_ignore_search_path=False,
+    )
+    op.create_table(
+        "folders_access_rights",
+        sa.Column("folder_id", sa.BIGINT(), autoincrement=False, nullable=False),
+        sa.Column("gid", sa.BIGINT(), autoincrement=False, nullable=False),
+        sa.Column(
+            "traversal_parent_id", sa.BIGINT(), autoincrement=False, nullable=True
+        ),
+        sa.Column(
+            "original_parent_id", sa.BIGINT(), autoincrement=False, nullable=True
+        ),
+        sa.Column("read", sa.BOOLEAN(), autoincrement=False, nullable=False),
+        sa.Column("write", sa.BOOLEAN(), autoincrement=False, nullable=False),
+        sa.Column("delete", sa.BOOLEAN(), autoincrement=False, nullable=False),
+        sa.Column(
+            "created",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.Column(
+            "modified",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["folder_id"],
+            ["folders.id"],
+            name="fk_folders_access_rights_to_folders_id",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["gid"],
+            ["groups.gid"],
+            name="fk_folders_access_rights_to_groups_gid",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["original_parent_id"],
+            ["folders.id"],
+            name="fk_folders_to_folders_id_via_original_parent_id",
+            ondelete="SET NULL",
+        ),
+        sa.ForeignKeyConstraint(
+            ["traversal_parent_id"],
+            ["folders.id"],
+            name="fk_folders_to_folders_id_via_traversal_parent_id",
+            ondelete="SET NULL",
+        ),
+        sa.PrimaryKeyConstraint("folder_id", "gid", name="folders_access_rights_pk"),
+    )
+    op.create_table(
+        "folders_to_projects",
+        sa.Column("folder_id", sa.BIGINT(), autoincrement=False, nullable=False),
+        sa.Column("project_uuid", sa.VARCHAR(), autoincrement=False, nullable=False),
+        sa.Column(
+            "created",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.Column(
+            "modified",
+            postgresql.TIMESTAMP(timezone=True),
+            server_default=sa.text("now()"),
+            autoincrement=False,
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["folder_id"],
+            ["folders.id"],
+            name="fk_folders_to_projects_to_folders_id",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["project_uuid"],
+            ["projects.uuid"],
+            name="fk_folders_to_projects_to_projects_uuid",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint(
+            "folder_id", "project_uuid", name="projects_to_folder_pk"
+        ),
+    )
+    # ### end Alembic commands ###
diff --git a/packages/postgres-database/src/simcore_postgres_database/migration/versions/8a742f3efdd9_new_tags_priority_column.py b/packages/postgres-database/src/simcore_postgres_database/migration/versions/8a742f3efdd9_new_tags_priority_column.py
new file mode 100644
index 000000000000..abede70a2812
--- /dev/null
+++ b/packages/postgres-database/src/simcore_postgres_database/migration/versions/8a742f3efdd9_new_tags_priority_column.py
@@ -0,0 +1,27 @@
+"""new tags priority column
+
+Revision ID: 8a742f3efdd9
+Revises: 10729e07000d
+Create Date: 2024-10-02 15:23:27.446241+00:00
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "8a742f3efdd9"
+down_revision = "10729e07000d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column("tags", sa.Column("priority", sa.Integer(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column("tags", "priority")
+    # ### end Alembic commands ###
diff --git a/packages/postgres-database/src/simcore_postgres_database/migration/versions/ea3952fe5a0e_add_enable_efs_to_group_extra_properties.py b/packages/postgres-database/src/simcore_postgres_database/migration/versions/ea3952fe5a0e_add_enable_efs_to_group_extra_properties.py
new file mode 100644
index 000000000000..7f66f3b38302
--- /dev/null
+++ b/packages/postgres-database/src/simcore_postgres_database/migration/versions/ea3952fe5a0e_add_enable_efs_to_group_extra_properties.py
@@ -0,0 +1,32 @@
+"""add `enable_efs` to group extra properties
+
+Revision ID: ea3952fe5a0e
+Revises: 8a742f3efdd9
+Create Date: 2024-10-07 06:24:42.464942+00:00
+
+"""
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "ea3952fe5a0e"
+down_revision = "8a742f3efdd9"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column(
+        "groups_extra_properties",
+        sa.Column(
+            "enable_efs", sa.Boolean(), server_default=sa.text("false"), nullable=False
+        ),
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column("groups_extra_properties", "enable_efs")
+    # ### end Alembic commands ###
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/folders.py b/packages/postgres-database/src/simcore_postgres_database/models/folders.py
deleted file mode 100644
index 82f3ef1a6c4f..000000000000
--- a/packages/postgres-database/src/simcore_postgres_database/models/folders.py
+++ /dev/null
@@ -1,183 +0,0 @@
-import sqlalchemy as sa
-
-from ._common import (
-    column_created_datetime,
-    column_modified_datetime,
-    register_modified_datetime_auto_update_trigger,
-)
-from .base import metadata
-
-folders = sa.Table(
-    "folders",
-    metadata,
-    sa.Column(
-        "id",
-        sa.BigInteger,
-        nullable=False,
-        autoincrement=True,
-        primary_key=True,
-        doc="Primary key",
-    ),
-    sa.Column(
-        "name",
-        sa.String,
-        nullable=False,
-        doc="name of the folder",
-    ),
-    sa.Column(
-        "description",
-        sa.String,
-        nullable=False,
-        server_default="",
-        doc="user provided description for the folder",
-    ),
-    sa.Column(
-        "product_name",
-        sa.String,
-        sa.ForeignKey(
-            "products.name",
-            onupdate="CASCADE",
-            ondelete="CASCADE",
-            name="fk_folders_to_products_name",
-        ),
-        nullable=False,
-        doc="product identifier",
-    ),
-    sa.Column(
-        "created_by",
-        sa.BigInteger,
-        sa.ForeignKey(
-            "groups.gid",
-            name="fk_folders_to_groups_gid",
-            ondelete="SET NULL",
-        ),
-        nullable=True,
-        doc="traces who created the folder",
-    ),
-    column_created_datetime(timezone=True),
-    column_modified_datetime(timezone=True),
-)
-
-
-register_modified_datetime_auto_update_trigger(folders)
-
-folders_access_rights = sa.Table(
-    "folders_access_rights",
-    metadata,
-    sa.Column(
-        "folder_id",
-        sa.BigInteger,
-        sa.ForeignKey(
-            "folders.id",
-            name="fk_folders_access_rights_to_folders_id",
-            onupdate="CASCADE",
-            ondelete="CASCADE",
-        ),
-    ),
-    sa.Column(
-        "gid",
-        sa.BigInteger,
-        sa.ForeignKey(
-            "groups.gid",
-            name="fk_folders_access_rights_to_groups_gid",
-            onupdate="CASCADE",
-            ondelete="CASCADE",
-        ),
-    ),
-    sa.Column(
-        "traversal_parent_id",
-        sa.BigInteger,
-        sa.ForeignKey(
-            "folders.id",
-            name="fk_folders_to_folders_id_via_traversal_parent_id",
-            ondelete="SET NULL",
-        ),
-        doc=(
-            "used for listing the contes of the folders, "
-            "can be changed by the user by moving the folder"
-        ),
-    ),
-    sa.Column(
-        "original_parent_id",
-        sa.BigInteger,
-        sa.ForeignKey(
-            "folders.id",
-            name="fk_folders_to_folders_id_via_original_parent_id",
-            ondelete="SET NULL",
-        ),
-        doc=(
-            "initially equal the same as `traversal_parent_id`, "
-            "keeps track of the original parent, "
-            "can never be changed once insteted"
-        ),
-    ),
-    sa.Column(
-        "read",
-        sa.Boolean(),
-        nullable=False,
-        doc=(
-            "if True can: "
-            "view folders inside current folder "
-            "view projects inside current folder"
-        ),
-    ),
-    sa.Column(
-        "write",
-        sa.Boolean(),
-        nullable=False,
-        doc=(
-            "if True can: "
-            "create folder inside current folder, "
-            "add project to folder"
-        ),
-    ),
-    sa.Column(
-        "delete",
-        sa.Boolean(),
-        nullable=False,
-        doc=(
-            "if True can: "
-            "share folder, "
-            "rename folder, "
-            "edit folder description, "
-            "delete folder, "
-            "delete project form folder"
-        ),
-    ),
-    column_created_datetime(timezone=True),
-    column_modified_datetime(timezone=True),
-    sa.PrimaryKeyConstraint("folder_id", "gid", name="folders_access_rights_pk"),
-)
-
-register_modified_datetime_auto_update_trigger(folders_access_rights)
-
-
-folders_to_projects = sa.Table(
-    "folders_to_projects",
-    metadata,
-    sa.Column(
-        "folder_id",
-        sa.BigInteger,
-        sa.ForeignKey(
-            "folders.id",
-            name="fk_folders_to_projects_to_folders_id",
-            onupdate="CASCADE",
-            ondelete="CASCADE",
-        ),
-    ),
-    sa.Column(
-        "project_uuid",
-        sa.String,
-        sa.ForeignKey(
-            "projects.uuid",
-            name="fk_folders_to_projects_to_projects_uuid",
-            onupdate="CASCADE",
-            ondelete="CASCADE",
-        ),
-    ),
-    column_created_datetime(timezone=True),
-    column_modified_datetime(timezone=True),
-    sa.PrimaryKeyConstraint("folder_id", "project_uuid", name="projects_to_folder_pk"),
-)
-
-register_modified_datetime_auto_update_trigger(folders_to_projects)
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/groups_extra_properties.py b/packages/postgres-database/src/simcore_postgres_database/models/groups_extra_properties.py
index e0d438d76c99..93ffe8cd7f73 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/groups_extra_properties.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/groups_extra_properties.py
@@ -63,6 +63,13 @@
         server_default=sa.sql.expression.false(),
         doc="If true, will send telemetry for new style dynamic services to frontend",
     ),
+    sa.Column(
+        "enable_efs",
+        sa.Boolean(),
+        nullable=False,
+        server_default=sa.sql.expression.false(),
+        doc="If true, will mount efs distributed file system when dynamic services starts",
+    ),
     sa.UniqueConstraint(
         "group_id", "product_name", name="group_id_product_name_uniqueness"
     ),
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/products.py b/packages/postgres-database/src/simcore_postgres_database/models/products.py
index 913c929e04c5..03e137528ecd 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/products.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/products.py
@@ -41,8 +41,6 @@ class Vendor(TypedDict, total=False):
     invitation_url: str  # How to request a trial invitation? (if applies)
     invitation_form: bool  # If True, it takes precendence over invitation_url and asks the FE to show the form (if defined)
 
-    has_landing_page: bool  # Is Landing page enabled
-
     release_notes_url_template: str  # a template url where `{vtag}` will be replaced, eg: "http://example.com/{vtag}.md"
 
 
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plan_to_service.py b/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plan_to_service.py
index b0040d93ae66..820ec42fc506 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plan_to_service.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plan_to_service.py
@@ -45,4 +45,11 @@
         doc="Option to mark default pricing plan for the service (ex. when there are more pricing plans for the same service)",
     ),
     # ---------------------------
+    sa.ForeignKeyConstraint(
+        ["service_key", "service_version"],
+        ["services_meta_data.key", "services_meta_data.version"],
+        name="fk_rut_pricing_plan_to_service_key_and_version",
+        onupdate="CASCADE",
+        ondelete="CASCADE",
+    ),
 )
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plans.py b/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plans.py
index 8ec50b0f206a..81d98ebcac10 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plans.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_plans.py
@@ -33,9 +33,14 @@ class PricingPlanClassification(str, enum.Enum):
     sa.Column(
         "product_name",
         sa.String,
+        sa.ForeignKey(
+            "products.name",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+            name="fk_rut_pricing_plans_product_name",
+        ),
         nullable=False,
-        doc="Product name",
-        index=True,
+        doc="Products unique name",
     ),
     sa.Column(
         "display_name",
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_unit_costs.py b/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_unit_costs.py
index defaf49eb4a5..460315323877 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_unit_costs.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/resource_tracker_pricing_unit_costs.py
@@ -22,8 +22,14 @@
     sa.Column(
         "pricing_plan_id",
         sa.BigInteger,
+        sa.ForeignKey(
+            "resource_tracker_pricing_plans.pricing_plan_id",
+            name="fk_resource_tracker_pricing_units_costs_pricing_plan_id",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
         nullable=False,
-        doc="Parent pricing plan",
+        doc="Foreign key to pricing plan",
         index=True,
     ),
     sa.Column(
@@ -35,8 +41,14 @@
     sa.Column(
         "pricing_unit_id",
         sa.BigInteger,
+        sa.ForeignKey(
+            "resource_tracker_pricing_units.pricing_unit_id",
+            name="fk_resource_tracker_pricing_units_costs_pricing_unit_id",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+        ),
         nullable=False,
-        doc="Parent pricing unit",
+        doc="Foreign key to pricing unit",
         index=True,
     ),
     sa.Column(
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/services_tags.py b/packages/postgres-database/src/simcore_postgres_database/models/services_tags.py
index 6a3ea828eea2..c774cdcd3176 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/services_tags.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/services_tags.py
@@ -14,13 +14,13 @@
         "service_key",
         sa.String,
         nullable=False,
-        doc="Service Key Identifier",
+        doc="Key name identifier for the service, without specifiying its versions",
     ),
     sa.Column(
         "service_version",
         sa.String,
         nullable=False,
-        doc="Service version",
+        doc="Version of the service. Combined with 'service_key', it forms a unique identifier for this service.",
     ),
     # Tag
     sa.Column(
@@ -28,6 +28,7 @@
         sa.BigInteger,
         sa.ForeignKey(tags.c.id, onupdate="CASCADE", ondelete="CASCADE"),
         nullable=False,
+        doc="Identifier of the tag assigned to this specific service (service_key, service_version).",
     ),
     # Constraints
     sa.ForeignKeyConstraint(
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/tags.py b/packages/postgres-database/src/simcore_postgres_database/models/tags.py
index ce05e68f1985..da7c788e02dc 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/tags.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/tags.py
@@ -2,11 +2,11 @@
 
 from .base import metadata
 
-#
-# tags: a way to mark any entity (e.g. a project, ...)
-#       this can be used to perform operations as filter, select, compare, etc
-#
 tags = sa.Table(
+    #
+    # A way to mark any entity (e.g. a project, ...)
+    # this can be used to perform operations as filter, select, compare, etc
+    #
     "tags",
     metadata,
     sa.Column(
@@ -14,23 +14,30 @@
         sa.BigInteger(),
         nullable=False,
         primary_key=True,
+        doc="Unique identifier for each tag.",
     ),
-    sa.Column(
-        "name",
-        sa.String(),
-        nullable=False,
-        doc="display name",
-    ),
+    sa.Column("name", sa.String(), nullable=False, doc="The display name of the tag."),
     sa.Column(
         "description",
         sa.String(),
         nullable=True,
-        doc="description displayed",
+        doc="A brief description displayed for the tag.",
     ),
     sa.Column(
         "color",
         sa.String(),
         nullable=False,
-        doc="Hex color (see https://www.color-hex.com/)",
+        doc="Hexadecimal color code representing the tag (e.g., #FF5733).",
+    ),
+    sa.Column(
+        "priority",
+        sa.Integer(),
+        nullable=True,
+        doc=(
+            "Explicit ordering priority when displaying tags. "
+            "Tags with a lower value are displayed first. "
+            "If NULL, tags are considered to have the lowest priority and "
+            "are displayed after non-NULL values, ordered by their ID (reflecting creation order)."
+        ),
     ),
 )
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/tags_access_rights.py b/packages/postgres-database/src/simcore_postgres_database/models/tags_access_rights.py
index 9efb4123f0df..9078a9254f18 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/tags_access_rights.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/tags_access_rights.py
@@ -22,7 +22,7 @@
             name="fk_tag_to_group_tag_id",
         ),
         nullable=False,
-        doc="Tag unique ID",
+        doc="References the unique identifier of the tag that these access rights apply to.",
     ),
     sa.Column(
         "group_id",
@@ -34,7 +34,7 @@
             name="fk_tag_to_group_group_id",
         ),
         nullable=False,
-        doc="Group unique ID",
+        doc="References the unique identifier of the group that has access rights to the tag.",
     ),
     # ACCESS RIGHTS ---
     sa.Column(
@@ -42,22 +42,24 @@
         sa.Boolean(),
         nullable=False,
         server_default=sa.sql.expression.true(),
-        doc="If true, group can *read* a tag."
-        "This column can be used to set the tag invisible",
+        doc="Indicates whether the group has permission to view the tag. "
+        "A value of 'True' allows the group to access the tag's details.",
     ),
     sa.Column(
         "write",
         sa.Boolean(),
         nullable=False,
         server_default=sa.sql.expression.false(),
-        doc="If true, group can *create* and *update* a tag",
+        doc="Indicates whether the group has permission to modify the tag. "
+        "A value of 'True' grants write access to the group.",
     ),
     sa.Column(
         "delete",
         sa.Boolean(),
         nullable=False,
         server_default=sa.sql.expression.false(),
-        doc="If true, group can *delete* the tag",
+        doc="Indicates whether the group has permission to delete the tag. "
+        "A value of 'True' allows the group to remove the tag.",
     ),
     # TIME STAMPS ----
     column_created_datetime(timezone=False),
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/wallets.py b/packages/postgres-database/src/simcore_postgres_database/models/wallets.py
index e26545f1f4ac..3c7655299761 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/wallets.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/wallets.py
@@ -50,7 +50,18 @@ class WalletStatus(str, enum.Enum):
     ),
     column_created_datetime(timezone=True),
     column_modified_datetime(timezone=True),
-    sa.Column("product_name", sa.String, nullable=False, doc="Product name"),
+    sa.Column(
+        "product_name",
+        sa.String,
+        sa.ForeignKey(
+            "products.name",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+            name="fk_wallets_product_name",
+        ),
+        nullable=False,
+        doc="Products unique name",
+    ),
 )
 
 # ------------------------ TRIGGERS
diff --git a/packages/postgres-database/src/simcore_postgres_database/models/workspaces.py b/packages/postgres-database/src/simcore_postgres_database/models/workspaces.py
index f4b76812a6c1..998c7676761b 100644
--- a/packages/postgres-database/src/simcore_postgres_database/models/workspaces.py
+++ b/packages/postgres-database/src/simcore_postgres_database/models/workspaces.py
@@ -34,7 +34,18 @@
         nullable=False,
         doc="Identifier of the group that owns this workspace (Should be just PRIMARY GROUP)",
     ),
-    sa.Column("product_name", sa.String, nullable=False, doc="Product name"),
+    sa.Column(
+        "product_name",
+        sa.String,
+        sa.ForeignKey(
+            "products.name",
+            onupdate="CASCADE",
+            ondelete="CASCADE",
+            name="fk_workspaces_product_name",
+        ),
+        nullable=False,
+        doc="Products unique name",
+    ),
     column_created_datetime(timezone=True),
     column_modified_datetime(timezone=True),
 )
diff --git a/packages/postgres-database/src/simcore_postgres_database/utils_folders.py b/packages/postgres-database/src/simcore_postgres_database/utils_folders.py
deleted file mode 100644
index 6dcca321a73b..000000000000
--- a/packages/postgres-database/src/simcore_postgres_database/utils_folders.py
+++ /dev/null
@@ -1,1156 +0,0 @@
-import re
-import uuid
-from collections.abc import Iterable
-from dataclasses import dataclass
-from datetime import datetime
-from enum import Enum
-from functools import reduce
-from typing import Annotated, Any, ClassVar, Final, TypeAlias, cast
-
-import sqlalchemy as sa
-from aiopg.sa.connection import SAConnection
-from aiopg.sa.result import RowProxy
-from common_library.errors_classes import OsparcErrorMixin
-from pydantic import (
-    BaseModel,
-    ConfigDict,
-    Field,
-    NonNegativeInt,
-    PositiveInt,
-    StringConstraints,
-    TypeAdapter,
-    ValidationError,
-)
-from simcore_postgres_database.utils_ordering import OrderByDict
-from sqlalchemy import Column, func
-from sqlalchemy.dialects import postgresql
-from sqlalchemy.dialects.postgresql import BOOLEAN, INTEGER
-from sqlalchemy.sql.elements import ColumnElement, Label
-from sqlalchemy.sql.selectable import CTE
-
-from .models.folders import folders, folders_access_rights, folders_to_projects
-from .models.groups import GroupType, groups
-from .utils_ordering import OrderDirection
-
-_ProductName: TypeAlias = str
-_ProjectID: TypeAlias = uuid.UUID
-_GroupID: TypeAlias = PositiveInt
-_FolderID: TypeAlias = PositiveInt
-
-###
-### ERRORS
-###
-
-
-"""Errors hierarchy
-
-FoldersError
-    * InvalidFolderNameError
-    * FolderAccessError
-        * FolderNotFoundError
-        * FolderNotSharedWithGidError
-        * InsufficientPermissionsError
-        * NoAccessForGroupsFoundError
-    * BaseCreateFolderError
-        * FolderAlreadyExistsError
-        * ParentFolderIsNotWritableError
-        * CouldNotCreateFolderError
-        * GroupIdDoesNotExistError
-        * RootFolderRequiresAtLeastOnePrimaryGroupError
-    * BaseMoveFolderError
-        * CannotMoveFolderSharedViaNonPrimaryGroupError
-    * BaseAddProjectError
-        * ProjectAlreadyExistsInFolderError
-"""
-
-
-class FoldersError(OsparcErrorMixin, RuntimeError):
-    ...
-
-
-class InvalidFolderNameError(FoldersError):
-    msg_template = "Provided folder name='{name}' is invalid: {reason}"
-
-
-class FolderAccessError(FoldersError):
-    pass
-
-
-class FolderNotFoundError(FolderAccessError):
-    msg_template = "no entry found for folder_id={folder_id}, gids={gids} and product_name={product_name}"
-
-
-class FolderNotSharedWithGidError(FolderAccessError):
-    msg_template = "folder_id={folder_id} was not shared with gids={gids}"
-
-
-class InsufficientPermissionsError(FolderAccessError):
-    msg_template = "could not find a parent for folder_id={folder_id} and gids={gids}, with permissions={permissions}"
-
-
-class NoAccessForGroupsFoundError(FolderAccessError):
-    msg_template = "No parent found for folder_id={folder_id} and gids={gids}, with permissions={permissions}"
-
-
-class BaseCreateFolderError(FoldersError):
-    pass
-
-
-class FolderAlreadyExistsError(BaseCreateFolderError):
-    msg_template = "A folder='{folder}' with parent='{parent}' in product_name={product_name} already exists"
-
-
-class ParentFolderIsNotWritableError(BaseCreateFolderError):
-    msg_template = "Cannot create any sub-folders inside folder_id={parent_folder_id} since it is not writable for gid={gid}."
-
-
-class CouldNotCreateFolderError(BaseCreateFolderError):
-    msg_template = "Could not create folder='{folder}' and parent='{parent}'"
-
-
-class NoGroupIDFoundError(BaseCreateFolderError):
-    msg_template = "None of the provided gids='{gids}' was found"
-
-
-class RootFolderRequiresAtLeastOnePrimaryGroupError(BaseCreateFolderError):
-    msg_template = (
-        "No parent={parent} defined and groupIDs={gids} did not contain a PRIMARY group. "
-        "Cannot create a folder isnide the 'root' wihtout using the user's group."
-    )
-
-
-class BaseMoveFolderError(FoldersError):
-    pass
-
-
-class CannotMoveFolderSharedViaNonPrimaryGroupError(BaseMoveFolderError):
-    msg_template = (
-        "deltected group_type={group_type} for gid={gid} which is not allowed"
-    )
-
-
-class BaseAddProjectError(FoldersError):
-    pass
-
-
-class ProjectAlreadyExistsInFolderError(BaseAddProjectError):
-    msg_template = (
-        "project_id={project_uuid} in folder_id={folder_id} is already present"
-    )
-
-
-###
-### UTILS ACCESS LAYER
-###
-
-
-class FolderAccessRole(Enum):
-    """Used by the frontend to indicate a role in a simple manner"""
-
-    NO_ACCESS = 0
-    VIEWER = 1
-    EDITOR = 2
-    OWNER = 3
-
-
-@dataclass(frozen=True)
-class _FolderPermissions:
-    read: bool
-    write: bool
-    delete: bool
-
-    def to_dict(self, *, include_only_true: bool = False) -> dict[str, bool]:
-        data: dict[str, bool] = {
-            "read": self.read,
-            "write": self.write,
-            "delete": self.delete,
-        }
-        if include_only_true:
-            for key_to_remove in [k for k, v in data.items() if not v]:
-                data.pop(key_to_remove)
-
-        return data
-
-
-def _make_permissions(
-    *, r: bool = False, w: bool = False, d: bool = False, description: str = ""
-) -> "_FolderPermissions":
-    _ = description
-    return _FolderPermissions(read=r, write=w, delete=d)
-
-
-def _only_true_permissions(permissions: _FolderPermissions) -> dict:
-    return permissions.to_dict(include_only_true=True)
-
-
-def _or_reduce(x: _FolderPermissions, y: _FolderPermissions) -> _FolderPermissions:
-    return _FolderPermissions(
-        read=x.read or y.read, write=x.write or y.write, delete=x.delete or y.delete
-    )
-
-
-def _or_dicts_list(dicts: Iterable[_FolderPermissions]) -> _FolderPermissions:
-    if not dicts:
-        return _make_permissions()
-    return reduce(_or_reduce, dicts)
-
-
-class _BasePermissions:
-    GET_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(r=True)
-    LIST_FOLDERS: ClassVar[_FolderPermissions] = _make_permissions(r=True)
-
-    CREATE_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(w=True)
-    ADD_PROJECT_TO_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(w=True)
-
-    SHARE_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(d=True)
-    UPDATE_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(d=True)
-    DELETE_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(d=True)
-    REMOVE_PROJECT_FROM_FOLDER: ClassVar[_FolderPermissions] = _make_permissions(d=True)
-
-    _MOVE_PROJECT_FROM_FOLDER_SOURCE: ClassVar[_FolderPermissions] = _make_permissions(
-        d=True,
-        description="apply to folder where the project is",
-    )
-    _MOVE_PROJECT_FROM_FOLDER_DESTINATION: ClassVar[
-        _FolderPermissions
-    ] = _make_permissions(
-        w=True, description="apply on the folder receiving the project"
-    )
-    MOVE_PROJECT_FROM_FOLDER: ClassVar[_FolderPermissions] = _or_dicts_list(
-        [_MOVE_PROJECT_FROM_FOLDER_SOURCE, _MOVE_PROJECT_FROM_FOLDER_DESTINATION]
-    )
-
-    _MOVE_FOLDER_SOURCE: ClassVar[_FolderPermissions] = _make_permissions(
-        d=True,
-        description="apply to folder providing the data",
-    )
-    _MOVE_FOLDER_DESTINATION: ClassVar[_FolderPermissions] = _make_permissions(
-        w=True, description="apply on the folder receiving the data"
-    )
-    MOVE_FOLDER: ClassVar[_FolderPermissions] = _or_dicts_list(
-        [_MOVE_FOLDER_SOURCE, _MOVE_FOLDER_DESTINATION]
-    )
-
-
-NO_ACCESS_PERMISSIONS: _FolderPermissions = _make_permissions()
-
-VIEWER_PERMISSIONS: _FolderPermissions = _or_dicts_list(
-    [
-        _BasePermissions.LIST_FOLDERS,
-    ]
-)
-EDITOR_PERMISSIONS: _FolderPermissions = _or_dicts_list(
-    [
-        VIEWER_PERMISSIONS,
-        _BasePermissions.CREATE_FOLDER,
-        _BasePermissions.ADD_PROJECT_TO_FOLDER,
-    ]
-)
-OWNER_PERMISSIONS: _FolderPermissions = _or_dicts_list(
-    [
-        EDITOR_PERMISSIONS,
-        _BasePermissions.SHARE_FOLDER,
-        _BasePermissions.UPDATE_FOLDER,
-        _BasePermissions.DELETE_FOLDER,
-        _BasePermissions.REMOVE_PROJECT_FROM_FOLDER,
-        _BasePermissions.MOVE_FOLDER,
-    ]
-)
-
-_ROLE_TO_PERMISSIONS: dict[FolderAccessRole, _FolderPermissions] = {
-    FolderAccessRole.NO_ACCESS: NO_ACCESS_PERMISSIONS,
-    FolderAccessRole.VIEWER: VIEWER_PERMISSIONS,
-    FolderAccessRole.EDITOR: EDITOR_PERMISSIONS,
-    FolderAccessRole.OWNER: OWNER_PERMISSIONS,
-}
-
-
-def _get_permissions_from_role(role: FolderAccessRole) -> _FolderPermissions:
-    return _ROLE_TO_PERMISSIONS[role]
-
-
-def _requires(*permissions: _FolderPermissions) -> _FolderPermissions:
-    if len(permissions) == 0:
-        return _make_permissions()
-    return _or_dicts_list(permissions)
-
-
-def _get_filter_for_enabled_permissions(
-    permissions: _FolderPermissions, table: sa.Table | CTE
-) -> ColumnElement | bool:
-    clauses: list[ColumnElement] = []
-
-    if permissions.read:
-        clauses.append(table.c.read.is_(True))
-    if permissions.write:
-        clauses.append(table.c.write.is_(True))
-    if permissions.delete:
-        clauses.append(table.c.delete.is_(True))
-
-    return sa.and_(*clauses) if clauses else True
-
-
-###
-### UTILS
-###
-
-
-FolderName: TypeAlias = Annotated[
-    str,
-    StringConstraints(
-        min_length=1,
-        max_length=255,
-        pattern=re.compile(
-            r'^(?!.*[<>:"/\\|?*\]])(?!.*\b(?:LPT9|COM1|LPT1|COM2|LPT3|LPT4|CON|COM5|COM3|COM4|AUX|PRN|LPT2|LPT5|COM6|LPT7|NUL|COM8|LPT6|COM9|COM7|LPT8)\b).+$',
-            re.IGNORECASE,
-        ),
-    ),
-]
-
-
-class FolderEntry(BaseModel):
-    id: _FolderID
-    parent_folder: _FolderID | None = Field(alias="traversal_parent_id")
-    name: str
-    description: str
-    owner: _GroupID = Field(alias="created_by")
-    created: datetime = Field(alias="access_created")
-    modified: datetime = Field(alias="access_modified")
-    my_access_rights: _FolderPermissions
-    access_rights: dict[_GroupID, _FolderPermissions]
-    model_config = ConfigDict(from_attributes=True)
-
-
-class _ResolvedAccessRights(BaseModel):
-    folder_id: _FolderID
-    gid: _GroupID
-    traversal_parent_id: _FolderID | None
-    original_parent_id: _FolderID | None
-    read: bool
-    write: bool
-    delete: bool
-    level: int
-    model_config = ConfigDict(from_attributes=True)
-
-
-async def _get_resolved_access_rights(
-    connection: SAConnection,
-    folder_id: _FolderID,
-    gid: _GroupID,
-    *,
-    permissions: _FolderPermissions | None,
-) -> _ResolvedAccessRights | None:
-
-    # Define the anchor CTE
-    access_rights_cte = (
-        sa.select(
-            folders_access_rights.c.folder_id,
-            folders_access_rights.c.gid,
-            folders_access_rights.c.traversal_parent_id,
-            folders_access_rights.c.original_parent_id,
-            folders_access_rights.c.read,
-            folders_access_rights.c.write,
-            folders_access_rights.c.delete,
-            sa.literal_column("0").label("level"),
-        )
-        .where(folders_access_rights.c.folder_id == sa.bindparam("start_folder_id"))
-        .cte(name="access_rights_cte", recursive=True)
-    )
-
-    # Define the recursive part of the CTE
-    recursive = sa.select(
-        folders_access_rights.c.folder_id,
-        folders_access_rights.c.gid,
-        folders_access_rights.c.traversal_parent_id,
-        folders_access_rights.c.original_parent_id,
-        folders_access_rights.c.read,
-        folders_access_rights.c.write,
-        folders_access_rights.c.delete,
-        sa.literal_column("access_rights_cte.level + 1").label("level"),
-    ).select_from(
-        folders_access_rights.join(
-            access_rights_cte,
-            folders_access_rights.c.folder_id == access_rights_cte.c.original_parent_id,
-        )
-    )
-
-    # Combine anchor and recursive CTE
-    folder_hierarchy: CTE = access_rights_cte.union_all(recursive)
-
-    # Final query to filter and order results
-    query = (
-        sa.select(
-            folder_hierarchy.c.folder_id,
-            folder_hierarchy.c.gid,
-            folder_hierarchy.c.traversal_parent_id,
-            folder_hierarchy.c.original_parent_id,
-            folder_hierarchy.c.read,
-            folder_hierarchy.c.write,
-            folder_hierarchy.c.delete,
-            folder_hierarchy.c.level,
-        )
-        .where(
-            True
-            if not permissions
-            else _get_filter_for_enabled_permissions(permissions, folder_hierarchy)
-        )
-        .where(folder_hierarchy.c.original_parent_id.is_(None))
-        .where(folder_hierarchy.c.gid == gid)
-        .order_by(folder_hierarchy.c.level.asc())
-    )
-
-    result = await connection.execute(query.params(start_folder_id=folder_id))
-    resolved_access_rights: RowProxy | None = await result.fetchone()
-    return (
-        _ResolvedAccessRights.from_orm(resolved_access_rights)
-        if resolved_access_rights
-        else None
-    )
-
-
-async def _check_and_get_folder_access_by_group(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gid: _GroupID,
-    *,
-    error_reporting_gids: set[_GroupID],
-    permissions: _FolderPermissions,
-) -> _ResolvedAccessRights:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-    """
-    folder_entry: int | None = await connection.scalar(
-        sa.select(folders.c.id)
-        .where(folders.c.id == folder_id)
-        .where(folders.c.product_name == product_name)
-    )
-    if not folder_entry:
-        raise FolderNotFoundError(
-            folder_id=folder_id, gids=error_reporting_gids, product_name=product_name
-        )
-
-    # check if folder was shared
-    resolved_access_rights_without_permissions = await _get_resolved_access_rights(
-        connection,
-        folder_id,
-        gid,
-        permissions=None,
-    )
-    if not resolved_access_rights_without_permissions:
-        raise FolderNotSharedWithGidError(
-            folder_id=folder_id, gids=error_reporting_gids
-        )
-
-    # check if there are permissions
-    resolved_access_rights = await _get_resolved_access_rights(
-        connection,
-        folder_id,
-        gid,
-        permissions=permissions,
-    )
-    if resolved_access_rights is None:
-        raise InsufficientPermissionsError(
-            folder_id=folder_id,
-            gids=error_reporting_gids,
-            permissions=_only_true_permissions(permissions),
-        )
-
-    return resolved_access_rights
-
-
-async def _check_and_get_folder_access(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    permissions: _FolderPermissions,
-) -> _ResolvedAccessRights:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-    """
-    folder_access_error = None
-
-    for gid in gids:
-        try:
-            return await _check_and_get_folder_access_by_group(
-                connection,
-                product_name,
-                folder_id,
-                gid,
-                error_reporting_gids=gids,
-                permissions=permissions,
-            )
-        except FolderAccessError as e:  # noqa: PERF203
-            folder_access_error = e
-
-    if folder_access_error:
-        raise folder_access_error
-
-    raise NoAccessForGroupsFoundError(
-        folder_id=folder_id,
-        gids=gids,
-        permissions=_only_true_permissions(permissions),
-    )
-
-
-###
-### API DB LAYER
-###
-
-
-async def folder_create(
-    connection: SAConnection,
-    product_name: _ProductName,
-    name: str,
-    gids: set[_GroupID],
-    description: str = "",
-    parent: _FolderID | None = None,
-    _required_permissions: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions.CREATE_FOLDER
-    ),
-) -> _FolderID:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-        FolderAlreadyExistsError
-        CouldNotCreateFolderError
-        GroupIdDoesNotExistError
-        RootFolderRequiresAtLeastOnePrimaryGroupError
-    """
-    try:
-        TypeAdapter(FolderName).validate_python(name)
-    except ValidationError as exc:
-        raise InvalidFolderNameError(name=name, reason=f"{exc}") from exc
-
-    async with connection.begin():
-        entry_exists: int | None = await connection.scalar(
-            sa.select(folders.c.id)
-            .select_from(
-                folders.join(
-                    folders_access_rights,
-                    folders.c.id == folders_access_rights.c.folder_id,
-                )
-            )
-            .where(folders.c.name == name)
-            .where(folders.c.product_name == product_name)
-            .where(folders_access_rights.c.original_parent_id == parent)
-        )
-        if entry_exists:
-            raise FolderAlreadyExistsError(
-                product_name=product_name, folder=name, parent=parent
-            )
-
-        # `permissions_gid` is computed as follows:
-        # - `folder has a parent?` taken from the resolved access rights of the parent folder
-        # - `is root folder, a.k.a. no parent?` taken from the user's primary group
-        permissions_gid = None
-        if parent:
-            resolved_access_rights = await _check_and_get_folder_access(
-                connection,
-                product_name,
-                folder_id=parent,
-                gids=gids,
-                permissions=_required_permissions,
-            )
-            permissions_gid = resolved_access_rights.gid
-
-        if permissions_gid is None:
-            groups_results: list[RowProxy] | None = await (
-                await connection.execute(
-                    sa.select(groups.c.gid, groups.c.type).where(groups.c.gid.in_(gids))
-                )
-            ).fetchall()
-
-            if not groups_results:
-                raise NoGroupIDFoundError(gids=gids)
-
-            primary_gid = None
-            for group in groups_results:
-                if group["type"] == GroupType.PRIMARY:
-                    primary_gid = group["gid"]
-            if primary_gid is None:
-                raise RootFolderRequiresAtLeastOnePrimaryGroupError(
-                    parent=parent, gids=gids
-                )
-
-            permissions_gid = primary_gid
-
-        # folder entry can now be inserted
-        folder_id = await connection.scalar(
-            sa.insert(folders)
-            .values(
-                name=name,
-                description=description,
-                created_by=permissions_gid,
-                product_name=product_name,
-            )
-            .returning(folders.c.id)
-        )
-
-        if not folder_id:
-            raise CouldNotCreateFolderError(folder=name, parent=parent)
-
-        await connection.execute(
-            sa.insert(folders_access_rights).values(
-                folder_id=folder_id,
-                gid=permissions_gid,
-                traversal_parent_id=parent,
-                original_parent_id=parent,
-                **OWNER_PERMISSIONS.to_dict(),
-            )
-        )
-
-        return _FolderID(folder_id)
-
-
-async def folder_share_or_update_permissions(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    sharing_gids: set[_GroupID],
-    *,
-    recipient_gid: _GroupID,
-    recipient_role: FolderAccessRole,
-    required_permissions: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions.SHARE_FOLDER
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-    """
-    # NOTE: if the `sharing_gid`` has permissions to share it can share it with any `FolderAccessRole`
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=folder_id,
-            gids=sharing_gids,
-            permissions=required_permissions,
-        )
-
-        # update or create permissions entry
-        sharing_permissions: _FolderPermissions = _get_permissions_from_role(
-            recipient_role
-        )
-        data: dict[str, Any] = {
-            "folder_id": folder_id,
-            "gid": recipient_gid,
-            "original_parent_id": None,
-            "traversal_parent_id": None,
-            **sharing_permissions.to_dict(),
-        }
-        insert_stmt = postgresql.insert(folders_access_rights).values(**data)
-        upsert_stmt = insert_stmt.on_conflict_do_update(
-            index_elements=[
-                folders_access_rights.c.folder_id,
-                folders_access_rights.c.gid,
-            ],
-            set_=data,
-        )
-        await connection.execute(upsert_stmt)
-
-
-async def folder_update(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    name: str | None = None,
-    description: str | None = None,
-    _required_permissions: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions.UPDATE_FOLDER
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-    """
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=folder_id,
-            gids=gids,
-            permissions=_required_permissions,
-        )
-
-        # do not update if nothing changed
-        if name is None and description is None:
-            return
-
-        values: dict[str, str] = {}
-        if name:
-            values["name"] = name
-        if description is not None:  # Can be empty string
-            values["description"] = description
-
-        # update entry
-        await connection.execute(
-            folders.update().where(folders.c.id == folder_id).values(**values)
-        )
-
-
-async def folder_delete(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    _required_permissions: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions.DELETE_FOLDER
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-    """
-    childern_folder_ids: list[_FolderID] = []
-
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=folder_id,
-            gids=gids,
-            permissions=_required_permissions,
-        )
-
-        # list all children then delete
-        results = await connection.execute(
-            folders_access_rights.select().where(
-                folders_access_rights.c.traversal_parent_id == folder_id
-            )
-        )
-        rows = await results.fetchall()
-        if rows:
-            for entry in rows:
-                childern_folder_ids.append(entry.folder_id)  # noqa: PERF401
-
-    # first remove all childeren
-    for child_folder_id in childern_folder_ids:
-        await folder_delete(connection, product_name, child_folder_id, gids)
-
-    # as a last step remove the folder per se
-    async with connection.begin():
-        await connection.execute(folders.delete().where(folders.c.id == folder_id))
-
-
-async def folder_move(
-    connection: SAConnection,
-    product_name: _ProductName,
-    source_folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    destination_folder_id: _FolderID | None,
-    required_permissions_source: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions._MOVE_FOLDER_SOURCE  # pylint:disable=protected-access # noqa: SLF001
-    ),
-    required_permissions_destination: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions._MOVE_FOLDER_DESTINATION  # pylint:disable=protected-access # noqa: SLF001
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-        CannotMoveFolderSharedViaNonPrimaryGroupError:
-    """
-    async with connection.begin():
-        source_access_entry = await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=source_folder_id,
-            gids=gids,
-            permissions=required_permissions_source,
-        )
-
-        source_access_gid = source_access_entry.gid
-        group_type: GroupType | None = await connection.scalar(
-            sa.select(groups.c.type).where(groups.c.gid == source_access_gid)
-        )
-        # Might drop primary check
-        if group_type is None or group_type != GroupType.PRIMARY:
-            raise CannotMoveFolderSharedViaNonPrimaryGroupError(
-                group_type=group_type, gid=source_access_gid
-            )
-        if destination_folder_id:
-            await _check_and_get_folder_access(
-                connection,
-                product_name,
-                folder_id=destination_folder_id,
-                gids=gids,
-                permissions=required_permissions_destination,
-            )
-
-        # set new traversa_parent_id on the source_folder_id which is equal to destination_folder_id
-        await connection.execute(
-            folders_access_rights.update()
-            .where(
-                sa.and_(
-                    folders_access_rights.c.folder_id == source_folder_id,
-                    folders_access_rights.c.gid.in_(gids),
-                )
-            )
-            .values(traversal_parent_id=destination_folder_id)
-        )
-
-
-async def folder_add_project(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    project_uuid: _ProjectID,
-    required_permissions=_requires(  # noqa: B008
-        _BasePermissions.ADD_PROJECT_TO_FOLDER
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-        ProjectAlreadyExistsInFolderError
-    """
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=folder_id,
-            gids=gids,
-            permissions=required_permissions,
-        )
-
-        # check if already added in folder
-        project_in_folder_entry = await (
-            await connection.execute(
-                folders_to_projects.select()
-                .where(folders_to_projects.c.folder_id == folder_id)
-                .where(folders_to_projects.c.project_uuid == f"{project_uuid}")
-            )
-        ).fetchone()
-        if project_in_folder_entry:
-            raise ProjectAlreadyExistsInFolderError(
-                project_uuid=project_uuid, folder_id=folder_id
-            )
-
-        # finally add project to folder
-        await connection.execute(
-            folders_to_projects.insert().values(
-                folder_id=folder_id, project_uuid=f"{project_uuid}"
-            )
-        )
-
-
-async def folder_move_project(
-    connection: SAConnection,
-    product_name: _ProductName,
-    source_folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    project_uuid: _ProjectID,
-    destination_folder_id: _FolderID | None,
-    _required_permissions_source: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions._MOVE_PROJECT_FROM_FOLDER_SOURCE  # pylint:disable=protected-access # noqa: SLF001
-    ),
-    _required_permissions_destination: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions._MOVE_PROJECT_FROM_FOLDER_DESTINATION  # pylint:disable=protected-access # noqa: SLF001
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        CannotMoveFolderSharedViaNonPrimaryGroupError:
-    """
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=source_folder_id,
-            gids=gids,
-            permissions=_required_permissions_source,
-        )
-
-    if destination_folder_id is None:
-        # NOTE: As the project is moved to the root directory we will just remove it from the folders_to_projects table
-        await folder_remove_project(
-            connection,
-            product_name,
-            folder_id=source_folder_id,
-            gids=gids,
-            project_uuid=project_uuid,
-        )
-        return
-
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=destination_folder_id,
-            gids=gids,
-            permissions=_required_permissions_destination,
-        )
-
-        await connection.execute(
-            folders_to_projects.delete()
-            .where(folders_to_projects.c.folder_id == source_folder_id)
-            .where(folders_to_projects.c.project_uuid == f"{project_uuid}")
-        )
-        await connection.execute(
-            folders_to_projects.insert().values(
-                folder_id=destination_folder_id, project_uuid=f"{project_uuid}"
-            )
-        )
-
-
-async def get_project_folder_without_check(
-    connection: SAConnection,
-    *,
-    project_uuid: _ProjectID,
-) -> _FolderID | None:
-    """
-    This is temporary, until we discuss how to proceed. In first version we assume there is only one unique project uuid
-    in the folders_to_projects table.
-
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        CannotMoveFolderSharedViaNonPrimaryGroupError:
-    """
-    async with connection.begin():
-        folder_id = await connection.scalar(
-            sa.select(folders_to_projects.c.folder_id).where(
-                folders_to_projects.c.project_uuid == f"{project_uuid}"
-            )
-        )
-        if folder_id:
-            return _FolderID(folder_id)
-        return None
-
-
-async def folder_remove_project(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    project_uuid: _ProjectID,
-    required_permissions=_requires(  # noqa: B008
-        _BasePermissions.REMOVE_PROJECT_FROM_FOLDER
-    ),
-) -> None:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-    """
-    async with connection.begin():
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=folder_id,
-            gids=gids,
-            permissions=required_permissions,
-        )
-
-        await connection.execute(
-            folders_to_projects.delete()
-            .where(folders_to_projects.c.folder_id == folder_id)
-            .where(folders_to_projects.c.project_uuid == f"{project_uuid}")
-        )
-
-
-_LIST_GROUP_BY_FIELDS: Final[tuple[Column, ...]] = (
-    folders.c.id,
-    folders.c.name,
-    folders.c.description,
-    folders.c.created_by,
-    folders_access_rights.c.traversal_parent_id,
-)
-_LIST_SELECT_FIELDS: Final[tuple[Label | Column, ...]] = (
-    *_LIST_GROUP_BY_FIELDS,
-    # access_rights
-    (
-        sa.select(
-            sa.func.jsonb_object_agg(
-                folders_access_rights.c.gid,
-                sa.func.jsonb_build_object(
-                    "read",
-                    folders_access_rights.c.read,
-                    "write",
-                    folders_access_rights.c.write,
-                    "delete",
-                    folders_access_rights.c.delete,
-                ),
-            ).label("access_rights"),
-        )
-        .where(folders_access_rights.c.folder_id == folders.c.id)
-        .correlate(folders)
-        .scalar_subquery()
-    ).label("access_rights"),
-    # my_access_rights
-    func.json_build_object(
-        "read",
-        func.max(folders_access_rights.c.read.cast(INTEGER)).cast(BOOLEAN),
-        "write",
-        func.max(folders_access_rights.c.write.cast(INTEGER)).cast(BOOLEAN),
-        "delete",
-        func.max(folders_access_rights.c.delete.cast(INTEGER)).cast(BOOLEAN),
-    ).label("my_access_rights"),
-    # access_created
-    func.max(folders_access_rights.c.created).label("access_created"),
-    # access_modified
-    func.max(folders_access_rights.c.modified).label("access_modified"),
-)
-
-
-async def folder_list(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID | None,
-    gids: set[_GroupID],
-    *,
-    offset: NonNegativeInt,
-    limit: NonNegativeInt,
-    order_by: OrderByDict = OrderByDict(  # noqa: B008
-        field="modified", direction=OrderDirection.DESC
-    ),
-    required_permissions: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions.LIST_FOLDERS
-    ),
-) -> tuple[int, list[FolderEntry]]:
-    """
-    Raises:
-        FolderNotFoundError
-        FolderNotSharedWithGidError
-        InsufficientPermissionsError
-        NoAccessForGroupsFoundError
-    """
-    # NOTE: when `folder_id is None` list the root folder of the `gids`
-
-    if folder_id is not None:
-        await _check_and_get_folder_access(
-            connection,
-            product_name,
-            folder_id=folder_id,
-            gids=gids,
-            permissions=required_permissions,
-        )
-
-    results: list[FolderEntry] = []
-
-    base_query = (
-        sa.select(*_LIST_SELECT_FIELDS)
-        .join(folders_access_rights, folders.c.id == folders_access_rights.c.folder_id)
-        .where(folders.c.product_name == product_name)
-        .where(
-            folders_access_rights.c.traversal_parent_id.is_(None)
-            if folder_id is None
-            else folders_access_rights.c.traversal_parent_id == folder_id
-        )
-        .where(folders_access_rights.c.gid.in_(gids))
-        .where(
-            _get_filter_for_enabled_permissions(
-                required_permissions, folders_access_rights
-            )
-        )
-        .group_by(*_LIST_GROUP_BY_FIELDS)
-    )
-
-    # Select total count from base_query
-    subquery = base_query.subquery()
-    count_query = sa.select(sa.func.count()).select_from(subquery)
-    count_result = await connection.execute(count_query)
-    total_count = await count_result.scalar()
-
-    # Ordering and pagination
-    if order_by["direction"] == OrderDirection.ASC:
-        list_query = base_query.order_by(sa.asc(getattr(folders.c, order_by["field"])))
-    else:
-        list_query = base_query.order_by(sa.desc(getattr(folders.c, order_by["field"])))
-    list_query = list_query.offset(offset).limit(limit)
-
-    async for entry in connection.execute(list_query):
-        results.append(FolderEntry.from_orm(entry))  # noqa: PERF401s
-
-    return cast(int, total_count), results
-
-
-async def folder_get(
-    connection: SAConnection,
-    product_name: _ProductName,
-    folder_id: _FolderID,
-    gids: set[_GroupID],
-    *,
-    required_permissions: _FolderPermissions = _requires(  # noqa: B008
-        _BasePermissions.GET_FOLDER
-    ),
-) -> FolderEntry:
-    resolved_access_rights: _ResolvedAccessRights = await _check_and_get_folder_access(
-        connection,
-        product_name,
-        folder_id=folder_id,
-        gids=gids,
-        permissions=required_permissions,
-    )
-    permissions_gid: _GroupID = resolved_access_rights.gid
-
-    query = (
-        sa.select(*_LIST_SELECT_FIELDS)
-        .join(folders_access_rights, folders.c.id == folders_access_rights.c.folder_id)
-        .where(folders_access_rights.c.folder_id == folder_id)
-        .where(folders_access_rights.c.gid == permissions_gid)
-        .where(
-            _get_filter_for_enabled_permissions(
-                required_permissions, folders_access_rights
-            )
-            if folder_id is None
-            else True
-        )
-        .where(folders.c.product_name == product_name)
-        .group_by(*_LIST_GROUP_BY_FIELDS)
-    )
-
-    query_result: RowProxy | None = await (await connection.execute(query)).fetchone()
-
-    if query_result is None:
-        raise FolderNotFoundError(
-            folder_id=folder_id, gids=gids, product_name=product_name
-        )
-
-    return FolderEntry.from_orm(query_result)
-
-
-__all__ = ["OrderByDict"]
diff --git a/packages/postgres-database/src/simcore_postgres_database/utils_groups_extra_properties.py b/packages/postgres-database/src/simcore_postgres_database/utils_groups_extra_properties.py
index e52fbb4791ae..b6c25183a21a 100644
--- a/packages/postgres-database/src/simcore_postgres_database/utils_groups_extra_properties.py
+++ b/packages/postgres-database/src/simcore_postgres_database/utils_groups_extra_properties.py
@@ -32,6 +32,7 @@ class GroupExtraProperties(FromRowMixin):
     enable_telemetry: bool
     created: datetime.datetime
     modified: datetime.datetime
+    enable_efs: bool
 
 
 async def _list_table_entries_ordered_by_group_type(
diff --git a/packages/postgres-database/src/simcore_postgres_database/utils_repos.py b/packages/postgres-database/src/simcore_postgres_database/utils_repos.py
new file mode 100644
index 000000000000..f2c96313ea9e
--- /dev/null
+++ b/packages/postgres-database/src/simcore_postgres_database/utils_repos.py
@@ -0,0 +1,43 @@
+import logging
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+
+from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
+
+_logger = logging.getLogger(__name__)
+
+
+@asynccontextmanager
+async def pass_or_acquire_connection(
+    engine: AsyncEngine, connection: AsyncConnection | None = None
+) -> AsyncIterator[AsyncConnection]:
+    # NOTE: When connection is passed, the engine is actually not needed
+    # NOTE: Creator is responsible of closing connection
+    is_connection_created = connection is None
+    if is_connection_created:
+        connection = await engine.connect()
+    try:
+        assert connection  # nosec
+        yield connection
+    finally:
+        assert connection  # nosec
+        assert not connection.closed  # nosec
+        if is_connection_created and connection:
+            await connection.close()
+
+
+@asynccontextmanager
+async def transaction_context(
+    engine: AsyncEngine, connection: AsyncConnection | None = None
+):
+    async with pass_or_acquire_connection(engine, connection) as conn:
+        if conn.in_transaction():
+            async with conn.begin_nested():  # inner transaction (savepoint)
+                yield conn
+        else:
+            try:
+                async with conn.begin():  # outer transaction (savepoint)
+                    yield conn
+            finally:
+                assert not conn.closed  # nosec
+                assert not conn.in_transaction()  # nosec
diff --git a/packages/postgres-database/src/simcore_postgres_database/utils_tags.py b/packages/postgres-database/src/simcore_postgres_database/utils_tags.py
index 0a8b3e4ac28a..7421f25de0fc 100644
--- a/packages/postgres-database/src/simcore_postgres_database/utils_tags.py
+++ b/packages/postgres-database/src/simcore_postgres_database/utils_tags.py
@@ -1,14 +1,13 @@
 """ Repository pattern, errors and data structures for models.tags
 """
 
-import itertools
-from dataclasses import dataclass
 from typing import TypedDict
 
-from aiopg.sa.connection import SAConnection
+from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
 
+from .utils_repos import pass_or_acquire_connection, transaction_context
 from .utils_tags_sql import (
-    count_users_with_access_rights_stmt,
+    count_groups_with_given_access_rights_stmt,
     create_tag_stmt,
     delete_tag_stmt,
     get_tag_stmt,
@@ -49,15 +48,16 @@ class TagDict(TypedDict, total=True):
     delete: bool
 
 
-@dataclass(frozen=True)
 class TagsRepo:
-    user_id: int  # Determines access-rights
+    def __init__(self, engine: AsyncEngine):
+        self.engine = engine
 
     async def access_count(
         self,
-        conn: SAConnection,
-        tag_id: int,
+        connection: AsyncConnection | None = None,
         *,
+        user_id: int,
+        tag_id: int,
         read: bool | None = None,
         write: bool | None = None,
         delete: bool | None = None,
@@ -66,11 +66,12 @@ async def access_count(
         Returns 0 if tag does not match access
         Returns >0 if it does and represents the number of groups granting this access to the user
         """
-        count_stmt = count_users_with_access_rights_stmt(
-            user_id=self.user_id, tag_id=tag_id, read=read, write=write, delete=delete
-        )
-        permissions_count: int | None = await conn.scalar(count_stmt)
-        return permissions_count if permissions_count else 0
+        async with pass_or_acquire_connection(self.engine, connection) as conn:
+            count_stmt = count_groups_with_given_access_rights_stmt(
+                user_id=user_id, tag_id=tag_id, read=read, write=write, delete=delete
+            )
+            permissions_count: int | None = await conn.scalar(count_stmt)
+            return permissions_count if permissions_count else 0
 
     #
     # CRUD operations
@@ -78,85 +79,187 @@ async def access_count(
 
     async def create(
         self,
-        conn: SAConnection,
+        connection: AsyncConnection | None = None,
         *,
+        user_id: int,
         name: str,
         color: str,
         description: str | None = None,  # =nullable
         read: bool = True,
         write: bool = True,
         delete: bool = True,
+        priority: int | None = None,
     ) -> TagDict:
-        values = {
+        """Creates tag and defaults to full access rights to `user_id`"""
+        values: dict[str, str | int] = {
             "name": name,
             "color": color,
         }
         if description:
             values["description"] = description
+        if priority is not None:
+            values["priority"] = priority
 
-        async with conn.begin():
+        async with transaction_context(self.engine, connection) as conn:
             # insert new tag
             insert_stmt = create_tag_stmt(**values)
             result = await conn.execute(insert_stmt)
-            tag = await result.first()
+            tag = result.first()
             assert tag  # nosec
 
             # take tag ownership
             access_stmt = set_tag_access_rights_stmt(
                 tag_id=tag.id,
-                user_id=self.user_id,
+                user_id=user_id,
                 read=read,
                 write=write,
                 delete=delete,
             )
             result = await conn.execute(access_stmt)
-            access = await result.first()
-            assert access
+            access = result.first()
+            assert access  # nosec
 
-            return TagDict(itertools.chain(tag.items(), access.items()))  # type: ignore
+            return TagDict(
+                id=tag.id,
+                name=tag.name,
+                description=tag.description,
+                color=tag.color,
+                read=access.read,
+                write=access.write,
+                delete=access.delete,
+            )
 
-    async def list_all(self, conn: SAConnection) -> list[TagDict]:
-        stmt_list = list_tags_stmt(user_id=self.user_id)
-        return [TagDict(row.items()) async for row in conn.execute(stmt_list)]  # type: ignore
+    async def list_all(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
+    ) -> list[TagDict]:
+        async with pass_or_acquire_connection(self.engine, connection) as conn:
+            stmt_list = list_tags_stmt(user_id=user_id)
+            result = await conn.stream(stmt_list)
+            return [
+                TagDict(
+                    id=row.id,
+                    name=row.name,
+                    description=row.description,
+                    color=row.color,
+                    read=row.read,
+                    write=row.write,
+                    delete=row.delete,
+                )
+                async for row in result
+            ]
 
-    async def get(self, conn: SAConnection, tag_id: int) -> TagDict:
-        stmt_get = get_tag_stmt(user_id=self.user_id, tag_id=tag_id)
-        result = await conn.execute(stmt_get)
-        row = await result.first()
-        if not row:
-            msg = f"{tag_id=} not found: either no access or does not exists"
-            raise TagNotFoundError(msg)
-        return TagDict(row.items())  # type: ignore
+    async def get(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
+        tag_id: int,
+    ) -> TagDict:
+        stmt_get = get_tag_stmt(user_id=user_id, tag_id=tag_id)
+        async with pass_or_acquire_connection(self.engine, connection) as conn:
+            result = await conn.execute(stmt_get)
+            row = result.first()
+            if not row:
+                msg = f"{tag_id=} not found: either no access or does not exists"
+                raise TagNotFoundError(msg)
+            return TagDict(
+                id=row.id,
+                name=row.name,
+                description=row.description,
+                color=row.color,
+                read=row.read,
+                write=row.write,
+                delete=row.delete,
+            )
 
     async def update(
         self,
-        conn: SAConnection,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
         tag_id: int,
         **fields,
     ) -> TagDict:
-        updates = {
-            name: value
-            for name, value in fields.items()
-            if name in {"name", "color", "description"}
-        }
+        async with transaction_context(self.engine, connection) as conn:
+            updates = {
+                name: value
+                for name, value in fields.items()
+                if name in {"name", "color", "description", "priority"}
+            }
 
-        if not updates:
-            # no updates == get
-            return await self.get(conn, tag_id=tag_id)
+            if not updates:
+                # no updates == get
+                return await self.get(conn, user_id=user_id, tag_id=tag_id)
 
-        update_stmt = update_tag_stmt(user_id=self.user_id, tag_id=tag_id, **updates)
-        result = await conn.execute(update_stmt)
-        row = await result.first()
-        if not row:
-            msg = f"{tag_id=} not updated: either no access or not found"
-            raise TagOperationNotAllowedError(msg)
+            update_stmt = update_tag_stmt(user_id=user_id, tag_id=tag_id, **updates)
+            result = await conn.execute(update_stmt)
+            row = result.first()
+            if not row:
+                msg = f"{tag_id=} not updated: either no access or not found"
+                raise TagOperationNotAllowedError(msg)
 
-        return TagDict(row.items())  # type: ignore
+            return TagDict(
+                id=row.id,
+                name=row.name,
+                description=row.description,
+                color=row.color,
+                read=row.read,
+                write=row.write,
+                delete=row.delete,
+            )
 
-    async def delete(self, conn: SAConnection, tag_id: int) -> None:
-        stmt_delete = delete_tag_stmt(user_id=self.user_id, tag_id=tag_id)
+    async def delete(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
+        tag_id: int,
+    ) -> None:
+        stmt_delete = delete_tag_stmt(user_id=user_id, tag_id=tag_id)
+        async with transaction_context(self.engine, connection) as conn:
+            deleted = await conn.scalar(stmt_delete)
+            if not deleted:
+                msg = f"Could not delete {tag_id=}. Not found or insuficient access."
+                raise TagOperationNotAllowedError(msg)
+
+    #
+    # ACCESS RIGHTS
+    #
+
+    async def create_access_rights(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
+        tag_id: int,
+        group_id: int,
+        read: bool,
+        write: bool,
+        delete: bool,
+    ):
+        raise NotImplementedError
 
-        deleted = await conn.scalar(stmt_delete)
-        if not deleted:
-            msg = f"Could not delete {tag_id=}. Not found or insuficient access."
-            raise TagOperationNotAllowedError(msg)
+    async def update_access_rights(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
+        tag_id: int,
+        group_id: int,
+        read: bool,
+        write: bool,
+        delete: bool,
+    ):
+        raise NotImplementedError
+
+    async def delete_access_rights(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        user_id: int,
+        tag_id: int,
+    ):
+        raise NotImplementedError
diff --git a/packages/postgres-database/src/simcore_postgres_database/utils_tags_sql.py b/packages/postgres-database/src/simcore_postgres_database/utils_tags_sql.py
index 05a1e93ca33e..bd727a0dcc32 100644
--- a/packages/postgres-database/src/simcore_postgres_database/utils_tags_sql.py
+++ b/packages/postgres-database/src/simcore_postgres_database/utils_tags_sql.py
@@ -23,9 +23,6 @@
 ]
 
 
-_COLUMNS = _TAG_COLUMNS + _ACCESS_RIGHTS_COLUMNS
-
-
 def _join_user_groups_tag(*, access_condition, tag_id: int, user_id: int):
     return user_to_groups.join(
         tags_access_rights,
@@ -57,24 +54,42 @@ def get_tag_stmt(
     user_id: int,
     tag_id: int,
 ):
-    return sa.select(*_COLUMNS).select_from(
-        _join_user_to_given_tag(
-            access_condition=tags_access_rights.c.read.is_(True),
-            tag_id=tag_id,
-            user_id=user_id,
+    return (
+        sa.select(
+            *_TAG_COLUMNS,
+            # aggregation ensures MOST PERMISSIVE policy of access-rights
+            sa.func.bool_or(tags_access_rights.c.read).label("read"),
+            sa.func.bool_or(tags_access_rights.c.write).label("write"),
+            sa.func.bool_or(tags_access_rights.c.delete).label("delete")
+        )
+        .select_from(
+            _join_user_to_given_tag(
+                access_condition=tags_access_rights.c.read.is_(True),
+                tag_id=tag_id,
+                user_id=user_id,
+            )
         )
+        .group_by(tags.c.id)
     )
 
 
 def list_tags_stmt(*, user_id: int):
     return (
-        sa.select(*_COLUMNS)
+        sa.select(
+            *_TAG_COLUMNS,
+            # aggregation ensures MOST PERMISSIVE policy of access-rights
+            sa.func.bool_or(tags_access_rights.c.read).label("read"),
+            sa.func.bool_or(tags_access_rights.c.write).label("write"),
+            sa.func.bool_or(tags_access_rights.c.delete).label("delete")
+        )
         .select_from(
             _join_user_to_tags(
                 access_condition=tags_access_rights.c.read.is_(True),
                 user_id=user_id,
             )
         )
+        .group_by(tags.c.id)  # makes it tag.id uniqueness
+        .order_by(tags.c.priority.nulls_last())
         .order_by(tags.c.id)
     )
 
@@ -83,7 +98,7 @@ def create_tag_stmt(**values):
     return tags.insert().values(**values).returning(*_TAG_COLUMNS)
 
 
-def count_users_with_access_rights_stmt(
+def count_groups_with_given_access_rights_stmt(
     *,
     user_id: int,
     tag_id: int,
@@ -92,7 +107,7 @@ def count_users_with_access_rights_stmt(
     delete: bool | None
 ):
     """
-    How many users are given these access permissions
+    How many groups (from this user_id) are given EXACTLY these access permissions
     """
     access = []
     if read is not None:
@@ -146,7 +161,7 @@ def update_tag_stmt(*, user_id: int, tag_id: int, **updates):
             & (user_to_groups.c.uid == user_id)
         )
         .values(**updates)
-        .returning(*_COLUMNS)
+        .returning(*_TAG_COLUMNS, *_ACCESS_RIGHTS_COLUMNS)
     )
 
 
@@ -166,6 +181,11 @@ def delete_tag_stmt(*, user_id: int, tag_id: int):
     )
 
 
+#
+# PROJECT TAGS
+#
+
+
 def get_tags_for_project_stmt(*, project_index: int):
     return sa.select(projects_tags.c.tag_id).where(
         projects_tags.c.project_id == project_index
@@ -183,6 +203,11 @@ def add_tag_to_project_stmt(*, project_index: int, tag_id: int):
     )
 
 
+#
+# SERVICE TAGS
+#
+
+
 def get_tags_for_services_stmt(*, key: str, version: str):
     return sa.select(services_tags.c.tag_id).where(
         (services_tags.c.service_key == key)
diff --git a/packages/postgres-database/tests/conftest.py b/packages/postgres-database/tests/conftest.py
index 0d2224e286ee..5526b668e398 100644
--- a/packages/postgres-database/tests/conftest.py
+++ b/packages/postgres-database/tests/conftest.py
@@ -4,6 +4,7 @@
 # pylint: disable=unused-variable
 
 import uuid
+import warnings
 from collections.abc import AsyncIterator, Awaitable, Callable, Iterator
 from pathlib import Path
 
@@ -37,6 +38,7 @@
     user_to_groups,
     users,
 )
+from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
 
 pytest_plugins = [
     "pytest_simcore.pytest_global_environs",
@@ -81,6 +83,30 @@ def _make(is_async=True) -> Awaitable[Engine] | sa.engine.base.Engine:
     return _make
 
 
+@pytest.fixture
+def make_asyncpg_engine(postgres_service: str) -> Callable[[bool], AsyncEngine]:
+    # NOTE: users is responsible of `await engine.dispose()`
+    dsn = postgres_service.replace("postgresql://", "postgresql+asyncpg://")
+    minsize = 1
+    maxsize = 50
+
+    def _(echo: bool):
+        engine: AsyncEngine = create_async_engine(
+            dsn,
+            pool_size=minsize,
+            max_overflow=maxsize - minsize,
+            connect_args={
+                "server_settings": {"application_name": "postgres_database_tests"}
+            },
+            pool_pre_ping=True,  # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
+            future=True,  # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
+            echo=echo,
+        )
+        return engine
+
+    return _
+
+
 def is_postgres_responsive(dsn) -> bool:
     """Check if something responds to ``url``"""
     try:
@@ -107,6 +133,11 @@ def pg_sa_engine(
 ) -> Iterator[sa.engine.Engine]:
     """
     Runs migration to create tables and return a sqlalchemy engine
+
+    NOTE: use this fixture to ensure pg db:
+        - up,
+        - responsive,
+        - init (w/ tables) and/or migrated
     """
     # NOTE: Using migration to upgrade/downgrade is not
     # such a great idea since these tests are used while developing
@@ -142,29 +173,56 @@ def pg_sa_engine(
 
 
 @pytest.fixture
-async def pg_engine(
+async def aiopg_engine(
     pg_sa_engine: sa.engine.Engine, make_engine: Callable
 ) -> AsyncIterator[Engine]:
     """
     Return an aiopg.sa engine connected to a responsive and migrated pg database
     """
-    async_engine = await make_engine(is_async=True)
 
-    yield async_engine
+    aiopg_sa_engine = await make_engine(is_async=True)
+
+    warnings.warn(
+        "The 'aiopg_engine' is deprecated since we are replacing `aiopg` library by `sqlalchemy.ext.asyncio`."
+        "SEE https://github.com/ITISFoundation/osparc-simcore/issues/4529. "
+        "Please use 'asyncpg_engine' instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+
+    yield aiopg_sa_engine
 
     # closes async-engine connections and terminates
-    async_engine.close()
-    await async_engine.wait_closed()
-    async_engine.terminate()
+    aiopg_sa_engine.close()
+    await aiopg_sa_engine.wait_closed()
+    aiopg_sa_engine.terminate()
 
 
 @pytest.fixture
-async def connection(pg_engine: Engine) -> AsyncIterator[SAConnection]:
+async def connection(aiopg_engine: Engine) -> AsyncIterator[SAConnection]:
     """Returns an aiopg.sa connection from an engine to a fully furnished and ready pg database"""
-    async with pg_engine.acquire() as _conn:
+    async with aiopg_engine.acquire() as _conn:
         yield _conn
 
 
+@pytest.fixture
+async def asyncpg_engine(
+    is_pdb_enabled: bool,
+    pg_sa_engine: sa.engine.Engine,
+    make_asyncpg_engine: Callable[[bool], AsyncEngine],
+) -> AsyncIterator[AsyncEngine]:
+
+    assert (
+        pg_sa_engine
+    ), "Ensures pg db up, responsive, init (w/ tables) and/or migrated"
+
+    _apg_engine = make_asyncpg_engine(is_pdb_enabled)
+
+    yield _apg_engine
+
+    await _apg_engine.dispose()
+
+
 #
 # FACTORY FIXTURES
 #
@@ -240,7 +298,7 @@ async def _creator(conn, group: RowProxy | None = None, **overrides) -> RowProxy
 
 @pytest.fixture
 async def create_fake_cluster(
-    pg_engine: Engine, faker: Faker
+    aiopg_engine: Engine, faker: Faker
 ) -> AsyncIterator[Callable[..., Awaitable[int]]]:
     cluster_ids = []
     assert cluster_to_groups is not None
@@ -254,7 +312,7 @@ async def _creator(**overrides) -> int:
             "authentication": faker.pydict(value_types=[str]),
         }
         insert_values.update(overrides)
-        async with pg_engine.acquire() as conn:
+        async with aiopg_engine.acquire() as conn:
             cluster_id = await conn.scalar(
                 clusters.insert().values(**insert_values).returning(clusters.c.id)
             )
@@ -265,13 +323,13 @@ async def _creator(**overrides) -> int:
     yield _creator
 
     # cleanup
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await conn.execute(clusters.delete().where(clusters.c.id.in_(cluster_ids)))
 
 
 @pytest.fixture
 async def create_fake_project(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
 ) -> AsyncIterator[Callable[..., Awaitable[RowProxy]]]:
     created_project_uuids = []
 
@@ -288,7 +346,7 @@ async def _creator(conn, user: RowProxy, **overrides) -> RowProxy:
 
     yield _creator
 
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await conn.execute(
             projects.delete().where(projects.c.uuid.in_(created_project_uuids))
         )
diff --git a/packages/postgres-database/tests/products/test_models_products.py b/packages/postgres-database/tests/products/test_models_products.py
index 02d9a6076190..c385cd7e7340 100644
--- a/packages/postgres-database/tests/products/test_models_products.py
+++ b/packages/postgres-database/tests/products/test_models_products.py
@@ -26,14 +26,14 @@
 
 
 async def test_load_products(
-    pg_engine: Engine, make_products_table: Callable, products_regex: dict
+    aiopg_engine: Engine, make_products_table: Callable, products_regex: dict
 ):
     exclude = {
         products.c.created,
         products.c.modified,
     }
 
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await make_products_table(conn)
 
         stmt = sa.select(*[c for c in products.columns if c not in exclude])
@@ -49,14 +49,14 @@ async def test_load_products(
 
 
 async def test_jinja2_templates_table(
-    pg_engine: Engine, osparc_simcore_services_dir: Path
+    aiopg_engine: Engine, osparc_simcore_services_dir: Path
 ):
     templates_common_dir = (
         osparc_simcore_services_dir
         / "web/server/src/simcore_service_webserver/templates/common"
     )
 
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         templates = []
         # templates table
         for p in templates_common_dir.glob("*.jinja2"):
@@ -135,7 +135,7 @@ async def test_jinja2_templates_table(
 
 
 async def test_insert_select_product(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
 ):
     osparc_product = {
         "name": "osparc",
@@ -174,7 +174,7 @@ async def test_insert_select_product(
 
     print(json.dumps(osparc_product))
 
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         # writes
         stmt = (
             pg_insert(products)
diff --git a/packages/postgres-database/tests/products/test_utils_products.py b/packages/postgres-database/tests/products/test_utils_products.py
index 3956d74e2262..a1b84fe96dd8 100644
--- a/packages/postgres-database/tests/products/test_utils_products.py
+++ b/packages/postgres-database/tests/products/test_utils_products.py
@@ -19,24 +19,24 @@
 )
 
 
-async def test_default_product(pg_engine: Engine, make_products_table: Callable):
-    async with pg_engine.acquire() as conn:
+async def test_default_product(aiopg_engine: Engine, make_products_table: Callable):
+    async with aiopg_engine.acquire() as conn:
         await make_products_table(conn)
         default_product = await get_default_product_name(conn)
         assert default_product == "s4l"
 
 
 @pytest.mark.parametrize("pg_sa_engine", ["sqlModels"], indirect=True)
-async def test_default_product_undefined(pg_engine: Engine):
-    async with pg_engine.acquire() as conn:
+async def test_default_product_undefined(aiopg_engine: Engine):
+    async with aiopg_engine.acquire() as conn:
         with pytest.raises(ValueError):
             await get_default_product_name(conn)
 
 
 async def test_get_or_create_group_product(
-    pg_engine: Engine, make_products_table: Callable
+    aiopg_engine: Engine, make_products_table: Callable
 ):
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await make_products_table(conn)
 
         async for product_row in await conn.execute(
@@ -105,13 +105,13 @@ async def test_get_or_create_group_product(
     reason="Not relevant. Will review in https://github.com/ITISFoundation/osparc-simcore/issues/3754"
 )
 async def test_get_or_create_group_product_concurrent(
-    pg_engine: Engine, make_products_table: Callable
+    aiopg_engine: Engine, make_products_table: Callable
 ):
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await make_products_table(conn)
 
     async def _auto_create_products_groups():
-        async with pg_engine.acquire() as conn:
+        async with aiopg_engine.acquire() as conn:
             async for product_row in await conn.execute(
                 sa.select(products.c.name, products.c.group_id).order_by(
                     products.c.priority
diff --git a/packages/postgres-database/tests/projects/conftest.py b/packages/postgres-database/tests/projects/conftest.py
index fb507557fbf2..2a1b9c99f245 100644
--- a/packages/postgres-database/tests/projects/conftest.py
+++ b/packages/postgres-database/tests/projects/conftest.py
@@ -16,10 +16,10 @@
 
 
 @pytest.fixture
-async def user(pg_engine: Engine) -> RowProxy:
+async def user(aiopg_engine: Engine) -> RowProxy:
     _USERNAME = f"{__name__}.me"
     # some user
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         result: ResultProxy | None = await conn.execute(
             users.insert().values(**random_user(name=_USERNAME)).returning(users)
         )
@@ -32,10 +32,10 @@ async def user(pg_engine: Engine) -> RowProxy:
 
 
 @pytest.fixture
-async def project(pg_engine: Engine, user: RowProxy) -> RowProxy:
+async def project(aiopg_engine: Engine, user: RowProxy) -> RowProxy:
     _PARENT_PROJECT_NAME = f"{__name__}.parent"
     # a user's project
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         result: ResultProxy | None = await conn.execute(
             projects.insert()
             .values(**random_project(prj_owner=user.id, name=_PARENT_PROJECT_NAME))
@@ -50,6 +50,6 @@ async def project(pg_engine: Engine, user: RowProxy) -> RowProxy:
 
 
 @pytest.fixture
-async def conn(pg_engine: Engine) -> AsyncIterable[SAConnection]:
-    async with pg_engine.acquire() as conn:
+async def conn(aiopg_engine: Engine) -> AsyncIterable[SAConnection]:
+    async with aiopg_engine.acquire() as conn:
         yield conn
diff --git a/packages/postgres-database/tests/test_classifiers.py b/packages/postgres-database/tests/test_classifiers.py
index f53740a124df..8e8e0eba24cb 100644
--- a/packages/postgres-database/tests/test_classifiers.py
+++ b/packages/postgres-database/tests/test_classifiers.py
@@ -38,10 +38,10 @@ def classifiers_bundle(web_client_resource_folder: Path) -> dict:
 
 
 async def test_operations_on_group_classifiers(
-    pg_engine: Engine, classifiers_bundle: dict
+    aiopg_engine: Engine, classifiers_bundle: dict
 ):
     # NOTE: mostly for TDD
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         # creates a group
         stmt = (
             groups.insert()
diff --git a/packages/postgres-database/tests/test_clusters.py b/packages/postgres-database/tests/test_clusters.py
index 6dccc8ef153c..95cd8492965f 100644
--- a/packages/postgres-database/tests/test_clusters.py
+++ b/packages/postgres-database/tests/test_clusters.py
@@ -16,8 +16,8 @@
 
 
 @pytest.fixture
-async def user_id(pg_engine: Engine) -> AsyncIterable[int]:
-    async with pg_engine.acquire() as conn:
+async def user_id(aiopg_engine: Engine) -> AsyncIterable[int]:
+    async with aiopg_engine.acquire() as conn:
         # a 'me' user
         uid = await conn.scalar(
             users.insert().values(**(random_user())).returning(users.c.id)
@@ -25,14 +25,14 @@ async def user_id(pg_engine: Engine) -> AsyncIterable[int]:
     assert uid is not None
     yield uid
     # cleanup
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         # a 'me' user
         uid = await conn.execute(users.delete().where(users.c.id == uid))
 
 
 @pytest.fixture
-async def user_group_id(pg_engine: Engine, user_id: int) -> int:
-    async with pg_engine.acquire() as conn:
+async def user_group_id(aiopg_engine: Engine, user_id: int) -> int:
+    async with aiopg_engine.acquire() as conn:
         primary_gid = await conn.scalar(
             sa.select(users.c.primary_gid).where(users.c.id == user_id)
         )
@@ -64,34 +64,34 @@ async def test_can_create_cluster_with_owner(
 
 
 async def test_cannot_remove_owner_that_owns_cluster(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
     user_id: int,
     user_group_id: int,
     create_fake_cluster: Callable[..., Awaitable[int]],
 ):
     cluster_id = await create_fake_cluster(owner=user_group_id)
     # now try removing the user
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         with pytest.raises(ForeignKeyViolation):
             await conn.execute(users.delete().where(users.c.id == user_id))
 
     # now remove the cluster
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await conn.execute(clusters.delete().where(clusters.c.id == cluster_id))
 
     # removing the user should work now
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         await conn.execute(users.delete().where(users.c.id == user_id))
 
 
 async def test_cluster_owner_has_all_rights(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
     user_group_id: int,
     create_fake_cluster: Callable[..., Awaitable[int]],
 ):
     cluster_id = await create_fake_cluster(owner=user_group_id)
 
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         result: ResultProxy = await conn.execute(
             cluster_to_groups.select().where(
                 cluster_to_groups.c.cluster_id == cluster_id
diff --git a/packages/postgres-database/tests/test_comp_tasks.py b/packages/postgres-database/tests/test_comp_tasks.py
index 46152e30a69f..4c36260f4318 100644
--- a/packages/postgres-database/tests/test_comp_tasks.py
+++ b/packages/postgres-database/tests/test_comp_tasks.py
@@ -19,8 +19,8 @@
 
 
 @pytest.fixture()
-async def db_connection(pg_engine: Engine) -> SAConnection:
-    async with pg_engine.acquire() as conn:
+async def db_connection(aiopg_engine: Engine) -> SAConnection:
+    async with aiopg_engine.acquire() as conn:
         yield conn
 
 
diff --git a/packages/postgres-database/tests/test_delete_projects_and_users.py b/packages/postgres-database/tests/test_delete_projects_and_users.py
index b0e0edacef84..4c3801560664 100644
--- a/packages/postgres-database/tests/test_delete_projects_and_users.py
+++ b/packages/postgres-database/tests/test_delete_projects_and_users.py
@@ -15,8 +15,8 @@
 
 
 @pytest.fixture
-async def engine(pg_engine: Engine):
-    async with pg_engine.acquire() as conn:
+async def engine(aiopg_engine: Engine):
+    async with aiopg_engine.acquire() as conn:
         await conn.execute(users.insert().values(**random_user(name="A")))
         await conn.execute(users.insert().values(**random_user()))
         await conn.execute(users.insert().values(**random_user()))
@@ -27,7 +27,7 @@ async def engine(pg_engine: Engine):
         with pytest.raises(ForeignKeyViolation):
             await conn.execute(projects.insert().values(**random_project(prj_owner=4)))
 
-    return pg_engine
+    return aiopg_engine
 
 
 @pytest.mark.skip(reason="sandbox for dev purposes")
diff --git a/packages/postgres-database/tests/test_services_consume_filetypes.py b/packages/postgres-database/tests/test_services_consume_filetypes.py
index 88c68dadc7c2..f72799299073 100644
--- a/packages/postgres-database/tests/test_services_consume_filetypes.py
+++ b/packages/postgres-database/tests/test_services_consume_filetypes.py
@@ -59,9 +59,9 @@ async def _make(connection: SAConnection):
 
 @pytest.fixture
 async def connection(
-    pg_engine: sa.engine.Engine, connection: SAConnection, make_table: Callable
+    aiopg_engine: sa.engine.Engine, connection: SAConnection, make_table: Callable
 ):
-    assert pg_engine
+    assert aiopg_engine
     # NOTE: do not remove th pg_engine, or the test will fail as pytest
     # cannot set the parameters in the fixture
 
diff --git a/packages/postgres-database/tests/test_utils_aiopg_orm.py b/packages/postgres-database/tests/test_utils_aiopg_orm.py
index d34ef15f95ac..2905a3f3a87b 100644
--- a/packages/postgres-database/tests/test_utils_aiopg_orm.py
+++ b/packages/postgres-database/tests/test_utils_aiopg_orm.py
@@ -16,12 +16,12 @@
 
 
 @pytest.fixture
-async def fake_scicrunch_ids(pg_engine: Engine) -> list[str]:
+async def fake_scicrunch_ids(aiopg_engine: Engine) -> list[str]:
     row1 = {"rrid": "RRID:foo", "name": "foo", "description": "fooing"}
     row2 = {"rrid": "RRID:bar", "name": "bar", "description": "barring"}
 
     row_ids = []
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         for row in (row1, row2):
             row_id = await conn.scalar(
                 scicrunch_resources.insert()
@@ -35,7 +35,7 @@ async def fake_scicrunch_ids(pg_engine: Engine) -> list[str]:
 
 
 @pytest.fixture()
-async def scicrunch_orm(pg_engine: Engine) -> Iterator[BaseOrm[str]]:
+async def scicrunch_orm(aiopg_engine: Engine) -> Iterator[BaseOrm[str]]:
     # This is a table without dependencies and therefore easy to use as fixture
     class ScicrunchOrm(BaseOrm[str]):
         def __init__(self, connection: SAConnection):
@@ -46,7 +46,7 @@ def __init__(self, connection: SAConnection):
                 writeonce={"rrid"},
             )
 
-    async with pg_engine.acquire() as conn:
+    async with aiopg_engine.acquire() as conn:
         orm_obj = ScicrunchOrm(conn)
         yield orm_obj
 
diff --git a/packages/postgres-database/tests/test_utils_folders.py b/packages/postgres-database/tests/test_utils_folders.py
deleted file mode 100644
index 8c49fd9914f5..000000000000
--- a/packages/postgres-database/tests/test_utils_folders.py
+++ /dev/null
@@ -1,2312 +0,0 @@
-# pylint:disable=redefined-outer-name
-# pylint:disable=too-many-statements
-# pylint:disable=unused-variable
-
-import itertools
-from collections.abc import AsyncIterable, Awaitable, Callable
-from copy import deepcopy
-from typing import NamedTuple
-from unittest.mock import Mock
-
-import pytest
-import sqlalchemy as sa
-from aiopg.sa.connection import SAConnection
-from aiopg.sa.result import RowProxy
-from pydantic import BaseModel, Field, NonNegativeInt
-from pytest_simcore.helpers.faker_factories import random_product
-from simcore_postgres_database.models.folders import (
-    folders,
-    folders_access_rights,
-    folders_to_projects,
-)
-from simcore_postgres_database.models.groups import GroupType, groups
-from simcore_postgres_database.utils_folders import (
-    _ROLE_TO_PERMISSIONS,
-    EDITOR_PERMISSIONS,
-    NO_ACCESS_PERMISSIONS,
-    OWNER_PERMISSIONS,
-    VIEWER_PERMISSIONS,
-    CannotMoveFolderSharedViaNonPrimaryGroupError,
-    FolderAccessRole,
-    FolderAlreadyExistsError,
-    FolderEntry,
-    FolderNotFoundError,
-    FolderNotSharedWithGidError,
-    InsufficientPermissionsError,
-    InvalidFolderNameError,
-    NoGroupIDFoundError,
-    RootFolderRequiresAtLeastOnePrimaryGroupError,
-    _FolderID,
-    _FolderPermissions,
-    _get_filter_for_enabled_permissions,
-    _get_permissions_from_role,
-    _get_resolved_access_rights,
-    _GroupID,
-    _ProductName,
-    _ProjectID,
-    _requires,
-    folder_add_project,
-    folder_create,
-    folder_delete,
-    folder_get,
-    folder_list,
-    folder_move,
-    folder_remove_project,
-    folder_share_or_update_permissions,
-    folder_update,
-)
-from simcore_postgres_database.utils_products import products
-from sqlalchemy.sql.elements import ColumnElement
-
-
-def test_permissions_integrity():
-    assert set(FolderAccessRole) == set(_ROLE_TO_PERMISSIONS.keys())
-
-
-@pytest.mark.parametrize(
-    "read, write, delete", list(itertools.product([True, False], repeat=3))
-)
-def test__folder_permissions_to_dict(read: bool, write: bool, delete: bool):
-    folder_permissions = _FolderPermissions(read=read, write=write, delete=delete)
-    assert folder_permissions.to_dict() == {
-        "read": read,
-        "write": write,
-        "delete": delete,
-    }
-    only_true: dict[str, bool] = {}
-    if read:
-        only_true["read"] = True
-    if write:
-        only_true["write"] = True
-    if delete:
-        only_true["delete"] = True
-    assert folder_permissions.to_dict(include_only_true=True) == only_true
-
-
-@pytest.mark.parametrize(
-    "role, expected_permissions",
-    [
-        (
-            FolderAccessRole.VIEWER,
-            _FolderPermissions(read=True, write=False, delete=False),
-        ),
-        (
-            FolderAccessRole.EDITOR,
-            _FolderPermissions(read=True, write=True, delete=False),
-        ),
-        (
-            FolderAccessRole.OWNER,
-            _FolderPermissions(read=True, write=True, delete=True),
-        ),
-    ],
-)
-def test_role_permissions(
-    role: FolderAccessRole, expected_permissions: dict[str, bool]
-):
-    assert _get_permissions_from_role(role) == expected_permissions
-
-
-@pytest.mark.parametrize(
-    "permissions, expected",
-    [
-        ([], _FolderPermissions(read=False, write=False, delete=False)),
-        (
-            [VIEWER_PERMISSIONS],
-            _FolderPermissions(read=True, write=False, delete=False),
-        ),
-        ([EDITOR_PERMISSIONS], _FolderPermissions(read=True, write=True, delete=False)),
-        (
-            [EDITOR_PERMISSIONS, VIEWER_PERMISSIONS],
-            _FolderPermissions(read=True, write=True, delete=False),
-        ),
-        ([OWNER_PERMISSIONS], _FolderPermissions(read=True, write=True, delete=True)),
-        (
-            [OWNER_PERMISSIONS, EDITOR_PERMISSIONS],
-            _FolderPermissions(read=True, write=True, delete=True),
-        ),
-        (
-            [OWNER_PERMISSIONS, EDITOR_PERMISSIONS, VIEWER_PERMISSIONS],
-            _FolderPermissions(read=True, write=True, delete=True),
-        ),
-    ],
-)
-def test__requires_permissions(
-    permissions: list[_FolderPermissions], expected: dict[str, bool]
-):
-    assert _requires(*permissions) == expected
-
-
-@pytest.fixture
-async def create_product(
-    connection: SAConnection,
-) -> AsyncIterable[Callable[[str], Awaitable[_ProductName]]]:
-    created_products: list[_ProductName] = []
-
-    async def _(name: str) -> _ProductName:
-        assert name != "osparc", f"{name} is reserved! please choose a different one"
-        resultlt: _ProductName | None = await connection.scalar(
-            products.insert()
-            .values(random_product(name=name, group_id=None))
-            .returning(products.c.name)
-        )
-        assert resultlt is not None
-        return resultlt
-
-    yield _
-
-    for product in created_products:
-        await connection.execute(products.delete().where(products.c.name == product))
-
-
-@pytest.fixture
-async def default_product_name(
-    create_product: Callable[[str], Awaitable[_ProductName]]
-) -> _ProductName:
-    return await create_product("test_product")
-
-
-@pytest.mark.parametrize(
-    "invalid_name",
-    [
-        None,
-        "",
-        "/",
-        ":",
-        '"',
-        "<",
-        ">",
-        "\\",
-        "|",
-        "?",
-        "My/Folder",
-        "MyFolder<",
-        "My*Folder",
-        "A" * (256),
-        "CON",
-        "PRN",
-        "AUX",
-        "NUL",
-        *[f"COM{i}" for i in range(1, 10)],
-        *[f"LPT{i}" for i in range(1, 10)],
-    ],
-)
-async def test_folder_create_wrong_folder_name(invalid_name: str):
-    with pytest.raises(InvalidFolderNameError):
-        await folder_create(Mock(), "mock_product", invalid_name, Mock())
-
-
-def test__get_where_clause():
-    assert isinstance(
-        _get_filter_for_enabled_permissions(VIEWER_PERMISSIONS, folders_access_rights),
-        ColumnElement,
-    )
-    assert isinstance(
-        _get_filter_for_enabled_permissions(EDITOR_PERMISSIONS, folders_access_rights),
-        ColumnElement,
-    )
-    assert isinstance(
-        _get_filter_for_enabled_permissions(OWNER_PERMISSIONS, folders_access_rights),
-        ColumnElement,
-    )
-    assert isinstance(
-        _get_filter_for_enabled_permissions(
-            _FolderPermissions(read=False, write=False, delete=False),
-            folders_access_rights,
-        ),
-        bool,
-    )
-
-
-async def _assert_folder_entires(
-    connection: SAConnection,
-    *,
-    folder_count: NonNegativeInt,
-    access_rights_count: NonNegativeInt | None = None,
-) -> None:
-    async def _query_table(table_name: sa.Table, count: NonNegativeInt) -> None:
-        result = await connection.execute(table_name.select())
-        rows = await result.fetchall()
-        assert rows is not None
-        assert len(rows) == count
-
-    await _query_table(folders, folder_count)
-    await _query_table(folders_access_rights, access_rights_count or folder_count)
-
-
-async def _assert_folderpermissions_exists(
-    connection: SAConnection, folder_id: _FolderID, gids: set[_GroupID]
-) -> None:
-    result = await connection.execute(
-        folders_access_rights.select()
-        .where(folders_access_rights.c.folder_id == folder_id)
-        .where(folders_access_rights.c.gid.in_(gids))
-    )
-    rows = await result.fetchall()
-    assert rows is not None
-    assert len(rows) == 1
-
-
-async def _assert_folder_permissions(
-    connection: SAConnection,
-    *,
-    folder_id: _FolderID,
-    gid: _GroupID,
-    role: FolderAccessRole,
-) -> None:
-    result = await connection.execute(
-        sa.select(folders_access_rights.c.folder_id)
-        .where(folders_access_rights.c.folder_id == folder_id)
-        .where(folders_access_rights.c.gid == gid)
-        .where(
-            _get_filter_for_enabled_permissions(
-                _get_permissions_from_role(role), folders_access_rights
-            )
-        )
-    )
-    rows = await result.fetchall()
-    assert rows is not None
-    assert len(rows) == 1
-
-
-async def _assert_name_and_description(
-    connection: SAConnection,
-    folder_id: _FolderID,
-    *,
-    name: str,
-    description: str,
-):
-    async with connection.execute(
-        sa.select(folders.c.name, folders.c.description).where(
-            folders.c.id == folder_id
-        )
-    ) as result_proxy:
-        results = await result_proxy.fetchall()
-        assert results
-        assert len(results) == 1
-        result = results[0]
-        assert result["name"] == name
-        assert result["description"] == description
-
-
-@pytest.fixture
-async def setup_users(
-    connection: SAConnection, create_fake_user: Callable[..., Awaitable[RowProxy]]
-) -> list[RowProxy]:
-    users: list[RowProxy] = []
-    for _ in range(10):
-        users.append(await create_fake_user(connection))  # noqa: PERF401
-    return users
-
-
-@pytest.fixture
-async def setup_users_and_groups(setup_users: list[RowProxy]) -> set[_GroupID]:
-    return {u.primary_gid for u in setup_users}
-
-
-@pytest.fixture
-def get_unique_gids(
-    setup_users_and_groups: set[_GroupID],
-) -> Callable[[int], tuple[_GroupID, ...]]:
-    def _(tuple_size: int) -> tuple[_GroupID, ...]:
-        copied_groups = deepcopy(setup_users_and_groups)
-        return tuple(copied_groups.pop() for _ in range(tuple_size))
-
-    return _
-
-
-@pytest.fixture
-async def setup_projects_for_users(
-    connection: SAConnection,
-    setup_users: list[RowProxy],
-    create_fake_project: Callable[..., Awaitable[RowProxy]],
-) -> set[_ProjectID]:
-    projects: set[_ProjectID] = set()
-    for user in setup_users:
-        project = await create_fake_project(connection, user)
-        projects.add(project.uuid)
-    return projects
-
-
-@pytest.fixture
-def get_unique_project_uuids(
-    setup_projects_for_users: set[_ProjectID],
-) -> Callable[[int], tuple[_ProjectID, ...]]:
-    def _(tuple_size: int) -> tuple[_ProjectID, ...]:
-        copied_projects = deepcopy(setup_projects_for_users)
-        return tuple(copied_projects.pop() for _ in range(tuple_size))
-
-    return _
-
-
-class MkFolder(BaseModel):
-    name: str
-    gid: _GroupID
-    description: str = ""
-    parent: _FolderID | None = None
-
-    shared_with: dict[_GroupID, FolderAccessRole] = Field(default_factory=dict)
-    children: set["MkFolder"] = Field(default_factory=set)
-
-    def __hash__(self):
-        return hash(
-            (
-                self.name,
-                self.description,
-                self.gid,
-                tuple(sorted(self.shared_with.items())),
-                frozenset(self.children),
-            )
-        )
-
-    def __eq__(self, other):
-        if not isinstance(other, MkFolder):
-            return False
-        return (
-            self.name == other.name
-            and self.description == other.description
-            and self.gid == other.gid
-            and self.shared_with == other.shared_with
-            and self.children == other.children
-        )
-
-
-@pytest.fixture
-def make_folders(
-    connection: SAConnection, default_product_name: _ProductName
-) -> Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]]:
-    async def _(
-        root_folders: set[MkFolder], *, parent: _FolderID | None = None
-    ) -> dict[str, _FolderID]:
-        folder_names_map: dict[str, _FolderID] = {}
-
-        for root in root_folders:
-            # create folder
-            folder_names_map[root.name] = root_folder_id = await folder_create(
-                connection,
-                default_product_name,
-                root.name,
-                {root.gid},
-                description=root.description,
-                parent=parent,
-            )
-            # share with others
-            for gid, role in root.shared_with.items():
-                await folder_share_or_update_permissions(
-                    connection,
-                    default_product_name,
-                    root_folder_id,
-                    sharing_gids={root.gid},
-                    recipient_gid=gid,
-                    recipient_role=role,
-                )
-            # create subfolders
-            subfolders_names_map = await _(root.children, parent=root_folder_id)
-            root_name = set(folder_names_map.keys())
-            subfolder_names = set(subfolders_names_map.keys())
-            if subfolder_names & root_name != set():
-                msg = f"{root_name=} and {subfolder_names=} are not allowed to have common folder names"
-                raise ValueError(msg)
-            folder_names_map.update(subfolders_names_map)
-
-        return folder_names_map
-
-    return _
-
-
-async def test_folder_create(
-    connection: SAConnection,
-    create_product: Callable[[str], Awaitable[_ProductName]],
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-):
-
-    (owner_gid,) = get_unique_gids(1)
-
-    product_a = await create_product("product_a")
-    product_b = await create_product("product_b")
-
-    expected_folder_count: int = 0
-    for product_name in (
-        product_a,
-        product_b,  # NOTE: a different product also can dfeine the same folder strucutre
-    ):
-
-        # 1. when GID is missing no entries should be present
-        missing_gid = 10202023302
-        await _assert_folder_entires(connection, folder_count=expected_folder_count)
-        with pytest.raises(NoGroupIDFoundError):
-            await folder_create(connection, product_name, "f1", {missing_gid})
-        await _assert_folder_entires(connection, folder_count=expected_folder_count)
-
-        # 2. create a folder and a subfolder of the same name
-        f1_folder_id = await folder_create(connection, product_name, "f1", {owner_gid})
-        expected_folder_count += 1
-        await _assert_folder_entires(connection, folder_count=expected_folder_count)
-        await folder_create(
-            connection, product_name, "f1", {owner_gid}, parent=f1_folder_id
-        )
-        expected_folder_count += 1
-        await _assert_folder_entires(connection, folder_count=expected_folder_count)
-
-        # 3. inserting already existing folder fails
-        with pytest.raises(FolderAlreadyExistsError):
-            await folder_create(connection, product_name, "f1", {owner_gid})
-        await _assert_folder_entires(connection, folder_count=expected_folder_count)
-
-
-async def test_folder_create_shared_via_groups(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-    create_fake_group: Callable[..., Awaitable[RowProxy]],
-):
-    #######
-    # SETUP
-    #######
-    gid_original_owner: _GroupID
-    (gid_original_owner,) = get_unique_gids(1)
-
-    gid_user: _GroupID = (
-        await create_fake_group(connection, type=GroupType.PRIMARY)
-    ).gid
-    gid_everyone: _GroupID | None = await connection.scalar(
-        sa.select(groups.c.gid).where(groups.c.type == GroupType.EVERYONE)
-    )
-    assert gid_everyone
-    gid_z43: _GroupID = (
-        await create_fake_group(connection, type=GroupType.STANDARD)
-    ).gid
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="root",
-                gid=gid_original_owner,
-                shared_with={
-                    gid_z43: FolderAccessRole.OWNER,
-                    gid_everyone: FolderAccessRole.OWNER,
-                },
-            ),
-        }
-    )
-
-    folder_id_root = folder_ids["root"]
-
-    #######
-    # TESTS
-    #######
-
-    # 1. can create when using one gid with permissions
-    folder_id_f1 = await folder_create(
-        connection,
-        default_product_name,
-        "f1",
-        {gid_z43, gid_user},
-        parent=folder_id_root,
-    )
-    await _assert_folderpermissions_exists(connection, folder_id_f1, {gid_z43})
-
-    folder_id_f2 = await folder_create(
-        connection,
-        default_product_name,
-        "f2",
-        {gid_everyone, gid_user},
-        parent=folder_id_root,
-    )
-    await _assert_folderpermissions_exists(connection, folder_id_f2, {gid_everyone})
-
-    # 2. can create new folder when using both gids with permissions
-    folder_id_f3 = await folder_create(
-        connection,
-        default_product_name,
-        "f3",
-        {gid_z43, gid_everyone, gid_user},
-        parent=folder_id_root,
-    )
-    await _assert_folderpermissions_exists(
-        connection, folder_id_f3, {gid_everyone, gid_z43}
-    )
-
-    # 3. cannot create a root folder without a primary group
-    with pytest.raises(RootFolderRequiresAtLeastOnePrimaryGroupError):
-        await folder_create(
-            connection,
-            default_product_name,
-            "folder_in_root",
-            {gid_z43, gid_everyone},
-        )
-
-
-async def test__get_resolved_access_rights(
-    connection: SAConnection,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-    (
-        gid_owner_a,
-        gid_owner_b,
-        gid_owner_c,
-        gid_owner_d,
-        gid_editor_a,
-        gid_editor_b,
-    ) = get_unique_gids(6)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="root",
-                gid=gid_owner_a,
-                shared_with={
-                    gid_owner_b: FolderAccessRole.OWNER,
-                    gid_owner_c: FolderAccessRole.OWNER,
-                    gid_owner_d: FolderAccessRole.OWNER,
-                    gid_editor_a: FolderAccessRole.EDITOR,
-                },
-                children={
-                    MkFolder(name="b", gid=gid_owner_b),
-                    MkFolder(
-                        name="c",
-                        gid=gid_owner_c,
-                        children={
-                            MkFolder(
-                                name="d",
-                                gid=gid_owner_d,
-                                shared_with={gid_editor_b: FolderAccessRole.EDITOR},
-                                children={MkFolder(name="editor_a", gid=gid_editor_a)},
-                            )
-                        },
-                    ),
-                },
-            ),
-        }
-    )
-
-    folder_id_root = folder_ids["root"]
-    folder_id_b = folder_ids["b"]
-    folder_id_c = folder_ids["c"]
-    folder_id_d = folder_ids["d"]
-    folder_id_editor_a = folder_ids["editor_a"]
-
-    # check resolved access rgihts resolution
-    async def _assert_resolves_to(
-        *,
-        target_folder_id: _FolderID,
-        gid: _GroupID,
-        permissions: _FolderPermissions,
-        expected_folder_id: _FolderID,
-        expected_gids: set[_FolderID],
-    ) -> None:
-        resolved_parent = await _get_resolved_access_rights(
-            connection,
-            target_folder_id,
-            gid,
-            permissions=permissions,
-        )
-        assert resolved_parent
-        assert resolved_parent.folder_id == expected_folder_id
-        assert resolved_parent.gid in expected_gids
-
-    #######
-    # TESTS
-    #######
-
-    await _assert_resolves_to(
-        target_folder_id=folder_id_root,
-        gid=gid_owner_a,
-        permissions=OWNER_PERMISSIONS,
-        expected_folder_id=folder_id_root,
-        expected_gids={gid_owner_a},
-    )
-    await _assert_resolves_to(
-        target_folder_id=folder_id_b,
-        gid=gid_owner_b,
-        permissions=OWNER_PERMISSIONS,
-        expected_folder_id=folder_id_root,
-        expected_gids={gid_owner_b},
-    )
-    await _assert_resolves_to(
-        target_folder_id=folder_id_c,
-        gid=gid_owner_c,
-        permissions=OWNER_PERMISSIONS,
-        expected_folder_id=folder_id_root,
-        expected_gids={gid_owner_c},
-    )
-    await _assert_resolves_to(
-        target_folder_id=folder_id_d,
-        gid=gid_owner_d,
-        permissions=OWNER_PERMISSIONS,
-        expected_folder_id=folder_id_root,
-        expected_gids={gid_owner_d},
-    )
-    await _assert_resolves_to(
-        target_folder_id=folder_id_editor_a,
-        gid=gid_editor_a,
-        permissions=EDITOR_PERMISSIONS,
-        expected_folder_id=folder_id_root,
-        expected_gids={gid_editor_a},
-    )
-    await _assert_resolves_to(
-        target_folder_id=folder_id_editor_a,
-        gid=gid_editor_b,
-        permissions=EDITOR_PERMISSIONS,
-        expected_folder_id=folder_id_d,
-        expected_gids={gid_editor_b},
-    )
-
-
-async def test_folder_share_or_update_permissions(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-):
-    (
-        gid_owner,
-        gid_other_owner,
-        gid_editor,
-        gid_viewer,
-        gid_no_access,
-        gid_share_with_error,
-    ) = get_unique_gids(6)
-
-    # 1. folder does not exist
-    folder_id_missing = 12313123232
-    with pytest.raises(FolderNotFoundError):
-        await folder_share_or_update_permissions(
-            connection,
-            default_product_name,
-            folder_id_missing,
-            sharing_gids={gid_owner},
-            recipient_gid=gid_share_with_error,
-            recipient_role=FolderAccessRole.OWNER,
-        )
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 2. share existing folder with all possible roles
-    folder_id = await folder_create(connection, default_product_name, "f1", {gid_owner})
-    await _assert_folder_entires(connection, folder_count=1)
-    await _assert_folder_permissions(
-        connection, folder_id=folder_id, gid=gid_owner, role=FolderAccessRole.OWNER
-    )
-
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={gid_owner},
-        recipient_gid=gid_other_owner,
-        recipient_role=FolderAccessRole.OWNER,
-    )
-    await _assert_folder_entires(connection, folder_count=1, access_rights_count=2)
-    await _assert_folder_permissions(
-        connection,
-        folder_id=folder_id,
-        gid=gid_other_owner,
-        role=FolderAccessRole.OWNER,
-    )
-
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={gid_owner},
-        recipient_gid=gid_editor,
-        recipient_role=FolderAccessRole.EDITOR,
-    )
-    await _assert_folder_entires(connection, folder_count=1, access_rights_count=3)
-    await _assert_folder_permissions(
-        connection, folder_id=folder_id, gid=gid_editor, role=FolderAccessRole.EDITOR
-    )
-
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={gid_owner},
-        recipient_gid=gid_viewer,
-        recipient_role=FolderAccessRole.VIEWER,
-    )
-    await _assert_folder_entires(connection, folder_count=1, access_rights_count=4)
-    await _assert_folder_permissions(
-        connection, folder_id=folder_id, gid=gid_viewer, role=FolderAccessRole.VIEWER
-    )
-
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={gid_owner},
-        recipient_gid=gid_no_access,
-        recipient_role=FolderAccessRole.NO_ACCESS,
-    )
-    await _assert_folder_entires(connection, folder_count=1, access_rights_count=5)
-    await _assert_folder_permissions(
-        connection,
-        folder_id=folder_id,
-        gid=gid_no_access,
-        role=FolderAccessRole.NO_ACCESS,
-    )
-
-    # 3. roles without permissions cannot share with any role
-    for recipient_role in FolderAccessRole:
-        for no_access_gids in (gid_editor, gid_viewer, gid_no_access):
-            with pytest.raises(InsufficientPermissionsError):
-                await folder_share_or_update_permissions(
-                    connection,
-                    default_product_name,
-                    folder_id,
-                    sharing_gids={no_access_gids},
-                    recipient_gid=gid_share_with_error,
-                    recipient_role=recipient_role,
-                )
-            await _assert_folder_entires(
-                connection, folder_count=1, access_rights_count=5
-            )
-
-        with pytest.raises(FolderNotSharedWithGidError):
-            await folder_share_or_update_permissions(
-                connection,
-                default_product_name,
-                folder_id,
-                sharing_gids={gid_share_with_error},
-                recipient_gid=gid_share_with_error,
-                recipient_role=recipient_role,
-            )
-        await _assert_folder_entires(connection, folder_count=1, access_rights_count=5)
-
-    # 4. all users loose permission on the foler including the issuer
-    # NOTE: anoteher_owner dropped owner's permission and his permission to no access!
-    for gid_to_drop_permission in (gid_owner, gid_editor, gid_viewer, gid_other_owner):
-        await folder_share_or_update_permissions(
-            connection,
-            default_product_name,
-            folder_id,
-            sharing_gids={gid_other_owner},
-            recipient_gid=gid_to_drop_permission,
-            recipient_role=FolderAccessRole.NO_ACCESS,
-        )
-        await _assert_folder_entires(connection, folder_count=1, access_rights_count=5)
-        await _assert_folder_permissions(
-            connection,
-            folder_id=folder_id,
-            gid=gid_to_drop_permission,
-            role=FolderAccessRole.NO_ACCESS,
-        )
-
-
-async def test_folder_update(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-):
-    (
-        owner_gid,
-        other_owner_gid,
-        editor_gid,
-        viewer_gid,
-        no_access_gid,
-        share_with_error_gid,
-    ) = get_unique_gids(6)
-
-    # 1. folder is missing
-    missing_folder_id = 1231321332
-    with pytest.raises(FolderNotFoundError):
-        await folder_update(
-            connection, default_product_name, missing_folder_id, {owner_gid}
-        )
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 2. owner updates created fodler
-    folder_id = await folder_create(connection, default_product_name, "f1", {owner_gid})
-    await _assert_folder_entires(connection, folder_count=1)
-    await _assert_name_and_description(connection, folder_id, name="f1", description="")
-
-    # nothing changes
-    await folder_update(connection, default_product_name, folder_id, {owner_gid})
-    await _assert_name_and_description(connection, folder_id, name="f1", description="")
-
-    # both changed
-    await folder_update(
-        connection,
-        default_product_name,
-        folder_id,
-        {owner_gid},
-        name="new_folder",
-        description="new_desc",
-    )
-    await _assert_name_and_description(
-        connection, folder_id, name="new_folder", description="new_desc"
-    )
-
-    # 3. another_owner can also update
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=other_owner_gid,
-        recipient_role=FolderAccessRole.OWNER,
-    )
-    await folder_update(
-        connection,
-        default_product_name,
-        folder_id,
-        {owner_gid},
-        name="another_owner_name",
-        description="another_owner_description",
-    )
-    await _assert_name_and_description(
-        connection,
-        folder_id,
-        name="another_owner_name",
-        description="another_owner_description",
-    )
-
-    # 4. other roles have no permission to update
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=editor_gid,
-        recipient_role=FolderAccessRole.EDITOR,
-    )
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=viewer_gid,
-        recipient_role=FolderAccessRole.VIEWER,
-    )
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=no_access_gid,
-        recipient_role=FolderAccessRole.NO_ACCESS,
-    )
-
-    for target_user_gid in (editor_gid, viewer_gid, no_access_gid):
-        with pytest.raises(InsufficientPermissionsError):
-            await folder_update(
-                connection,
-                default_product_name,
-                folder_id,
-                {target_user_gid},
-                name="error_name",
-                description="error_description",
-            )
-        await _assert_name_and_description(
-            connection,
-            folder_id,
-            name="another_owner_name",
-            description="another_owner_description",
-        )
-
-    with pytest.raises(FolderNotSharedWithGidError):
-        await folder_update(
-            connection,
-            default_product_name,
-            folder_id,
-            {share_with_error_gid},
-            name="error_name",
-            description="error_description",
-        )
-    await _assert_name_and_description(
-        connection,
-        folder_id,
-        name="another_owner_name",
-        description="another_owner_description",
-    )
-
-
-async def test_folder_delete(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-):
-    (
-        owner_gid,
-        other_owner_gid,
-        editor_gid,
-        viewer_gid,
-        no_access_gid,
-        share_with_error_gid,
-    ) = get_unique_gids(6)
-
-    # 1. folder is missing
-    missing_folder_id = 1231321332
-    with pytest.raises(FolderNotFoundError):
-        await folder_delete(
-            connection, default_product_name, missing_folder_id, {owner_gid}
-        )
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 2. owner deletes folder
-    folder_id = await folder_create(connection, default_product_name, "f1", {owner_gid})
-    await _assert_folder_entires(connection, folder_count=1)
-
-    await folder_delete(connection, default_product_name, folder_id, {owner_gid})
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 3. other owners can delete the folder
-    folder_id = await folder_create(connection, default_product_name, "f1", {owner_gid})
-    await _assert_folder_entires(connection, folder_count=1)
-
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=other_owner_gid,
-        recipient_role=FolderAccessRole.OWNER,
-    )
-
-    await folder_delete(connection, default_product_name, folder_id, {other_owner_gid})
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 4. non owner users cannot delete the folder
-    folder_id = await folder_create(connection, default_product_name, "f1", {owner_gid})
-    await _assert_folder_entires(connection, folder_count=1)
-
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=editor_gid,
-        recipient_role=FolderAccessRole.EDITOR,
-    )
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=viewer_gid,
-        recipient_role=FolderAccessRole.VIEWER,
-    )
-    await folder_share_or_update_permissions(
-        connection,
-        default_product_name,
-        folder_id,
-        sharing_gids={owner_gid},
-        recipient_gid=no_access_gid,
-        recipient_role=FolderAccessRole.NO_ACCESS,
-    )
-    await _assert_folder_entires(connection, folder_count=1, access_rights_count=4)
-
-    for non_owner_gid in (editor_gid, viewer_gid, no_access_gid):
-        with pytest.raises(InsufficientPermissionsError):
-            await folder_delete(
-                connection, default_product_name, folder_id, {non_owner_gid}
-            )
-
-    with pytest.raises(FolderNotSharedWithGidError):
-        await folder_delete(
-            connection, default_product_name, folder_id, {share_with_error_gid}
-        )
-
-    await _assert_folder_entires(connection, folder_count=1, access_rights_count=4)
-
-
-async def test_folder_delete_nested_folders(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-    (
-        gid_owner_a,
-        gid_owner_b,
-        gid_editor_a,
-        gid_editor_b,
-        gid_viewer,
-        gid_no_access,
-        gid_not_shared,
-    ) = get_unique_gids(7)
-
-    async def _setup_folders() -> _FolderID:
-        await _assert_folder_entires(connection, folder_count=0)
-        folder_ids = await make_folders(
-            {
-                MkFolder(
-                    name="root_folder",
-                    gid=gid_owner_a,
-                    shared_with={
-                        gid_owner_b: FolderAccessRole.OWNER,
-                        gid_editor_a: FolderAccessRole.EDITOR,
-                        gid_editor_b: FolderAccessRole.EDITOR,
-                        gid_viewer: FolderAccessRole.VIEWER,
-                        gid_no_access: FolderAccessRole.NO_ACCESS,
-                    },
-                )
-            }
-        )
-        folder_id_root_folder = folder_ids["root_folder"]
-        await _assert_folder_entires(connection, folder_count=1, access_rights_count=6)
-
-        GIDS_WITH_CREATE_PERMISSIONS: set[_GroupID] = {
-            gid_owner_a,
-            gid_owner_b,
-            gid_editor_a,
-            gid_editor_b,
-        }
-
-        previous_folder_id = folder_id_root_folder
-        for i in range(100):
-            previous_folder_id = await folder_create(
-                connection,
-                default_product_name,
-                f"f{i}",
-                GIDS_WITH_CREATE_PERMISSIONS,
-                parent=previous_folder_id,
-            )
-        await _assert_folder_entires(
-            connection, folder_count=101, access_rights_count=106
-        )
-        return folder_id_root_folder
-
-    #######
-    # TESTS
-    #######
-
-    # 1. delete via `gid_owner_a`
-    folder_id_root_folder = await _setup_folders()
-    await folder_delete(
-        connection, default_product_name, folder_id_root_folder, {gid_owner_a}
-    )
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 2. delete via shared with `gid_owner_b`
-    folder_id_root_folder = await _setup_folders()
-    await folder_delete(
-        connection, default_product_name, folder_id_root_folder, {gid_owner_b}
-    )
-    await _assert_folder_entires(connection, folder_count=0)
-
-    # 3. delete is not permitted
-    folder_id_root_folder = await _setup_folders()
-    for no_permissions_gid in (gid_editor_a, gid_editor_b, gid_viewer):
-        with pytest.raises(InsufficientPermissionsError):
-            await folder_delete(
-                connection,
-                default_product_name,
-                folder_id_root_folder,
-                {no_permissions_gid},
-            )
-    for no_permissions_gid in (gid_not_shared,):
-        with pytest.raises(FolderNotSharedWithGidError):
-            await folder_delete(
-                connection,
-                default_product_name,
-                folder_id_root_folder,
-                {no_permissions_gid},
-            )
-    await _assert_folder_entires(connection, folder_count=101, access_rights_count=106)
-
-
-async def test_folder_move(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-
-    (gid_sharing, gid_user_a, gid_user_b) = get_unique_gids(3)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="USER_A",
-                gid=gid_user_a,
-                children={MkFolder(name="f_user_a", gid=gid_user_a)},
-            ),
-            MkFolder(
-                name="USER_B",
-                gid=gid_user_b,
-                children={MkFolder(name="f_user_b", gid=gid_user_b)},
-            ),
-            MkFolder(
-                name="SHARED_AS_OWNER",
-                gid=gid_sharing,
-                children={
-                    MkFolder(
-                        name="f_shared_as_owner_user_a",
-                        gid=gid_sharing,
-                        shared_with={gid_user_a: FolderAccessRole.OWNER},
-                    ),
-                    MkFolder(
-                        name="f_shared_as_owner_user_b",
-                        gid=gid_sharing,
-                        shared_with={gid_user_b: FolderAccessRole.OWNER},
-                    ),
-                },
-            ),
-            MkFolder(
-                name="SHARED_AS_EDITOR",
-                gid=gid_sharing,
-                children={
-                    MkFolder(
-                        name="f_shared_as_editor_user_a",
-                        gid=gid_sharing,
-                        shared_with={gid_user_a: FolderAccessRole.EDITOR},
-                    ),
-                    MkFolder(
-                        name="f_shared_as_editor_user_b",
-                        gid=gid_sharing,
-                        shared_with={gid_user_b: FolderAccessRole.EDITOR},
-                    ),
-                },
-            ),
-            MkFolder(
-                name="SHARED_AS_VIEWER",
-                gid=gid_sharing,
-                children={
-                    MkFolder(
-                        name="f_shared_as_viewer_user_a",
-                        gid=gid_sharing,
-                        shared_with={gid_user_a: FolderAccessRole.VIEWER},
-                    ),
-                    MkFolder(
-                        name="f_shared_as_viewer_user_b",
-                        gid=gid_sharing,
-                        shared_with={gid_user_b: FolderAccessRole.VIEWER},
-                    ),
-                },
-            ),
-            MkFolder(
-                name="SHARED_AS_NO_ACCESS",
-                gid=gid_sharing,
-                children={
-                    MkFolder(
-                        name="f_shared_as_no_access_user_a",
-                        gid=gid_sharing,
-                        shared_with={gid_user_a: FolderAccessRole.NO_ACCESS},
-                    ),
-                    MkFolder(
-                        name="f_shared_as_no_access_user_b",
-                        gid=gid_sharing,
-                        shared_with={gid_user_b: FolderAccessRole.NO_ACCESS},
-                    ),
-                },
-            ),
-            MkFolder(name="NOT_SHARED", gid=gid_sharing),
-        }
-    )
-
-    folder_id_user_a = folder_ids["USER_A"]
-    folder_id_f_user_a = folder_ids["f_user_a"]
-    folder_id_user_b = folder_ids["USER_B"]
-    folder_id_f_user_b = folder_ids["f_user_b"]
-    folder_id_f_shared_as_owner_user_a = folder_ids["f_shared_as_owner_user_a"]
-    folder_id_f_shared_as_owner_user_b = folder_ids["f_shared_as_owner_user_b"]
-    folder_id_f_shared_as_editor_user_a = folder_ids["f_shared_as_editor_user_a"]
-    folder_id_f_shared_as_editor_user_b = folder_ids["f_shared_as_editor_user_b"]
-    folder_id_f_shared_as_viewer_user_a = folder_ids["f_shared_as_viewer_user_a"]
-    folder_id_f_shared_as_viewer_user_b = folder_ids["f_shared_as_viewer_user_b"]
-    folder_id_f_shared_as_no_access_user_a = folder_ids["f_shared_as_no_access_user_a"]
-    folder_id_f_shared_as_no_access_user_b = folder_ids["f_shared_as_no_access_user_b"]
-    folder_id_not_shared = folder_ids["NOT_SHARED"]
-
-    async def _move_fails_not_shared_with_error(
-        gid: _GroupID, *, source: _FolderID, destination: _FolderID
-    ) -> None:
-        with pytest.raises(FolderNotSharedWithGidError):
-            await folder_move(
-                connection,
-                default_product_name,
-                source,
-                {gid},
-                destination_folder_id=destination,
-            )
-
-    async def _move_fails_insufficient_permissions_error(
-        gid: _GroupID, *, source: _FolderID, destination: _FolderID
-    ) -> None:
-        with pytest.raises(InsufficientPermissionsError):
-            await folder_move(
-                connection,
-                default_product_name,
-                source,
-                {gid},
-                destination_folder_id=destination,
-            )
-
-    async def _move_back_and_forth(
-        gid: _GroupID,
-        *,
-        source: _FolderID,
-        destination: _FolderID,
-        source_parent: _FolderID,
-    ) -> None:
-        async def _assert_folder_permissions(
-            connection: SAConnection,
-            *,
-            folder_id: _FolderID,
-            gid: _GroupID,
-            parent_folder: _FolderID,
-        ) -> None:
-            result = await connection.execute(
-                sa.select(folders_access_rights.c.folder_id)
-                .where(folders_access_rights.c.folder_id == folder_id)
-                .where(folders_access_rights.c.gid == gid)
-                .where(folders_access_rights.c.traversal_parent_id == parent_folder)
-            )
-            rows = await result.fetchall()
-            assert rows is not None
-            assert len(rows) == 1
-
-        # check parent should be parent_before
-        await _assert_folder_permissions(
-            connection, folder_id=source, gid=gid, parent_folder=source_parent
-        )
-
-        await folder_move(
-            connection,
-            default_product_name,
-            source,
-            {gid},
-            destination_folder_id=destination,
-        )
-
-        # check parent should be destination
-        await _assert_folder_permissions(
-            connection, folder_id=source, gid=gid, parent_folder=destination
-        )
-
-        await folder_move(
-            connection,
-            default_product_name,
-            source,
-            {gid},
-            destination_folder_id=source_parent,
-        )
-
-        # check parent should be parent_before
-        await _assert_folder_permissions(
-            connection, folder_id=source, gid=gid, parent_folder=source_parent
-        )
-
-    #######
-    # TESTS
-    #######
-
-    # 1. not working:
-    # - `USER_A/f_user_a -> USER_B`
-    await _move_fails_not_shared_with_error(
-        gid_user_a, source=folder_id_f_user_a, destination=folder_id_user_b
-    )
-    # - `USER_B.f_user_b -/> USER_A`
-    await _move_fails_not_shared_with_error(
-        gid_user_b, source=folder_id_f_user_b, destination=folder_id_user_a
-    )
-    # - `USER_A/f_user_a -> NOT_SHARED`
-    await _move_fails_not_shared_with_error(
-        gid_user_a, source=folder_id_f_user_a, destination=folder_id_not_shared
-    )
-    # - `USER_B/f_user_b -> NOT_SHARED`
-    await _move_fails_not_shared_with_error(
-        gid_user_b, source=folder_id_f_user_b, destination=folder_id_not_shared
-    )
-    # - `USER_A/f_user_a -> f_shared_as_no_access_user_a`
-    await _move_fails_insufficient_permissions_error(
-        gid_user_a,
-        source=folder_id_f_user_a,
-        destination=folder_id_f_shared_as_no_access_user_a,
-    )
-    # - `USER_B/f_user_b -> f_shared_as_no_access_user_b`
-    await _move_fails_insufficient_permissions_error(
-        gid_user_b,
-        source=folder_id_f_user_b,
-        destination=folder_id_f_shared_as_no_access_user_b,
-    )
-    # - `USER_A/f_user_a -> f_shared_as_viewer_user_a`
-    await _move_fails_insufficient_permissions_error(
-        gid_user_a,
-        source=folder_id_f_user_a,
-        destination=folder_id_f_shared_as_viewer_user_a,
-    )
-    # - `USER_B/f_user_b -> f_shared_as_viewer_user_b`
-    await _move_fails_insufficient_permissions_error(
-        gid_user_b,
-        source=folder_id_f_user_b,
-        destination=folder_id_f_shared_as_viewer_user_b,
-    )
-
-    # 2. allowed oeprations:
-    # - `USER_A/f_user_a -> f_shared_as_editor_user_a` (& reverse)
-    await _move_back_and_forth(
-        gid_user_a,
-        source=folder_id_f_user_a,
-        destination=folder_id_f_shared_as_editor_user_a,
-        source_parent=folder_id_user_a,
-    )
-    # - `USER_B/f_user_b -> f_shared_as_editor_user_b` (& reverse)
-    await _move_back_and_forth(
-        gid_user_b,
-        source=folder_id_f_user_b,
-        destination=folder_id_f_shared_as_editor_user_b,
-        source_parent=folder_id_user_b,
-    )
-    # - `USER_A/f_user_a -> f_shared_as_owner_user_a` (& reverse)
-    await _move_back_and_forth(
-        gid_user_a,
-        source=folder_id_f_user_a,
-        destination=folder_id_f_shared_as_owner_user_a,
-        source_parent=folder_id_user_a,
-    )
-    # - `USER_B/f_user_b -> f_shared_as_owner_user_b` (& reverse)
-    await _move_back_and_forth(
-        gid_user_b,
-        source=folder_id_f_user_b,
-        destination=folder_id_f_shared_as_owner_user_b,
-        source_parent=folder_id_user_b,
-    )
-
-    # 3. allowed to move in `root` folder
-    for to_move_folder_id, to_move_gid in [
-        (folder_id_f_user_a, gid_user_a),
-        (folder_id_f_user_b, gid_user_b),
-        (folder_id_f_shared_as_owner_user_a, gid_user_a),
-        (folder_id_f_shared_as_owner_user_b, gid_user_b),
-    ]:
-        await folder_move(
-            connection,
-            default_product_name,
-            to_move_folder_id,
-            {to_move_gid},
-            destination_folder_id=None,
-        )
-
-    # 4. not allowed to move in `root` folder
-    for to_move_folder_id, to_move_gid in [
-        (folder_id_f_shared_as_editor_user_a, gid_user_a),
-        (folder_id_f_shared_as_editor_user_b, gid_user_b),
-        (folder_id_f_shared_as_viewer_user_a, gid_user_a),
-        (folder_id_f_shared_as_viewer_user_b, gid_user_b),
-        (folder_id_f_shared_as_no_access_user_a, gid_user_a),
-        (folder_id_f_shared_as_no_access_user_b, gid_user_b),
-    ]:
-        with pytest.raises(InsufficientPermissionsError):
-            await folder_move(
-                connection,
-                default_product_name,
-                to_move_folder_id,
-                {to_move_gid},
-                destination_folder_id=None,
-            )
-
-    for to_move_gid in [gid_user_a, gid_user_b]:
-        with pytest.raises(FolderNotSharedWithGidError):
-            await folder_move(
-                connection,
-                default_product_name,
-                folder_id_not_shared,
-                {to_move_gid},
-                destination_folder_id=None,
-            )
-
-
-async def test_move_only_owners_can_move(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-    (
-        gid_owner,
-        gid_editor,
-        gid_viewer,
-        gid_no_access,
-        gid_not_shared,
-    ) = get_unique_gids(5)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="to_move",
-                gid=gid_owner,
-                shared_with={
-                    gid_editor: FolderAccessRole.EDITOR,
-                    gid_viewer: FolderAccessRole.VIEWER,
-                    gid_no_access: FolderAccessRole.NO_ACCESS,
-                },
-            ),
-            MkFolder(name="target_owner", gid=gid_owner),
-            MkFolder(name="target_editor", gid=gid_editor),
-            MkFolder(name="target_viewer", gid=gid_viewer),
-            MkFolder(name="target_no_access", gid=gid_no_access),
-            MkFolder(name="target_not_shared", gid=gid_not_shared),
-        }
-    )
-
-    folder_id_to_move = folder_ids["to_move"]
-    folder_id_target_owner = folder_ids["target_owner"]
-    folder_id_target_editor = folder_ids["target_editor"]
-    folder_id_target_viewer = folder_ids["target_viewer"]
-    folder_id_target_no_access = folder_ids["target_no_access"]
-    folder_id_target_not_shared = folder_ids["target_not_shared"]
-
-    async def _fails_to_move(gid: _GroupID, destination_folder_id: _FolderID) -> None:
-        with pytest.raises(InsufficientPermissionsError):
-            await folder_move(
-                connection,
-                default_product_name,
-                folder_id_to_move,
-                {gid},
-                destination_folder_id=destination_folder_id,
-            )
-
-    #######
-    # TESTS
-    #######
-
-    # 1. no permissions to move
-    await _fails_to_move(gid_editor, folder_id_target_editor)
-    await _fails_to_move(gid_viewer, folder_id_target_viewer)
-    await _fails_to_move(gid_no_access, folder_id_target_no_access)
-
-    # 2. not shared with user
-    with pytest.raises(FolderNotSharedWithGidError):
-        await folder_move(
-            connection,
-            default_product_name,
-            folder_id_to_move,
-            {gid_not_shared},
-            destination_folder_id=folder_id_target_not_shared,
-        )
-
-    # 3. owner us able to move
-    await folder_move(
-        connection,
-        default_product_name,
-        folder_id_to_move,
-        {gid_owner},
-        destination_folder_id=folder_id_target_owner,
-    )
-
-
-async def test_move_group_non_standard_groups_raise_error(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-    create_fake_group: Callable[..., Awaitable[RowProxy]],
-):
-    #######
-    # SETUP
-    #######
-    gid_original_owner: _GroupID
-    (gid_original_owner,) = get_unique_gids(1)
-    gid_primary: _GroupID = (
-        await create_fake_group(connection, type=GroupType.PRIMARY)
-    ).gid
-    gid_everyone: _GroupID | None = await connection.scalar(
-        sa.select(groups.c.gid).where(groups.c.type == GroupType.EVERYONE)
-    )
-    assert gid_everyone
-    gid_standard: _GroupID = (
-        await create_fake_group(connection, type=GroupType.STANDARD)
-    ).gid
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="SHARING_USER",
-                gid=gid_original_owner,
-                shared_with={
-                    gid_primary: FolderAccessRole.EDITOR,
-                    gid_everyone: FolderAccessRole.EDITOR,
-                    gid_standard: FolderAccessRole.EDITOR,
-                },
-            ),
-            MkFolder(
-                name="PRIMARY",
-                gid=gid_original_owner,
-                shared_with={gid_primary: FolderAccessRole.OWNER},
-            ),
-            MkFolder(
-                name="EVERYONE",
-                gid=gid_original_owner,
-                shared_with={gid_everyone: FolderAccessRole.OWNER},
-            ),
-            MkFolder(
-                name="STANDARD",
-                gid=gid_original_owner,
-                shared_with={gid_standard: FolderAccessRole.OWNER},
-            ),
-        }
-    )
-
-    folder_id_sharing_user = folder_ids["SHARING_USER"]
-    folder_id_primary = folder_ids["PRIMARY"]
-    folder_id_everyone = folder_ids["EVERYONE"]
-    folder_id_standard = folder_ids["STANDARD"]
-
-    #######
-    # TESTS
-    #######
-
-    with pytest.raises(CannotMoveFolderSharedViaNonPrimaryGroupError) as exc:
-        await folder_move(
-            connection,
-            default_product_name,
-            folder_id_everyone,
-            {gid_everyone},
-            destination_folder_id=folder_id_sharing_user,
-        )
-    assert "EVERYONE" in f"{exc.value}"
-
-    with pytest.raises(CannotMoveFolderSharedViaNonPrimaryGroupError) as exc:
-        await folder_move(
-            connection,
-            default_product_name,
-            folder_id_standard,
-            {gid_standard},
-            destination_folder_id=folder_id_sharing_user,
-        )
-    assert "STANDARD" in f"{exc.value}"
-
-    # primary gorup does not raise error
-    await folder_move(
-        connection,
-        default_product_name,
-        folder_id_primary,
-        {gid_primary},
-        destination_folder_id=folder_id_sharing_user,
-    )
-
-
-async def test_add_remove_project_in_folder(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-    get_unique_project_uuids: Callable[[int], tuple[_ProjectID, ...]],
-):
-    #######
-    # SETUP
-    #######
-
-    (gid_owner, gid_editor, gid_viewer, gid_no_access) = get_unique_gids(4)
-    (project_uuid,) = get_unique_project_uuids(1)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="f1",
-                gid=gid_owner,
-                shared_with={
-                    gid_editor: FolderAccessRole.EDITOR,
-                    gid_viewer: FolderAccessRole.VIEWER,
-                    gid_no_access: FolderAccessRole.NO_ACCESS,
-                },
-            )
-        }
-    )
-    folder_id_f1 = folder_ids["f1"]
-
-    async def _is_project_present(
-        connection: SAConnection,
-        folder_id: _FolderID,
-        project_id: _ProjectID,
-    ) -> bool:
-        async with connection.execute(
-            folders_to_projects.select()
-            .where(folders_to_projects.c.folder_id == folder_id)
-            .where(folders_to_projects.c.project_uuid == project_id)
-        ) as result:
-            rows = await result.fetchall()
-            assert rows is not None
-            return len(rows) == 1
-
-    async def _add_folder_as(gid: _GroupID) -> None:
-        await folder_add_project(
-            connection,
-            default_product_name,
-            folder_id_f1,
-            {gid},
-            project_uuid=project_uuid,
-        )
-        assert await _is_project_present(connection, folder_id_f1, project_uuid) is True
-
-    async def _remove_folder_as(gid: _GroupID) -> None:
-        await folder_remove_project(
-            connection,
-            default_product_name,
-            folder_id_f1,
-            {gid},
-            project_uuid=project_uuid,
-        )
-        assert (
-            await _is_project_present(connection, folder_id_f1, project_uuid) is False
-        )
-
-    assert await _is_project_present(connection, folder_id_f1, project_uuid) is False
-
-    #######
-    # TESTS
-    #######
-
-    # 1. owner can add and remove
-    await _add_folder_as(gid_owner)
-    await _remove_folder_as(gid_owner)
-
-    # 2 editor can add and can't remove
-    await _add_folder_as(gid_editor)
-    with pytest.raises(InsufficientPermissionsError):
-        await _remove_folder_as(gid_editor)
-    await _remove_folder_as(gid_owner)  # cleanup
-
-    # 3. viwer can't add and can't remove
-    with pytest.raises(InsufficientPermissionsError):
-        await _add_folder_as(gid_viewer)
-    with pytest.raises(InsufficientPermissionsError):
-        await _remove_folder_as(gid_viewer)
-
-    # 4. no_access can't add and can't remove
-    with pytest.raises(InsufficientPermissionsError):
-        await _add_folder_as(gid_no_access)
-    with pytest.raises(InsufficientPermissionsError):
-        await _remove_folder_as(gid_no_access)
-
-
-class ExpectedValues(NamedTuple):
-    id: _FolderID
-    my_access_rights: _FolderPermissions
-    access_rights: dict[_GroupID, _FolderPermissions]
-
-    def __hash__(self):
-        return hash(
-            (
-                self.id,
-                self.my_access_rights,
-                tuple(sorted(self.access_rights.items())),
-            )
-        )
-
-    def __eq__(self, other):
-        if not isinstance(other, ExpectedValues):
-            return False
-        return (
-            self.id == other.id
-            and self.my_access_rights == other.my_access_rights
-            and self.access_rights == other.access_rights
-        )
-
-
-def _assert_expected_entries(
-    folders: list[FolderEntry], *, expected: set[ExpectedValues]
-) -> None:
-    for folder_entry in folders:
-        expected_values = ExpectedValues(
-            folder_entry.id,
-            folder_entry.my_access_rights,
-            folder_entry.access_rights,
-        )
-        assert expected_values in expected
-
-
-ALL_IN_ONE_PAGE_OFFSET: NonNegativeInt = 0
-ALL_IN_ONE_PAGE_LIMIT: NonNegativeInt = 1000
-
-
-async def _list_folder_as(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    folder_id: _FolderID | None,
-    gids: set[_GroupID],
-    offset: NonNegativeInt = ALL_IN_ONE_PAGE_OFFSET,
-    limit: NonNegativeInt = ALL_IN_ONE_PAGE_LIMIT,
-) -> list[FolderEntry]:
-
-    _, folders_db = await folder_list(
-        connection, default_product_name, folder_id, gids, offset=offset, limit=limit
-    )
-    return folders_db
-
-
-async def test_folder_list(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-    (
-        gid_owner,
-        gid_editor,
-        gid_viewer,
-        gid_no_access,
-        gid_not_shared,
-    ) = get_unique_gids(5)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="owner_folder",
-                gid=gid_owner,
-                shared_with={
-                    gid_editor: FolderAccessRole.EDITOR,
-                    gid_viewer: FolderAccessRole.VIEWER,
-                    gid_no_access: FolderAccessRole.NO_ACCESS,
-                },
-                children={
-                    *{MkFolder(name=f"f{i}", gid=gid_owner) for i in range(1, 10)},
-                    MkFolder(
-                        name="f10",
-                        gid=gid_owner,
-                        children={
-                            MkFolder(name=f"sub_f{i}", gid=gid_owner)
-                            for i in range(1, 11)
-                        },
-                    ),
-                },
-            )
-        }
-    )
-
-    folder_id_owner_folder = folder_ids["owner_folder"]
-    folder_id_f1 = folder_ids["f1"]
-    folder_id_f2 = folder_ids["f2"]
-    folder_id_f3 = folder_ids["f3"]
-    folder_id_f4 = folder_ids["f4"]
-    folder_id_f5 = folder_ids["f5"]
-    folder_id_f6 = folder_ids["f6"]
-    folder_id_f7 = folder_ids["f7"]
-    folder_id_f8 = folder_ids["f8"]
-    folder_id_f9 = folder_ids["f9"]
-    folder_id_f10 = folder_ids["f10"]
-    folder_id_sub_f1 = folder_ids["sub_f1"]
-    folder_id_sub_f2 = folder_ids["sub_f2"]
-    folder_id_sub_f3 = folder_ids["sub_f3"]
-    folder_id_sub_f4 = folder_ids["sub_f4"]
-    folder_id_sub_f5 = folder_ids["sub_f5"]
-    folder_id_sub_f6 = folder_ids["sub_f6"]
-    folder_id_sub_f7 = folder_ids["sub_f7"]
-    folder_id_sub_f8 = folder_ids["sub_f8"]
-    folder_id_sub_f9 = folder_ids["sub_f9"]
-    folder_id_sub_f10 = folder_ids["sub_f10"]
-
-    ALL_FOLDERS_FX = (
-        folder_id_f1,
-        folder_id_f2,
-        folder_id_f3,
-        folder_id_f4,
-        folder_id_f5,
-        folder_id_f6,
-        folder_id_f7,
-        folder_id_f8,
-        folder_id_f9,
-        folder_id_f10,
-    )
-
-    ALL_FOLDERS_SUB_FX = (
-        folder_id_sub_f1,
-        folder_id_sub_f2,
-        folder_id_sub_f3,
-        folder_id_sub_f4,
-        folder_id_sub_f5,
-        folder_id_sub_f6,
-        folder_id_sub_f7,
-        folder_id_sub_f8,
-        folder_id_sub_f9,
-        folder_id_sub_f10,
-    )
-
-    ALL_FOLDERS_AND_SUBFOLDERS = (
-        folder_id_owner_folder,
-        *ALL_FOLDERS_FX,
-        *ALL_FOLDERS_SUB_FX,
-    )
-
-    ACCESS_RIGHTS_BY_GID: dict[_GroupID, _FolderPermissions] = {
-        gid_owner: OWNER_PERMISSIONS,
-        gid_editor: EDITOR_PERMISSIONS,
-        gid_viewer: VIEWER_PERMISSIONS,
-        gid_no_access: NO_ACCESS_PERMISSIONS,
-    }
-
-    #######
-    # TESTS
-    #######
-
-    # 1. list all levels per gid with access
-    for listing_gid in (gid_owner, gid_editor, gid_viewer):
-        # list `root` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, None, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    folder_id_owner_folder,
-                    ACCESS_RIGHTS_BY_GID[listing_gid],
-                    {
-                        gid_owner: OWNER_PERMISSIONS,
-                        gid_editor: EDITOR_PERMISSIONS,
-                        gid_viewer: VIEWER_PERMISSIONS,
-                        gid_no_access: NO_ACCESS_PERMISSIONS,
-                    },
-                ),
-            },
-        )
-        # list `owner_folder` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, folder_id_owner_folder, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    fx,
-                    ACCESS_RIGHTS_BY_GID[listing_gid],
-                    {gid_owner: OWNER_PERMISSIONS},
-                )
-                for fx in ALL_FOLDERS_FX
-            },
-        )
-        # list `f10` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, folder_id_f10, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    sub_fx,
-                    ACCESS_RIGHTS_BY_GID[listing_gid],
-                    {gid_owner: OWNER_PERMISSIONS},
-                )
-                for sub_fx in ALL_FOLDERS_SUB_FX
-            },
-        )
-
-    # 2. lisit all levels for `gid_no_access`
-    # can always be ran but should not list any entry
-    _assert_expected_entries(
-        await _list_folder_as(connection, default_product_name, None, {gid_no_access}),
-        expected=set(),
-    )
-    # there are insusficient permissions
-    for folder_id_to_check in ALL_FOLDERS_AND_SUBFOLDERS:
-        with pytest.raises(InsufficientPermissionsError):
-            await _list_folder_as(
-                connection, default_product_name, folder_id_to_check, {gid_no_access}
-            )
-
-    # 3. lisit all levels for `gid_not_shared``
-    # can always list the contets of the "root" folder for a gid
-    _assert_expected_entries(
-        await _list_folder_as(connection, default_product_name, None, {gid_not_shared}),
-        expected=set(),
-    )
-    for folder_id_to_check in ALL_FOLDERS_AND_SUBFOLDERS:
-        with pytest.raises(FolderNotSharedWithGidError):
-            await _list_folder_as(
-                connection, default_product_name, folder_id_to_check, {gid_not_shared}
-            )
-
-    # 4. list with pagination
-    for initial_limit in (1, 2, 3, 4, 5):
-        offset = 0
-        limit = initial_limit
-        found_folders: list[FolderEntry] = []
-        while items := await _list_folder_as(
-            connection,
-            default_product_name,
-            folder_id_owner_folder,
-            {gid_owner},
-            offset=offset,
-            limit=limit,
-        ):
-            found_folders.extend(items)
-            offset += limit
-            if len(items) != limit:
-                break
-
-        one_shot_query = await _list_folder_as(
-            connection, default_product_name, folder_id_owner_folder, {gid_owner}
-        )
-
-        assert len(found_folders) == len(one_shot_query)
-        assert found_folders == one_shot_query
-
-
-async def test_folder_list_shared_with_different_permissions(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-
-    (gid_owner_a, gid_owner_b, gid_owner_c, gid_owner_level_2) = get_unique_gids(4)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="f_owner_a",
-                gid=gid_owner_a,
-                shared_with={
-                    gid_owner_b: FolderAccessRole.OWNER,
-                    gid_owner_c: FolderAccessRole.OWNER,
-                },
-                children={
-                    MkFolder(
-                        name="f_owner_b",
-                        gid=gid_owner_b,
-                        children={
-                            MkFolder(
-                                name="f_owner_c",
-                                gid=gid_owner_c,
-                                shared_with={gid_owner_level_2: FolderAccessRole.OWNER},
-                                children={
-                                    MkFolder(name="f_sub_owner_c", gid=gid_owner_c),
-                                    MkFolder(
-                                        name="f_owner_level_2", gid=gid_owner_level_2
-                                    ),
-                                },
-                            )
-                        },
-                    )
-                },
-            )
-        }
-    )
-
-    folder_id_f_owner_a = folder_ids["f_owner_a"]
-    folder_id_f_owner_b = folder_ids["f_owner_b"]
-    folder_id_f_owner_c = folder_ids["f_owner_c"]
-    folder_id_f_sub_owner_c = folder_ids["f_sub_owner_c"]
-    folder_id_f_owner_level_2 = folder_ids["f_owner_level_2"]
-
-    #######
-    # TESTS
-    #######
-
-    # 1. `gid_owner_a`, `gid_owner_b`, `gid_owner_c` have the exact same veiw
-    for listing_gid in (gid_owner_a, gid_owner_b, gid_owner_c):
-        # list `root` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, None, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    folder_id_f_owner_a,
-                    OWNER_PERMISSIONS,
-                    {
-                        gid_owner_a: OWNER_PERMISSIONS,
-                        gid_owner_b: OWNER_PERMISSIONS,
-                        gid_owner_c: OWNER_PERMISSIONS,
-                    },
-                ),
-            },
-        )
-        # list `f_owner_a` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, folder_id_f_owner_a, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    folder_id_f_owner_b,
-                    OWNER_PERMISSIONS,
-                    {gid_owner_b: OWNER_PERMISSIONS},
-                ),
-            },
-        )
-        # list `f_owner_b` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, folder_id_f_owner_b, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    folder_id_f_owner_c,
-                    OWNER_PERMISSIONS,
-                    {
-                        gid_owner_c: OWNER_PERMISSIONS,
-                        gid_owner_level_2: OWNER_PERMISSIONS,
-                    },
-                ),
-            },
-        )
-        # list `f_owner_c` for gid
-        _assert_expected_entries(
-            await _list_folder_as(
-                connection, default_product_name, folder_id_f_owner_c, {listing_gid}
-            ),
-            expected={
-                ExpectedValues(
-                    folder_id_f_sub_owner_c,
-                    OWNER_PERMISSIONS,
-                    {
-                        gid_owner_c: OWNER_PERMISSIONS,
-                    },
-                ),
-                ExpectedValues(
-                    folder_id_f_owner_level_2,
-                    OWNER_PERMISSIONS,
-                    {
-                        gid_owner_level_2: OWNER_PERMISSIONS,
-                    },
-                ),
-            },
-        )
-
-    # 2. `gid_owner_level_2` can only access from `f_owner_c` downwards
-    # list `f_owner_c` for `gid_owner_level_2`
-    _assert_expected_entries(
-        await _list_folder_as(
-            connection, default_product_name, None, {gid_owner_level_2}
-        ),
-        expected={
-            ExpectedValues(
-                folder_id_f_owner_c,
-                OWNER_PERMISSIONS,
-                {
-                    gid_owner_c: OWNER_PERMISSIONS,
-                    gid_owner_level_2: OWNER_PERMISSIONS,
-                },
-            ),
-        },
-    )
-    # list `root` for `gid_owner_level_2`
-    _assert_expected_entries(
-        await _list_folder_as(
-            connection, default_product_name, folder_id_f_owner_c, {gid_owner_level_2}
-        ),
-        expected={
-            ExpectedValues(
-                folder_id_f_sub_owner_c,
-                OWNER_PERMISSIONS,
-                {
-                    gid_owner_c: OWNER_PERMISSIONS,
-                },
-            ),
-            ExpectedValues(
-                folder_id_f_owner_level_2,
-                OWNER_PERMISSIONS,
-                {
-                    gid_owner_level_2: OWNER_PERMISSIONS,
-                },
-            ),
-        },
-    )
-
-
-async def test_folder_list_in_root_with_different_groups_avoids_duplicate_entries(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-
-    (gid_z43, gid_osparc, gid_user) = get_unique_gids(3)
-
-    await make_folders(
-        {
-            MkFolder(
-                name="f1",
-                gid=gid_user,
-                shared_with={
-                    gid_z43: FolderAccessRole.OWNER,
-                    gid_osparc: FolderAccessRole.OWNER,
-                },
-            ),
-            MkFolder(
-                name="f2",
-                gid=gid_z43,
-                shared_with={
-                    gid_osparc: FolderAccessRole.OWNER,
-                },
-            ),
-            MkFolder(
-                name="f3",
-                gid=gid_osparc,
-                shared_with={
-                    gid_z43: FolderAccessRole.OWNER,
-                },
-            ),
-        }
-    )
-
-    #######
-    # TESTS
-    #######
-
-    # 1. gid_z43 and gid_osparc see all folders
-    for gid_all_folders in (gid_z43, gid_osparc):
-        entries_z43 = await _list_folder_as(
-            connection, default_product_name, None, {gid_all_folders}
-        )
-        assert len(entries_z43) == 3
-
-    # 2. gid_user only sees it's own folder
-    entries_user = await _list_folder_as(
-        connection, default_product_name, None, {gid_user}
-    )
-    assert len(entries_user) == 1
-
-    # 3. all gids see all fodlers
-    entries_all_groups = await _list_folder_as(
-        connection, default_product_name, None, {gid_z43, gid_osparc, gid_user}
-    )
-    assert len(entries_all_groups) == 3
-
-
-async def test_regression_list_folder_parent(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-
-    (gid_user,) = get_unique_gids(1)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="f1",
-                gid=gid_user,
-                children={
-                    MkFolder(
-                        name="f2",
-                        gid=gid_user,
-                        children={
-                            MkFolder(name="f3", gid=gid_user),
-                        },
-                    )
-                },
-            ),
-        }
-    )
-
-    folder_id_f1 = folder_ids["f1"]
-    folder_id_f2 = folder_ids["f2"]
-    folder_id_f3 = folder_ids["f3"]
-
-    #######
-    # TESTS
-    #######
-
-    for folder_id in (None, folder_id_f1, folder_id_f2):
-        folder_content = await _list_folder_as(
-            connection, default_product_name, folder_id, {gid_user}
-        )
-        assert len(folder_content) == 1
-        assert folder_content[0]
-        assert folder_content[0].parent_folder == folder_id
-
-    f3_content = await _list_folder_as(
-        connection, default_product_name, folder_id_f3, {gid_user}
-    )
-    assert len(f3_content) == 0
-
-
-async def test_folder_get(
-    connection: SAConnection,
-    default_product_name: _ProductName,
-    get_unique_gids: Callable[[int], tuple[_GroupID, ...]],
-    make_folders: Callable[[set[MkFolder]], Awaitable[dict[str, _FolderID]]],
-):
-    #######
-    # SETUP
-    #######
-    (
-        gid_owner,
-        gid_other_owner,
-        gid_not_shared,
-    ) = get_unique_gids(3)
-
-    folder_ids = await make_folders(
-        {
-            MkFolder(
-                name="owner_folder",
-                gid=gid_owner,
-                shared_with={
-                    gid_other_owner: FolderAccessRole.OWNER,
-                },
-                children={
-                    *{MkFolder(name=f"f{i}", gid=gid_owner) for i in range(1, 3)},
-                    MkFolder(
-                        name="f10",
-                        gid=gid_owner,
-                        children={
-                            MkFolder(name=f"sub_f{i}", gid=gid_owner)
-                            for i in range(1, 3)
-                        },
-                    ),
-                },
-            )
-        }
-    )
-
-    folder_id_owner_folder = folder_ids["owner_folder"]
-    folder_id_f1 = folder_ids["f1"]
-    folder_id_f2 = folder_ids["f2"]
-    folder_id_sub_f1 = folder_ids["sub_f1"]
-    folder_id_sub_f2 = folder_ids["sub_f2"]
-
-    #######
-    # TESTS
-    #######
-
-    # 1. query exsisting directories
-    for folder_id_to_list in (
-        None,
-        folder_id_owner_folder,
-        folder_id_f1,
-        folder_id_f2,
-        folder_id_sub_f1,
-        folder_id_sub_f2,
-    ):
-        folder_entries = await _list_folder_as(
-            connection, default_product_name, folder_id_to_list, {gid_owner}
-        )
-        for entry in folder_entries:
-            queried_folder = await folder_get(
-                connection, default_product_name, entry.id, {gid_owner}
-            )
-            assert entry == queried_folder
-
-    # 2. query via gid_not_shared
-    with pytest.raises(FolderNotSharedWithGidError):
-        await folder_get(
-            connection, default_product_name, folder_id_owner_folder, {gid_not_shared}
-        )
-
-    # 3. query with missing folder_id
-    missing_folder_id = 12312313123
-    for gid_to_test in (
-        gid_owner,
-        gid_other_owner,
-        gid_not_shared,
-    ):
-        with pytest.raises(FolderNotFoundError):
-            await folder_get(
-                connection, default_product_name, missing_folder_id, {gid_to_test}
-            )
diff --git a/packages/postgres-database/tests/test_utils_projects_nodes.py b/packages/postgres-database/tests/test_utils_projects_nodes.py
index 50d2af969117..21c130bcc7d9 100644
--- a/packages/postgres-database/tests/test_utils_projects_nodes.py
+++ b/packages/postgres-database/tests/test_utils_projects_nodes.py
@@ -309,7 +309,7 @@ async def test_delete_project_delete_all_nodes(
 
 @pytest.mark.parametrize("num_concurrent_workflows", [1, 250])
 async def test_multiple_creation_deletion_of_nodes(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
     registered_user: RowProxy,
     create_fake_project: Callable[..., Awaitable[RowProxy]],
     create_fake_projects_node: Callable[..., ProjectNodeCreate],
@@ -318,7 +318,7 @@ async def test_multiple_creation_deletion_of_nodes(
     NUM_NODES = 11
 
     async def _workflow() -> None:
-        async with pg_engine.acquire() as connection:
+        async with aiopg_engine.acquire() as connection:
             project = await create_fake_project(connection, registered_user)
             projects_nodes_repo = ProjectNodesRepo(project_uuid=project.uuid)
 
@@ -341,7 +341,7 @@ async def _workflow() -> None:
 
 
 async def test_get_project_id_from_node_id(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
     connection: SAConnection,
     projects_nodes_repo: ProjectNodesRepo,
     registered_user: RowProxy,
@@ -351,7 +351,7 @@ async def test_get_project_id_from_node_id(
     NUM_NODES = 11
 
     async def _workflow() -> dict[uuid.UUID, list[uuid.UUID]]:
-        async with pg_engine.acquire() as connection:
+        async with aiopg_engine.acquire() as connection:
             project = await create_fake_project(connection, registered_user)
             projects_nodes_repo = ProjectNodesRepo(project_uuid=project.uuid)
 
@@ -379,7 +379,7 @@ async def _workflow() -> dict[uuid.UUID, list[uuid.UUID]]:
 
 
 async def test_get_project_id_from_node_id_raises_for_invalid_node_id(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
     connection: SAConnection,
     projects_nodes_repo: ProjectNodesRepo,
     faker: Faker,
@@ -393,7 +393,7 @@ async def test_get_project_id_from_node_id_raises_for_invalid_node_id(
 
 
 async def test_get_project_id_from_node_id_raises_if_multiple_projects_with_same_node_id_exist(
-    pg_engine: Engine,
+    aiopg_engine: Engine,
     connection: SAConnection,
     projects_nodes_repo: ProjectNodesRepo,
     registered_user: RowProxy,
diff --git a/packages/postgres-database/tests/test_utils_repos.py b/packages/postgres-database/tests/test_utils_repos.py
new file mode 100644
index 000000000000..be100df2ef10
--- /dev/null
+++ b/packages/postgres-database/tests/test_utils_repos.py
@@ -0,0 +1,213 @@
+# pylint: disable=redefined-outer-name
+# pylint: disable=unused-argument
+# pylint: disable=unused-variable
+# pylint: disable=too-many-arguments
+
+
+from typing import Any, NamedTuple
+
+import pytest
+import sqlalchemy as sa
+from simcore_postgres_database.models.tags import tags
+from simcore_postgres_database.utils_repos import (
+    pass_or_acquire_connection,
+    transaction_context,
+)
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
+
+
+async def test_sa_transactions(asyncpg_engine: AsyncEngine):
+    #
+    # SEE https://docs.sqlalchemy.org/en/20/orm/extensions/asyncio.html#synopsis-core
+    #
+
+    # READ query
+    total_count_query = sa.select(sa.func.count()).select_from(tags)
+
+    # WRITE queries
+    query1 = (
+        tags.insert().values(id=2, name="query1", color="blue").returning(tags.c.id)
+    )
+    query11 = (
+        tags.insert().values(id=3, name="query11", color="blue").returning(tags.c.id)
+    )
+    query12 = (
+        tags.insert().values(id=5, name="query12", color="blue").returning(tags.c.id)
+    )
+    query2 = (
+        tags.insert().values(id=6, name="query2", color="blue").returning(tags.c.id)
+    )
+    query2 = (
+        tags.insert().values(id=7, name="query2", color="blue").returning(tags.c.id)
+    )
+
+    async with asyncpg_engine.connect() as conn, conn.begin():  # starts transaction (savepoint)
+
+        result = await conn.execute(query1)
+        assert result.scalar() == 2
+
+        total_count = (await conn.execute(total_count_query)).scalar()
+        assert total_count == 1
+
+        rows = (await conn.execute(tags.select().where(tags.c.id == 2))).fetchall()
+        assert rows
+        assert rows[0].id == 2
+
+        async with conn.begin_nested():  # savepoint
+            await conn.execute(query11)
+
+            with pytest.raises(IntegrityError):
+                async with conn.begin_nested():  # savepoint
+                    await conn.execute(query11)
+
+            await conn.execute(query12)
+
+            total_count = (await conn.execute(total_count_query)).scalar()
+            assert total_count == 3  # since query11 (second time) reverted!
+
+        await conn.execute(query2)
+
+        total_count = (await conn.execute(total_count_query)).scalar()
+        assert total_count == 4
+
+
+class _PageTuple(NamedTuple):
+    total_count: int
+    rows: list[dict[str, Any]]
+
+
+class OneResourceRepoDemo:
+    # This is a PROTOTYPE of how one could implement a generic
+    # repo that provides CRUD operations providing a given table
+    def __init__(self, engine: AsyncEngine, table: sa.Table):
+        if "id" not in table.columns:
+            msg = "id column expected"
+            raise ValueError(msg)
+        self.table = table
+
+        self.engine = engine
+
+    async def create(self, connection: AsyncConnection | None = None, **kwargs) -> int:
+        async with transaction_context(self.engine, connection) as conn:
+            result = await conn.execute(self.table.insert().values(**kwargs))
+            assert result  # nosec
+            return result.inserted_primary_key[0]
+
+    async def get_by_id(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        row_id: int,
+    ) -> dict[str, Any] | None:
+        async with pass_or_acquire_connection(self.engine, connection) as conn:
+            result = await conn.execute(
+                sa.select(self.table).where(self.table.c.id == row_id)
+            )
+            row = result.mappings().fetchone()
+            return dict(row) if row else None
+
+    async def get_page(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        limit: int,
+        offset: int = 0,
+    ) -> _PageTuple:
+        async with pass_or_acquire_connection(self.engine, connection) as conn:
+            # Compute total count
+            total_count_query = sa.select(sa.func.count()).select_from(self.table)
+            total_count_result = await conn.execute(total_count_query)
+            total_count = total_count_result.scalar()
+
+            # Fetch paginated results
+            query = sa.select(self.table).limit(limit).offset(offset)
+            result = await conn.execute(query)
+            rows = [dict(row) for row in result.mappings().fetchall()]
+
+            return _PageTuple(total_count=total_count or 0, rows=rows)
+
+    async def update(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        row_id: int,
+        **values,
+    ) -> bool:
+        async with transaction_context(self.engine, connection) as conn:
+            result = await conn.execute(
+                self.table.update().where(self.table.c.id == row_id).values(**values)
+            )
+            return result.rowcount > 0
+
+    async def delete(
+        self,
+        connection: AsyncConnection | None = None,
+        *,
+        row_id: int,
+    ) -> bool:
+        async with transaction_context(self.engine, connection) as conn:
+            result = await conn.execute(
+                self.table.delete().where(self.table.c.id == row_id)
+            )
+            return result.rowcount > 0
+
+
+async def test_oneresourcerepodemo_prototype(asyncpg_engine: AsyncEngine):
+
+    tags_repo = OneResourceRepoDemo(engine=asyncpg_engine, table=tags)
+
+    # create
+    tag_id = await tags_repo.create(name="cyan tag", color="cyan")
+    assert tag_id > 0
+
+    # get, list
+    tag = await tags_repo.get_by_id(row_id=tag_id)
+    assert tag
+
+    page = await tags_repo.get_page(limit=10)
+    assert page.total_count == 1
+    assert page.rows == [tag]
+
+    # update
+    ok = await tags_repo.update(row_id=tag_id, name="changed name")
+    assert ok
+
+    updated_tag = await tags_repo.get_by_id(row_id=tag_id)
+    assert updated_tag
+    assert updated_tag["name"] != tag["name"]
+
+    # delete
+    ok = await tags_repo.delete(row_id=tag_id)
+    assert ok
+
+    assert not await tags_repo.get_by_id(row_id=tag_id)
+
+
+async def test_transaction_context(asyncpg_engine: AsyncEngine):
+    # (1) Using transaction_context and fails
+    fake_error_msg = "some error"
+
+    def _something_raises_here():
+        raise RuntimeError(fake_error_msg)
+
+    tags_repo = OneResourceRepoDemo(engine=asyncpg_engine, table=tags)
+
+    # using external transaction_context: commits upon __aexit__
+    async with transaction_context(asyncpg_engine) as conn:
+        await tags_repo.create(conn, name="cyan tag", color="cyan")
+        await tags_repo.create(conn, name="red tag", color="red")
+        assert (await tags_repo.get_page(conn, limit=10, offset=0)).total_count == 2
+
+    # using internal: auto-commit
+    await tags_repo.create(name="red tag", color="red")
+    assert (await tags_repo.get_page(limit=10, offset=0)).total_count == 3
+
+    # auto-rollback
+    with pytest.raises(RuntimeError, match=fake_error_msg):  # noqa: PT012
+        async with transaction_context(asyncpg_engine) as conn:
+            await tags_repo.create(conn, name="violet tag", color="violet")
+            assert (await tags_repo.get_page(conn, limit=10, offset=0)).total_count == 4
+            _something_raises_here()
+
+    assert (await tags_repo.get_page(limit=10, offset=0)).total_count == 3
diff --git a/packages/postgres-database/tests/test_utils_tags.py b/packages/postgres-database/tests/test_utils_tags.py
index 2b99c1939fe6..26f9a301f76d 100644
--- a/packages/postgres-database/tests/test_utils_tags.py
+++ b/packages/postgres-database/tests/test_utils_tags.py
@@ -27,9 +27,11 @@
     get_tag_stmt,
     get_tags_for_project_stmt,
     get_tags_for_services_stmt,
+    list_tags_stmt,
     set_tag_access_rights_stmt,
     update_tag_stmt,
 )
+from sqlalchemy.ext.asyncio import AsyncEngine
 
 
 @pytest.fixture
@@ -75,7 +77,11 @@ async def other_user(
 
 
 async def test_tags_access_with_primary_groups(
-    connection: SAConnection, user: RowProxy, group: RowProxy, other_user: RowProxy
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+    other_user: RowProxy,
 ):
     conn = connection
 
@@ -102,22 +108,29 @@ async def test_tags_access_with_primary_groups(
         ),
     ]
 
-    tags_repo = TagsRepo(user_id=user.id)
+    tags_repo = TagsRepo(asyncpg_engine)
 
     # repo has access
     assert (
-        await tags_repo.access_count(conn, tag_id, read=True, write=True, delete=True)
+        await tags_repo.access_count(
+            user_id=user.id, tag_id=tag_id, read=True, write=True, delete=True
+        )
+        == 1
+    )
+    assert (
+        await tags_repo.access_count(
+            user_id=user.id, tag_id=tag_id, read=True, write=True
+        )
         == 1
     )
-    assert await tags_repo.access_count(conn, tag_id, read=True, write=True) == 1
-    assert await tags_repo.access_count(conn, tag_id, read=True) == 1
-    assert await tags_repo.access_count(conn, tag_id, write=True) == 1
+    assert await tags_repo.access_count(user_id=user.id, tag_id=tag_id, read=True) == 1
+    assert await tags_repo.access_count(user_id=user.id, tag_id=tag_id, write=True) == 1
 
     # changing access conditions
     assert (
         await tags_repo.access_count(
-            conn,
-            tag_id,
+            user_id=user.id,
+            tag_id=tag_id,
             read=True,
             write=True,
             delete=False,  # <---
@@ -128,15 +141,20 @@ async def test_tags_access_with_primary_groups(
     # user will have NO access to other user's tags even matching access rights
     assert (
         await tags_repo.access_count(
-            conn, other_tag_id, read=True, write=True, delete=True
+            user_id=user.id, tag_id=other_tag_id, read=True, write=True, delete=True
         )
         == 0
     )
 
 
 async def test_tags_access_with_multiple_groups(
-    connection: SAConnection, user: RowProxy, group: RowProxy, other_user: RowProxy
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+    other_user: RowProxy,
 ):
+
     conn = connection
 
     (tag_id, other_tag_id, group_tag_id, everyone_tag_id) = [
@@ -182,30 +200,58 @@ async def test_tags_access_with_multiple_groups(
         ),
     ]
 
-    tags_repo = TagsRepo(user_id=user.id)
-    other_repo = TagsRepo(user_id=other_user.id)
+    tags_repo = TagsRepo(asyncpg_engine)
+    other_repo = TagsRepo(asyncpg_engine)
 
     # tag_id
     assert (
-        await tags_repo.access_count(conn, tag_id, read=True, write=True, delete=True)
+        await tags_repo.access_count(
+            user_id=user.id, tag_id=tag_id, read=True, write=True, delete=True
+        )
         == 1
     )
     assert (
-        await other_repo.access_count(conn, tag_id, read=True, write=True, delete=True)
+        await other_repo.access_count(
+            user_id=other_user.id, tag_id=tag_id, read=True, write=True, delete=True
+        )
         == 0
     )
 
     # other_tag_id
-    assert await tags_repo.access_count(conn, other_tag_id, read=True) == 0
-    assert await other_repo.access_count(conn, other_tag_id, read=True) == 1
+    assert (
+        await tags_repo.access_count(user_id=user.id, tag_id=other_tag_id, read=True)
+        == 0
+    )
+    assert (
+        await other_repo.access_count(
+            user_id=other_user.id, tag_id=other_tag_id, read=True
+        )
+        == 1
+    )
 
     # group_tag_id
-    assert await tags_repo.access_count(conn, group_tag_id, read=True) == 1
-    assert await other_repo.access_count(conn, group_tag_id, read=True) == 0
+    assert (
+        await tags_repo.access_count(user_id=user.id, tag_id=group_tag_id, read=True)
+        == 1
+    )
+    assert (
+        await other_repo.access_count(
+            user_id=other_user.id, tag_id=group_tag_id, read=True
+        )
+        == 0
+    )
 
     # everyone_tag_id
-    assert await tags_repo.access_count(conn, everyone_tag_id, read=True) == 1
-    assert await other_repo.access_count(conn, everyone_tag_id, read=True) == 1
+    assert (
+        await tags_repo.access_count(user_id=user.id, tag_id=everyone_tag_id, read=True)
+        == 1
+    )
+    assert (
+        await other_repo.access_count(
+            user_id=other_user.id, tag_id=everyone_tag_id, read=True
+        )
+        == 1
+    )
 
     # now group adds read for all tags
     for t in (tag_id, other_tag_id, everyone_tag_id):
@@ -218,19 +264,29 @@ async def test_tags_access_with_multiple_groups(
             delete=False,
         )
 
-    assert await tags_repo.access_count(conn, tag_id, read=True) == 2
-    assert await tags_repo.access_count(conn, other_tag_id, read=True) == 1
-    assert await tags_repo.access_count(conn, everyone_tag_id, read=True) == 2
+    assert await tags_repo.access_count(user_id=user.id, tag_id=tag_id, read=True) == 2
+    assert (
+        await tags_repo.access_count(user_id=user.id, tag_id=other_tag_id, read=True)
+        == 1
+    )
+    assert (
+        await tags_repo.access_count(user_id=user.id, tag_id=everyone_tag_id, read=True)
+        == 2
+    )
 
 
 async def test_tags_repo_list_and_get(
-    connection: SAConnection, user: RowProxy, group: RowProxy, other_user: RowProxy
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+    other_user: RowProxy,
 ):
     conn = connection
-    tags_repo = TagsRepo(user_id=user.id)
+    tags_repo = TagsRepo(asyncpg_engine)
 
     # (1) no tags
-    listed_tags = await tags_repo.list_all(conn)
+    listed_tags = await tags_repo.list_all(user_id=user.id)
     assert not listed_tags
 
     # (2) one tag
@@ -247,7 +303,7 @@ async def test_tags_repo_list_and_get(
         )
     ]
 
-    listed_tags = await tags_repo.list_all(conn)
+    listed_tags = await tags_repo.list_all(user_id=user.id)
     assert listed_tags
     assert [t["id"] for t in listed_tags] == expected_tags_ids
 
@@ -265,7 +321,7 @@ async def test_tags_repo_list_and_get(
         )
     )
 
-    listed_tags = await tags_repo.list_all(conn)
+    listed_tags = await tags_repo.list_all(user_id=user.id)
     assert {t["id"] for t in listed_tags} == set(expected_tags_ids)
 
     # (4) add another tag from a differnt user
@@ -282,7 +338,7 @@ async def test_tags_repo_list_and_get(
 
     # same as before
     prev_listed_tags = listed_tags
-    listed_tags = await tags_repo.list_all(conn)
+    listed_tags = await tags_repo.list_all(user_id=user.id)
     assert listed_tags == prev_listed_tags
 
     # (5) add a global tag
@@ -297,7 +353,7 @@ async def test_tags_repo_list_and_get(
         delete=False,
     )
 
-    listed_tags = await tags_repo.list_all(conn)
+    listed_tags = await tags_repo.list_all(user_id=user.id)
     assert listed_tags == [
         {
             "id": 1,
@@ -328,8 +384,8 @@ async def test_tags_repo_list_and_get(
         },
     ]
 
-    other_repo = TagsRepo(user_id=other_user.id)
-    assert await other_repo.list_all(conn) == [
+    other_repo = TagsRepo(asyncpg_engine)
+    assert await other_repo.list_all(user_id=other_user.id) == [
         {
             "id": 3,
             "name": "T3",
@@ -351,7 +407,7 @@ async def test_tags_repo_list_and_get(
     ]
 
     # exclusive to user
-    assert await tags_repo.get(conn, tag_id=2) == {
+    assert await tags_repo.get(user_id=user.id, tag_id=2) == {
         "id": 2,
         "name": "T2",
         "description": "tag via std group",
@@ -363,9 +419,9 @@ async def test_tags_repo_list_and_get(
 
     # exclusive ot other user
     with pytest.raises(TagNotFoundError):
-        assert await tags_repo.get(conn, tag_id=3)
+        assert await tags_repo.get(user_id=user.id, tag_id=3)
 
-    assert await other_repo.get(conn, tag_id=3) == {
+    assert await other_repo.get(user_id=other_user.id, tag_id=3) == {
         "id": 3,
         "name": "T3",
         "description": "tag for 2",
@@ -376,14 +432,71 @@ async def test_tags_repo_list_and_get(
     }
 
     # a common tag
-    assert await tags_repo.get(conn, tag_id=4) == await other_repo.get(conn, tag_id=4)
+    assert await tags_repo.get(user_id=user.id, tag_id=4) == await other_repo.get(
+        user_id=user.id, tag_id=4
+    )
+
+
+async def test_tags_repo_uniquely_list_or_get_shared_tags(
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+):
+    conn = connection
+    tags_repo = TagsRepo(asyncpg_engine)
+
+    # (1) create a tag which cannot be written
+    expected_tag_id = await create_tag(
+        conn,
+        name="T1",
+        description=f"tag for {user.id}",
+        color="blue",
+        group_id=user.primary_gid,
+        read=True,
+        write=False,  # <-- cannot write
+        delete=True,
+    )
+
+    got = await tags_repo.get(user_id=user.id, tag_id=expected_tag_id)
+    assert got
+    assert got["id"] == expected_tag_id
+    assert got["read"] is True
+    assert got["write"] is False  # <--
+    assert got["delete"] is True
+
+    # (2) share with standard group
+    await create_tag_access(
+        conn,
+        tag_id=expected_tag_id,
+        group_id=group.gid,
+        read=True,
+        write=True,  # < -- group can write
+        delete=False,
+    )
+
+    # checks that the agregattion is the MOST permisive
+    # checks that user_id has now full access via its primary and its stadard group
+    got = await tags_repo.get(user_id=user.id, tag_id=expected_tag_id)
+    assert got
+    assert got["id"] == expected_tag_id
+    assert got["read"] is True
+    assert got["write"] is True  # <--
+    assert got["delete"] is True
+
+    user_tags = await tags_repo.list_all(user_id=user.id)
+    assert user_tags == [got]
 
 
 async def test_tags_repo_update(
-    connection: SAConnection, user: RowProxy, group: RowProxy, other_user: RowProxy
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+    other_user: RowProxy,
 ):
     conn = connection
-    tags_repo = TagsRepo(user_id=user.id)
+    tags_repo = TagsRepo(asyncpg_engine)
 
     # Tags with different access rights
     readonly_tid, readwrite_tid, other_tid = [
@@ -420,10 +533,12 @@ async def test_tags_repo_update(
     ]
 
     with pytest.raises(TagOperationNotAllowedError):
-        await tags_repo.update(conn, tag_id=readonly_tid, description="modified")
+        await tags_repo.update(
+            user_id=user.id, tag_id=readonly_tid, description="modified"
+        )
 
     assert await tags_repo.update(
-        conn, tag_id=readwrite_tid, description="modified"
+        user_id=user.id, tag_id=readwrite_tid, description="modified"
     ) == {
         "id": readwrite_tid,
         "name": "T2",
@@ -435,14 +550,20 @@ async def test_tags_repo_update(
     }
 
     with pytest.raises(TagOperationNotAllowedError):
-        await tags_repo.update(conn, tag_id=other_tid, description="modified")
+        await tags_repo.update(
+            user_id=user.id, tag_id=other_tid, description="modified"
+        )
 
 
 async def test_tags_repo_delete(
-    connection: SAConnection, user: RowProxy, group: RowProxy, other_user: RowProxy
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+    other_user: RowProxy,
 ):
     conn = connection
-    tags_repo = TagsRepo(user_id=user.id)
+    tags_repo = TagsRepo(asyncpg_engine)
 
     # Tags with different access rights
     readonly_tid, delete_tid, other_tid = [
@@ -480,28 +601,32 @@ async def test_tags_repo_delete(
 
     # cannot delete
     with pytest.raises(TagOperationNotAllowedError):
-        await tags_repo.delete(conn, tag_id=readonly_tid)
+        await tags_repo.delete(user_id=user.id, tag_id=readonly_tid)
 
     # can delete
-    await tags_repo.get(conn, tag_id=delete_tid)
-    await tags_repo.delete(conn, tag_id=delete_tid)
+    await tags_repo.get(user_id=user.id, tag_id=delete_tid)
+    await tags_repo.delete(user_id=user.id, tag_id=delete_tid)
 
     with pytest.raises(TagNotFoundError):
-        await tags_repo.get(conn, tag_id=delete_tid)
+        await tags_repo.get(user_id=user.id, tag_id=delete_tid)
 
     # cannot delete
     with pytest.raises(TagOperationNotAllowedError):
-        await tags_repo.delete(conn, tag_id=other_tid)
+        await tags_repo.delete(user_id=user.id, tag_id=other_tid)
 
 
 async def test_tags_repo_create(
-    connection: SAConnection, user: RowProxy, group: RowProxy, other_user: RowProxy
+    asyncpg_engine: AsyncEngine,
+    connection: SAConnection,
+    user: RowProxy,
+    group: RowProxy,
+    other_user: RowProxy,
 ):
     conn = connection
-    tags_repo = TagsRepo(user_id=user.id)
+    tags_repo = TagsRepo(asyncpg_engine)
 
     tag_1 = await tags_repo.create(
-        conn,
+        user_id=user.id,
         name="T1",
         description="my first tag",
         color="pink",
@@ -546,6 +671,11 @@ def _check(func_smt, **kwargs):
     service_key = "simcore/services/comp/isolve"
     service_version = "2.0.85"
 
+    _check(
+        list_tags_stmt,
+        user_id=user_id,
+    )
+
     _check(
         get_tag_stmt,
         user_id=user_id,
diff --git a/packages/pytest-simcore/src/pytest_simcore/aws_server.py b/packages/pytest-simcore/src/pytest_simcore/aws_server.py
index 74f007973c5d..25e9bb6e83e0 100644
--- a/packages/pytest-simcore/src/pytest_simcore/aws_server.py
+++ b/packages/pytest-simcore/src/pytest_simcore/aws_server.py
@@ -8,12 +8,12 @@
 import pytest
 import requests
 from aiohttp.test_utils import unused_port
+from common_library.pydantic_basic_types import IDStr
 from faker import Faker
 from models_library.utils.fastapi_encoders import jsonable_encoder
 from moto.server import ThreadedMotoServer
 from pydantic import SecretStr
 from pytest_mock.plugin import MockerFixture
-from settings_library.basic_types import IDStr
 from settings_library.ec2 import EC2Settings
 from settings_library.s3 import S3Settings
 from settings_library.ssm import SSMSettings
diff --git a/packages/pytest-simcore/src/pytest_simcore/dev_vendors_compose.py b/packages/pytest-simcore/src/pytest_simcore/dev_vendors_compose.py
new file mode 100644
index 000000000000..178e125b279b
--- /dev/null
+++ b/packages/pytest-simcore/src/pytest_simcore/dev_vendors_compose.py
@@ -0,0 +1,27 @@
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from .helpers.docker import run_docker_compose_config
+
+
+@pytest.fixture(scope="module")
+def dev_vendors_docker_compose(
+    osparc_simcore_root_dir: Path,
+    osparc_simcore_scripts_dir: Path,
+    env_file_for_testing: Path,
+    temp_folder: Path,
+) -> dict[str, Any]:
+    docker_compose_path = (
+        osparc_simcore_root_dir / "services" / "docker-compose-dev-vendors.yml"
+    )
+    assert docker_compose_path.exists()
+
+    return run_docker_compose_config(
+        project_dir=osparc_simcore_root_dir / "services",
+        scripts_dir=osparc_simcore_scripts_dir,
+        docker_compose_paths=docker_compose_path,
+        env_file_path=env_file_for_testing,
+        destination_path=temp_folder / "ops_docker_compose.yml",
+    )
diff --git a/packages/pytest-simcore/src/pytest_simcore/faker_payments_data.py b/packages/pytest-simcore/src/pytest_simcore/faker_payments_data.py
index 3f4058b72e97..4f19e15eec40 100644
--- a/packages/pytest-simcore/src/pytest_simcore/faker_payments_data.py
+++ b/packages/pytest-simcore/src/pytest_simcore/faker_payments_data.py
@@ -18,8 +18,8 @@
 from typing import Any
 
 import pytest
+from common_library.pydantic_basic_types import IDStr
 from faker import Faker
-from models_library.basic_types import IDStr
 from models_library.payments import StripeInvoiceID
 from models_library.products import ProductName
 from models_library.users import UserID
diff --git a/packages/pytest-simcore/src/pytest_simcore/faker_users_data.py b/packages/pytest-simcore/src/pytest_simcore/faker_users_data.py
index 4e59b6db93a4..7a928193d5bd 100644
--- a/packages/pytest-simcore/src/pytest_simcore/faker_users_data.py
+++ b/packages/pytest-simcore/src/pytest_simcore/faker_users_data.py
@@ -11,8 +11,8 @@
 from typing import Any
 
 import pytest
+from common_library.pydantic_basic_types import IDStr
 from faker import Faker
-from models_library.basic_types import IDStr
 from models_library.users import UserID
 from pydantic import EmailStr, TypeAdapter
 
diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/faker_factories.py b/packages/pytest-simcore/src/pytest_simcore/helpers/faker_factories.py
index f51a5d8211be..78da55218273 100644
--- a/packages/pytest-simcore/src/pytest_simcore/helpers/faker_factories.py
+++ b/packages/pytest-simcore/src/pytest_simcore/helpers/faker_factories.py
@@ -235,7 +235,6 @@ def random_product(
             license_url=fake.url(),
             invitation_url=fake.url(),
             invitation_form=fake.boolean(),
-            has_landing_page=fake.boolean(),
             address=fake.address().replace("\n", ". "),
         ),
         "registration_email_template": registration_email_template,
diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/logging_tools.py b/packages/pytest-simcore/src/pytest_simcore/helpers/logging_tools.py
index 427117749aa8..2bb29562d755 100644
--- a/packages/pytest-simcore/src/pytest_simcore/helpers/logging_tools.py
+++ b/packages/pytest-simcore/src/pytest_simcore/helpers/logging_tools.py
@@ -133,14 +133,14 @@ def log_context(
     else:
         ctx_msg = msg
 
-    started_time = datetime.datetime.now(tz=datetime.timezone.utc)
+    started_time = datetime.datetime.now(tz=datetime.UTC)
     try:
         DynamicIndentFormatter.cls_increase_indent()
 
         logger.log(level, ctx_msg.starting, *args, **kwargs)
         with _increased_logger_indent(logger):
             yield SimpleNamespace(logger=logger, messages=ctx_msg)
-        elapsed_time = datetime.datetime.now(tz=datetime.timezone.utc) - started_time
+        elapsed_time = datetime.datetime.now(tz=datetime.UTC) - started_time
         done_message = (
             f"{ctx_msg.done} ({_timedelta_as_minute_second_ms(elapsed_time)})"
         )
@@ -152,7 +152,7 @@ def log_context(
         )
 
     except:
-        elapsed_time = datetime.datetime.now(tz=datetime.timezone.utc) - started_time
+        elapsed_time = datetime.datetime.now(tz=datetime.UTC) - started_time
         error_message = (
             f"{ctx_msg.raised} ({_timedelta_as_minute_second_ms(elapsed_time)})"
         )
diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/playwright.py b/packages/pytest-simcore/src/pytest_simcore/helpers/playwright.py
index bec851c4d33e..38539f0d7fb7 100644
--- a/packages/pytest-simcore/src/pytest_simcore/helpers/playwright.py
+++ b/packages/pytest-simcore/src/pytest_simcore/helpers/playwright.py
@@ -3,12 +3,14 @@
 import logging
 import re
 from collections import defaultdict
-from contextlib import ExitStack
+from collections.abc import Generator, Iterator
 from dataclasses import dataclass, field
 from enum import Enum, unique
-from typing import Any, Final, Generator
+from typing import Any, Final
 
-from playwright.sync_api import FrameLocator, Page, Request, WebSocket
+from playwright.sync_api import FrameLocator, Page, Request
+from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
+from playwright.sync_api import WebSocket
 from pytest_simcore.helpers.logging_tools import log_context
 
 SECOND: Final[int] = 1000
@@ -58,6 +60,7 @@ class NodeProgressType(str, Enum):
     SERVICE_OUTPUTS_PULLING = "SERVICE_OUTPUTS_PULLING"
     SERVICE_STATE_PULLING = "SERVICE_STATE_PULLING"
     SERVICE_IMAGES_PULLING = "SERVICE_IMAGES_PULLING"
+    SERVICE_CONTAINERS_STARTING = "SERVICE_CONTAINERS_STARTING"
 
     @classmethod
     def required_types_for_started_service(cls) -> set["NodeProgressType"]:
@@ -67,6 +70,7 @@ def required_types_for_started_service(cls) -> set["NodeProgressType"]:
             NodeProgressType.SERVICE_OUTPUTS_PULLING,
             NodeProgressType.SERVICE_STATE_PULLING,
             NodeProgressType.SERVICE_IMAGES_PULLING,
+            NodeProgressType.SERVICE_CONTAINERS_STARTING,
         }
 
 
@@ -219,21 +223,28 @@ def __call__(self, message: str) -> bool:
                         self._current_progress[
                             node_progress_event.progress_type
                         ] = new_progress
+
                         self.logger.info(
-                            "current startup progress: %s",
+                            "Current startup progress [expected number of node-progress-types=%d]: %s",
+                            len(NodeProgressType.required_types_for_started_service()),
                             f"{json.dumps({k:round(v,1) for k,v in self._current_progress.items()})}",
                         )
 
-                return all(
-                    progress_type in self._current_progress
-                    for progress_type in NodeProgressType.required_types_for_started_service()
-                ) and all(
+                return self.got_expected_node_progress_types() and all(
                     round(progress, 1) == 1.0
                     for progress in self._current_progress.values()
                 )
-
         return False
 
+    def got_expected_node_progress_types(self):
+        return all(
+            progress_type in self._current_progress
+            for progress_type in NodeProgressType.required_types_for_started_service()
+        )
+
+    def get_current_progress(self):
+        return self._current_progress.values()
+
 
 def wait_for_pipeline_state(
     current_state: RunningState,
@@ -261,28 +272,37 @@ def wait_for_pipeline_state(
     return current_state
 
 
-def on_web_socket_default_handler(ws) -> None:
-    """Usage
+@contextlib.contextmanager
+def web_socket_default_log_handler(web_socket: WebSocket) -> Iterator[None]:
 
-    from pytest_simcore.playwright_utils import on_web_socket_default_handler
+    try:
+        with log_context(
+            logging.DEBUG,
+            msg="handle websocket message (set to --log-cli-level=DEBUG level if you wanna see all of them)",
+        ) as ctx:
 
-    page.on("websocket", on_web_socket_default_handler)
+            def on_framesent(payload: str | bytes) -> None:
+                ctx.logger.debug("⬇️ Frame sent: %s", payload)
 
-    """
-    stack = ExitStack()
-    ctx = stack.enter_context(
-        log_context(
-            logging.INFO,
-            (
-                f"WebSocket opened: {ws.url}",
-                "WebSocket closed",
-            ),
-        )
-    )
+            def on_framereceived(payload: str | bytes) -> None:
+                ctx.logger.debug("⬆️ Frame received: %s", payload)
+
+            def on_close(payload: WebSocket) -> None:
+                ctx.logger.warning("⚠️ Websocket closed: %s", payload)
+
+            def on_socketerror(error_msg: str) -> None:
+                ctx.logger.error("❌ Websocket error: %s", error_msg)
 
-    ws.on("framesent", lambda payload: ctx.logger.info("⬇️ %s", payload))
-    ws.on("framereceived", lambda payload: ctx.logger.info("⬆️ %s", payload))
-    ws.on("close", lambda payload: stack.close())  # noqa: ARG005
+            web_socket.on("framesent", on_framesent)
+            web_socket.on("framereceived", on_framereceived)
+            web_socket.on("close", on_close)
+            web_socket.on("socketerror", on_socketerror)
+            yield
+    finally:
+        web_socket.remove_listener("framesent", on_framesent)
+        web_socket.remove_listener("framereceived", on_framereceived)
+        web_socket.remove_listener("close", on_close)
+        web_socket.remove_listener("socketerror", on_socketerror)
 
 
 def _node_started_predicate(request: Request) -> bool:
@@ -316,10 +336,23 @@ def expected_service_running(
     with log_context(logging.INFO, msg="Waiting for node to run") as ctx:
         waiter = SocketIONodeProgressCompleteWaiter(node_id=node_id, logger=ctx.logger)
         service_running = ServiceRunning(iframe_locator=None)
-        with websocket.expect_event("framereceived", waiter, timeout=timeout):
-            if press_start_button:
-                _trigger_service_start(page, node_id)
-            yield service_running
+
+        try:
+
+            with websocket.expect_event("framereceived", waiter, timeout=timeout):
+                if press_start_button:
+                    _trigger_service_start(page, node_id)
+
+                yield service_running
+
+        except PlaywrightTimeoutError:
+            if waiter.got_expected_node_progress_types():
+                ctx.logger.warning(
+                    "⚠️ Progress bar didn't receive 100 percent but all expected node-progress-types are in place: %s ⚠️",  # https://github.com/ITISFoundation/osparc-simcore/issues/6449
+                    waiter.get_current_progress(),
+                )
+            else:
+                raise
 
     service_running.iframe_locator = page.frame_locator(
         f'[osparc-test-id="iframe_{node_id}"]'
diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/playwright_sim4life.py b/packages/pytest-simcore/src/pytest_simcore/helpers/playwright_sim4life.py
index cb0d4089c3e1..ddbd444c5f61 100644
--- a/packages/pytest-simcore/src/pytest_simcore/helpers/playwright_sim4life.py
+++ b/packages/pytest-simcore/src/pytest_simcore/helpers/playwright_sim4life.py
@@ -2,22 +2,24 @@
 import logging
 import re
 from dataclasses import dataclass
-from typing import Dict, Final, Union
+from typing import Final, TypedDict
 
 import arrow
 from playwright.sync_api import FrameLocator, Page, WebSocket, expect
+from pydantic import TypeAdapter  # pylint: disable=no-name-in-module
+from pydantic import ByteSize
 
 from .logging_tools import log_context
 from .playwright import (
-    SECOND,
     MINUTE,
+    SECOND,
     SOCKETIO_MESSAGE_PREFIX,
     SocketIOEvent,
     decode_socketio_42_message,
     wait_for_service_running,
 )
 
-_S4L_STREAMING_ESTABLISHMENT_MAX_TIME: Final[int] = 15 * SECOND
+_S4L_STREAMING_ESTABLISHMENT_MAX_TIME: Final[int] = 30 * SECOND
 _S4L_SOCKETIO_REGEX: Final[re.Pattern] = re.compile(
     r"^(?P[^:]+)://(?P[^\.]+)\.services\.(?P[^\/]+)\/socket\.io\/.+$"
 )
@@ -28,6 +30,7 @@
     _EC2_STARTUP_MAX_WAIT_TIME + _S4L_DOCKER_PULLING_MAX_TIME + _S4L_MAX_STARTUP_TIME
 )
 _S4L_STARTUP_SCREEN_MAX_TIME: Final[int] = 45 * SECOND
+_S4L_COPY_WORKSPACE_TIME: Final[int] = 60 * SECOND
 
 
 @dataclass(kw_only=True)
@@ -62,7 +65,7 @@ def __call__(self, message: str) -> bool:
                     self._initial_bit_rate_time = arrow.utcnow().datetime
                     self.logger.info(
                         "%s",
-                        f"{self._initial_bit_rate=} at {self._initial_bit_rate_time.isoformat()}",
+                        f"{TypeAdapter(ByteSize).validate_python(self._initial_bit_rate).human_readable()}/s at {self._initial_bit_rate_time.isoformat()}",
                     )
                     return False
 
@@ -77,14 +80,26 @@ def __call__(self, message: str) -> bool:
                     bitrate_test = bool(self._initial_bit_rate != current_bitrate)
                     self.logger.info(
                         "%s",
-                        f"{current_bitrate=} after {elapsed_time=}: {'good!' if bitrate_test else 'failed! bitrate did not change! TIP: talk with MaG about underwater cables!'}",
+                        f"{TypeAdapter(ByteSize).validate_python(current_bitrate).human_readable()}/s after {elapsed_time=}: {'good!' if bitrate_test else 'failed! bitrate did not change! TIP: talk with MaG about underwater cables!'}",
                     )
                     return bitrate_test
 
         return False
 
 
-def launch_S4L(page: Page, node_id, log_in_and_out: WebSocket, autoscaled: bool) -> Dict[str, Union[WebSocket, FrameLocator]]:
+class WaitForS4LDict(TypedDict):
+    websocket: WebSocket
+    iframe: FrameLocator
+
+
+def wait_for_launched_s4l(
+    page: Page,
+    node_id,
+    log_in_and_out: WebSocket,
+    *,
+    autoscaled: bool,
+    copy_workspace: bool,
+) -> WaitForS4LDict:
     with log_context(logging.INFO, "launch S4L") as ctx:
         predicate = S4LWaitForWebsocket(logger=ctx.logger)
         with page.expect_websocket(
@@ -95,6 +110,7 @@ def launch_S4L(page: Page, node_id, log_in_and_out: WebSocket, autoscaled: bool)
                 if autoscaled
                 else _S4L_MAX_STARTUP_TIME
             )
+            + (_S4L_COPY_WORKSPACE_TIME if copy_workspace else 0)
             + 10 * SECOND,
         ) as ws_info:
             s4l_iframe = wait_for_service_running(
@@ -105,18 +121,19 @@ def launch_S4L(page: Page, node_id, log_in_and_out: WebSocket, autoscaled: bool)
                     _S4L_AUTOSCALED_MAX_STARTUP_TIME
                     if autoscaled
                     else _S4L_MAX_STARTUP_TIME
-                ),
+                )
+                + (_S4L_COPY_WORKSPACE_TIME if copy_workspace else 0),
                 press_start_button=False,
             )
         s4l_websocket = ws_info.value
         ctx.logger.info("acquired S4L websocket!")
         return {
             "websocket": s4l_websocket,
-            "iframe" : s4l_iframe,
+            "iframe": s4l_iframe,
         }
 
 
-def interact_with_S4L(page: Page, s4l_iframe: FrameLocator) -> None:
+def interact_with_s4l(page: Page, s4l_iframe: FrameLocator) -> None:
     # Wait until grid is shown
     # NOTE: the startup screen should disappear very fast after the websocket was acquired
     with log_context(logging.INFO, "Interact with S4l"):
@@ -124,7 +141,9 @@ def interact_with_S4L(page: Page, s4l_iframe: FrameLocator) -> None:
     page.wait_for_timeout(3000)
 
 
-def check_video_streaming(page: Page, s4l_iframe: FrameLocator, s4l_websocket: WebSocket) -> None:
+def check_video_streaming(
+    page: Page, s4l_iframe: FrameLocator, s4l_websocket: WebSocket
+) -> None:
     with log_context(logging.INFO, "Check videostreaming works") as ctx:
         waiter = _S4LSocketIOCheckBitRateIncreasesMessagePrinter(
             observation_time=datetime.timedelta(
diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/pydantic_extension.py b/packages/pytest-simcore/src/pytest_simcore/helpers/pydantic_extension.py
new file mode 100644
index 000000000000..c1252ed8bb42
--- /dev/null
+++ b/packages/pytest-simcore/src/pytest_simcore/helpers/pydantic_extension.py
@@ -0,0 +1,34 @@
+from pydantic import SecretStr
+
+
+def _mask(value):
+    """
+    Mask the password, showing only the first and last characters
+    or *** if very short passwords
+    """
+    if len(value) > 2:
+        masked_value = value[0] + "*" * (len(value) - 2) + value[-1]
+    else:
+        # In case of very short passwords
+        masked_value = "*" * len(value)
+    return masked_value
+
+
+def _hash(value):
+    """Uses hash number to mask the password"""
+    return f"hash:{hash(value)}"
+
+
+class Secret4TestsStr(SecretStr):
+    """Prints a hint of the secret
+    TIP: Can be handy for testing
+    """
+
+    def _display(self) -> str | bytes:
+        # SEE overrides _SecretBase._display
+        value = self.get_secret_value()
+        return _mask(value) if value else ""
+
+
+assert str(Secret4TestsStr("123456890")) == "1*******0"
+assert "1*******0" in repr(Secret4TestsStr("123456890"))
diff --git a/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py b/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py
index e2f7654d3d0c..37694b41d598 100644
--- a/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py
+++ b/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py
@@ -45,16 +45,17 @@ async def storage_service(
 ) -> URL:
     await wait_till_storage_responsive(storage_endpoint)
 
-    def correct_ip(url: AnyUrl):
+    def correct_ip(url: str):
+        any_url = AnyUrl(url)
         assert storage_endpoint.host is not None
         assert storage_endpoint.port is not None
 
         return AnyUrl.build(
-            scheme=url.scheme,
+            scheme=any_url.scheme,
             host=storage_endpoint.host,
-            port=f"{storage_endpoint.port}",
-            path=url.path,
-            query=url.query,
+            port=storage_endpoint.port,
+            path=any_url.path,
+            query=any_url.query,
         )
 
     # NOTE: Mock to ensure container IP agrees with host IP when testing
diff --git a/packages/pytest-simcore/src/pytest_simcore/simcore_webserver_projects_rest_api.py b/packages/pytest-simcore/src/pytest_simcore/simcore_webserver_projects_rest_api.py
index 99452834be15..ac017ac4b557 100644
--- a/packages/pytest-simcore/src/pytest_simcore/simcore_webserver_projects_rest_api.py
+++ b/packages/pytest-simcore/src/pytest_simcore/simcore_webserver_projects_rest_api.py
@@ -73,6 +73,8 @@ def request_desc(self) -> str:
                 "locked": {"value": False, "status": "CLOSED"},
                 "state": {"value": "NOT_STARTED"},
             },
+            "workspaceId": None,
+            "folderId": None,
         },
         "error": None,
     },
@@ -106,6 +108,8 @@ def request_desc(self) -> str:
                 "locked": {"value": False, "status": "CLOSED"},
                 "state": {"value": "NOT_STARTED"},
             },
+            "workspaceId": None,
+            "folderId": None,
         }
     },
 )
@@ -272,6 +276,8 @@ def request_desc(self) -> str:
                 },
                 "state": {"value": "NOT_STARTED"},
             },
+            "workspaceId": None,
+            "folderId": None,
         }
     },
 )
@@ -525,6 +531,8 @@ def request_desc(self) -> str:
                 },
                 "state": {"value": "NOT_STARTED"},
             },
+            "workspaceId": None,
+            "folderId": None,
         }
     },
 )
@@ -698,6 +706,8 @@ def request_desc(self) -> str:
                     "locked": {"value": False, "status": "CLOSED"},
                     "state": {"value": "NOT_STARTED"},
                 },
+                "workspaceId": None,
+                "folderId": None,
             }
         ],
     },
@@ -946,6 +956,8 @@ def request_desc(self) -> str:
                 "locked": {"value": False, "status": "CLOSED"},
                 "state": {"value": "NOT_STARTED"},
             },
+            "workspaceId": None,
+            "folderId": None,
         }
     },
     status_code=HTTPStatus.CREATED,  # 201
diff --git a/packages/pytest-simcore/tests/test_dev_vendors_compose.py b/packages/pytest-simcore/tests/test_dev_vendors_compose.py
new file mode 100644
index 000000000000..2a0d2b17f21e
--- /dev/null
+++ b/packages/pytest-simcore/tests/test_dev_vendors_compose.py
@@ -0,0 +1,40 @@
+import json
+from typing import Final
+
+from settings_library.utils_session import DEFAULT_SESSION_COOKIE_NAME
+
+pytest_plugins = [
+    "pytest_simcore.dev_vendors_compose",
+    "pytest_simcore.docker_compose",
+    "pytest_simcore.repository_paths",
+]
+
+
+_SERVICE_TO_MIDDLEWARE_MAPPING: Final[dict[str, str]] = {
+    "manual": "pytest-simcore_manual-auth"
+}
+
+
+def test_dev_vendors_docker_compose_auth_enabled(
+    dev_vendors_docker_compose: dict[str, str]
+):
+
+    assert isinstance(dev_vendors_docker_compose["services"], dict)
+    for service_name, service_spec in dev_vendors_docker_compose["services"].items():
+        print(
+            f"Checking vendor service '{service_name}'\n{json.dumps(service_spec, indent=2)}"
+        )
+        labels = service_spec["deploy"]["labels"]
+
+        # NOTE: when adding a new service it should also be added to the mapping
+        auth_middleware_name = _SERVICE_TO_MIDDLEWARE_MAPPING[service_name]
+
+        prefix = f"traefik.http.middlewares.{auth_middleware_name}.forwardauth"
+
+        assert labels[f"{prefix}.trustForwardHeader"] == "true"
+        assert "http://webserver:8080/v0/auth:check" in labels[f"{prefix}.address"]
+        assert DEFAULT_SESSION_COOKIE_NAME in labels[f"{prefix}.authResponseHeaders"]
+        assert (
+            auth_middleware_name
+            in labels["traefik.http.routers.pytest-simcore_manual.middlewares"]
+        )
diff --git a/packages/service-integration/requirements/_base.in b/packages/service-integration/requirements/_base.in
index dc7e5dd4a6c6..213a27f4c131 100644
--- a/packages/service-integration/requirements/_base.in
+++ b/packages/service-integration/requirements/_base.in
@@ -14,3 +14,4 @@ jsonschema # pytest-plugin
 pytest # pytest-plugin
 pyyaml
 typer[all]
+yarl
diff --git a/packages/service-integration/requirements/_base.txt b/packages/service-integration/requirements/_base.txt
index bbd971710638..a465920e7ba1 100644
--- a/packages/service-integration/requirements/_base.txt
+++ b/packages/service-integration/requirements/_base.txt
@@ -11,7 +11,7 @@ attrs==24.2.0
     #   referencing
 binaryornot==0.4.4
     # via cookiecutter
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
@@ -33,10 +33,11 @@ docker==7.1.0
     # via -r requirements/_base.in
 email-validator==2.2.0
     # via pydantic
-idna==3.7
+idna==3.10
     # via
     #   email-validator
     #   requests
+    #   yarl
 iniconfig==2.0.0
     # via pytest
 jinja2==3.1.4
@@ -59,6 +60,8 @@ markupsafe==2.1.5
     # via jinja2
 mdurl==0.1.2
     # via markdown-it-py
+multidict==6.1.0
+    # via yarl
 orjson==3.10.7
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -83,7 +86,7 @@ pydantic-settings==2.5.2
     # via -r requirements/../../../packages/models-library/requirements/_base.in
 pygments==2.18.0
     # via rich
-pytest==8.3.2
+pytest==8.3.3
     # via -r requirements/_base.in
 python-dateutil==2.9.0.post0
     # via arrow
@@ -105,7 +108,7 @@ requests==2.32.3
     # via
     #   cookiecutter
     #   docker
-rich==13.7.1
+rich==13.8.1
     # via
     #   cookiecutter
     #   typer
@@ -119,18 +122,20 @@ six==1.16.0
     # via python-dateutil
 text-unidecode==1.3
     # via python-slugify
-typer==0.12.4
+typer==0.12.5
     # via -r requirements/_base.in
-types-python-dateutil==2.9.0.20240821
+types-python-dateutil==2.9.0.20240906
     # via arrow
 typing-extensions==4.12.2
     # via
     #   pydantic
     #   pydantic-core
     #   typer
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   docker
     #   requests
+yarl==1.12.1
+    # via -r requirements/_base.in
diff --git a/packages/service-integration/requirements/_test.txt b/packages/service-integration/requirements/_test.txt
index fa7046980915..dad76c6a0cd4 100644
--- a/packages/service-integration/requirements/_test.txt
+++ b/packages/service-integration/requirements/_test.txt
@@ -19,7 +19,7 @@ pluggy==1.5.0
     # via
     #   -c requirements/_base.txt
     #   pytest
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -c requirements/_base.txt
     #   -r requirements/_test.in
@@ -44,15 +44,15 @@ rpds-py==0.20.0
     #   referencing
 termcolor==2.4.0
     # via pytest-sugar
-types-docker==7.1.0.20240821
+types-docker==7.1.0.20240827
     # via -r requirements/_test.in
 types-jsonschema==4.23.0.20240813
     # via -r requirements/_test.in
-types-pyyaml==6.0.12.20240808
+types-pyyaml==6.0.12.20240917
     # via -r requirements/_test.in
-types-requests==2.32.0.20240712
+types-requests==2.32.0.20240914
     # via types-docker
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
diff --git a/packages/service-integration/requirements/_tools.txt b/packages/service-integration/requirements/_tools.txt
index 354746f70e43..6cfab1a3f280 100644
--- a/packages/service-integration/requirements/_tools.txt
+++ b/packages/service-integration/requirements/_tools.txt
@@ -1,8 +1,8 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
     # via -r requirements/../../../requirements/devenv.txt
-build==1.2.1
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
@@ -17,9 +17,9 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
@@ -27,7 +27,7 @@ isort==5.13.2
     #   pylint
 mccabe==0.7.0
     # via pylint
-mypy==1.11.1
+mypy==1.11.2
     # via -r requirements/../../../requirements/devenv.txt
 mypy-extensions==1.0.0
     # via
@@ -47,14 +47,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.0
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -65,9 +65,9 @@ pyyaml==6.0.2
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
     #   pre-commit
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
-setuptools==73.0.1
+setuptools==75.1.0
     # via pip-tools
 tomlkit==0.13.2
     # via pylint
@@ -75,7 +75,7 @@ typing-extensions==4.12.2
     # via
     #   -c requirements/_base.txt
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/packages/service-integration/src/service_integration/cli/_compose_spec.py b/packages/service-integration/src/service_integration/cli/_compose_spec.py
index f6d9b16be9e2..afccc0e268e2 100644
--- a/packages/service-integration/src/service_integration/cli/_compose_spec.py
+++ b/packages/service-integration/src/service_integration/cli/_compose_spec.py
@@ -8,6 +8,7 @@
 import yaml
 from models_library.utils.labels_annotations import to_labels
 from rich.console import Console
+from yarl import URL
 
 from ..compose_spec_model import ComposeSpecification
 from ..errors import UndefinedOciImageSpecError
@@ -34,6 +35,13 @@ def _run_git(*args) -> str:
     ).stdout.strip()
 
 
+def _strip_credentials(url: str) -> str:
+    if (yarl_url := URL(url)) and yarl_url.is_absolute():
+        stripped_url = URL(url).with_user(None).with_password(None)
+        return f"{stripped_url}"
+    return url
+
+
 def _run_git_or_empty_string(*args) -> str:
     try:
         return _run_git(*args)
@@ -118,8 +126,8 @@ def create_docker_compose_image_spec(
     extra_labels[f"{LS_LABEL_PREFIX}.vcs-ref"] = _run_git_or_empty_string(
         "rev-parse", "HEAD"
     )
-    extra_labels[f"{LS_LABEL_PREFIX}.vcs-url"] = _run_git_or_empty_string(
-        "config", "--get", "remote.origin.url"
+    extra_labels[f"{LS_LABEL_PREFIX}.vcs-url"] = _strip_credentials(
+        _run_git_or_empty_string("config", "--get", "remote.origin.url")
     )
 
     return create_image_spec(
diff --git a/packages/service-integration/tests/test_cli__compose_spec.py b/packages/service-integration/tests/test_cli__compose_spec.py
new file mode 100644
index 000000000000..5fe98689a14e
--- /dev/null
+++ b/packages/service-integration/tests/test_cli__compose_spec.py
@@ -0,0 +1,28 @@
+import pytest
+from service_integration.cli._compose_spec import _strip_credentials
+
+
+@pytest.mark.parametrize(
+    "url, expected_url",
+    [
+        (
+            "schema.veshttps://user:password@example.com/some/repo.git",
+            "schema.veshttps://example.com/some/repo.git",
+        ),
+        (
+            "https://user:password@example.com/some/repo.git",
+            "https://example.com/some/repo.git",
+        ),
+        (
+            "ssh://user:password@example.com/some/repo.git",
+            "ssh://example.com/some/repo.git",
+        ),
+        (
+            "git@git.speag.com:some/repo.git",
+            "git@git.speag.com:some/repo.git",
+        ),
+        ("any_str", "any_str"),
+    ],
+)
+def test__strip_credentials(url: str, expected_url: str):
+    assert _strip_credentials(url) == expected_url
diff --git a/packages/service-library/requirements/_aiohttp.in b/packages/service-library/requirements/_aiohttp.in
index 423432180e49..6b04b964a6c0 100644
--- a/packages/service-library/requirements/_aiohttp.in
+++ b/packages/service-library/requirements/_aiohttp.in
@@ -7,9 +7,11 @@
 
 aiohttp
 aiopg[sa]
-aiozipkin
 attrs
 jsonschema
 openapi-core
+opentelemetry-instrumentation-aiohttp-client
+opentelemetry-instrumentation-aiohttp-server
+opentelemetry-instrumentation-aiopg
 prometheus_client
 werkzeug
diff --git a/packages/service-library/requirements/_aiohttp.txt b/packages/service-library/requirements/_aiohttp.txt
index f97b5b35ee81..d3e27ed894b2 100644
--- a/packages/service-library/requirements/_aiohttp.txt
+++ b/packages/service-library/requirements/_aiohttp.txt
@@ -2,17 +2,17 @@ aiohappyeyeballs==2.4.0
     # via aiohttp
 aiohttp==3.10.5
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/_aiohttp.in
-    #   aiozipkin
 aiopg==1.4.0
     # via -r requirements/_aiohttp.in
 aiosignal==1.3.1
     # via aiohttp
-aiozipkin==1.1.1
-    # via -r requirements/_aiohttp.in
 async-timeout==4.0.3
     # via aiopg
 attrs==24.2.0
@@ -21,22 +21,33 @@ attrs==24.2.0
     #   aiohttp
     #   jsonschema
     #   referencing
-certifi==2024.7.4
+certifi==2024.8.30
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   requests
 charset-normalizer==3.3.2
     # via requests
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-semantic-conventions
 frozenlist==1.4.1
     # via
     #   aiohttp
     #   aiosignal
-idna==3.7
+greenlet==3.1.1
+    # via sqlalchemy
+idna==3.10
     # via
     #   requests
     #   yarl
+importlib-metadata==8.4.0
+    # via opentelemetry-api
 isodate==0.6.1
     # via openapi-core
 jsonschema==4.23.0
@@ -57,13 +68,13 @@ lazy-object-proxy==1.10.0
     # via openapi-spec-validator
 markupsafe==2.1.5
     # via werkzeug
-more-itertools==10.4.0
+more-itertools==10.5.0
     # via openapi-core
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
-openapi-core==0.19.3
+openapi-core==0.19.4
     # via -r requirements/_aiohttp.in
 openapi-schema-validator==0.6.2
     # via
@@ -71,11 +82,42 @@ openapi-schema-validator==0.6.2
     #   openapi-spec-validator
 openapi-spec-validator==0.7.1
     # via openapi-core
+opentelemetry-api==1.27.0
+    # via
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-aiohttp-client
+    #   opentelemetry-instrumentation-aiohttp-server
+    #   opentelemetry-instrumentation-aiopg
+    #   opentelemetry-instrumentation-dbapi
+    #   opentelemetry-semantic-conventions
+opentelemetry-instrumentation==0.48b0
+    # via
+    #   opentelemetry-instrumentation-aiohttp-client
+    #   opentelemetry-instrumentation-aiohttp-server
+    #   opentelemetry-instrumentation-aiopg
+    #   opentelemetry-instrumentation-dbapi
+opentelemetry-instrumentation-aiohttp-client==0.48b0
+    # via -r requirements/_aiohttp.in
+opentelemetry-instrumentation-aiohttp-server==0.48b0
+    # via -r requirements/_aiohttp.in
+opentelemetry-instrumentation-aiopg==0.48b0
+    # via -r requirements/_aiohttp.in
+opentelemetry-instrumentation-dbapi==0.48b0
+    # via opentelemetry-instrumentation-aiopg
+opentelemetry-semantic-conventions==0.48b0
+    # via
+    #   opentelemetry-instrumentation-aiohttp-client
+    #   opentelemetry-instrumentation-aiohttp-server
+    #   opentelemetry-instrumentation-dbapi
+opentelemetry-util-http==0.48b0
+    # via
+    #   opentelemetry-instrumentation-aiohttp-client
+    #   opentelemetry-instrumentation-aiohttp-server
 parse==1.20.2
     # via openapi-core
 pathable==0.4.3
     # via jsonschema-path
-prometheus-client==0.20.0
+prometheus-client==0.21.0
     # via -r requirements/_aiohttp.in
 psycopg2-binary==2.9.9
     # via
@@ -83,7 +125,10 @@ psycopg2-binary==2.9.9
     #   sqlalchemy
 pyyaml==6.0.2
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   jsonschema-path
@@ -101,25 +146,43 @@ rpds-py==0.20.0
     # via
     #   jsonschema
     #   referencing
+setuptools==75.1.0
+    # via opentelemetry-instrumentation
 six==1.16.0
     # via
     #   isodate
     #   rfc3339-validator
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   aiopg
-urllib3==2.2.2
+urllib3==2.2.3
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   requests
-werkzeug==3.0.3
+werkzeug==3.0.4
     # via
     #   -r requirements/_aiohttp.in
     #   openapi-core
-yarl==1.9.4
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-aiohttp-client
+    #   opentelemetry-instrumentation-aiohttp-server
+    #   opentelemetry-instrumentation-aiopg
+    #   opentelemetry-instrumentation-dbapi
+yarl==1.12.1
     # via aiohttp
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/packages/service-library/requirements/_base.in b/packages/service-library/requirements/_base.in
index e0e928cee52f..018f64574f29 100644
--- a/packages/service-library/requirements/_base.in
+++ b/packages/service-library/requirements/_base.in
@@ -10,15 +10,22 @@
 --requirement ../../../packages/settings-library/requirements/_base.in
 
 aio-pika
+aiocache
 aiodebug
 aiodocker
 aiofiles
 arrow # date/time
 faststream
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-instrumentation-requests
+opentelemetry-sdk
+psutil
 pydantic
 pyinstrument
 pyyaml
 redis
+repro-zipfile
 tenacity
 toolz
 tqdm
diff --git a/packages/service-library/requirements/_base.txt b/packages/service-library/requirements/_base.txt
index 6d0447e7c0e7..556fc10fa728 100644
--- a/packages/service-library/requirements/_base.txt
+++ b/packages/service-library/requirements/_base.txt
@@ -1,8 +1,10 @@
 aio-pika==9.4.3
     # via -r requirements/_base.in
+aiocache==0.12.2
+    # via -r requirements/_base.in
 aiodebug==2.3.0
     # via -r requirements/_base.in
-aiodocker==0.22.2
+aiodocker==0.23.0
     # via -r requirements/_base.in
 aiofiles==24.1.0
     # via -r requirements/_base.in
@@ -10,17 +12,20 @@ aiohappyeyeballs==2.4.0
     # via aiohttp
 aiohttp==3.10.5
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   aiodocker
-aiormq==6.8.0
+aiormq==6.8.1
     # via aio-pika
 aiosignal==1.3.1
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anyio==4.4.0
+anyio==4.6.0
     # via
     #   fast-depends
     #   faststream
@@ -33,25 +38,51 @@ attrs==24.2.0
     #   aiohttp
     #   jsonschema
     #   referencing
+certifi==2024.8.30
+    # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   requests
+charset-normalizer==3.3.2
+    # via requests
 click==8.1.7
     # via typer
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
 dnspython==2.6.1
     # via email-validator
 email-validator==2.2.0
     # via pydantic
-fast-depends==2.4.8
+fast-depends==2.4.11
     # via faststream
-faststream==0.5.18
+faststream==0.5.23
     # via -r requirements/_base.in
 frozenlist==1.4.1
     # via
     #   aiohttp
     #   aiosignal
-idna==3.7
+googleapis-common-protos==1.65.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+grpcio==1.66.1
+    # via opentelemetry-exporter-otlp-proto-grpc
+idna==3.10
     # via
     #   anyio
     #   email-validator
+    #   requests
     #   yarl
+importlib-metadata==8.4.0
+    # via opentelemetry-api
 jsonschema==4.23.0
     # via -r requirements/../../../packages/models-library/requirements/_base.in
 jsonschema-specifications==2023.7.1
@@ -60,24 +91,78 @@ markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
+opentelemetry-api==1.27.0
+    # via
+    #   -r requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.27.0
+    # via -r requirements/_base.in
+opentelemetry-exporter-otlp-proto-common==1.27.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.27.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.48b0
+    # via opentelemetry-instrumentation-requests
+opentelemetry-instrumentation-requests==0.48b0
+    # via -r requirements/_base.in
+opentelemetry-proto==1.27.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.27.0
+    # via
+    #   -r requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.48b0
+    # via
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+opentelemetry-util-http==0.48b0
+    # via opentelemetry-instrumentation-requests
 orjson==3.10.7
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/../../../packages/models-library/requirements/_base.in
 pamqp==3.3.0
     # via aiormq
+protobuf==4.25.5
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
+psutil==6.0.0
+    # via -r requirements/_base.in
 pydantic==2.9.2
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/common-library/requirements/_base.in
+    #   -r requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/_base.in
     #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/_base.in
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
     #   -r requirements/_base.in
     #   fast-depends
@@ -93,7 +178,7 @@ pydantic-settings==2.5.2
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
 pygments==2.18.0
     # via rich
-pyinstrument==4.7.2
+pyinstrument==4.7.3
     # via -r requirements/_base.in
 python-dateutil==2.9.0.post0
     # via arrow
@@ -101,13 +186,19 @@ python-dotenv==1.0.1
     # via pydantic-settings
 pyyaml==6.0.2
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/_base.in
 redis==5.0.8
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/_base.in
@@ -116,7 +207,11 @@ referencing==0.29.3
     #   -c requirements/./constraints.txt
     #   jsonschema
     #   jsonschema-specifications
-rich==13.7.1
+repro-zipfile==0.3.1
+    # via -r requirements/_base.in
+requests==2.32.3
+    # via opentelemetry-exporter-otlp-proto-http
+rich==13.8.1
     # via
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
     #   typer
@@ -124,6 +219,8 @@ rpds-py==0.20.0
     # via
     #   jsonschema
     #   referencing
+setuptools==75.1.0
+    # via opentelemetry-instrumentation
 shellingham==1.5.4
     # via typer
 six==1.16.0
@@ -136,21 +233,35 @@ toolz==0.12.1
     # via -r requirements/_base.in
 tqdm==4.66.5
     # via -r requirements/_base.in
-typer==0.12.4
-    # via
-    #   -r requirements/../../../packages/settings-library/requirements/_base.in
-    #   faststream
-types-python-dateutil==2.9.0.20240821
+typer==0.12.5
+    # via -r requirements/../../../packages/settings-library/requirements/_base.in
+types-python-dateutil==2.9.0.20240906
     # via arrow
 typing-extensions==4.12.2
     # via
     #   aiodebug
     #   faststream
+    #   opentelemetry-sdk
     #   pydantic
     #   pydantic-core
     #   typer
-yarl==1.9.4
+urllib3==2.2.3
+    # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   requests
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+yarl==1.12.1
     # via
     #   aio-pika
     #   aiohttp
     #   aiormq
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/packages/service-library/requirements/_fastapi.in b/packages/service-library/requirements/_fastapi.in
index 178938dde4f6..7b6a6bb2cf2e 100644
--- a/packages/service-library/requirements/_fastapi.in
+++ b/packages/service-library/requirements/_fastapi.in
@@ -8,6 +8,7 @@
 
 fastapi
 httpx
+opentelemetry-instrumentation-fastapi
 prometheus-client
 prometheus-fastapi-instrumentator
 uvicorn
diff --git a/packages/service-library/requirements/_fastapi.txt b/packages/service-library/requirements/_fastapi.txt
index abaaac8e1115..42dd6b7a5509 100644
--- a/packages/service-library/requirements/_fastapi.txt
+++ b/packages/service-library/requirements/_fastapi.txt
@@ -1,18 +1,27 @@
 annotated-types==0.7.0
     # via pydantic
-anyio==4.4.0
+anyio==4.6.0
     # via
     #   httpx
     #   starlette
-certifi==2024.7.4
+asgiref==3.8.1
+    # via opentelemetry-instrumentation-asgi
+certifi==2024.8.30
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   httpcore
     #   httpx
 click==8.1.7
     # via uvicorn
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-semantic-conventions
 fastapi==0.115.0
     # via
     #   -r requirements/_fastapi.in
@@ -23,17 +32,44 @@ h11==0.14.0
     #   uvicorn
 httpcore==1.0.5
     # via httpx
-httpx==0.27.0
+httpx==0.27.2
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/_fastapi.in
-idna==3.7
+idna==3.10
     # via
     #   anyio
     #   httpx
-prometheus-client==0.20.0
+importlib-metadata==8.4.0
+    # via opentelemetry-api
+opentelemetry-api==1.27.0
+    # via
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-semantic-conventions
+opentelemetry-instrumentation==0.48b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+opentelemetry-instrumentation-asgi==0.48b0
+    # via opentelemetry-instrumentation-fastapi
+opentelemetry-instrumentation-fastapi==0.48b0
+    # via -r requirements/_fastapi.in
+opentelemetry-semantic-conventions==0.48b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+opentelemetry-util-http==0.48b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+prometheus-client==0.21.0
     # via
     #   -r requirements/_fastapi.in
     #   prometheus-fastapi-instrumentator
@@ -41,19 +77,27 @@ prometheus-fastapi-instrumentator==6.1.0
     # via -r requirements/_fastapi.in
 pydantic==2.9.2
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   fastapi
 pydantic-core==2.23.4
     # via pydantic
+setuptools==75.1.0
+    # via opentelemetry-instrumentation
 sniffio==1.3.1
     # via
     #   anyio
     #   httpx
 starlette==0.38.5
     # via
+    #   -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   fastapi
@@ -64,3 +108,9 @@ typing-extensions==4.12.2
     #   pydantic-core
 uvicorn==0.30.6
     # via -r requirements/_fastapi.in
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/packages/service-library/requirements/_test.in b/packages/service-library/requirements/_test.in
index 5f85f1ff4d67..d3936487c758 100644
--- a/packages/service-library/requirements/_test.in
+++ b/packages/service-library/requirements/_test.in
@@ -17,8 +17,9 @@ coverage
 docker
 faker
 flaky
+numpy
 openapi-spec-validator
-psutil
+pillow
 pytest
 pytest-aiohttp
 pytest-asyncio
@@ -35,5 +36,6 @@ python-dotenv
 respx
 sqlalchemy[mypy]
 types_aiofiles
-types-psycopg2
 types_tqdm
+types-psutil
+types-psycopg2
diff --git a/packages/service-library/requirements/_test.txt b/packages/service-library/requirements/_test.txt
index 38282969f740..5b44ec09a3de 100644
--- a/packages/service-library/requirements/_test.txt
+++ b/packages/service-library/requirements/_test.txt
@@ -14,7 +14,7 @@ aiosignal==1.3.1
     #   -c requirements/_aiohttp.txt
     #   -c requirements/_base.txt
     #   aiohttp
-anyio==4.4.0
+anyio==4.6.0
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_fastapi.txt
@@ -29,10 +29,11 @@ attrs==24.2.0
     #   jsonschema
     #   pytest-docker
     #   referencing
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_aiohttp.txt
+    #   -c requirements/_base.txt
     #   -c requirements/_fastapi.txt
     #   httpcore
     #   httpx
@@ -40,6 +41,7 @@ certifi==2024.7.4
 charset-normalizer==3.3.2
     # via
     #   -c requirements/_aiohttp.txt
+    #   -c requirements/_base.txt
     #   requests
 coverage==7.6.1
     # via
@@ -49,7 +51,7 @@ docker==7.1.0
     # via -r requirements/_test.in
 execnet==2.1.1
     # via pytest-xdist
-faker==27.0.0
+faker==29.0.0
     # via -r requirements/_test.in
 flaky==3.8.1
     # via -r requirements/_test.in
@@ -59,6 +61,10 @@ frozenlist==1.4.1
     #   -c requirements/_base.txt
     #   aiohttp
     #   aiosignal
+greenlet==3.1.1
+    # via
+    #   -c requirements/_aiohttp.txt
+    #   sqlalchemy
 h11==0.14.0
     # via
     #   -c requirements/_fastapi.txt
@@ -67,14 +73,14 @@ httpcore==1.0.5
     # via
     #   -c requirements/_fastapi.txt
     #   httpx
-httpx==0.27.0
+httpx==0.27.2
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_fastapi.txt
     #   respx
 icdiff==2.0.7
     # via pytest-icdiff
-idna==3.7
+idna==3.10
     # via
     #   -c requirements/_aiohttp.txt
     #   -c requirements/_base.txt
@@ -105,16 +111,18 @@ lazy-object-proxy==1.10.0
     # via
     #   -c requirements/_aiohttp.txt
     #   openapi-spec-validator
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   -c requirements/_aiohttp.txt
     #   -c requirements/_base.txt
     #   aiohttp
     #   yarl
-mypy==1.11.1
+mypy==1.11.2
     # via sqlalchemy
 mypy-extensions==1.0.0
     # via mypy
+numpy==2.1.1
+    # via -r requirements/_test.in
 openapi-schema-validator==0.6.2
     # via
     #   -c requirements/_aiohttp.txt
@@ -131,15 +139,15 @@ pathable==0.4.3
     # via
     #   -c requirements/_aiohttp.txt
     #   jsonschema-path
+pillow==10.4.0
+    # via -r requirements/_test.in
 pluggy==1.5.0
     # via pytest
 pprintpp==0.4.0
     # via pytest-icdiff
-psutil==6.0.0
-    # via -r requirements/_test.in
 py-cpuinfo==9.0.0
     # via pytest-benchmark
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -r requirements/_test.in
     #   pytest-aiohttp
@@ -201,6 +209,7 @@ referencing==0.29.3
 requests==2.32.3
     # via
     #   -c requirements/_aiohttp.txt
+    #   -c requirements/_base.txt
     #   docker
     #   jsonschema-path
 respx==0.21.1
@@ -228,7 +237,7 @@ sniffio==1.3.1
     #   anyio
     #   asgi-lifespan
     #   httpx
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_aiohttp.txt
@@ -239,6 +248,8 @@ termcolor==2.4.0
     # via pytest-sugar
 types-aiofiles==24.1.0.20240626
     # via -r requirements/_test.in
+types-psutil==6.0.0.20240901
+    # via -r requirements/_test.in
 types-psycopg2==2.9.21.20240819
     # via -r requirements/_test.in
 types-tqdm==4.66.0.20240417
@@ -249,13 +260,14 @@ typing-extensions==4.12.2
     #   -c requirements/_fastapi.txt
     #   mypy
     #   sqlalchemy2-stubs
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_aiohttp.txt
+    #   -c requirements/_base.txt
     #   docker
     #   requests
-yarl==1.9.4
+yarl==1.12.1
     # via
     #   -c requirements/_aiohttp.txt
     #   -c requirements/_base.txt
diff --git a/packages/service-library/requirements/_tools.txt b/packages/service-library/requirements/_tools.txt
index b54db6d8f5c7..dff0e09f6bbc 100644
--- a/packages/service-library/requirements/_tools.txt
+++ b/packages/service-library/requirements/_tools.txt
@@ -1,8 +1,8 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
     # via -r requirements/../../../requirements/devenv.txt
-build==1.2.1
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
@@ -17,9 +17,9 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
@@ -27,7 +27,7 @@ isort==5.13.2
     #   pylint
 mccabe==0.7.0
     # via pylint
-mypy==1.11.1
+mypy==1.11.2
     # via
     #   -c requirements/_test.txt
     #   -r requirements/../../../requirements/devenv.txt
@@ -49,14 +49,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.0
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -68,10 +68,12 @@ pyyaml==6.0.2
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   pre-commit
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
-setuptools==73.0.1
-    # via pip-tools
+setuptools==75.1.0
+    # via
+    #   -c requirements/_base.txt
+    #   pip-tools
 tomlkit==0.13.2
     # via pylint
 typing-extensions==4.12.2
@@ -79,7 +81,7 @@ typing-extensions==4.12.2
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/packages/service-library/src/servicelib/aiohttp/application_keys.py b/packages/service-library/src/servicelib/aiohttp/application_keys.py
index f98423bad05c..3958c860cb00 100644
--- a/packages/service-library/src/servicelib/aiohttp/application_keys.py
+++ b/packages/service-library/src/servicelib/aiohttp/application_keys.py
@@ -21,7 +21,7 @@
 APP_CONFIG_KEY: Final[str] = f"{__name__ }.config"
 APP_SETTINGS_KEY: Final[str] = f"{__name__ }.settings"
 
-APP_DB_ENGINE_KEY: Final[str] = f"{__name__ }.db_engine"
+APP_AIOPG_ENGINE_KEY: Final[str] = f"{__name__ }.aiopg_engine"
 
 APP_CLIENT_SESSION_KEY: Final[str] = f"{__name__ }.session"
 
diff --git a/packages/service-library/src/servicelib/aiohttp/db_asyncpg_engine.py b/packages/service-library/src/servicelib/aiohttp/db_asyncpg_engine.py
new file mode 100644
index 000000000000..2ca9d431075b
--- /dev/null
+++ b/packages/service-library/src/servicelib/aiohttp/db_asyncpg_engine.py
@@ -0,0 +1,74 @@
+"""
+Helpers on asyncpg specific for aiohttp
+
+SEE migration aiopg->asyncpg https://github.com/ITISFoundation/osparc-simcore/issues/4529
+"""
+
+
+import logging
+from typing import Final
+
+from aiohttp import web
+from servicelib.logging_utils import log_context
+from settings_library.postgres import PostgresSettings
+from simcore_postgres_database.utils_aiosqlalchemy import (  # type: ignore[import-not-found] # this on is unclear
+    get_pg_engine_stateinfo,
+)
+from sqlalchemy.ext.asyncio import AsyncEngine
+
+from ..db_asyncpg_utils import create_async_engine_and_pg_database_ready
+from ..logging_utils import log_context
+
+APP_DB_ASYNC_ENGINE_KEY: Final[str] = f"{__name__ }.AsyncEngine"
+
+
+_logger = logging.getLogger(__name__)
+
+
+def _set_async_engine_to_app_state(app: web.Application, engine: AsyncEngine):
+    if exists := app.get(APP_DB_ASYNC_ENGINE_KEY, None):
+        msg = f"An instance of {type(exists)} already in app[{APP_DB_ASYNC_ENGINE_KEY}]={exists}"
+        raise ValueError(msg)
+
+    app[APP_DB_ASYNC_ENGINE_KEY] = engine
+    return get_async_engine(app)
+
+
+def get_async_engine(app: web.Application) -> AsyncEngine:
+    engine: AsyncEngine = app[APP_DB_ASYNC_ENGINE_KEY]
+    assert engine  # nosec
+    return engine
+
+
+async def connect_to_db(app: web.Application, settings: PostgresSettings) -> None:
+    """
+    - db services up, data migrated and ready to use
+    - sets an engine in app state (use `get_async_engine(app)` to retrieve)
+    """
+    if settings.POSTGRES_CLIENT_NAME:
+        settings = settings.copy(
+            update={"POSTGRES_CLIENT_NAME": settings.POSTGRES_CLIENT_NAME + "-asyncpg"}
+        )
+
+    with log_context(
+        _logger,
+        logging.INFO,
+        "Connecting app[APP_DB_ASYNC_ENGINE_KEY] to postgres with %s",
+        f"{settings=}",
+    ):
+        engine = await create_async_engine_and_pg_database_ready(settings)
+        _set_async_engine_to_app_state(app, engine)
+
+    _logger.info(
+        "app[APP_DB_ASYNC_ENGINE_KEY] ready : %s",
+        await get_pg_engine_stateinfo(engine),
+    )
+
+
+async def close_db_connection(app: web.Application) -> None:
+    engine = get_async_engine(app)
+    with log_context(
+        _logger, logging.DEBUG, f"app[APP_DB_ASYNC_ENGINE_KEY] disconnect of {engine}"
+    ):
+        if engine:
+            await engine.dispose()
diff --git a/packages/service-library/src/servicelib/aiohttp/requests_validation.py b/packages/service-library/src/servicelib/aiohttp/requests_validation.py
index b70f0e821e4f..ce4a97d54a1b 100644
--- a/packages/service-library/src/servicelib/aiohttp/requests_validation.py
+++ b/packages/service-library/src/servicelib/aiohttp/requests_validation.py
@@ -167,7 +167,10 @@ def parse_request_query_parameters_as(
         resource_name=request.rel_url.path,
         use_error_v1=use_enveloped_error_v1,
     ):
+        # NOTE: Currently, this does not take into consideration cases where there are multiple
+        # query parameters with the same key. However, we are not using such cases anywhere at the moment.
         data = dict(request.query)
+
         if hasattr(parameters_schema_cls, "parse_obj"):
             return parameters_schema_cls.model_validate(data)
         model: ModelClass = TypeAdapter(parameters_schema_cls).validate_python(data)
diff --git a/packages/service-library/src/servicelib/aiohttp/rest_middlewares.py b/packages/service-library/src/servicelib/aiohttp/rest_middlewares.py
index d05446e51cf5..035a41de5274 100644
--- a/packages/service-library/src/servicelib/aiohttp/rest_middlewares.py
+++ b/packages/service-library/src/servicelib/aiohttp/rest_middlewares.py
@@ -12,8 +12,11 @@
 from aiohttp import web
 from aiohttp.web_request import Request
 from aiohttp.web_response import StreamResponse
+from common_library.errors_classes import OsparcErrorMixin
 from models_library.utils.json_serialization import json_dumps
+from servicelib.error_codes import create_error_code
 
+from ..logging_utils import create_troubleshotting_log_message, get_log_record_extra
 from ..mimetype_constants import MIMETYPE_APPLICATION_JSON
 from ..utils import is_production_environ
 from .rest_models import ErrorItemType, ErrorType, LogMessageType
@@ -28,6 +31,11 @@
 from .typing_extension import Handler, Middleware
 
 DEFAULT_API_VERSION = "v0"
+_FMSG_INTERNAL_ERROR_USER_FRIENDLY_WITH_OEC = (
+    "We apologize for the inconvenience."
+    " Our team has recorded the issue [{error_code}] and is working to resolve it as quickly as possible."
+    " Thank you for your patience"
+)
 
 
 _logger = logging.getLogger(__name__)
@@ -40,29 +48,43 @@ def is_api_request(request: web.Request, api_version: str) -> bool:
 
 def error_middleware_factory(
     api_version: str,
-    log_exceptions: bool = True,
 ) -> Middleware:
     _is_prod: bool = is_production_environ()
 
     def _process_and_raise_unexpected_error(request: web.BaseRequest, err: Exception):
+
+        error_code = create_error_code(err)
+        error_context: dict[str, Any] = {
+            "request.remote": f"{request.remote}",
+            "request.method": f"{request.method}",
+            "request.path": f"{request.path}",
+        }
+        if isinstance(err, OsparcErrorMixin):
+            error_context.update(err.error_context())
+
+        frontend_msg = _FMSG_INTERNAL_ERROR_USER_FRIENDLY_WITH_OEC.format(
+            error_code=error_code
+        )
+        log_msg = create_troubleshotting_log_message(
+            message_to_user=frontend_msg,
+            error=err,
+            error_code=error_code,
+            error_context=error_context,
+        )
+
         http_error = create_http_error(
             err,
-            "Unexpected Server error",
+            frontend_msg,
             web.HTTPInternalServerError,
             skip_internal_error_details=_is_prod,
         )
-
-        if log_exceptions:
-            _logger.error(
-                'Unexpected server error "%s" from access: %s "%s %s". Responding with status %s',
-                type(err),
-                request.remote,
-                request.method,
-                request.path,
-                http_error.status,
-                exc_info=err,
-                stack_info=True,
-            )
+        _logger.exception(
+            log_msg,
+            extra=get_log_record_extra(
+                error_code=error_code,
+                user_id=error_context.get("user_id"),
+            ),
+        )
         raise http_error
 
     @web.middleware
diff --git a/packages/service-library/src/servicelib/aiohttp/rest_responses.py b/packages/service-library/src/servicelib/aiohttp/rest_responses.py
index 569ab56fbca0..dda73210865e 100644
--- a/packages/service-library/src/servicelib/aiohttp/rest_responses.py
+++ b/packages/service-library/src/servicelib/aiohttp/rest_responses.py
@@ -110,7 +110,7 @@ def create_http_error(
         error = ErrorType(
             errors=items,
             status=http_error_cls.status_code,
-            message=items[0].message if items else default_message,
+            message=default_message,
         )
 
     assert not http_error_cls.empty_body  # nosec
diff --git a/packages/service-library/src/servicelib/aiohttp/tracing.py b/packages/service-library/src/servicelib/aiohttp/tracing.py
index 099d927dbc50..c33e92cc96f4 100644
--- a/packages/service-library/src/servicelib/aiohttp/tracing.py
+++ b/packages/service-library/src/servicelib/aiohttp/tracing.py
@@ -1,76 +1,80 @@
-""" Adds aiohttp middleware for tracing using zipkin server instrumentation.
+""" Adds aiohttp middleware for tracing using opentelemetry instrumentation.
 
 """
+
 import logging
-from typing import Iterable
 
-import aiozipkin as az
 from aiohttp import web
-from aiohttp.web import AbstractRoute
-from aiozipkin.aiohttp_helpers import (
-    APP_AIOZIPKIN_KEY,
-    REQUEST_AIOZIPKIN_KEY,
-    middleware_maker,
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
+    OTLPSpanExporter as OTLPSpanExporterHTTP,
+)
+from opentelemetry.instrumentation.aiohttp_client import (  # pylint:disable=no-name-in-module
+    AioHttpClientInstrumentor,
+)
+from opentelemetry.instrumentation.aiohttp_server import (  # pylint:disable=no-name-in-module
+    AioHttpServerInstrumentor,
+)
+from opentelemetry.instrumentation.aiopg import (  # pylint:disable=no-name-in-module
+    AiopgInstrumentor,
 )
-from yarl import URL
+from opentelemetry.instrumentation.requests import RequestsInstrumentor
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from settings_library.tracing import TracingSettings
 
-log = logging.getLogger(__name__)
+_logger = logging.getLogger(__name__)
 
 
 def setup_tracing(
     app: web.Application,
-    *,
+    tracing_settings: TracingSettings,
     service_name: str,
-    host: str,
-    port: int,
-    jaeger_base_url: URL | str,
-    skip_routes: Iterable[AbstractRoute] | None = None,
-) -> bool:
+    instrument_aiopg: bool = False,  # noqa: FBT001, FBT002
+) -> None:
     """
-    Sets up this service for a distributed tracing system
-    using zipkin (https://zipkin.io/) and Jaeger (https://www.jaegertracing.io/)
+    Sets up this service for a distributed tracing system (opentelemetry)
     """
-    zipkin_address = URL(f"{jaeger_base_url}") / "api/v2/spans"
-
-    log.debug(
-        "Setting up tracing for %s at %s:%d -> %s",
-        service_name,
-        host,
-        port,
-        zipkin_address,
+    _ = app
+    opentelemetry_collector_endpoint = (
+        tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT
     )
-
-    endpoint = az.create_endpoint(service_name, ipv4=host, port=port)
-
-    # TODO: move away from aiozipkin to OpenTelemetrySDK
-    # https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/asgi/asgi.html
-    # see issue [#2715](https://github.com/ITISFoundation/osparc-simcore/issues/2715)
-    # creates / closes tracer
-    async def _tracer_cleanup_context(app: web.Application):
-
-        app[APP_AIOZIPKIN_KEY] = await az.create(
-            f"{zipkin_address}", endpoint, sample_rate=1.0
+    opentelemetry_collector_port = tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_PORT
+    if not opentelemetry_collector_endpoint and not opentelemetry_collector_port:
+        _logger.warning("Skipping opentelemetry tracing setup")
+        return
+    if not opentelemetry_collector_endpoint or not opentelemetry_collector_port:
+        msg = (
+            "Variable opentelemetry_collector_endpoint "
+            f"[{tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT}] "
+            "or opentelemetry_collector_port "
+            f"[{tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_PORT}] "
+            "unset. Provide both or remove both."
         )
+        raise RuntimeError(msg)
+    resource = Resource(attributes={"service.name": service_name})
+    trace.set_tracer_provider(TracerProvider(resource=resource))
+    tracer_provider: trace.TracerProvider = trace.get_tracer_provider()
+    tracing_destination: str = (
+        f"{opentelemetry_collector_endpoint}:{opentelemetry_collector_port}/v1/traces"
+    )
 
-        yield
-
-        if APP_AIOZIPKIN_KEY in app:
-            await app[APP_AIOZIPKIN_KEY].close()
-
-    app.cleanup_ctx.append(_tracer_cleanup_context)
-
-    # adds middleware to tag spans (when used, tracer should be ready)
-    m = middleware_maker(
-        skip_routes=skip_routes,
-        tracer_key=APP_AIOZIPKIN_KEY,
-        request_key=REQUEST_AIOZIPKIN_KEY,
+    _logger.info(
+        "Trying to connect service %s to tracing collector at %s.",
+        service_name,
+        tracing_destination,
     )
-    # NOTE: mypy: tracing library uses helpers aiozipkin.aiohttp_helpers that are not
-    # exactly as defined with latest aiohttp.typedefs. They are compatible but mypy fails.
-    app.middlewares.append(m)  # type: ignore[arg-type]
 
-    # # WARNING: adds a middleware that should be the outermost since
-    # # it expects stream responses while we allow data returns from a handler
-    # az.setup(app, tracer, skip_routes=skip_routes)
+    otlp_exporter = OTLPSpanExporterHTTP(
+        endpoint=tracing_destination,
+    )
 
-    return True
+    # Add the span processor to the tracer provider
+    tracer_provider.add_span_processor(BatchSpanProcessor(otlp_exporter))  # type: ignore[attr-defined] # https://github.com/open-telemetry/opentelemetry-python/issues/3713
+    # Instrument aiohttp server and client
+    AioHttpServerInstrumentor().instrument()
+    AioHttpClientInstrumentor().instrument()
+    if instrument_aiopg:
+        AiopgInstrumentor().instrument()
+    RequestsInstrumentor().instrument()
diff --git a/packages/service-library/src/servicelib/archiving_utils.py b/packages/service-library/src/servicelib/archiving_utils.py
index d5c318f8b09f..d48eb7104c67 100644
--- a/packages/service-library/src/servicelib/archiving_utils.py
+++ b/packages/service-library/src/servicelib/archiving_utils.py
@@ -4,13 +4,16 @@
 import logging
 import types
 import zipfile
-from contextlib import AsyncExitStack, contextmanager
+from collections.abc import Awaitable, Callable, Iterator
+from contextlib import AsyncExitStack, contextmanager, suppress
 from functools import partial
 from pathlib import Path
-from typing import Any, Awaitable, Callable, Final, Iterator
+from typing import Any, Final
 
 import tqdm
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
+from pydantic import NonNegativeFloat
+from repro_zipfile import ReproducibleZipFile  # type: ignore[import-untyped]
 from tqdm.contrib.logging import logging_redirect_tqdm, tqdm_logging_redirect
 
 from .file_utils import remove_directory
@@ -21,8 +24,9 @@
 _MIN: Final[int] = 60  # secs
 _MAX_UNARCHIVING_WORKER_COUNT: Final[int] = 2
 _CHUNK_SIZE: Final[int] = 1024 * 8
+_UNIT_MULTIPLIER: Final[NonNegativeFloat] = 1024.0
 
-log = logging.getLogger(__name__)
+_logger = logging.getLogger(__name__)
 
 
 class ArchiveError(Exception):
@@ -35,10 +39,10 @@ def _human_readable_size(size, decimal_places=3):
     human_readable_file_size = float(size)
     unit = "B"
     for t_unit in ["B", "KiB", "MiB", "GiB", "TiB"]:
-        if human_readable_file_size < 1024.0:
+        if human_readable_file_size < _UNIT_MULTIPLIER:
             unit = t_unit
             break
-        human_readable_file_size /= 1024.0
+        human_readable_file_size /= _UNIT_MULTIPLIER
 
     return f"{human_readable_file_size:.{decimal_places}f}{unit}"
 
@@ -56,7 +60,9 @@ def _iter_files_to_compress(
     dir_path: Path, exclude_patterns: set[str] | None
 ) -> Iterator[Path]:
     exclude_patterns = exclude_patterns if exclude_patterns else set()
-    for path in dir_path.rglob("*"):
+    # NOTE: make sure to sort paths othrwise between different runs
+    # the zip will have a different structure and hash
+    for path in sorted(dir_path.rglob("*")):
         if path.is_file() and not any(
             fnmatch.fnmatch(f"{path}", x) for x in exclude_patterns
         ):
@@ -64,11 +70,11 @@ def _iter_files_to_compress(
 
 
 def _strip_directory_from_path(input_path: Path, to_strip: Path) -> Path:
-    _to_strip = f"{str(to_strip)}/"
+    _to_strip = f"{to_strip}/"
     return Path(str(input_path).replace(_to_strip, ""))
 
 
-class _FastZipFileReader(zipfile.ZipFile):
+class _FastZipFileReader(ReproducibleZipFile):
     """
     Used to gain a speed boost of several orders of magnitude.
 
@@ -86,7 +92,7 @@ class _FastZipFileReader(zipfile.ZipFile):
     files contained in the archive.
     """
 
-    def _RealGetContents(self):
+    def _RealGetContents(self):  # noqa: N802
         """method disabled"""
 
 
@@ -107,7 +113,7 @@ def _zipfile_single_file_extract_worker(
     zip_file_path: Path,
     file_in_archive: zipfile.ZipInfo,
     destination_folder: Path,
-    is_dir: bool,
+    is_dir: bool,  # noqa: FBT001
 ) -> Path:
     """Extracts file_in_archive from the archive zip_file_path -> destination_folder/file_in_archive
 
@@ -129,7 +135,7 @@ def _zipfile_single_file_extract_worker(
             desc=desc,
             **(
                 _TQDM_FILE_OPTIONS
-                | dict(miniters=_compute_tqdm_miniters(file_in_archive.file_size))
+                | {"miniters": _compute_tqdm_miniters(file_in_archive.file_size)}
             ),
         ) as pbar:
             while chunk := zip_fp.read(_CHUNK_SIZE):
@@ -139,7 +145,7 @@ def _zipfile_single_file_extract_worker(
 
 
 def _ensure_destination_subdirectories_exist(
-    zip_file_handler: zipfile.ZipFile, destination_folder: Path
+    zip_file_handler: ReproducibleZipFile, destination_folder: Path
 ) -> None:
     # assemble full destination paths
     full_destination_paths = {
@@ -177,7 +183,7 @@ async def unarchive_dir(
         )
     async with AsyncExitStack() as zip_stack:
         zip_file_handler = zip_stack.enter_context(
-            zipfile.ZipFile(  # pylint: disable=consider-using-with
+            ReproducibleZipFile(  # pylint: disable=consider-using-with
                 archive_to_extract,
                 mode="r",
             )
@@ -232,7 +238,7 @@ async def unarchive_dir(
                     extracted_path = await future
                     extracted_file_size = extracted_path.stat().st_size
                     if tqdm_progress.update(extracted_file_size) and log_cb:
-                        with log_catch(log, reraise=False):
+                        with log_catch(_logger, reraise=False):
                             await log_cb(f"{tqdm_progress}")
                     await sub_prog.update(extracted_file_size)
                     extracted_paths.append(extracted_path)
@@ -266,12 +272,15 @@ async def unarchive_dir(
 
 @contextmanager
 def _progress_enabled_zip_write_handler(
-    zip_file_handler: zipfile.ZipFile, progress_bar: tqdm.tqdm
-) -> Iterator[zipfile.ZipFile]:
+    zip_file_handler: ReproducibleZipFile, progress_bar: tqdm.tqdm
+) -> Iterator[ReproducibleZipFile]:
     """This function overrides the default zip write fct to allow to get progress using tqdm library"""
 
     def _write_with_progress(
-        original_write_fct, self, data, pbar  # pylint: disable=unused-argument
+        original_write_fct,
+        self,  # pylint: disable=unused-argument  # noqa: ARG001
+        data,
+        pbar,
     ):
         pbar.update(len(data))
         return original_write_fct(data)
@@ -279,21 +288,21 @@ def _write_with_progress(
     # Replace original write() with a wrapper to track progress
     assert zip_file_handler.fp  # nosec
     old_write_method = zip_file_handler.fp.write
-    zip_file_handler.fp.write = types.MethodType(  # type: ignore[assignment]
+    zip_file_handler.fp.write = types.MethodType(
         partial(_write_with_progress, old_write_method, pbar=progress_bar),
         zip_file_handler.fp,
     )
     try:
         yield zip_file_handler
     finally:
-        zip_file_handler.fp.write = old_write_method  # type: ignore[method-assign]
+        zip_file_handler.fp.write = old_write_method
 
 
 def _add_to_archive(
     dir_to_compress: Path,
     destination: Path,
-    compress: bool,
-    store_relative_path: bool,
+    compress: bool,  # noqa: FBT001
+    store_relative_path: bool,  # noqa: FBT001
     update_progress,
     loop,
     exclude_patterns: set[str] | None = None,
@@ -308,11 +317,10 @@ def _add_to_archive(
         desc=f"{desc}\n",
         total=folder_size_bytes,
         **(
-            _TQDM_FILE_OPTIONS
-            | dict(miniters=_compute_tqdm_miniters(folder_size_bytes))
+            _TQDM_FILE_OPTIONS | {"miniters": _compute_tqdm_miniters(folder_size_bytes)}
         ),
     ) as progress_bar, _progress_enabled_zip_write_handler(
-        zipfile.ZipFile(destination, "w", compression=compression), progress_bar
+        ReproducibleZipFile(destination, "w", compression=compression), progress_bar
     ) as zip_file_handler:
         for file_to_add in _iter_files_to_compress(dir_to_compress, exclude_patterns):
             progress_bar.set_description(f"{desc}/{file_to_add.name}\n")
@@ -393,10 +401,11 @@ async def archive_dir(
             if destination.is_file():
                 destination.unlink(missing_ok=True)
 
-            raise ArchiveError(
+            msg = (
                 f"Failed archiving {dir_to_compress} -> {destination} due to {type(err)}."
                 f"Details: {err}"
-            ) from err
+            )
+            raise ArchiveError(msg) from err
 
         except BaseException:
             if destination.is_file():
@@ -453,11 +462,9 @@ def prune(self, exclude: set[Path]) -> None:
             if path.is_file():
                 path.unlink()
             elif path.is_dir():
-                try:
+                # prevents deleting non-empty folders
+                with suppress(OSError):
                     path.rmdir()
-                except OSError:
-                    # prevents deleting non-empty folders
-                    pass
 
         # second pass to delete empty folders
         # after deleting files, some folders might have been left empty
diff --git a/packages/service-library/src/servicelib/db_asyncpg_utils.py b/packages/service-library/src/servicelib/db_asyncpg_utils.py
new file mode 100644
index 000000000000..84430916824a
--- /dev/null
+++ b/packages/service-library/src/servicelib/db_asyncpg_utils.py
@@ -0,0 +1,63 @@
+import logging
+import time
+from datetime import timedelta
+
+from models_library.healthchecks import IsNonResponsive, IsResponsive, LivenessResult
+from settings_library.postgres import PostgresSettings
+from simcore_postgres_database.utils_aiosqlalchemy import (  # type: ignore[import-not-found] # this on is unclear
+    raise_if_migration_not_ready,
+)
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
+from tenacity import retry
+
+from .retry_policies import PostgresRetryPolicyUponInitialization
+
+_logger = logging.getLogger(__name__)
+
+
+@retry(**PostgresRetryPolicyUponInitialization(_logger).kwargs)
+async def create_async_engine_and_pg_database_ready(
+    settings: PostgresSettings,
+) -> AsyncEngine:
+    """
+    - creates asyncio engine
+    - waits until db service is up
+    - waits until db data is migrated (i.e. ready to use)
+    - returns engine
+    """
+    server_settings = None
+    if settings.POSTGRES_CLIENT_NAME:
+        server_settings = {
+            "application_name": settings.POSTGRES_CLIENT_NAME,
+        }
+
+    engine: AsyncEngine = create_async_engine(
+        settings.dsn_with_async_sqlalchemy,
+        pool_size=settings.POSTGRES_MINSIZE,
+        max_overflow=settings.POSTGRES_MAXSIZE - settings.POSTGRES_MINSIZE,
+        connect_args={"server_settings": server_settings},
+        pool_pre_ping=True,  # https://docs.sqlalchemy.org/en/14/core/pooling.html#dealing-with-disconnects
+        future=True,  # this uses sqlalchemy 2.0 API, shall be removed when sqlalchemy 2.0 is released
+    )
+
+    try:
+        await raise_if_migration_not_ready(engine)
+    except Exception:
+        # NOTE: engine must be closed because retry will create a new engine
+        await engine.dispose()
+        raise
+
+    return engine
+
+
+async def check_postgres_liveness(engine: AsyncEngine) -> LivenessResult:
+    try:
+        tic = time.time()
+        # test
+        async with engine.connect():
+            ...
+        elapsed_time = time.time() - tic
+        return IsResponsive(elapsed=timedelta(seconds=elapsed_time))
+    except SQLAlchemyError as err:
+        return IsNonResponsive(reason=f"{err}")
diff --git a/packages/service-library/src/servicelib/deferred_tasks/_models.py b/packages/service-library/src/servicelib/deferred_tasks/_models.py
index 22c3ad20c876..25267c0edf20 100644
--- a/packages/service-library/src/servicelib/deferred_tasks/_models.py
+++ b/packages/service-library/src/servicelib/deferred_tasks/_models.py
@@ -1,6 +1,6 @@
 from typing import Any, Literal, TypeAlias
 
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
 from pydantic import BaseModel
 
 TaskUID: TypeAlias = IDStr  # Unique identifier provided by the TaskTracker
diff --git a/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py b/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py
index 382cb6c9f040..3da6f6cbc7f3 100644
--- a/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py
+++ b/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py
@@ -1,7 +1,8 @@
+import pickle
 from typing import Final
 from uuid import uuid4
 
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
 from pydantic import NonNegativeInt
 
 from ..redis import RedisClientSDK
@@ -33,18 +34,14 @@ async def get_new_unique_identifier(self) -> TaskUID:
 
     async def _get_raw(self, redis_key: str) -> TaskScheduleModel | None:
         found_data = await self.redis_client_sdk.redis.get(redis_key)
-        return (
-            None
-            if found_data is None
-            else TaskScheduleModel.model_validate_json(found_data)
-        )
+        return None if found_data is None else pickle.loads(found_data)  # noqa: S301
 
     async def get(self, task_uid: TaskUID) -> TaskScheduleModel | None:
         return await self._get_raw(_get_key(task_uid))
 
     async def save(self, task_uid: TaskUID, task_schedule: TaskScheduleModel) -> None:
         await self.redis_client_sdk.redis.set(
-            _get_key(task_uid), task_schedule.model_dump_json()
+            _get_key(task_uid), pickle.dumps(task_schedule)
         )
 
     async def remove(self, task_uid: TaskUID) -> None:
diff --git a/packages/service-library/src/servicelib/docker_utils.py b/packages/service-library/src/servicelib/docker_utils.py
index df976c623afc..2ce1fab2fb51 100644
--- a/packages/service-library/src/servicelib/docker_utils.py
+++ b/packages/service-library/src/servicelib/docker_utils.py
@@ -178,12 +178,19 @@ async def _parse_pull_information(
             layer_id_to_size.setdefault(
                 parsed_progress.id, _PulledStatus(0)
             ).extracted = layer_id_to_size[parsed_progress.id].size
+        case "already exists":
+            assert parsed_progress.id  # nosec
+            layer_id_to_size.setdefault(
+                parsed_progress.id, _PulledStatus(0)
+            ).extracted = layer_id_to_size[parsed_progress.id].size
+            layer_id_to_size.setdefault(
+                parsed_progress.id, _PulledStatus(0)
+            ).downloaded = layer_id_to_size[parsed_progress.id].size
         case progress_status if any(
             msg in progress_status
             for msg in [
                 "status: downloaded newer image for ",
                 "status: image is up to date for ",
-                "already exists",
             ]
         ):
             for layer_pull_status in layer_id_to_size.values():
diff --git a/packages/service-library/src/servicelib/fastapi/app_state.py b/packages/service-library/src/servicelib/fastapi/app_state.py
index b15cbcb261e7..79e2bea4123a 100644
--- a/packages/service-library/src/servicelib/fastapi/app_state.py
+++ b/packages/service-library/src/servicelib/fastapi/app_state.py
@@ -1,8 +1,8 @@
-import logging
+from typing import TypeVar
 
 from fastapi import FastAPI
 
-_logger = logging.getLogger(__name__)
+T = TypeVar("T", bound="SingletonInAppStateMixin")
 
 
 class SingletonInAppStateMixin:
@@ -14,8 +14,8 @@ class SingletonInAppStateMixin:
     frozen: bool = True  # Will raise if set multiple times
 
     @classmethod
-    def get_from_app_state(cls, app: FastAPI):
-        return getattr(app.state, cls.app_state_name)
+    def get_from_app_state(cls: type[T], app: FastAPI) -> T:
+        return getattr(app.state, cls.app_state_name)  # type:ignore[no-any-return]
 
     def set_to_app_state(self, app: FastAPI):
         if (exists := getattr(app.state, self.app_state_name, None)) and self.frozen:
@@ -26,11 +26,11 @@ def set_to_app_state(self, app: FastAPI):
         return self.get_from_app_state(app)
 
     @classmethod
-    def pop_from_app_state(cls, app: FastAPI):
+    def pop_from_app_state(cls: type[T], app: FastAPI) -> T:
         """
         Raises:
             AttributeError: if instance is not in app.state
         """
-        old = getattr(app.state, cls.app_state_name)
+        old = cls.get_from_app_state(app)
         delattr(app.state, cls.app_state_name)
         return old
diff --git a/packages/service-library/src/servicelib/fastapi/db_asyncpg_engine.py b/packages/service-library/src/servicelib/fastapi/db_asyncpg_engine.py
new file mode 100644
index 000000000000..a45e5dc2145d
--- /dev/null
+++ b/packages/service-library/src/servicelib/fastapi/db_asyncpg_engine.py
@@ -0,0 +1,33 @@
+import logging
+
+from fastapi import FastAPI
+from settings_library.postgres import PostgresSettings
+from simcore_postgres_database.utils_aiosqlalchemy import (  # type: ignore[import-not-found] # this on is unclear
+    get_pg_engine_stateinfo,
+)
+
+from ..db_asyncpg_utils import create_async_engine_and_pg_database_ready
+from ..logging_utils import log_context
+
+_logger = logging.getLogger(__name__)
+
+
+async def connect_to_db(app: FastAPI, settings: PostgresSettings) -> None:
+    with log_context(
+        _logger,
+        logging.DEBUG,
+        f"Connecting and migraging {settings.dsn_with_async_sqlalchemy}",
+    ):
+        engine = await create_async_engine_and_pg_database_ready(settings)
+
+    app.state.engine = engine
+    _logger.debug(
+        "Setup engine: %s",
+        await get_pg_engine_stateinfo(engine),
+    )
+
+
+async def close_db_connection(app: FastAPI) -> None:
+    with log_context(_logger, logging.DEBUG, f"db disconnect of {app.state.engine}"):
+        if engine := app.state.engine:
+            await engine.dispose()
diff --git a/packages/service-library/src/servicelib/fastapi/docker_utils.py b/packages/service-library/src/servicelib/fastapi/docker_utils.py
index c7db4c1cf2d7..5b6bae843b33 100644
--- a/packages/service-library/src/servicelib/fastapi/docker_utils.py
+++ b/packages/service-library/src/servicelib/fastapi/docker_utils.py
@@ -3,7 +3,7 @@
 from typing import Final
 
 import httpx
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
 from models_library.docker import DockerGenericTag
 from pydantic import ByteSize, TypeAdapter, ValidationError
 from settings_library.docker_registry import RegistrySettings
diff --git a/packages/service-library/src/servicelib/fastapi/prometheus_instrumentation.py b/packages/service-library/src/servicelib/fastapi/prometheus_instrumentation.py
index 626d5559df72..847585c52fc2 100644
--- a/packages/service-library/src/servicelib/fastapi/prometheus_instrumentation.py
+++ b/packages/service-library/src/servicelib/fastapi/prometheus_instrumentation.py
@@ -2,23 +2,27 @@
 
 
 from fastapi import FastAPI
+from prometheus_client import CollectorRegistry
 from prometheus_fastapi_instrumentator import Instrumentator
 
 
 def setup_prometheus_instrumentation(app: FastAPI) -> Instrumentator:
+    # NOTE: use that registry to prevent having a global one
+    app.state.prometheus_registry = registry = CollectorRegistry(auto_describe=True)
+    instrumentator = Instrumentator(
+        should_instrument_requests_inprogress=False,  # bug in https://github.com/trallnag/prometheus-fastapi-instrumentator/issues/317
+        inprogress_labels=False,
+        registry=registry,
+    ).instrument(app)
 
-    instrumentator = (
-        Instrumentator(
-            should_instrument_requests_inprogress=True, inprogress_labels=False
-        )
-        .instrument(app)
-        .expose(app, include_in_schema=False)
-    )
+    async def _on_startup() -> None:
+        instrumentator.expose(app, include_in_schema=False)
 
-    def _unregister():
-        for collector in list(instrumentator.registry._collector_to_names.keys()):
-            instrumentator.registry.unregister(collector)
+    def _unregister() -> None:
+        # NOTE: avoid registering collectors multiple times when running unittests consecutively (https://stackoverflow.com/a/62489287)
+        for collector in list(registry._collector_to_names.keys()):  # noqa: SLF001
+            registry.unregister(collector)
 
-    # avoid registering collectors multiple times when running unittests consecutively (https://stackoverflow.com/a/62489287)
+    app.add_event_handler("startup", _on_startup)
     app.add_event_handler("shutdown", _unregister)
     return instrumentator
diff --git a/packages/service-library/src/servicelib/fastapi/tracing.py b/packages/service-library/src/servicelib/fastapi/tracing.py
new file mode 100644
index 000000000000..e0f670686f5c
--- /dev/null
+++ b/packages/service-library/src/servicelib/fastapi/tracing.py
@@ -0,0 +1,47 @@
+""" Adds fastapi middleware for tracing using opentelemetry instrumentation.
+
+"""
+
+import logging
+
+from fastapi import FastAPI
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
+    OTLPSpanExporter as OTLPSpanExporterHTTP,
+)
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from settings_library.tracing import TracingSettings
+
+log = logging.getLogger(__name__)
+
+
+def setup_tracing(
+    app: FastAPI, tracing_settings: TracingSettings, service_name: str
+) -> None:
+    if (
+        not tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT
+        and not tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_PORT
+    ):
+        log.warning("Skipping opentelemetry tracing setup")
+        return
+
+    # Set up the tracer provider
+    resource = Resource(attributes={"service.name": service_name})
+    trace.set_tracer_provider(TracerProvider(resource=resource))
+    global_tracer_provider = trace.get_tracer_provider()
+    assert isinstance(global_tracer_provider, TracerProvider)  # nosec
+    tracing_destination: str = f"{tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT}:{tracing_settings.TRACING_OPENTELEMETRY_COLLECTOR_PORT}/v1/traces"
+    log.info(
+        "Trying to connect service %s to tracing collector at %s.",
+        service_name,
+        tracing_destination,
+    )
+    # Configure OTLP exporter to send spans to the collector
+    otlp_exporter = OTLPSpanExporterHTTP(endpoint=tracing_destination)
+    span_processor = BatchSpanProcessor(otlp_exporter)
+    global_tracer_provider.add_span_processor(span_processor)
+    # Instrument FastAPI
+    FastAPIInstrumentor().instrument_app(app)
diff --git a/packages/service-library/src/servicelib/instrumentation.py b/packages/service-library/src/servicelib/instrumentation.py
new file mode 100644
index 000000000000..002e19428530
--- /dev/null
+++ b/packages/service-library/src/servicelib/instrumentation.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+
+from prometheus_client import CollectorRegistry
+
+
+@dataclass(slots=True, kw_only=True)
+class MetricsBase:
+    subsystem: str
+    registry: CollectorRegistry
+
+
+def get_metrics_namespace(application_name: str) -> str:
+    return application_name.replace("-", "_")
diff --git a/packages/service-library/src/servicelib/logging_utils.py b/packages/service-library/src/servicelib/logging_utils.py
index bc8ba72b4c01..235418fd950e 100644
--- a/packages/service-library/src/servicelib/logging_utils.py
+++ b/packages/service-library/src/servicelib/logging_utils.py
@@ -16,6 +16,9 @@
 from pathlib import Path
 from typing import Any, TypeAlias, TypedDict, TypeVar
 
+from models_library.utils.json_serialization import json_dumps
+
+from .error_codes import ErrorCodeStr
 from .utils_secrets import mask_sensitive_data
 
 _logger = logging.getLogger(__name__)
@@ -320,20 +323,58 @@ def log_catch(logger: logging.Logger, *, reraise: bool = True) -> Iterator[None]
 
 class LogExtra(TypedDict, total=False):
     log_uid: str
+    log_oec: str
 
 
 LogLevelInt: TypeAlias = int
 LogMessageStr: TypeAlias = str
 
 
-def get_log_record_extra(*, user_id: int | str | None = None) -> LogExtra | None:
+def get_log_record_extra(
+    *,
+    user_id: int | str | None = None,
+    error_code: str | None = None,
+) -> LogExtra | None:
     extra: LogExtra = {}
+
     if user_id:
         assert int(user_id) > 0  # nosec
         extra["log_uid"] = f"{user_id}"
+    if error_code:
+        extra["log_oec"] = error_code
+
     return extra or None
 
 
+def create_troubleshotting_log_message(
+    message_to_user: str,
+    error: BaseException,
+    error_code: ErrorCodeStr,
+    error_context: dict[str, Any] | None = None,
+    tip: str | None = None,
+) -> str:
+    """Create a formatted message for _logger.exception(...)
+
+    Arguments:
+        message_to_user -- A user-friendly message to be displayed on the front-end explaining the issue in simple terms.
+        error -- the instance of the handled exception
+        error_code -- A unique error code (e.g., OEC or osparc-specific) to identify the type or source of the error for easier tracking.
+        error_context -- Additional context surrounding the exception, such as environment variables or function-specific data. This can be derived from exc.error_context() (relevant when using the OsparcErrorMixin)
+        tip -- Helpful suggestions or possible solutions explaining why the error may have occurred and how it could potentially be resolved
+    """
+    debug_data = json_dumps(
+        {
+            "exception_details": f"{error}",
+            "error_code": error_code,
+            "context": error_context,
+            "tip": tip,
+        },
+        indent=1,
+    )
+
+    return f"{message_to_user}.\n{debug_data}"
+
+
 def _un_capitalize(s: str) -> str:
     return s[:1].lower() + s[1:] if s else ""
 
@@ -392,3 +433,8 @@ def guess_message_log_level(message: str) -> LogLevelInt:
     ):
         return logging.WARNING
     return logging.INFO
+
+
+def set_parent_module_log_level(current_module: str, desired_log_level: int) -> None:
+    parent_module = ".".join(current_module.split(".")[:-1])
+    logging.getLogger(parent_module).setLevel(desired_log_level)
diff --git a/packages/service-library/src/servicelib/minio_utils.py b/packages/service-library/src/servicelib/minio_utils.py
index 53aedaa9e98b..31bdce4253c8 100644
--- a/packages/service-library/src/servicelib/minio_utils.py
+++ b/packages/service-library/src/servicelib/minio_utils.py
@@ -14,9 +14,9 @@ class ServiceRetryPolicyUponInitialization:
     def __init__(self, logger: logging.Logger | None = None):
         logger = logger or log
 
-        self.kwargs = dict(
-            wait=wait_fixed(self.WAIT_SECS),
-            stop=stop_after_attempt(self.ATTEMPTS_COUNT),
-            before_sleep=before_sleep_log(logger, logging.INFO),
-            reraise=True,
-        )
+        self.kwargs = {
+            "wait": wait_fixed(self.WAIT_SECS),
+            "stop": stop_after_attempt(self.ATTEMPTS_COUNT),
+            "before_sleep": before_sleep_log(logger, logging.INFO),
+            "reraise": True,
+        }
diff --git a/packages/service-library/src/servicelib/progress_bar.py b/packages/service-library/src/servicelib/progress_bar.py
index bf70c0c3e889..143d4debac0e 100644
--- a/packages/service-library/src/servicelib/progress_bar.py
+++ b/packages/service-library/src/servicelib/progress_bar.py
@@ -4,7 +4,7 @@
 from inspect import isawaitable
 from typing import Final, Optional, Protocol, runtime_checkable
 
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
 from models_library.progress_bar import (
     ProgressReport,
     ProgressStructuredMessage,
diff --git a/packages/service-library/src/servicelib/rabbitmq/_models.py b/packages/service-library/src/servicelib/rabbitmq/_models.py
index e48e4bb13aab..38bf965c15b7 100644
--- a/packages/service-library/src/servicelib/rabbitmq/_models.py
+++ b/packages/service-library/src/servicelib/rabbitmq/_models.py
@@ -1,8 +1,7 @@
-import re
 from collections.abc import Awaitable, Callable
 from typing import Any, Protocol
 
-from models_library.basic_types import ConstrainedStr
+from common_library.pydantic_basic_types import ConstrainedStr
 from models_library.rabbitmq_basic_types import (
     REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS,
     RPCMethodName,
diff --git a/packages/service-library/src/servicelib/rabbitmq/_utils.py b/packages/service-library/src/servicelib/rabbitmq/_utils.py
index 716d8e2adae6..176635e1e88d 100644
--- a/packages/service-library/src/servicelib/rabbitmq/_utils.py
+++ b/packages/service-library/src/servicelib/rabbitmq/_utils.py
@@ -1,8 +1,10 @@
 import logging
+import os
 import socket
 from typing import Any, Final
 
 import aio_pika
+import psutil
 from aiormq.exceptions import ChannelPreconditionFailed
 from pydantic import NonNegativeInt
 from tenacity import retry
@@ -51,7 +53,13 @@ async def wait_till_rabbitmq_responsive(url: str) -> bool:
 
 
 def get_rabbitmq_client_unique_name(base_name: str) -> str:
-    return f"{base_name}_{socket.gethostname()}"
+    # NOTE: The prefix below will change every time the process restarts.
+    # Why is this necessary?
+    # 1. The codebase relies on this behavior; without it, subscribers and consumers will fail.
+    # 2. It allows the web server to be restarted seamlessly during [re]deployments.
+    prefix_create_time = f"{psutil.Process(os.getpid()).create_time()}".strip(".")[-6:]
+
+    return f"{base_name}_{socket.gethostname()}_{prefix_create_time}"
 
 
 async def declare_queue(
diff --git a/services/agent/src/simcore_service_agent/modules/__init__.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/__init__.py
similarity index 100%
rename from services/agent/src/simcore_service_agent/modules/__init__.py
rename to packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/__init__.py
diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/errors.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/errors.py
new file mode 100644
index 000000000000..ae21b8f09a70
--- /dev/null
+++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/errors.py
@@ -0,0 +1,12 @@
+from pydantic.errors import PydanticErrorMixin
+
+
+class BaseAgentRPCError(PydanticErrorMixin, Exception):
+    ...
+
+
+class NoServiceVolumesFoundRPCError(BaseAgentRPCError):
+    msg_template: str = (
+        "Could not detect any unused volumes after waiting '{period}' seconds for "
+        "volumes to be released after closing all container for service='{node_id}'"
+    )
diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/volumes.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/volumes.py
new file mode 100644
index 000000000000..d414cd6b979c
--- /dev/null
+++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/volumes.py
@@ -0,0 +1,57 @@
+import logging
+from datetime import timedelta
+from typing import Final
+
+from models_library.projects_nodes_io import NodeID
+from models_library.rabbitmq_basic_types import RPCMethodName, RPCNamespace
+from pydantic import NonNegativeInt, parse_obj_as
+from servicelib.logging_utils import log_decorator
+from servicelib.rabbitmq import RabbitMQRPCClient
+
+_logger = logging.getLogger(__name__)
+
+_REQUEST_TIMEOUT: Final[NonNegativeInt] = int(timedelta(minutes=60).total_seconds())
+
+
+@log_decorator(_logger, level=logging.DEBUG)
+async def remove_volumes_without_backup_for_service(
+    rabbitmq_rpc_client: RabbitMQRPCClient,
+    *,
+    docker_node_id: str,
+    swarm_stack_name: str,
+    node_id: NodeID,
+) -> None:
+    result = await rabbitmq_rpc_client.request(
+        RPCNamespace.from_entries(
+            {
+                "service": "agent",
+                "docker_node_id": docker_node_id,
+                "swarm_stack_name": swarm_stack_name,
+            }
+        ),
+        parse_obj_as(RPCMethodName, "remove_volumes_without_backup_for_service"),
+        node_id=node_id,
+        timeout_s=_REQUEST_TIMEOUT,
+    )
+    assert result is None  # nosec
+
+
+@log_decorator(_logger, level=logging.DEBUG)
+async def backup_and_remove_volumes_for_all_services(
+    rabbitmq_rpc_client: RabbitMQRPCClient,
+    *,
+    docker_node_id: str,
+    swarm_stack_name: str,
+) -> None:
+    result = await rabbitmq_rpc_client.request(
+        RPCNamespace.from_entries(
+            {
+                "service": "agent",
+                "docker_node_id": docker_node_id,
+                "swarm_stack_name": swarm_stack_name,
+            }
+        ),
+        parse_obj_as(RPCMethodName, "backup_and_remove_volumes_for_all_services"),
+        timeout_s=_REQUEST_TIMEOUT,
+    )
+    assert result is None  # nosec
diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/catalog/services.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/catalog/services.py
index 83efa8f1d0e1..1c168a6d1b13 100644
--- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/catalog/services.py
+++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/catalog/services.py
@@ -54,7 +54,7 @@ async def _call(
             user_id=user_id,
             limit=limit,
             offset=offset,
-            timeout_s=20 * RPC_REQUEST_DEFAULT_TIMEOUT_S,
+            timeout_s=40 * RPC_REQUEST_DEFAULT_TIMEOUT_S,
         )
 
     result = await _call(
diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/clusters_keeper/clusters.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/clusters_keeper/clusters.py
index 49cf7d169880..ada0c66d26d9 100644
--- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/clusters_keeper/clusters.py
+++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/clusters_keeper/clusters.py
@@ -1,6 +1,6 @@
 from typing import Final
 
-from aiocache import cached  # type: ignore[import-not-found]
+from aiocache import cached  # type: ignore[import-untyped]
 from models_library.api_schemas_clusters_keeper import CLUSTERS_KEEPER_RPC_NAMESPACE
 from models_library.api_schemas_clusters_keeper.clusters import OnDemandCluster
 from models_library.rabbitmq_basic_types import RPCMethodName
diff --git a/packages/service-library/src/servicelib/services_utils.py b/packages/service-library/src/servicelib/services_utils.py
index 60a9caf92a53..98aace49c6c6 100644
--- a/packages/service-library/src/servicelib/services_utils.py
+++ b/packages/service-library/src/servicelib/services_utils.py
@@ -1,5 +1,11 @@
 import urllib.parse
 
+from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet
+from models_library.api_schemas_webserver.projects_nodes import (
+    NodeGet,
+    NodeGetIdle,
+    NodeGetUnknown,
+)
 from models_library.services import ServiceType
 
 
@@ -9,3 +15,14 @@ def get_service_from_key(service_key: str) -> ServiceType:
     if encoded_service_type == "comp":
         encoded_service_type = "computational"
     return ServiceType(encoded_service_type)
+
+
+def get_status_as_dict(
+    status: NodeGetIdle | NodeGetUnknown | DynamicServiceGet | NodeGet,
+) -> dict:
+    """shared between different backend services to guarantee same result to frontend"""
+    return (
+        status.dict(by_alias=True)
+        if isinstance(status, DynamicServiceGet)
+        else status.dict()
+    )
diff --git a/packages/service-library/tests/aiohttp/long_running_tasks/conftest.py b/packages/service-library/tests/aiohttp/long_running_tasks/conftest.py
index 987a68a40369..8fe29473cfcb 100644
--- a/packages/service-library/tests/aiohttp/long_running_tasks/conftest.py
+++ b/packages/service-library/tests/aiohttp/long_running_tasks/conftest.py
@@ -33,7 +33,8 @@ async def _string_list_task(
         await asyncio.sleep(sleep_time)
         task_progress.update(message="generated item", percent=index / num_strings)
         if fail:
-            raise RuntimeError("We were asked to fail!!")
+            msg = "We were asked to fail!!"
+            raise RuntimeError(msg)
 
     # NOTE: this code is used just for the sake of not returning the default 200
     return web.json_response(
diff --git a/packages/service-library/tests/aiohttp/test_rest_middlewares.py b/packages/service-library/tests/aiohttp/test_rest_middlewares.py
index 64c42e606d57..e70b6963b437 100644
--- a/packages/service-library/tests/aiohttp/test_rest_middlewares.py
+++ b/packages/service-library/tests/aiohttp/test_rest_middlewares.py
@@ -5,6 +5,8 @@
 
 import asyncio
 import json
+import logging
+from collections.abc import Callable
 from dataclasses import dataclass
 from typing import Any
 
@@ -14,10 +16,12 @@
 from models_library.utils.json_serialization import json_dumps
 from servicelib.aiohttp import status
 from servicelib.aiohttp.rest_middlewares import (
+    _FMSG_INTERNAL_ERROR_USER_FRIENDLY_WITH_OEC,
     envelope_middleware_factory,
     error_middleware_factory,
 )
 from servicelib.aiohttp.rest_responses import is_enveloped, unwrap_envelope
+from servicelib.error_codes import parse_error_code
 
 
 @dataclass
@@ -26,9 +30,13 @@ class Data:
     y: str = "foo"
 
 
+class SomeUnexpectedError(Exception):
+    ...
+
+
 class Handlers:
     @staticmethod
-    async def get_health_wrong(request: web.Request):
+    async def get_health_wrong(_request: web.Request):
         return {
             "name": __name__.split(".")[0],
             "version": "1.0",
@@ -37,7 +45,7 @@ async def get_health_wrong(request: web.Request):
         }
 
     @staticmethod
-    async def get_health(request: web.Request):
+    async def get_health(_request: web.Request):
         return {
             "name": __name__.split(".")[0],
             "version": "1.0",
@@ -46,62 +54,126 @@ async def get_health(request: web.Request):
         }
 
     @staticmethod
-    async def get_dict(request: web.Request):
+    async def get_dict(_request: web.Request):
         return {"x": 3, "y": "3"}
 
     @staticmethod
-    async def get_envelope(request: web.Request):
+    async def get_envelope(_request: web.Request):
         data = {"x": 3, "y": "3"}
         return {"error": None, "data": data}
 
     @staticmethod
-    async def get_list(request: web.Request):
+    async def get_list(_request: web.Request):
         return [{"x": 3, "y": "3"}] * 3
 
     @staticmethod
-    async def get_attobj(request: web.Request):
+    async def get_obj(_request: web.Request):
         return Data(3, "3")
 
     @staticmethod
-    async def get_string(request: web.Request):
+    async def get_string(_request: web.Request):
         return "foo"
 
     @staticmethod
-    async def get_number(request: web.Request):
+    async def get_number(_request: web.Request):
         return 3
 
     @staticmethod
-    async def get_mixed(request: web.Request):
+    async def get_mixed(_request: web.Request):
         return [{"x": 3, "y": "3", "z": [Data(3, "3")] * 2}] * 3
 
     @classmethod
-    def get(cls, suffix):
+    def returns_value(cls, suffix):
         handlers = cls()
         coro = getattr(handlers, "get_" + suffix)
         loop = asyncio.get_event_loop()
-        data = loop.run_until_complete(coro(None))
+        returned_value = loop.run_until_complete(coro(None))
+        return json.loads(json_dumps(returned_value))
+
+    EXPECTED_RAISE_UNEXPECTED_REASON = "Unexpected error"
+
+    @classmethod
+    async def raise_exception(cls, request: web.Request):
+        exc_name = request.query.get("exc")
+        match exc_name:
+            case NotImplementedError.__name__:
+                raise NotImplementedError
+            case asyncio.TimeoutError.__name__:
+                raise asyncio.TimeoutError
+            case web.HTTPOk.__name__:
+                raise web.HTTPOk  # 2XX
+            case web.HTTPUnauthorized.__name__:
+                raise web.HTTPUnauthorized  # 4XX
+            case web.HTTPServiceUnavailable.__name__:
+                raise web.HTTPServiceUnavailable  # 5XX
+            case _:  # unexpected
+                raise SomeUnexpectedError(cls.EXPECTED_RAISE_UNEXPECTED_REASON)
+
+    @staticmethod
+    async def raise_error(_request: web.Request):
+        raise web.HTTPNotFound
 
-        return json.loads(json_dumps(data))
+    @staticmethod
+    async def raise_error_with_reason(_request: web.Request):
+        raise web.HTTPNotFound(reason="I did not find it")
+
+    @staticmethod
+    async def raise_success(_request: web.Request):
+        raise web.HTTPOk
+
+    @staticmethod
+    async def raise_success_with_reason(_request: web.Request):
+        raise web.HTTPOk(reason="I'm ok")
+
+    @staticmethod
+    async def raise_success_with_text(_request: web.Request):
+        # NOTE: explicitly NOT enveloped!
+        raise web.HTTPOk(reason="I'm ok", text=json.dumps({"ok": True}))
 
 
 @pytest.fixture
-def client(event_loop, aiohttp_client):
+def client(
+    event_loop: asyncio.AbstractEventLoop,
+    aiohttp_client: Callable,
+    monkeypatch: pytest.MonkeyPatch,
+):
+    monkeypatch.setenv("SC_BUILD_TARGET", "production")
+
     app = web.Application()
 
     # routes
     app.router.add_routes(
         [
-            web.get("/v1/health", Handlers.get_health, name="get_health"),
-            web.get("/v1/dict", Handlers.get_dict, name="get_dict"),
-            web.get("/v1/envelope", Handlers.get_envelope, name="get_envelope"),
-            web.get("/v1/list", Handlers.get_list, name="get_list"),
-            web.get("/v1/attobj", Handlers.get_attobj, name="get_attobj"),
-            web.get("/v1/string", Handlers.get_string, name="get_string"),
-            web.get("/v1/number", Handlers.get_number, name="get_number"),
-            web.get("/v1/mixed", Handlers.get_mixed, name="get_mixed"),
+            web.get(path, handler, name=handler.__name__)
+            for path, handler in [
+                ("/v1/health", Handlers.get_health),
+                ("/v1/dict", Handlers.get_dict),
+                ("/v1/envelope", Handlers.get_envelope),
+                ("/v1/list", Handlers.get_list),
+                ("/v1/obj", Handlers.get_obj),
+                ("/v1/string", Handlers.get_string),
+                ("/v1/number", Handlers.get_number),
+                ("/v1/mixed", Handlers.get_mixed),
+                # custom use cases
+                ("/v1/raise_exception", Handlers.raise_exception),
+                ("/v1/raise_error", Handlers.raise_error),
+                ("/v1/raise_error_with_reason", Handlers.raise_error_with_reason),
+                ("/v1/raise_success", Handlers.raise_success),
+                ("/v1/raise_success_with_reason", Handlers.raise_success_with_reason),
+                ("/v1/raise_success_with_text", Handlers.raise_success_with_text),
+            ]
         ]
     )
 
+    app.router.add_routes(
+        [
+            web.get(
+                "/free/raise_exception",
+                Handlers.raise_exception,
+                name="raise_exception_without_middleware",
+            )
+        ]
+    )
     # middlewares
     app.middlewares.append(error_middleware_factory(api_version="/v1"))
     app.middlewares.append(envelope_middleware_factory(api_version="/v1"))
@@ -112,14 +184,14 @@ def client(event_loop, aiohttp_client):
 @pytest.mark.parametrize(
     "path,expected_data",
     [
-        ("/health", Handlers.get("health")),
-        ("/dict", Handlers.get("dict")),
-        ("/envelope", Handlers.get("envelope")["data"]),
-        ("/list", Handlers.get("list")),
-        ("/attobj", Handlers.get("attobj")),
-        ("/string", Handlers.get("string")),
-        ("/number", Handlers.get("number")),
-        ("/mixed", Handlers.get("mixed")),
+        ("/health", Handlers.returns_value("health")),
+        ("/dict", Handlers.returns_value("dict")),
+        ("/envelope", Handlers.returns_value("envelope")["data"]),
+        ("/list", Handlers.returns_value("list")),
+        ("/obj", Handlers.returns_value("obj")),
+        ("/string", Handlers.returns_value("string")),
+        ("/number", Handlers.returns_value("number")),
+        ("/mixed", Handlers.returns_value("mixed")),
     ],
 )
 async def test_envelope_middleware(path: str, expected_data: Any, client: TestClient):
@@ -133,7 +205,7 @@ async def test_envelope_middleware(path: str, expected_data: Any, client: TestCl
     assert data == expected_data
 
 
-async def test_404_not_found(client: TestClient):
+async def test_404_not_found_when_entrypoint_not_exposed(client: TestClient):
     response = await client.get("/some-invalid-address-outside-api")
     payload = await response.text()
     assert response.status == status.HTTP_404_NOT_FOUND, payload
@@ -147,3 +219,65 @@ async def test_404_not_found(client: TestClient):
     data, error = unwrap_envelope(payload)
     assert error
     assert not data
+
+
+async def test_raised_unhandled_exception(
+    client: TestClient, caplog: pytest.LogCaptureFixture
+):
+    with caplog.at_level(logging.ERROR):
+        response = await client.get("/v1/raise_exception")
+
+        # respond the client with 500
+        assert response.status == status.HTTP_500_INTERNAL_SERVER_ERROR
+
+        # response model
+        data, error = unwrap_envelope(await response.json())
+        assert not data
+        assert error
+
+        # user friendly message with OEC reference
+        assert "OEC" in error["message"]
+        parsed_oec = parse_error_code(error["message"]).pop()
+        assert (
+            _FMSG_INTERNAL_ERROR_USER_FRIENDLY_WITH_OEC.format(error_code=parsed_oec)
+            == error["message"]
+        )
+
+        # avoids details
+        assert not error.get("errors")
+        assert not error.get("logs")
+
+        # - log sufficient information to diagnose the issue
+        #
+        # ERROR    servicelib.aiohttp.rest_middlewares:rest_middlewares.py:75 We apologize ... [OEC:128594540599840].
+        # {
+        # "exception_details": "Unexpected error",
+        # "error_code": "OEC:128594540599840",
+        # "context": {
+        #     "request.remote": "127.0.0.1",
+        #     "request.method": "GET",
+        #     "request.path": "/v1/raise_exception"
+        # },
+        # "tip": null
+        # }
+        # Traceback (most recent call last):
+        # File "/osparc-simcore/packages/service-library/src/servicelib/aiohttp/rest_middlewares.py", line 94, in _middleware_handler
+        #     return await handler(request)
+        #         ^^^^^^^^^^^^^^^^^^^^^^
+        # File "/osparc-simcore/packages/service-library/src/servicelib/aiohttp/rest_middlewares.py", line 186, in _middleware_handler
+        #     resp = await handler(request)
+        #         ^^^^^^^^^^^^^^^^^^^^^^
+        # File "/osparc-simcore/packages/service-library/tests/aiohttp/test_rest_middlewares.py", line 109, in raise_exception
+        #     raise SomeUnexpectedError(cls.EXPECTED_RAISE_UNEXPECTED_REASON)
+        # tests.aiohttp.test_rest_middlewares.SomeUnexpectedError: Unexpected error
+
+        assert response.method in caplog.text
+        assert response.url.path in caplog.text
+        assert "exception_details" in caplog.text
+        assert "request.remote" in caplog.text
+        assert "context" in caplog.text
+        assert SomeUnexpectedError.__name__ in caplog.text
+        assert Handlers.EXPECTED_RAISE_UNEXPECTED_REASON in caplog.text
+
+        # log OEC
+        assert "OEC:" in caplog.text
diff --git a/packages/service-library/tests/aiohttp/test_tracing.py b/packages/service-library/tests/aiohttp/test_tracing.py
index a0f03585670a..2a1cc30eeac0 100644
--- a/packages/service-library/tests/aiohttp/test_tracing.py
+++ b/packages/service-library/tests/aiohttp/test_tracing.py
@@ -2,97 +2,70 @@
 # pylint: disable=unused-argument
 # pylint: disable=unused-variable
 
-from asyncio import AbstractEventLoop
-from typing import Callable
+from collections.abc import Callable
 
 import pytest
 from aiohttp import web
-from aiohttp.client_reqrep import ClientResponse
 from aiohttp.test_utils import TestClient
-from servicelib.aiohttp import status
-from servicelib.aiohttp.rest_responses import _collect_http_exceptions
+from pydantic import ValidationError
 from servicelib.aiohttp.tracing import setup_tracing
+from settings_library.tracing import TracingSettings
 
-DEFAULT_JAEGER_BASE_URL = "http://jaeger:9411"
 
+@pytest.fixture
+def tracing_settings_in(request):
+    return request.param
 
-@pytest.fixture()
-def client(
-    event_loop: AbstractEventLoop,
-    aiohttp_client: Callable,
-    unused_tcp_port_factory: Callable,
-) -> TestClient:
-    ports = [unused_tcp_port_factory() for _ in range(2)]
 
-    async def redirect(request: web.Request) -> web.Response:
-        return web.HTTPFound(location="/return/200")
-
-    async def return_response(request: web.Request) -> web.Response:
-        code = int(request.match_info["code"])
-        return web.Response(status=code)
-
-    async def raise_response(request: web.Request):
-        status_code = int(request.match_info["code"])
-        status_to_http_exception = _collect_http_exceptions()
-        http_exception_cls = status_to_http_exception[status_code]
-        raise http_exception_cls(
-            reason=f"raised from raised_error with code {status_code}"
+@pytest.fixture()
+def set_and_clean_settings_env_vars(
+    monkeypatch: pytest.MonkeyPatch, tracing_settings_in
+):
+    if tracing_settings_in[0]:
+        monkeypatch.setenv(
+            "TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT", f"{tracing_settings_in[0]}"
+        )
+    if tracing_settings_in[1]:
+        monkeypatch.setenv(
+            "TRACING_OPENTELEMETRY_COLLECTOR_PORT", f"{tracing_settings_in[1]}"
         )
 
-    async def skip(request: web.Request):
-        return web.HTTPServiceUnavailable(reason="should not happen")
 
+@pytest.mark.parametrize(
+    "tracing_settings_in",
+    [
+        ("http://opentelemetry-collector", 4318),
+    ],
+    indirect=True,
+)
+async def test_valid_tracing_settings(
+    aiohttp_client: Callable,
+    set_and_clean_settings_env_vars: Callable,
+    tracing_settings_in,
+) -> TestClient:
     app = web.Application()
-    app.add_routes(
-        [
-            web.get("/redirect", redirect),
-            web.get("/return/{code}", return_response),
-            web.get("/raise/{code}", raise_response),
-            web.get("/skip", skip, name="skip"),
-        ]
-    )
-
-    print("Resources:")
-    for resource in app.router.resources():
-        print(resource)
-
-    # UNDER TEST ---
-    # SEE RoutesView to understand how resources can be iterated to get routes
-    resource = app.router["skip"]
-    routes_in_a_resource = list(resource)
-
+    service_name = "simcore_service_webserver"
+    tracing_settings = TracingSettings()
     setup_tracing(
         app,
-        service_name=f"{__name__}.client",
-        host="127.0.0.1",
-        port=ports[0],
-        jaeger_base_url=DEFAULT_JAEGER_BASE_URL,
-        skip_routes=routes_in_a_resource,
-    )
-
-    return event_loop.run_until_complete(
-        aiohttp_client(app, server_kwargs={"port": ports[0]})
+        service_name=service_name,
+        tracing_settings=tracing_settings,
     )
 
 
-async def test_setup_tracing(client: TestClient):
-    res: ClientResponse
-
-    # on error
-    for code in (status.HTTP_200_OK, status.HTTP_400_BAD_REQUEST):
-        res = await client.get(f"/return/{code}")
-
-        assert res.status == code, await res.text()
-        res = await client.get(f"/raise/{code}")
-        assert res.status == code, await res.text()
-
-    res = await client.get("/redirect")
-    # TODO: check it was redirected
-    assert res.status == 200, await res.text()
-
-    res = await client.get("/skip")
-    assert res.status == status.HTTP_503_SERVICE_UNAVAILABLE
-
-    # using POST instead of GET ->  HTTPMethodNotAllowed
-    res = await client.post("/skip")
-    assert res.status == status.HTTP_405_METHOD_NOT_ALLOWED, "GET and not POST"
+@pytest.mark.parametrize(
+    "tracing_settings_in",
+    [
+        ("http://opentelemetry-collector", 80),
+        ("opentelemetry-collector", 4318),
+        ("httsdasp://ot@##el-collector", 4318),
+    ],
+    indirect=True,
+)
+async def test_invalid_tracing_settings(
+    aiohttp_client: Callable,
+    set_and_clean_settings_env_vars: Callable,
+    tracing_settings_in,
+) -> TestClient:
+    with pytest.raises(ValidationError):
+        TracingSettings()
diff --git a/packages/service-library/tests/conftest.py b/packages/service-library/tests/conftest.py
index f069aeedd768..712746ccce97 100644
--- a/packages/service-library/tests/conftest.py
+++ b/packages/service-library/tests/conftest.py
@@ -76,9 +76,11 @@ async def get_redis_client_sdk(
     Callable[[RedisDatabase], AbstractAsyncContextManager[RedisClientSDK]]
 ]:
     @asynccontextmanager
-    async def _(database: RedisDatabase) -> AsyncIterator[RedisClientSDK]:
+    async def _(
+        database: RedisDatabase, decode_response: bool = True  # noqa: FBT002
+    ) -> AsyncIterator[RedisClientSDK]:
         redis_resources_dns = redis_service.build_redis_dsn(database)
-        client = RedisClientSDK(redis_resources_dns)
+        client = RedisClientSDK(redis_resources_dns, decode_responses=decode_response)
         assert client
         assert client.redis_dsn == redis_resources_dns
         await client.setup()
diff --git a/packages/service-library/tests/deferred_tasks/conftest.py b/packages/service-library/tests/deferred_tasks/conftest.py
index 642a67336b6b..00881e614715 100644
--- a/packages/service-library/tests/deferred_tasks/conftest.py
+++ b/packages/service-library/tests/deferred_tasks/conftest.py
@@ -9,8 +9,10 @@
 @pytest.fixture
 async def redis_client_sdk_deferred_tasks(
     get_redis_client_sdk: Callable[
-        [RedisDatabase], AbstractAsyncContextManager[RedisClientSDK]
+        [RedisDatabase, bool], AbstractAsyncContextManager[RedisClientSDK]
     ]
 ) -> AsyncIterator[RedisClientSDK]:
-    async with get_redis_client_sdk(RedisDatabase.DEFERRED_TASKS) as client:
+    async with get_redis_client_sdk(
+        RedisDatabase.DEFERRED_TASKS, decode_response=False
+    ) as client:
         yield client
diff --git a/packages/service-library/tests/deferred_tasks/example_app.py b/packages/service-library/tests/deferred_tasks/example_app.py
index 1962d0b42320..8faea8d76ff2 100644
--- a/packages/service-library/tests/deferred_tasks/example_app.py
+++ b/packages/service-library/tests/deferred_tasks/example_app.py
@@ -8,6 +8,7 @@
 from uuid import uuid4
 
 from pydantic import NonNegativeInt
+from redis.asyncio import Redis
 from servicelib.deferred_tasks import (
     BaseDeferredHandler,
     DeferredContext,
@@ -54,21 +55,22 @@ async def on_result(cls, result: str, context: DeferredContext) -> None:
 
 class InMemoryLists:
     def __init__(self, redis_settings: RedisSettings, port: int) -> None:
-        self.redis_client_sdk = RedisClientSDK(
-            redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS)
-        )
+        # NOTE: RedisClientSDK is not required here but it's used to easily construct
+        # a redis connection
+        self.redis: Redis = RedisClientSDK(
+            redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS),
+            decode_responses=True,
+        ).redis
         self.port = port
 
     def _get_queue_name(self, queue_name: str) -> str:
         return f"in_memory_lists::{queue_name}.{self.port}"
 
     async def append_to(self, queue_name: str, value: Any) -> None:
-        await self.redis_client_sdk.redis.rpush(self._get_queue_name(queue_name), value)  # type: ignore
+        await self.redis.rpush(self._get_queue_name(queue_name), value)  # type: ignore
 
     async def get_all_from(self, queue_name: str) -> list:
-        return await self.redis_client_sdk.redis.lrange(
-            self._get_queue_name(queue_name), 0, -1
-        )  # type: ignore
+        return await self.redis.lrange(self._get_queue_name(queue_name), 0, -1)  # type: ignore
 
 
 class ExampleApp:
@@ -79,18 +81,19 @@ def __init__(
         in_memory_lists: InMemoryLists,
         max_workers: NonNegativeInt,
     ) -> None:
-        self._redis_client_sdk = RedisClientSDK(
-            redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS)
+        self._redis_client = RedisClientSDK(
+            redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS),
+            decode_responses=False,
         )
         self._manager = DeferredManager(
             rabbit_settings,
-            self._redis_client_sdk,
+            self._redis_client,
             globals_context={"in_memory_lists": in_memory_lists},
             max_workers=max_workers,
         )
 
     async def setup(self) -> None:
-        await self._redis_client_sdk.setup()
+        await self._redis_client.setup()
         await self._manager.setup()
 
 
diff --git a/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py b/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py
index 9f3451058bfb..a5b45ed80d95 100644
--- a/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py
+++ b/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py
@@ -52,7 +52,10 @@ class MockKeys(StrAutoEnum):
 async def redis_client_sdk(
     redis_service: RedisSettings,
 ) -> AsyncIterable[RedisClientSDK]:
-    sdk = RedisClientSDK(redis_service.build_redis_dsn(RedisDatabase.DEFERRED_TASKS))
+    sdk = RedisClientSDK(
+        redis_service.build_redis_dsn(RedisDatabase.DEFERRED_TASKS),
+        decode_responses=False,
+    )
     await sdk.setup()
     yield sdk
     await sdk.shutdown()
diff --git a/packages/service-library/tests/deferred_tasks/test_deferred_tasks.py b/packages/service-library/tests/deferred_tasks/test_deferred_tasks.py
index 9ea22f87ed1b..a03b87c41512 100644
--- a/packages/service-library/tests/deferred_tasks/test_deferred_tasks.py
+++ b/packages/service-library/tests/deferred_tasks/test_deferred_tasks.py
@@ -9,7 +9,7 @@
 import random
 import sys
 from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable
-from contextlib import AbstractAsyncContextManager, AsyncExitStack
+from contextlib import AbstractAsyncContextManager, AsyncExitStack, suppress
 from pathlib import Path
 from typing import Any, Protocol
 
@@ -61,10 +61,12 @@ async def stop(self, *, graceful: bool = False):
             assert self.process is not None
             assert self.pid is not None
 
-        parent = psutil.Process(self.pid)
-        children = parent.children(recursive=True)
-        for child_pid in [child.pid for child in children]:
-            psutil.Process(child_pid).kill()
+        with suppress(psutil.NoSuchProcess):
+            parent = psutil.Process(self.pid)
+            children = parent.children(recursive=True)
+            for child_pid in [child.pid for child in children]:
+                with suppress(psutil.NoSuchProcess):
+                    psutil.Process(child_pid).kill()
 
         self.process = None
         self.pid = None
diff --git a/packages/service-library/tests/fastapi/test_openapi.py b/packages/service-library/tests/fastapi/test_openapi.py
index 54f7e0177997..7df0ab63a9f0 100644
--- a/packages/service-library/tests/fastapi/test_openapi.py
+++ b/packages/service-library/tests/fastapi/test_openapi.py
@@ -8,8 +8,12 @@
 import starlette.routing
 from fastapi.applications import FastAPI
 from fastapi.routing import APIRouter
-from openapi_spec_validator.exceptions import OpenAPISpecValidatorError
-from openapi_spec_validator.shortcuts import get_validator_cls
+from openapi_spec_validator.exceptions import (
+    OpenAPISpecValidatorError,  # pylint: disable=no-name-in-module
+)
+from openapi_spec_validator.shortcuts import (
+    get_validator_cls,  # pylint: disable=no-name-in-module
+)
 from servicelib.fastapi.openapi import (
     override_fastapi_openapi_method,
     set_operation_id_as_handler_function_name,
@@ -44,7 +48,7 @@ def test_exclusive_min_openapi_issue(app: FastAPI):
 
 def test_overriding_openapi_method(app: FastAPI):
     assert not hasattr(app, "_original_openapi")
-    #assert app.openapi.__doc__ is None # PC why was this set to check that it is none? it's coming from the base fastapi applicaiton and now they provide some docs
+    # assert app.openapi.__doc__ is None # PC why was this set to check that it is none? it's coming from the base fastapi applicaiton and now they provide some docs
 
     override_fastapi_openapi_method(app)
 
diff --git a/packages/service-library/tests/fastapi/test_tracing.py b/packages/service-library/tests/fastapi/test_tracing.py
new file mode 100644
index 000000000000..9364cb75a34a
--- /dev/null
+++ b/packages/service-library/tests/fastapi/test_tracing.py
@@ -0,0 +1,95 @@
+# pylint: disable=all
+
+
+import random
+import string
+from collections.abc import Callable
+from typing import Any
+
+import pytest
+from fastapi import FastAPI
+from pydantic import ValidationError
+from servicelib.fastapi.tracing import setup_tracing
+from settings_library.tracing import TracingSettings
+
+
+@pytest.fixture
+def mocked_app() -> FastAPI:
+    return FastAPI(title="opentelemetry example")
+
+
+@pytest.fixture
+def tracing_settings_in(request: pytest.FixtureRequest) -> dict[str, Any]:
+    return request.param
+
+
+@pytest.fixture()
+def set_and_clean_settings_env_vars(
+    monkeypatch: pytest.MonkeyPatch, tracing_settings_in: Callable[[], dict[str, Any]]
+) -> None:
+    if tracing_settings_in[0]:
+        monkeypatch.setenv(
+            "TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT", f"{tracing_settings_in[0]}"
+        )
+    if tracing_settings_in[1]:
+        monkeypatch.setenv(
+            "TRACING_OPENTELEMETRY_COLLECTOR_PORT", f"{tracing_settings_in[1]}"
+        )
+
+
+@pytest.mark.parametrize(
+    "tracing_settings_in",
+    [
+        ("http://opentelemetry-collector", 4318),
+        ("http://opentelemetry-collector", "4318"),
+    ],
+    indirect=True,
+)
+async def test_valid_tracing_settings(
+    mocked_app: FastAPI,
+    set_and_clean_settings_env_vars: Callable[[], None],
+    tracing_settings_in: Callable[[], dict[str, Any]],
+):
+    tracing_settings = TracingSettings()
+    setup_tracing(
+        mocked_app,
+        tracing_settings=tracing_settings,
+        service_name="Mock-Openetlemetry-Pytest",
+    )
+    # idempotency
+    setup_tracing(
+        mocked_app,
+        tracing_settings=tracing_settings,
+        service_name="Mock-Openetlemetry-Pytest",
+    )
+
+
+@pytest.mark.parametrize(
+    "tracing_settings_in",
+    [
+        ("http://opentelemetry-collector", 80),
+        ("http://opentelemetry-collector", 1238712936),
+        ("opentelemetry-collector", 4318),
+        ("httsdasp://ot@##el-collector", 4318),
+        (" !@#$%^&*()[]{};:,<>?\\|`~+=/'\"", 4318),
+        # The following exceeds max DNS name length
+        (
+            "".join(random.choice(string.ascii_letters) for _ in range(300)),
+            "1238712936",
+        ),  # noqa: S311
+    ],
+    indirect=True,
+)
+async def test_invalid_tracing_settings(
+    mocked_app: FastAPI,
+    set_and_clean_settings_env_vars: Callable[[], None],
+    tracing_settings_in: Callable[[], dict[str, Any]],
+):
+    app = mocked_app
+    with pytest.raises((BaseException, ValidationError, TypeError)):  # noqa: PT012
+        tracing_settings = TracingSettings()
+        setup_tracing(
+            app,
+            tracing_settings=tracing_settings,
+            service_name="Mock-Openetlemetry-Pytest",
+        )
diff --git a/packages/service-library/tests/test_archiving_utils.py b/packages/service-library/tests/test_archiving_utils.py
index bb6f2b486c4e..073111eb22b2 100644
--- a/packages/service-library/tests/test_archiving_utils.py
+++ b/packages/service-library/tests/test_archiving_utils.py
@@ -11,17 +11,20 @@
 import secrets
 import string
 import tempfile
+from collections.abc import AsyncIterable, Callable, Iterable, Iterator
 from concurrent.futures import ProcessPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Callable, Iterable, Iterator
 
+import numpy
 import pytest
 from faker import Faker
+from PIL import Image
 from pydantic import ByteSize, TypeAdapter
 from pytest_benchmark.plugin import BenchmarkFixture
 from servicelib import archiving_utils
 from servicelib.archiving_utils import ArchiveError, archive_dir, unarchive_dir
+from servicelib.file_utils import remove_directory
 
 
 def _print_tree(path: Path, level=0):
@@ -597,3 +600,90 @@ def run_async_test(*args, **kwargs):
         )
 
     benchmark(run_async_test)
+
+
+def _touch_all_files_in_path(path_to_archive: Path) -> None:
+    for path in path_to_archive.rglob("*"):
+        print("touching", path)
+        path.touch()
+
+
+@pytest.fixture
+async def mixed_file_types(tmp_path: Path, faker: Faker) -> AsyncIterable[Path]:
+    base_dir = tmp_path / "mixed_types_dir"
+    base_dir.mkdir()
+
+    # mixed small text files and binary files
+    (base_dir / "empty").mkdir()
+    (base_dir / "d1").mkdir()
+    (base_dir / "d1" / "f1.txt").write_text(faker.text())
+    (base_dir / "d1" / "b2.bin").write_bytes(faker.json_bytes())
+    (base_dir / "d1" / "sd1").mkdir()
+    (base_dir / "d1" / "sd1" / "f1.txt").write_text(faker.text())
+    (base_dir / "d1" / "sd1" / "b2.bin").write_bytes(faker.json_bytes())
+    (base_dir / "images").mkdir()
+
+    # images cause issues with zipping, below content produced different
+    # hashes for zip files
+    for i in range(2):
+        image_dir = base_dir / f"images{i}"
+        image_dir.mkdir()
+        for n in range(50):
+            a = numpy.random.rand(900, 900, 3) * 255  # noqa: NPY002
+            im_out = Image.fromarray(a.astype("uint8")).convert("RGB")
+            image_path = image_dir / f"out{n}.jpg"
+            im_out.save(image_path)
+
+    print("mixed_types_dir ---")
+    _print_tree(base_dir)
+
+    yield base_dir
+
+    await remove_directory(base_dir)
+    assert not base_dir.exists()
+
+
+@pytest.mark.parametrize(
+    "store_relative_path, compress",
+    [
+        # test that all possible combinations still work
+        pytest.param(False, False, id="no_relative_path_no_compress"),
+        pytest.param(False, True, id="no_relative_path_with_compression"),
+        pytest.param(True, False, id="nodeports_options"),
+        pytest.param(True, True, id="with_relative_path_with_compression"),
+    ],
+)
+async def test_regression_archive_hash_does_not_change(
+    mixed_file_types: Path,
+    tmp_path: Path,
+    store_relative_path: bool,
+    compress: bool,
+):
+    destination_path = tmp_path / "archives_to_compare"
+    destination_path.mkdir(parents=True, exist_ok=True)
+
+    first_archive = destination_path / "first"
+    second_archive = destination_path / "second"
+    assert not first_archive.exists()
+    assert not second_archive.exists()
+    assert first_archive != second_archive
+
+    await archive_dir(
+        mixed_file_types,
+        first_archive,
+        compress=compress,
+        store_relative_path=store_relative_path,
+    )
+
+    _touch_all_files_in_path(mixed_file_types)
+
+    await archive_dir(
+        mixed_file_types,
+        second_archive,
+        compress=compress,
+        store_relative_path=store_relative_path,
+    )
+
+    _, first_hash = _compute_hash(first_archive)
+    _, second_hash = _compute_hash(second_archive)
+    assert first_hash == second_hash
diff --git a/packages/service-library/tests/test_error_codes.py b/packages/service-library/tests/test_error_codes.py
index 0d88fa978ba9..f738ebe1e966 100644
--- a/packages/service-library/tests/test_error_codes.py
+++ b/packages/service-library/tests/test_error_codes.py
@@ -5,50 +5,50 @@
 
 import logging
 
+import pytest
 from servicelib.error_codes import create_error_code, parse_error_code
 
 logger = logging.getLogger(__name__)
 
 
-def test_error_code_use_case(caplog):
+def test_error_code_use_case(caplog: pytest.LogCaptureFixture):
     """use case for error-codes"""
-    try:
+    with pytest.raises(RuntimeError) as exc_info:
         raise RuntimeError("Something unexpected went wrong")
-    except Exception as err:
-        # 1. Unexpected ERROR
 
-        # 2. create error-code
-        error_code = create_error_code(err)
+    # 1. Unexpected ERROR
+    err = exc_info.value
 
-        # 3. log all details in service
-        caplog.clear()
+    # 2. create error-code
+    error_code = create_error_code(err)
 
-        # Can add a formatter that prefix error-codes
-        syslog = logging.StreamHandler()
-        syslog.setFormatter(
-            logging.Formatter("%(asctime)s %(error_code)s : %(message)s")
-        )
-        logger.addHandler(syslog)
+    # 3. log all details in service
+    caplog.clear()
 
-        logger.error("Fake Unexpected error", extra={"error_code": error_code})
+    # Can add a formatter that prefix error-codes
+    syslog = logging.StreamHandler()
+    syslog.setFormatter(logging.Formatter("%(asctime)s %(error_code)s : %(message)s"))
+    logger.addHandler(syslog)
 
-        # logs something like E.g. 2022-07-06 14:31:13,432 OEC:140350117529856 : Fake Unexpected error
-        assert parse_error_code(
-            f"2022-07-06 14:31:13,432 {error_code} : Fake Unexpected error"
-        ) == {
-            error_code,
-        }
+    logger.exception("Fake Unexpected error", extra={"error_code": error_code})
 
-        assert caplog.records[0].error_code == error_code
-        assert caplog.records[0]
+    # logs something like E.g. 2022-07-06 14:31:13,432 OEC:140350117529856 : Fake Unexpected error
+    assert parse_error_code(
+        f"2022-07-06 14:31:13,432 {error_code} : Fake Unexpected error"
+    ) == {
+        error_code,
+    }
 
-        logger.error("Fake without error_code")
+    assert caplog.records[0].error_code == error_code
+    assert caplog.records[0]
 
-        # 4. inform user (e.g. with new error or sending message)
-        user_message = (
-            f"This is a user-friendly message to inform about an error. [{error_code}]"
-        )
+    logger.exception("Fake without error_code")
 
-        assert parse_error_code(user_message) == {
-            error_code,
-        }
+    # 4. inform user (e.g. with new error or sending message)
+    user_message = (
+        f"This is a user-friendly message to inform about an error. [{error_code}]"
+    )
+
+    assert parse_error_code(user_message) == {
+        error_code,
+    }
diff --git a/packages/service-library/tests/test_logging_utils.py b/packages/service-library/tests/test_logging_utils.py
index 024ce9966aac..ca92a2759b4b 100644
--- a/packages/service-library/tests/test_logging_utils.py
+++ b/packages/service-library/tests/test_logging_utils.py
@@ -5,15 +5,20 @@
 from typing import Any
 
 import pytest
+from common_library.errors_classes import OsparcErrorMixin
 from faker import Faker
+from servicelib.error_codes import create_error_code
 from servicelib.logging_utils import (
     LogExtra,
     LogLevelInt,
     LogMessageStr,
+    create_troubleshotting_log_message,
+    get_log_record_extra,
     guess_message_log_level,
     log_context,
     log_decorator,
     log_exceptions,
+    set_parent_module_log_level,
 )
 
 _logger = logging.getLogger(__name__)
@@ -322,3 +327,96 @@ def test_log_exceptions_and_reraise(caplog: pytest.LogCaptureFixture, level: int
 
     assert len(caplog.records) == (1 if level != logging.NOTSET else 0)
     assert all(r.levelno == level for r in caplog.records)
+
+
+def test_set_parent_module_log_level_(caplog: pytest.LogCaptureFixture):
+    caplog.clear()
+    # emulates service logger
+    logging.root.setLevel(logging.WARNING)
+
+    parent = logging.getLogger("parent")
+    child = logging.getLogger("parent.child")
+
+    assert parent.level == logging.NOTSET
+    assert child.level == logging.NOTSET
+
+    parent.debug("parent debug")
+    child.debug("child debug")
+
+    parent.info("parent info")
+    child.info("child info")
+
+    parent.warning("parent warning")
+    child.warning("child warning")
+
+    assert "parent debug" not in caplog.text
+    assert "child debug" not in caplog.text
+
+    assert "parent info" not in caplog.text
+    assert "child info" not in caplog.text
+
+    assert "parent warning" in caplog.text
+    assert "child warning" in caplog.text
+
+    caplog.clear()
+    set_parent_module_log_level("parent.child", logging.INFO)
+
+    assert parent.level == logging.INFO
+    assert child.level == logging.NOTSET
+
+    parent.debug("parent debug")
+    child.debug("child debug")
+
+    parent.info("parent info")
+    child.info("child info")
+
+    parent.warning("parent warning")
+    child.warning("child warning")
+
+    assert "parent debug" not in caplog.text
+    assert "child debug" not in caplog.text
+
+    assert "parent info" in caplog.text
+    assert "child info" in caplog.text
+
+    assert "parent warning" in caplog.text
+    assert "child warning" in caplog.text
+
+
+def test_create_troubleshotting_log_message(caplog: pytest.LogCaptureFixture):
+    class MyError(OsparcErrorMixin, RuntimeError):
+        msg_template = "My error {user_id}"
+
+    with pytest.raises(MyError) as exc_info:
+        raise MyError(user_id=123, product_name="foo")
+
+    exc = exc_info.value
+    error_code = create_error_code(exc)
+    log_msg = create_troubleshotting_log_message(
+        f"Nice message to user [{error_code}]",
+        exc,
+        error_code=error_code,
+        error_context=exc.error_context(),
+        tip="This is a test error",
+    )
+
+    with caplog.at_level(logging.WARNING):
+        root_logger = logging.getLogger()
+        root_logger.exception(
+            log_msg, extra=get_log_record_extra(error_code=error_code)
+        )
+
+        # ERROR    root:test_logging_utils.py:417 Nice message to user [OEC:126055703573984].
+        # {
+        # "exception_details": "My error 123",
+        # "error_code": "OEC:126055703573984",
+        # "context": {
+        #     "user_id": 123,
+        #     "product_name": "foo"
+        # },
+        # "tip": "This is a test error"
+        # }
+
+        assert error_code in caplog.text
+        assert "user_id" in caplog.text
+        assert "product_name" in caplog.text
diff --git a/packages/settings-library/requirements/_base.txt b/packages/settings-library/requirements/_base.txt
index fc4a246dc958..64117c65d55d 100644
--- a/packages/settings-library/requirements/_base.txt
+++ b/packages/settings-library/requirements/_base.txt
@@ -21,13 +21,13 @@ pygments==2.18.0
     # via rich
 python-dotenv==1.0.1
     # via pydantic-settings
-rich==13.7.1
+rich==13.8.1
     # via
     #   -r requirements/_base.in
     #   typer
 shellingham==1.5.4
     # via typer
-typer==0.12.4
+typer==0.12.5
     # via -r requirements/_base.in
 typing-extensions==4.12.2
     # via
diff --git a/packages/settings-library/requirements/_test.txt b/packages/settings-library/requirements/_test.txt
index 56bf15d9c2dc..b9152c956e82 100644
--- a/packages/settings-library/requirements/_test.txt
+++ b/packages/settings-library/requirements/_test.txt
@@ -2,7 +2,7 @@ coverage==7.6.1
     # via
     #   -r requirements/_test.in
     #   pytest-cov
-faker==27.0.0
+faker==29.0.0
     # via -r requirements/_test.in
 iniconfig==2.0.0
     # via pytest
@@ -12,7 +12,7 @@ packaging==24.1
     #   pytest-sugar
 pluggy==1.5.0
     # via pytest
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -r requirements/_test.in
     #   pytest-cov
diff --git a/packages/settings-library/requirements/_tools.txt b/packages/settings-library/requirements/_tools.txt
index d14257822b06..643018f44287 100644
--- a/packages/settings-library/requirements/_tools.txt
+++ b/packages/settings-library/requirements/_tools.txt
@@ -1,8 +1,8 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
     # via -r requirements/../../../requirements/devenv.txt
-build==1.2.1
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
@@ -17,9 +17,9 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
@@ -27,7 +27,7 @@ isort==5.13.2
     #   pylint
 mccabe==0.7.0
     # via pylint
-mypy==1.11.1
+mypy==1.11.2
     # via -r requirements/../../../requirements/devenv.txt
 mypy-extensions==1.0.0
     # via
@@ -46,14 +46,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.0
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -63,9 +63,9 @@ pyyaml==6.0.2
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   pre-commit
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
-setuptools==73.0.1
+setuptools==75.1.0
     # via pip-tools
 tomlkit==0.13.2
     # via pylint
@@ -73,7 +73,7 @@ typing-extensions==4.12.2
     # via
     #   -c requirements/_base.txt
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/packages/settings-library/src/settings_library/basic_types.py b/packages/settings-library/src/settings_library/basic_types.py
index d912b5d4e39d..4c275397e1b3 100644
--- a/packages/settings-library/src/settings_library/basic_types.py
+++ b/packages/settings-library/src/settings_library/basic_types.py
@@ -1,5 +1,5 @@
 #
-# NOTE: This files copies some of the types from models_library.basic_types
+# NOTE: This file copies some of the types from models_library.basic_types
 #       This is a minor evil to avoid the maintenance burden that creates
 #       an extra dependency to a larger models_library (intra-repo library)
 
@@ -51,6 +51,3 @@ class BuildTargetEnum(str, Enum):
 
 # non-empty bounded string used as identifier
 # e.g. "123" or "name_123" or "fa327c73-52d8-462a-9267-84eeaf0f90e3" but NOT ""
-IDStr: TypeAlias = Annotated[
-    str, StringConstraints(strip_whitespace=True, min_length=1, max_length=50)
-]
diff --git a/packages/settings-library/src/settings_library/postgres.py b/packages/settings-library/src/settings_library/postgres.py
index ed7377877be7..883d14c3bb4c 100644
--- a/packages/settings-library/src/settings_library/postgres.py
+++ b/packages/settings-library/src/settings_library/postgres.py
@@ -1,5 +1,5 @@
-import urllib.parse
 from functools import cached_property
+from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
 
 from pydantic import (
     AliasChoices,
@@ -86,11 +86,23 @@ def dsn_with_async_sqlalchemy(self) -> str:
     def dsn_with_query(self) -> str:
         """Some clients do not support queries in the dsn"""
         dsn = self.dsn
+        return self._update_query(dsn)
+
+    def _update_query(self, uri: str) -> str:
+        # SEE https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS
+        new_params: dict[str, str] = {}
         if self.POSTGRES_CLIENT_NAME:
-            dsn += "?" + urllib.parse.urlencode(
-                {"application_name": self.POSTGRES_CLIENT_NAME}
-            )
-        return dsn
+            new_params = {
+                "application_name": self.POSTGRES_CLIENT_NAME,
+            }
+
+        if new_params:
+            parsed_uri = urlparse(uri)
+            query = dict(parse_qsl(parsed_uri.query))
+            query.update(new_params)
+            updated_query = urlencode(query)
+            return urlunparse(parsed_uri._replace(query=updated_query))
+        return uri
 
     model_config = SettingsConfigDict(
         json_schema_extra={
diff --git a/packages/settings-library/src/settings_library/redis.py b/packages/settings-library/src/settings_library/redis.py
index 6f9d7ad9d380..2a05f08b9e2a 100644
--- a/packages/settings-library/src/settings_library/redis.py
+++ b/packages/settings-library/src/settings_library/redis.py
@@ -1,4 +1,4 @@
-from enum import Enum
+from enum import IntEnum
 
 from pydantic import TypeAdapter
 from pydantic.networks import RedisDsn
@@ -8,7 +8,7 @@
 from .basic_types import PortInt
 
 
-class RedisDatabase(int, Enum):
+class RedisDatabase(IntEnum):
     RESOURCES = 0
     LOCKS = 1
     VALIDATION_CODES = 2
@@ -17,10 +17,12 @@ class RedisDatabase(int, Enum):
     ANNOUNCEMENTS = 5
     DISTRIBUTED_IDENTIFIERS = 6
     DEFERRED_TASKS = 7
+    DYNAMIC_SERVICES = 8
 
 
 class RedisSettings(BaseCustomSettings):
     # host
+    REDIS_SECURE: bool = False
     REDIS_HOST: str = "redis"
     REDIS_PORT: PortInt = TypeAdapter(PortInt).validate_python(6789)
 
@@ -29,15 +31,14 @@ class RedisSettings(BaseCustomSettings):
     REDIS_PASSWORD: SecretStr | None = None
 
     def build_redis_dsn(self, db_index: RedisDatabase) -> str:
-        return str(
-            RedisDsn.build(  # pylint: disable=no-member
-                scheme="redis",
-                username=self.REDIS_USER or None,
-                password=(
-                    self.REDIS_PASSWORD.get_secret_value() if self.REDIS_PASSWORD else None
-                ),
-                host=self.REDIS_HOST,
-                port=self.REDIS_PORT,
-                path=f"/{db_index}",
-            )
+        url = RedisDsn.build(
+            scheme="rediss" if self.REDIS_SECURE else "redis",
+            username=self.REDIS_USER or None,
+            password=(
+                self.REDIS_PASSWORD.get_secret_value() if self.REDIS_PASSWORD else None
+            ),
+            host=self.REDIS_HOST,
+            port=self.REDIS_PORT,
+            path=f"/{db_index}",
         )
+        return f"{url}"
diff --git a/packages/settings-library/src/settings_library/s3.py b/packages/settings-library/src/settings_library/s3.py
index 95268b419205..55d343ee9216 100644
--- a/packages/settings-library/src/settings_library/s3.py
+++ b/packages/settings-library/src/settings_library/s3.py
@@ -1,10 +1,10 @@
 from typing import Annotated
 
+from common_library.pydantic_basic_types import IDStr
 from pydantic import AnyHttpUrl, BeforeValidator, Field, TypeAdapter
 from pydantic_settings import SettingsConfigDict
 
 from .base import BaseCustomSettings
-from .basic_types import IDStr
 
 ANY_HTTP_URL_ADAPTER: TypeAdapter = TypeAdapter(AnyHttpUrl)
 
diff --git a/packages/settings-library/src/settings_library/tracing.py b/packages/settings-library/src/settings_library/tracing.py
index 85b7abae5ffd..b9981c1166f5 100644
--- a/packages/settings-library/src/settings_library/tracing.py
+++ b/packages/settings-library/src/settings_library/tracing.py
@@ -1,3 +1,4 @@
+from common_library.pydantic_basic_types import RegisteredPortInt
 from pydantic import AliasChoices, AnyUrl, Field, TypeAdapter
 
 from .base import BaseCustomSettings
@@ -19,3 +20,9 @@ class TracingSettings(BaseCustomSettings):
         description="Name of the application connecting the tracing service",
         validation_alias=AliasChoices("HOST", "HOSTNAME", "TRACING_CLIENT_NAME"),
     )
+    TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: AnyUrl = Field(
+        ..., description="Opentelemetry compatible collector endpoint"
+    )
+    TRACING_OPENTELEMETRY_COLLECTOR_PORT: RegisteredPortInt = Field(
+        ..., description="Opentelemetry compatible collector port"
+    )
diff --git a/packages/settings-library/tests/test__pydantic_settings.py b/packages/settings-library/tests/test__pydantic_settings.py
index bdc536387fc6..82d952fc3ae8 100644
--- a/packages/settings-library/tests/test__pydantic_settings.py
+++ b/packages/settings-library/tests/test__pydantic_settings.py
@@ -16,6 +16,7 @@
 from pydantic import ValidationInfo, field_validator
 from pydantic.fields import PydanticUndefined
 from pydantic_settings import BaseSettings
+from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict
 
 
 def assert_field_specs(
@@ -138,10 +139,15 @@ def test_construct(monkeypatch):
     print(settings_from_init.model_dump_json(exclude_unset=True, indent=1))
 
     # from env vars
-    monkeypatch.setenv("VALUE", "1")
-    monkeypatch.setenv("VALUE_REQUIRED_AS_WELL", "10")
-    monkeypatch.setenv("VALUE_NULLABLE_REQUIRED", "null")
-    monkeypatch.setenv("VALUE_NULLABLE_REQUIRED_AS_WELL", None)
+    setenvs_from_dict(
+        monkeypatch,
+        {
+            "VALUE_NULLABLE_REQUIRED_AS_WELL": "null",
+            "VALUE_NULLABLE_REQUIRED": "null",
+            "VALUE_REQUIRED_AS_WELL": "10",
+            "VALUE": "1",
+        },
+    )  # WARNING: set this env to None would not work w/o ``parse_none`` validator! bug???
 
     settings_from_env = Settings()  # type: ignore[call-arg]
     print(settings_from_env.model_dump_json(exclude_unset=True, indent=1))
diff --git a/packages/settings-library/tests/test_postgres.py b/packages/settings-library/tests/test_postgres.py
index c191f0ea37e1..e64d79526d33 100644
--- a/packages/settings-library/tests/test_postgres.py
+++ b/packages/settings-library/tests/test_postgres.py
@@ -3,7 +3,11 @@
 # pylint: disable=unused-variable
 
 
+from urllib.parse import urlparse
+
 import pytest
+from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict
+from pytest_simcore.helpers.typing_env import EnvVarsDict
 from settings_library.postgres import PostgresSettings
 
 
@@ -12,24 +16,30 @@ def env_file():
     return ".env-sample"
 
 
-def test_cached_property_dsn(mock_environment: dict):
+@pytest.fixture
+def mock_environment(mock_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch):
+    return mock_environment | setenvs_from_dict(
+        monkeypatch, {"POSTGRES_CLIENT_NAME": "Some &43 funky name"}
+    )
+
 
-    settings = PostgresSettings()   # type: ignore[call-arg]
+def test_cached_property_dsn(mock_environment: EnvVarsDict):
+
+    settings = PostgresSettings.create_from_envs()
 
     # all are upper-case
     assert all(key == key.upper() for key in settings.model_dump())
-    
+
     assert settings.dsn
 
     # dsn is computed from the other fields
     assert "dsn" not in settings.model_dump()
 
 
-def test_dsn_with_query(mock_environment: dict, monkeypatch):
-
-    settings = PostgresSettings()   # type: ignore[call-arg]
+def test_dsn_with_query(mock_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch):
+    settings = PostgresSettings()
 
-    assert not settings.POSTGRES_CLIENT_NAME
+    assert settings.POSTGRES_CLIENT_NAME
     assert settings.dsn == "postgresql://foo:secret@localhost:5432/foodb"
 
     # now with app
@@ -39,6 +49,24 @@ def test_dsn_with_query(mock_environment: dict, monkeypatch):
 
     assert settings_with_app.POSTGRES_CLIENT_NAME
     assert (
-        settings_with_app.dsn_with_query
+        settings.dsn_with_query
         == "postgresql://foo:secret@localhost:5432/foodb?application_name=Some+%2643+funky+name"
     )
+
+    with monkeypatch.context() as patch:
+        patch.delenv("POSTGRES_CLIENT_NAME")
+        settings = PostgresSettings()
+
+        assert not settings.POSTGRES_CLIENT_NAME
+        assert settings.dsn == settings.dsn_with_query
+
+
+def test_dsn_with_async_sqlalchemy_has_query(
+    mock_environment: EnvVarsDict, monkeypatch
+):
+    settings = PostgresSettings()
+
+    parsed_url = urlparse(settings.dsn_with_async_sqlalchemy)
+    assert parsed_url.scheme.split("+") == ["postgresql", "asyncpg"]
+
+    assert not parsed_url.query
diff --git a/packages/simcore-sdk/requirements/_base.txt b/packages/simcore-sdk/requirements/_base.txt
index 28d31ff4fb2b..3df1342c216f 100644
--- a/packages/simcore-sdk/requirements/_base.txt
+++ b/packages/simcore-sdk/requirements/_base.txt
@@ -1,10 +1,12 @@
 aio-pika==9.4.3
     # via -r requirements/../../../packages/service-library/requirements/_base.in
 aiocache==0.12.2
-    # via -r requirements/_base.in
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/_base.in
 aiodebug==2.3.0
     # via -r requirements/../../../packages/service-library/requirements/_base.in
-aiodocker==0.22.2
+aiodocker==0.23.0
     # via -r requirements/../../../packages/service-library/requirements/_base.in
 aiofiles==24.1.0
     # via
@@ -25,15 +27,15 @@ aiohttp==3.10.5
     #   aiodocker
 aiopg==1.4.0
     # via -r requirements/_base.in
-aiormq==6.8.0
+aiormq==6.8.1
     # via aio-pika
 aiosignal==1.3.1
     # via aiohttp
-alembic==1.13.2
+alembic==1.13.3
     # via -r requirements/../../../packages/postgres-database/requirements/_base.in
 annotated-types==0.7.0
     # via pydantic
-anyio==4.4.0
+anyio==4.6.0
     # via
     #   fast-depends
     #   faststream
@@ -55,15 +57,33 @@ attrs==24.2.0
     #   aiohttp
     #   jsonschema
     #   referencing
+certifi==2024.8.30
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   requests
+charset-normalizer==3.3.2
+    # via requests
 click==8.1.7
     # via typer
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
 dnspython==2.6.1
     # via email-validator
 email-validator==2.2.0
     # via pydantic
-fast-depends==2.4.8
+fast-depends==2.4.11
     # via faststream
-faststream==0.5.18
+faststream==0.5.23
     # via -r requirements/../../../packages/service-library/requirements/_base.in
 flexcache==0.3
     # via pint
@@ -73,13 +93,22 @@ frozenlist==1.4.1
     # via
     #   aiohttp
     #   aiosignal
-greenlet==3.0.3
+googleapis-common-protos==1.65.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+greenlet==3.1.1
     # via sqlalchemy
-idna==3.7
+grpcio==1.66.1
+    # via opentelemetry-exporter-otlp-proto-grpc
+idna==3.10
     # via
     #   anyio
     #   email-validator
+    #   requests
     #   yarl
+importlib-metadata==8.4.0
+    # via opentelemetry-api
 jsonschema==4.23.0
     # via
     #   -r requirements/../../../packages/models-library/requirements/_base.in
@@ -102,10 +131,49 @@ markupsafe==2.1.5
     # via mako
 mdurl==0.1.2
     # via markdown-it-py
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
+opentelemetry-api==1.27.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.27.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+opentelemetry-exporter-otlp-proto-common==1.27.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.27.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.48b0
+    # via opentelemetry-instrumentation-requests
+opentelemetry-instrumentation-requests==0.48b0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+opentelemetry-proto==1.27.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.27.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.48b0
+    # via
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+opentelemetry-util-http==0.48b0
+    # via opentelemetry-instrumentation-requests
 orjson==3.10.7
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -123,6 +191,12 @@ pamqp==3.3.0
     # via aiormq
 pint==0.24.3
     # via -r requirements/_base.in
+protobuf==4.25.5
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
+psutil==6.0.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
 psycopg2-binary==2.9.9
     # via
     #   aiopg
@@ -160,7 +234,7 @@ pydantic-settings==2.5.2
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
 pygments==2.18.0
     # via rich
-pyinstrument==4.7.2
+pyinstrument==4.7.3
     # via -r requirements/../../../packages/service-library/requirements/_base.in
 python-dateutil==2.9.0.post0
     # via arrow
@@ -191,7 +265,11 @@ referencing==0.29.3
     #   -c requirements/../../../packages/service-library/requirements/./constraints.txt
     #   jsonschema
     #   jsonschema-specifications
-rich==13.7.1
+repro-zipfile==0.3.1
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+requests==2.32.3
+    # via opentelemetry-exporter-otlp-proto-http
+rich==13.8.1
     # via
     #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
@@ -200,13 +278,15 @@ rpds-py==0.20.0
     # via
     #   jsonschema
     #   referencing
+setuptools==75.1.0
+    # via opentelemetry-instrumentation
 shellingham==1.5.4
     # via typer
 six==1.16.0
     # via python-dateutil
 sniffio==1.3.1
     # via anyio
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
@@ -228,12 +308,11 @@ tqdm==4.66.5
     # via
     #   -r requirements/../../../packages/service-library/requirements/_base.in
     #   -r requirements/_base.in
-typer==0.12.4
+typer==0.12.5
     # via
     #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
-    #   faststream
-types-python-dateutil==2.9.0.20240821
+types-python-dateutil==2.9.0.20240906
     # via arrow
 typing-extensions==4.12.2
     # via
@@ -242,13 +321,30 @@ typing-extensions==4.12.2
     #   faststream
     #   flexcache
     #   flexparser
+    #   opentelemetry-sdk
     #   pint
     #   pydantic
     #   pydantic-core
     #   typer
-yarl==1.9.4
+urllib3==2.2.3
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   requests
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+yarl==1.12.1
     # via
     #   -r requirements/../../../packages/postgres-database/requirements/_base.in
     #   aio-pika
     #   aiohttp
     #   aiormq
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/packages/simcore-sdk/requirements/_test.txt b/packages/simcore-sdk/requirements/_test.txt
index fa838e387664..3a80283eb5c5 100644
--- a/packages/simcore-sdk/requirements/_test.txt
+++ b/packages/simcore-sdk/requirements/_test.txt
@@ -16,7 +16,7 @@ aiohttp==3.10.5
     #   -c requirements/_base.txt
     #   aiobotocore
     #   aioresponses
-aioitertools==0.11.0
+aioitertools==0.12.0
     # via aiobotocore
 aioresponses==0.7.6
     # via -r requirements/_test.in
@@ -24,7 +24,7 @@ aiosignal==1.3.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
-alembic==1.13.2
+alembic==1.13.3
     # via
     #   -c requirements/_base.txt
     #   -r requirements/_test.in
@@ -40,7 +40,7 @@ attrs==24.2.0
     #   aiohttp
     #   jsonschema
     #   referencing
-aws-sam-translator==1.89.0
+aws-sam-translator==1.91.0
     # via cfn-lint
 aws-xray-sdk==2.14.0
     # via moto
@@ -58,18 +58,21 @@ botocore==1.34.131
     #   boto3
     #   moto
     #   s3transfer
-botocore-stubs==1.35.2
+botocore-stubs==1.35.25
     # via types-aiobotocore
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   -c requirements/_base.txt
     #   requests
-cffi==1.17.0
+cffi==1.17.1
     # via cryptography
-cfn-lint==1.10.3
+cfn-lint==1.15.0
     # via moto
 charset-normalizer==3.3.2
-    # via requests
+    # via
+    #   -c requirements/_base.txt
+    #   requests
 click==8.1.7
     # via
     #   -c requirements/_base.txt
@@ -79,7 +82,7 @@ coverage==7.6.1
     # via
     #   -r requirements/_test.in
     #   pytest-cov
-cryptography==43.0.0
+cryptography==43.0.1
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   joserfc
@@ -90,7 +93,7 @@ docker==7.1.0
     #   moto
 execnet==2.1.1
     # via pytest-xdist
-faker==27.0.0
+faker==29.0.0
     # via -r requirements/_test.in
 flaky==3.8.1
     # via -r requirements/_test.in
@@ -98,22 +101,22 @@ flask==3.0.3
     # via
     #   flask-cors
     #   moto
-flask-cors==4.0.1
+flask-cors==5.0.0
     # via moto
 frozenlist==1.4.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
     #   aiosignal
-graphql-core==3.2.3
+graphql-core==3.2.4
     # via moto
-greenlet==3.0.3
+greenlet==3.1.1
     # via
     #   -c requirements/_base.txt
     #   sqlalchemy
 icdiff==2.0.7
     # via pytest-icdiff
-idna==3.7
+idna==3.10
     # via
     #   -c requirements/_base.txt
     #   requests
@@ -133,7 +136,7 @@ jmespath==1.0.1
     #   botocore
 joserfc==1.0.0
     # via moto
-jsondiff==2.2.0
+jsondiff==2.2.1
     # via moto
 jsonpatch==1.33
     # via cfn-lint
@@ -167,16 +170,16 @@ markupsafe==2.1.5
     #   jinja2
     #   mako
     #   werkzeug
-moto==5.0.13
+moto==5.0.15
     # via -r requirements/_test.in
 mpmath==1.3.0
     # via sympy
-multidict==6.0.5
+multidict==6.1.0
     # via
     #   -c requirements/_base.txt
     #   aiohttp
     #   yarl
-mypy==1.11.1
+mypy==1.11.2
     # via sqlalchemy
 mypy-extensions==1.0.0
     # via mypy
@@ -199,7 +202,7 @@ ply==3.11
     # via jsonpath-ng
 pprintpp==0.4.0
     # via pytest-icdiff
-py-partiql-parser==0.5.5
+py-partiql-parser==0.5.6
     # via moto
 pycparser==2.22
     # via cffi
@@ -212,9 +215,9 @@ pydantic-core==2.23.4
     # via
     #   -c requirements/_base.txt
     #   pydantic
-pyparsing==3.1.2
+pyparsing==3.1.4
     # via moto
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -r requirements/_test.in
     #   pytest-asyncio
@@ -267,10 +270,11 @@ referencing==0.29.3
     #   jsonschema
     #   jsonschema-path
     #   jsonschema-specifications
-regex==2024.7.24
+regex==2024.9.11
     # via cfn-lint
 requests==2.32.3
     # via
+    #   -c requirements/_base.txt
     #   -r requirements/_test.in
     #   docker
     #   jsonschema-path
@@ -287,14 +291,16 @@ rpds-py==0.20.0
     #   referencing
 s3transfer==0.10.2
     # via boto3
-setuptools==73.0.1
-    # via moto
+setuptools==75.1.0
+    # via
+    #   -c requirements/_base.txt
+    #   moto
 six==1.16.0
     # via
     #   -c requirements/_base.txt
     #   python-dateutil
     #   rfc3339-validator
-sqlalchemy==1.4.53
+sqlalchemy==1.4.54
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
@@ -302,17 +308,17 @@ sqlalchemy==1.4.53
     #   alembic
 sqlalchemy2-stubs==0.0.2a38
     # via sqlalchemy
-sympy==1.13.2
+sympy==1.13.3
     # via cfn-lint
 termcolor==2.4.0
     # via pytest-sugar
-types-aiobotocore==2.13.2
+types-aiobotocore==2.15.1
     # via -r requirements/_test.in
-types-aiobotocore-s3==2.13.2
+types-aiobotocore-s3==2.15.1
     # via types-aiobotocore
 types-aiofiles==24.1.0.20240626
     # via -r requirements/_test.in
-types-awscrt==0.21.2
+types-awscrt==0.21.5
     # via botocore-stubs
 types-tqdm==4.66.0.20240417
     # via -r requirements/_test.in
@@ -328,24 +334,26 @@ typing-extensions==4.12.2
     #   sqlalchemy2-stubs
     #   types-aiobotocore
     #   types-aiobotocore-s3
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   -c requirements/_base.txt
     #   botocore
     #   docker
     #   requests
     #   responses
-werkzeug==3.0.3
+werkzeug==3.0.4
     # via
     #   flask
     #   moto
 wrapt==1.16.0
     # via
+    #   -c requirements/_base.txt
     #   aiobotocore
     #   aws-xray-sdk
 xmltodict==0.13.0
     # via moto
-yarl==1.9.4
+yarl==1.12.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
diff --git a/packages/simcore-sdk/requirements/_tools.txt b/packages/simcore-sdk/requirements/_tools.txt
index 5a573bd48484..d1b323ae5dde 100644
--- a/packages/simcore-sdk/requirements/_tools.txt
+++ b/packages/simcore-sdk/requirements/_tools.txt
@@ -1,8 +1,8 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
     # via -r requirements/../../../requirements/devenv.txt
-build==1.2.1
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
@@ -18,9 +18,9 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
@@ -28,7 +28,7 @@ isort==5.13.2
     #   pylint
 mccabe==0.7.0
     # via pylint
-mypy==1.11.1
+mypy==1.11.2
     # via
     #   -c requirements/_test.txt
     #   -r requirements/../../../requirements/devenv.txt
@@ -51,14 +51,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.0
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -70,10 +70,11 @@ pyyaml==6.0.2
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   pre-commit
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
-setuptools==73.0.1
+setuptools==75.1.0
     # via
+    #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   pip-tools
 tomlkit==0.13.2
@@ -83,7 +84,7 @@ typing-extensions==4.12.2
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
 wheel==0.44.0
     # via pip-tools
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py
index 7579c3eeb0c5..c37d82aced78 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 from tempfile import TemporaryDirectory
 
-from models_library.basic_types import IDStr
+from common_library.pydantic_basic_types import IDStr
 from models_library.projects import ProjectID
 from models_library.projects_nodes_io import NodeID, StorageFileID
 from models_library.users import UserID
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/aws_s3_cli.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/aws_s3_cli.py
index 320cfd7e25f3..147b7e4024b4 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/aws_s3_cli.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/aws_s3_cli.py
@@ -6,10 +6,9 @@
 from asyncio.streams import StreamReader
 from pathlib import Path
 
-from common_library.errors_classes import OsparcErrorMixin
-
 from aiocache import cached  # type: ignore[import-untyped]
-from models_library.basic_types import IDStr
+from common_library.errors_classes import OsparcErrorMixin
+from common_library.pydantic_basic_types import IDStr
 from pydantic import AnyUrl, ByteSize
 from servicelib.progress_bar import ProgressBarData
 from servicelib.utils import logged_gather
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/file_io_utils.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/file_io_utils.py
index 5feefab82f82..5aea046d75b6 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/file_io_utils.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/file_io_utils.py
@@ -18,8 +18,9 @@
     RequestInfo,
 )
 from aiohttp.typedefs import LooseHeaders
+from common_library.pydantic_basic_types import IDStr
 from models_library.api_schemas_storage import ETag, FileUploadSchema, UploadedPart
-from models_library.basic_types import IDStr, SHA256Str
+from models_library.basic_types import SHA256Str
 from pydantic import AnyUrl, NonNegativeInt
 from servicelib.aiohttp import status
 from servicelib.logging_utils import log_catch
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py
index f3e2587fab75..23a88fc2aa32 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py
@@ -5,6 +5,7 @@
 
 import aiofiles
 from aiohttp import ClientSession
+from common_library.pydantic_basic_types import IDStr
 from models_library.api_schemas_storage import (
     ETag,
     FileMetaDataGet,
@@ -12,10 +13,10 @@
     LinkType,
     UploadedPart,
 )
-from models_library.basic_types import IDStr, SHA256Str
+from models_library.basic_types import SHA256Str
 from models_library.projects_nodes_io import LocationID, LocationName, StorageFileID
 from models_library.users import UserID
-from pydantic import AnyUrl, ByteSize, TypeAdapter, parse_obj_as
+from pydantic import AnyUrl, ByteSize, TypeAdapter
 from servicelib.file_utils import create_sha256_checksum
 from servicelib.progress_bar import ProgressBarData
 from settings_library.aws_s3_cli import AwsS3CliSettings
@@ -189,7 +190,9 @@ async def download_path_from_s3(
                     aws_s3_cli_settings,
                     progress_bar,
                     local_directory_path=local_path,
-                    download_s3_link=TypeAdapter(AnyUrl).validate_python(f"{download_link}"),
+                    download_s3_link=TypeAdapter(AnyUrl).validate_python(
+                        f"{download_link}"
+                    ),
                 )
             elif r_clone_settings:
                 await r_clone.sync_s3_to_local(
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py
index bbfe14e7f39e..4c9a02470001 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py
@@ -8,11 +8,10 @@
 from pathlib import Path
 from typing import Final
 
-from common_library.errors_classes import OsparcErrorMixin
-
 from aiocache import cached  # type: ignore[import-untyped]
 from aiofiles import tempfile
-from models_library.basic_types import IDStr
+from common_library.errors_classes import OsparcErrorMixin
+from common_library.pydantic_basic_types import IDStr
 from pydantic import AnyUrl, BaseModel, ByteSize
 from servicelib.progress_bar import ProgressBarData
 from servicelib.utils import logged_gather
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone_utils.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone_utils.py
index 75ed54ec686f..7470c3940280 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone_utils.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone_utils.py
@@ -12,8 +12,6 @@
 _logger = logging.getLogger(__name__)
 
 
-
-
 class _RCloneSyncMessageBase(BaseModel):
     level: str = Field(..., description="log level")
     msg: str
@@ -44,6 +42,7 @@ class _RCloneSyncTransferringMessage(_RCloneSyncMessageBase):
     _RCloneSyncTransferCompletedMessage,
     _RCloneSyncUpdatedMessage,
     _RCloneSyncTransferringMessage,
+    _RCloneSyncMessageBase,
 ]
 
 
@@ -77,7 +76,9 @@ def __init__(self, progress_bar: ProgressBarData) -> None:
     async def __call__(self, logs: str) -> None:
         _logger.debug("received logs: %s", logs)
         with log_catch(_logger, reraise=False):
-            rclone_message: _RCloneSyncMessages = TypeAdapter(_RCloneSyncMessages).validate_strings(
+            rclone_message: _RCloneSyncMessages = TypeAdapter(
+                _RCloneSyncMessages
+            ).validate_strings(
                 logs,
             )
 
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py
index d0ef9eb14bf4..5f1c43d40f14 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py
@@ -1,19 +1,19 @@
-from asyncio import Task
-import traceback
 import logging
+import traceback
+from abc import ABC, abstractmethod
+from asyncio import CancelledError, Task
 from collections.abc import Callable, Coroutine
 from pathlib import Path
 from typing import Any
 
-from pydantic_core import InitErrorDetails
-
+from common_library.pydantic_basic_types import IDStr
 from models_library.api_schemas_storage import LinkType
-from models_library.basic_types import IDStr
 from models_library.projects import ProjectIDStr
 from models_library.projects_nodes_io import NodeIDStr
 from models_library.services_types import ServicePortKey
 from models_library.users import UserID
 from pydantic import BaseModel, ConfigDict, Field, ValidationError
+from pydantic_core import InitErrorDetails
 from servicelib.progress_bar import ProgressBarData
 from servicelib.utils import logged_gather
 from settings_library.aws_s3_cli import AwsS3CliSettings
@@ -30,15 +30,38 @@
 log = logging.getLogger(__name__)
 
 
-def _format_error(task:Task)-> str:
+def _format_error(task: Task) -> str:
     # pylint:disable=protected-access
-    assert task._exception #nosec
-    error_list= traceback.format_exception(type(task._exception), task._exception, task._exception.__traceback__)
+    assert task._exception  # nosec
+    error_list = traceback.format_exception(
+        type(task._exception), task._exception, task._exception.__traceback__
+    )
     return "\n".join(error_list)
 
-def _get_error_details(task:Task, port_key:str)->InitErrorDetails:
+
+def _get_error_details(task: Task, port_key: str) -> InitErrorDetails:
     # pylint:disable=protected-access
-    return InitErrorDetails(type="value_error", loc=(f"{port_key}",), input=_format_error(task), ctx={"error":task._exception})
+    return InitErrorDetails(
+        type="value_error",
+        loc=(f"{port_key}",),
+        input=_format_error(task),
+        ctx={"error": task._exception},
+    )
+
+
+class OutputsCallbacks(ABC):
+    @abstractmethod
+    async def aborted(self, key: ServicePortKey) -> None:
+        pass
+
+    @abstractmethod
+    async def finished_succesfully(self, key: ServicePortKey) -> None:
+        pass
+
+    @abstractmethod
+    async def finished_with_error(self, key: ServicePortKey) -> None:
+        pass
+
 
 class Nodeports(BaseModel):
     """
@@ -161,6 +184,7 @@ async def set_multiple(
         ],
         *,
         progress_bar: ProgressBarData,
+        outputs_callbacks: OutputsCallbacks | None,
     ) -> None:
         """
         Sets the provided values to the respective input or output ports
@@ -169,34 +193,54 @@ async def set_multiple(
 
         raises ValidationError
         """
+
+        async def _set_with_notifications(
+            port_key: ServicePortKey,
+            value: ItemConcreteValue | None,
+            set_kwargs: SetKWargs | None,
+            sub_progress: ProgressBarData,
+        ) -> None:
+            try:
+                # pylint: disable=protected-access
+                await self.internal_outputs[port_key]._set(  # noqa: SLF001
+                    value, set_kwargs=set_kwargs, progress_bar=sub_progress
+                )
+                if outputs_callbacks:
+                    await outputs_callbacks.finished_succesfully(port_key)
+            except UnboundPortError:
+                # not available try inputs
+                # if this fails it will raise another exception
+                # pylint: disable=protected-access
+                await self.internal_inputs[port_key]._set(  # noqa: SLF001
+                    value, set_kwargs=set_kwargs, progress_bar=sub_progress
+                )
+            except CancelledError:
+                if outputs_callbacks:
+                    await outputs_callbacks.aborted(port_key)
+                raise
+            except Exception:
+                if outputs_callbacks:
+                    await outputs_callbacks.finished_with_error(port_key)
+                raise
+
         tasks = []
         async with progress_bar.sub_progress(
             steps=len(port_values.items()), description=IDStr("set multiple")
         ) as sub_progress:
             for port_key, (value, set_kwargs) in port_values.items():
-                # pylint: disable=protected-access
-                try:
-                    tasks.append(
-                        self.internal_outputs[port_key]._set(
-                            value, set_kwargs=set_kwargs, progress_bar=sub_progress
-                        )
-                    )
-                except UnboundPortError:
-                    # not available try inputs
-                    # if this fails it will raise another exception
-                    tasks.append(
-                        self.internal_inputs[port_key]._set(
-                            value, set_kwargs=set_kwargs, progress_bar=sub_progress
-                        )
-                    )
+                tasks.append(
+                    _set_with_notifications(port_key, value, set_kwargs, sub_progress)
+                )
 
             results = await logged_gather(*tasks)
             await self.save_to_db_cb(self)
 
         # groups all ValidationErrors pre-pending 'port_key' to loc and raises ValidationError
-        if error_details:= [
+        if error_details := [
             _get_error_details(r, port_key)
-            for port_key, r in zip(port_values.keys(), results)
+            for port_key, r in zip(port_values.keys(), results, strict=True)
             if r is not None
         ]:
-            raise ValidationError.from_exception_data(title="Multiple port_key errors",line_errors=error_details)
+            raise ValidationError.from_exception_data(
+                title="Multiple port_key errors", line_errors=error_details
+            )
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py
index 3ddab6a29d3a..3bf22174b643 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py
@@ -6,8 +6,8 @@
 from pprint import pformat
 from typing import Any
 
+from common_library.pydantic_basic_types import IDStr
 from models_library.api_schemas_storage import LinkType
-from models_library.basic_types import IDStr
 from models_library.services_io import BaseServiceIOModel
 from models_library.services_types import ServicePortKey
 from pydantic import (
diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py
index 655c95764085..cd31bdb01b17 100644
--- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py
+++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py
@@ -4,8 +4,9 @@
 from pathlib import Path
 from typing import Any
 
+from common_library.pydantic_basic_types import IDStr
 from models_library.api_schemas_storage import FileUploadSchema, LinkType
-from models_library.basic_types import IDStr, SHA256Str
+from models_library.basic_types import SHA256Str
 from models_library.services_types import FileName, ServicePortKey
 from models_library.users import UserID
 from pydantic import AnyUrl, ByteSize
diff --git a/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py b/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py
index ca7a81e6c177..8db4e908b686 100644
--- a/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py
+++ b/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py
@@ -17,7 +17,7 @@
 from models_library.projects import ProjectID
 from models_library.projects_nodes_io import NodeID, SimcoreS3FileID
 from models_library.users import UserID
-from pydantic import parse_obj_as
+from pydantic import TypeAdapter
 from servicelib.progress_bar import ProgressBarData
 from settings_library.aws_s3_cli import AwsS3CliSettings
 from settings_library.r_clone import RCloneSettings
@@ -263,8 +263,8 @@ async def test_delete_legacy_archive(
             user_id=user_id,
             store_id=SIMCORE_LOCATION,
             store_name=None,
-            s3_object=parse_obj_as(
-                SimcoreS3FileID, f"{project_id}/{node_uuid}/{legacy_archive_name.name}"
+            s3_object=TypeAdapter(SimcoreS3FileID).validate_python(
+                f"{project_id}/{node_uuid}/{legacy_archive_name.name}"
             ),
             path_to_upload=legacy_archive_name,
             io_log_redirect_cb=None,
diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py
index ec24f271394d..d755647b7533 100644
--- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py
+++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py
@@ -1,9 +1,10 @@
 # pylint: disable=pointless-statement
+# pylint: disable=protected-access
 # pylint: disable=redefined-outer-name
 # pylint: disable=too-many-arguments
+# pylint: disable=too-many-positional-arguments
 # pylint: disable=unused-argument
 # pylint: disable=unused-variable
-# pylint: disable=protected-access
 
 
 import filecmp
@@ -13,6 +14,7 @@
 from collections.abc import Awaitable, Callable, Iterable
 from pathlib import Path
 from typing import Any
+from unittest.mock import AsyncMock
 from uuid import uuid4
 
 import np_helpers
@@ -29,13 +31,14 @@
 )
 from models_library.services_types import ServicePortKey
 from pydantic import TypeAdapter
+from pytest_mock import MockerFixture
 from servicelib.progress_bar import ProgressBarData
 from settings_library.r_clone import RCloneSettings
 from simcore_sdk import node_ports_v2
 from simcore_sdk.node_ports_common.exceptions import UnboundPortError
 from simcore_sdk.node_ports_v2 import exceptions
 from simcore_sdk.node_ports_v2.links import ItemConcreteValue, PortLink
-from simcore_sdk.node_ports_v2.nodeports_v2 import Nodeports
+from simcore_sdk.node_ports_v2.nodeports_v2 import Nodeports, OutputsCallbacks
 from simcore_sdk.node_ports_v2.port import Port
 from utils_port_v2 import CONSTANT_UUID
 
@@ -789,6 +792,34 @@ async def _upload_create_task(item_key: str) -> None:
     )
 
 
+class _Callbacks(OutputsCallbacks):
+    async def aborted(self, key: ServicePortKey) -> None:
+        pass
+
+    async def finished_succesfully(self, key: ServicePortKey) -> None:
+        pass
+
+    async def finished_with_error(self, key: ServicePortKey) -> None:
+        pass
+
+
+@pytest.fixture
+async def output_callbacks() -> _Callbacks:
+    return _Callbacks()
+
+
+@pytest.fixture
+async def spy_outputs_callbaks(
+    mocker: MockerFixture, output_callbacks: _Callbacks
+) -> dict[str, AsyncMock]:
+    return {
+        "aborted": mocker.spy(output_callbacks, "aborted"),
+        "finished_succesfully": mocker.spy(output_callbacks, "finished_succesfully"),
+        "finished_with_error": mocker.spy(output_callbacks, "finished_with_error"),
+    }
+
+
+@pytest.mark.parametrize("use_output_callbacks", [True, False])
 async def test_batch_update_inputs_outputs(
     user_id: int,
     project_id: str,
@@ -797,7 +828,12 @@ async def test_batch_update_inputs_outputs(
     port_count: int,
     option_r_clone_settings: RCloneSettings | None,
     faker: Faker,
+    output_callbacks: _Callbacks,
+    spy_outputs_callbaks: dict[str, AsyncMock],
+    use_output_callbacks: bool,
 ) -> None:
+    callbacks = output_callbacks if use_output_callbacks else None
+
     outputs = [(f"value_out_{i}", "integer", None) for i in range(port_count)]
     inputs = [(f"value_in_{i}", "integer", None) for i in range(port_count)]
     config_dict, _, _ = create_special_configuration(inputs=inputs, outputs=outputs)
@@ -811,12 +847,17 @@ async def test_batch_update_inputs_outputs(
     await check_config_valid(PORTS, config_dict)
 
     async with ProgressBarData(num_steps=2, description=faker.pystr()) as progress_bar:
+        port_values = (await PORTS.outputs).values()
         await PORTS.set_multiple(
             {
                 TypeAdapter(ServicePortKey).validate_python(port.key): (k, None)
                 for k, port in enumerate((await PORTS.outputs).values())
             },
             progress_bar=progress_bar,
+            outputs_callbacks=callbacks,
+        )
+        assert len(spy_outputs_callbaks["finished_succesfully"].call_args_list) == (
+            len(port_values) if use_output_callbacks else 0
         )
         # pylint: disable=protected-access
         assert progress_bar._current_steps == pytest.approx(1)  # noqa: SLF001
@@ -826,6 +867,11 @@ async def test_batch_update_inputs_outputs(
                 for k, port in enumerate((await PORTS.inputs).values(), start=1000)
             },
             progress_bar=progress_bar,
+            outputs_callbacks=callbacks,
+        )
+        # inputs do not trigger callbacks
+        assert len(spy_outputs_callbaks["finished_succesfully"].call_args_list) == (
+            len(port_values) if use_output_callbacks else 0
         )
         assert progress_bar._current_steps == pytest.approx(2)  # noqa: SLF001
 
@@ -867,4 +913,11 @@ async def test_batch_update_inputs_outputs(
                     ): (123132, None)
                 },
                 progress_bar=progress_bar,
+                outputs_callbacks=callbacks,
             )
+
+    assert len(spy_outputs_callbaks["finished_succesfully"].call_args_list) == (
+        len(port_values) if use_output_callbacks else 0
+    )
+    assert len(spy_outputs_callbaks["aborted"].call_args_list) == 0
+    assert len(spy_outputs_callbaks["finished_with_error"].call_args_list) == 0
diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_common_file_io_utils.py b/packages/simcore-sdk/tests/unit/test_node_ports_common_file_io_utils.py
index 0540daa58d11..4b68e1e2e51d 100644
--- a/packages/simcore-sdk/tests/unit/test_node_ports_common_file_io_utils.py
+++ b/packages/simcore-sdk/tests/unit/test_node_ports_common_file_io_utils.py
@@ -12,6 +12,7 @@
 from aiobotocore.session import AioBaseClient, get_session
 from aiohttp import ClientResponse, ClientSession, TCPConnector
 from aioresponses import aioresponses
+from common_library.pydantic_basic_types import IDStr
 from faker import Faker
 from models_library.api_schemas_storage import (
     FileUploadLinks,
@@ -272,11 +273,13 @@ async def test_upload_file_to_presigned_links(
     """
     local_file = create_file_of_size(file_size)
     num_links = 2080
-    effective_chunk_size = TypeAdapter(ByteSize).validate_python(local_file.stat().st_size / num_links)
+    effective_chunk_size = TypeAdapter(ByteSize).validate_python(
+        local_file.stat().st_size / num_links
+    )
     assert effective_chunk_size <= used_chunk_size
     upload_links = await create_upload_links(num_links, used_chunk_size)
     assert len(upload_links.urls) == num_links
-    async with ProgressBarData(num_steps=1, description=faker.pystr()) as progress_bar:
+    async with ProgressBarData(num_steps=1, description=IDStr("42")) as progress_bar:
         uploaded_parts: list[UploadedPart] = await upload_file_to_presigned_links(
             session=client_session,
             file_upload_links=upload_links,
diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_common_r_clone_utils.py b/packages/simcore-sdk/tests/unit/test_node_ports_common_r_clone_utils.py
new file mode 100644
index 000000000000..e2d9b890ba5e
--- /dev/null
+++ b/packages/simcore-sdk/tests/unit/test_node_ports_common_r_clone_utils.py
@@ -0,0 +1,75 @@
+import json
+from unittest.mock import AsyncMock
+
+import pytest
+from pydantic import TypeAdapter
+from simcore_sdk.node_ports_common.r_clone_utils import (
+    SyncProgressLogParser,
+    _RCloneSyncMessageBase,
+    _RCloneSyncMessages,
+    _RCloneSyncTransferCompletedMessage,
+    _RCloneSyncTransferringMessage,
+    _RCloneSyncUpdatedMessage,
+)
+
+
+@pytest.mark.parametrize(
+    "log_message,expected",
+    [
+        (
+            '{"level":"info","msg":"There was nothing to transfer","source":"sync/sync.go:954","time":"2024-09-25T10:18:04.904537+00:00"}',
+            _RCloneSyncMessageBase,
+        ),
+        (
+            '{"level":"info","msg":"","object":".hidden_do_not_remove","objectType":"*s3.Object","source":"operations/operations.go:277","time":"2024-09-24T07:11:22.147117+00:00"}',
+            _RCloneSyncUpdatedMessage,
+        ),
+        (
+            '{"level":"info","msg":"Copied (new)","object":"README.ipynb","objectType":"*s3.Object","size":5123,"source":"operations/copy.go:360","time":"2024-04-23T14:05:10.408277+00:00"}',
+            _RCloneSyncTransferCompletedMessage,
+        ),
+        (
+            json.dumps(
+                {
+                    "level": "",
+                    "msg": "",
+                    "source": "",
+                    "time": "2024-09-24T07:11:22.147117+00:00",
+                    "object": "str",
+                }
+            ),
+            _RCloneSyncUpdatedMessage,
+        ),
+        (
+            json.dumps(
+                {
+                    "level": "",
+                    "msg": "",
+                    "source": "",
+                    "time": "2024-09-24T07:11:22.147117+00:00",
+                    "object": "str",
+                    "size": 1,
+                }
+            ),
+            _RCloneSyncTransferCompletedMessage,
+        ),
+        (
+            json.dumps(
+                {
+                    "level": "",
+                    "msg": "",
+                    "source": "",
+                    "time": "2024-09-24T07:11:22.147117+00:00",
+                    "stats": {"bytes": 1, "totalBytes": 1},
+                }
+            ),
+            _RCloneSyncTransferringMessage,
+        ),
+    ],
+)
+async def test_rclone_stbc_message_parsing_regression(log_message: str, expected: type):
+    parsed_log = TypeAdapter(_RCloneSyncMessages).validate_json(log_message)
+    assert isinstance(parsed_log, expected)
+
+    progress_log_parser = SyncProgressLogParser(AsyncMock())
+    await progress_log_parser(log_message)
diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_nodeports_v2.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_nodeports_v2.py
index 856b4b268b11..827949c56511 100644
--- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_nodeports_v2.py
+++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_nodeports_v2.py
@@ -6,10 +6,12 @@
 import asyncio
 from pathlib import Path
 from typing import Any, Callable
+from unittest.mock import AsyncMock
 
-from pydantic import ValidationError
 import pytest
+from common_library.pydantic_basic_types import IDStr
 from faker import Faker
+from pydantic import BaseModel, ValidationError
 from pytest_mock import MockFixture
 from servicelib.progress_bar import ProgressBarData
 from simcore_sdk.node_ports_common.filemanager import UploadedFile
@@ -144,6 +146,7 @@ async def mock_node_port_creator_cb(*args, **kwargs):
                 + list(original_outputs.values())
             },
             progress_bar=progress_bar,
+            outputs_callbacks=AsyncMock(),
         )
     assert progress_bar._current_steps == pytest.approx(1)  # noqa: SLF001
 
@@ -226,16 +229,24 @@ async def test_node_ports_v2_packages(
 
 
 @pytest.fixture
-def mock_port_set(mocker: MockFixture)->None:
+def mock_port_set(mocker: MockFixture) -> None:
     async def _always_raise_error(*args, **kwargs):
         async def _i_raise_errors():
-            raise ValidationError("invalid")
-        return asyncio.create_task(_i_raise_errors())
+            class User(BaseModel):
+                name: str
+                age: int
+
+            User(**kwargs)
+
+        return await asyncio.create_task(_i_raise_errors())
+
+    mocker.patch(
+        "simcore_sdk.node_ports_v2.port.Port._set", side_effect=_always_raise_error
+    )
 
-    mocker.patch("simcore_sdk.node_ports_v2.port.Port._set", side_effect=_always_raise_error)
 
 async def test_node_ports_v2_set_multiple_catch_multiple_failing_set_ports(
-    mock_port_set:None,
+    mock_port_set: None,
     mock_db_manager: Callable,
     default_configuration: dict[str, Any],
     user_id: int,
@@ -248,8 +259,7 @@ async def test_node_ports_v2_set_multiple_catch_multiple_failing_set_ports(
     original_inputs = create_valid_port_mapping(InputsList, suffix="original")
     original_outputs = create_valid_port_mapping(OutputsList, suffix="original")
 
-
-    async def _mock_callback(*args,**kwargs):
+    async def _mock_callback(*args, **kwargs):
         pass
 
     node_ports = Nodeports(
@@ -264,13 +274,18 @@ async def _mock_callback(*args,**kwargs):
         node_port_creator_cb=_mock_callback,
         auto_update=False,
     )
-    async with ProgressBarData(num_steps=1, description=faker.pystr()) as progress_bar:
+
+    callback = AsyncMock()
+    async with ProgressBarData(
+        num_steps=1, description=IDStr(faker.pystr())
+    ) as progress_bar:
         with pytest.raises(ValidationError):
             await node_ports.set_multiple(
-        {
-            port.key: (port.value, None)
-            for port in list(original_inputs.values())
-            + list(original_outputs.values())
-        },
-        progress_bar=progress_bar,
-    )
+                {
+                    port.key: (port.value, None)
+                    for port in list(original_inputs.values())
+                    + list(original_outputs.values())
+                },
+                progress_bar=progress_bar,
+                outputs_callbacks=callback,
+            )
diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py
index f8dbe5d0e8df..063c71f99f47 100644
--- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py
+++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py
@@ -1,9 +1,10 @@
-# pylint:disable=unused-variable
-# pylint:disable=unused-argument
-# pylint:disable=redefined-outer-name
 # pylint:disable=no-member
 # pylint:disable=protected-access
+# pylint:disable=redefined-outer-name
 # pylint:disable=too-many-arguments
+# pylint:disable=too-many-positional-arguments
+# pylint:disable=unused-argument
+# pylint:disable=unused-variable
 
 
 import os
diff --git a/requirements/tools/Makefile b/requirements/tools/Makefile
index 395bb9e6bc41..ee094b80ef94 100644
--- a/requirements/tools/Makefile
+++ b/requirements/tools/Makefile
@@ -77,11 +77,11 @@ reqs: ## updates test & tooling requirements
 	@$(foreach p,${_target-inputs},echo Touching $(p);touch $(p);$(MAKE_C) $(dir $(p)) reqs $(UPGRADE_OPTION);)
 
 
-reqs-all: guard-UPGRADE_OPTION ## updates a give package repository-wise (e.g. make reqs-all upgrade=foo==1.2.3 )
+reqs-all: guard-UPGRADE_OPTION ## updates a given package repository-wise IN ALL `requirements/` folders (e.g. make reqs-all upgrade=foo==1.2.3 )
 	# Upgrading $(upgrade) ALL requirements
 	@$(foreach p,${_all-in},echo Touching $(p);touch $(p);$(MAKE_C) $(dir $(p)) reqs $(UPGRADE_OPTION);)
 
-reqs-services: guard-UPGRADE_OPTION ## updates a give package on a service (e.g. make reqs-services upgrade=foo==1.2.3 )
+reqs-services: guard-UPGRADE_OPTION ## updates a given package on all services [and not packages] (e.g. make reqs-services upgrade=foo==1.2.3 )
 	# Upgrading $(upgrade) in services
 	@$(foreach p,${_services-in},echo Touching $(p);touch $(p);$(MAKE_C) $(dir $(p)) reqs $(UPGRADE_OPTION);)
 
diff --git a/scripts/maintenance/computational-clusters/autoscaled_monitor/dask.py b/scripts/maintenance/computational-clusters/autoscaled_monitor/dask.py
index d6e8859869a3..750ef816bc86 100644
--- a/scripts/maintenance/computational-clusters/autoscaled_monitor/dask.py
+++ b/scripts/maintenance/computational-clusters/autoscaled_monitor/dask.py
@@ -4,7 +4,6 @@
 
 import distributed
 import rich
-import typer
 from mypy_boto3_ec2.service_resource import Instance
 from pydantic import AnyUrl
 
@@ -64,25 +63,6 @@ async def dask_client(
                     f"{url}", security=security, timeout="5", asynchronous=True
                 )
             )
-            versions = await _wrap_dask_async_call(client.get_versions())
-            if versions["client"]["python"] != versions["scheduler"]["python"]:
-                rich.print(
-                    f"[red]python versions do not match! TIP: install the correct version {versions['scheduler']['python']}[/red]"
-                )
-                raise typer.Exit(1)
-            if (
-                versions["client"]["distributed"]
-                != versions["scheduler"]["distributed"]
-            ):
-                rich.print(
-                    f"[red]distributed versions do not match! TIP: install the correct version {versions['scheduler']['distributed']}[/red]"
-                )
-                raise typer.Exit(1)
-            if versions["client"]["dask"] != versions["scheduler"]["dask"]:
-                rich.print(
-                    f"[red]dask versions do not match! TIP: install the correct version {versions['scheduler']['dask']}[/red]"
-                )
-                raise typer.Exit(1)
             yield client
 
     finally:
@@ -132,12 +112,13 @@ def _list_tasks(
 
         return dict(task_state_to_tasks)
 
+    list_of_tasks: dict[TaskState, list[TaskId]] = []
     try:
-        list_of_tasks: dict[TaskState, list[TaskId]] = await client.run_on_scheduler(
+        list_of_tasks = await client.run_on_scheduler(
             _list_tasks
         )  # type: ignore
     except TypeError:
-        rich.print(f"ERROR while recoverring unrunnable tasks using {dask_client=}")
+        rich.print(f"ERROR while recoverring unrunnable tasks using {dask_client=}. Defaulting to empty list of tasks!!")
     return list_of_tasks
 
 
diff --git a/services/agent/VERSION b/services/agent/VERSION
index 8acdd82b765e..3eefcb9dd5b3 100644
--- a/services/agent/VERSION
+++ b/services/agent/VERSION
@@ -1 +1 @@
-0.0.1
+1.0.0
diff --git a/services/agent/requirements/_base.in b/services/agent/requirements/_base.in
index 5fc14c5cfe27..9ef6c66b437d 100644
--- a/services/agent/requirements/_base.in
+++ b/services/agent/requirements/_base.in
@@ -9,11 +9,12 @@
 --requirement ../../../packages/common-library/requirements/_base.in
 --requirement ../../../packages/models-library/requirements/_base.in
 --requirement ../../../packages/settings-library/requirements/_base.in
+# service-library[fastapi]
+--requirement ../../../packages/service-library/requirements/_base.in
 --requirement ../../../packages/service-library/requirements/_fastapi.in
 
 aiodocker
 fastapi
 packaging
 pydantic
-python-dotenv
 uvicorn
diff --git a/services/agent/requirements/_base.txt b/services/agent/requirements/_base.txt
index 04f6020c8742..10b3e81bd41e 100644
--- a/services/agent/requirements/_base.txt
+++ b/services/agent/requirements/_base.txt
@@ -1,6 +1,18 @@
-aiodocker==0.21.0
-    # via -r requirements/_base.in
-aiohttp==3.8.5
+aio-pika==9.4.3
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+aiocache==0.12.3
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+aiodebug==2.3.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+aiodocker==0.23.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/_base.in
+aiofiles==24.1.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+aiohappyeyeballs==2.4.0
+    # via aiohttp
+aiohttp==3.10.6
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -9,23 +21,31 @@ aiohttp==3.8.5
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   aiodocker
-aiosignal==1.2.0
+aiormq==6.8.1
+    # via aio-pika
+aiosignal==1.3.1
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anyio==3.6.2
+anyio==4.6.0
     # via
+    #   fast-depends
+    #   faststream
     #   httpx
     #   starlette
-arrow==1.2.3
-    # via -r requirements/../../../packages/models-library/requirements/_base.in
-async-timeout==4.0.2
-    # via aiohttp
-attrs==21.4.0
+arrow==1.3.0
+    # via
+    #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+asgiref==3.8.1
+    # via opentelemetry-instrumentation-asgi
+attrs==24.2.0
     # via
     #   aiohttp
     #   jsonschema
-certifi==2023.11.17
+    #   referencing
+certifi==2024.8.30
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -35,32 +55,49 @@ certifi==2023.11.17
     #   -c requirements/../../../requirements/constraints.txt
     #   httpcore
     #   httpx
-charset-normalizer==2.1.1
-    # via aiohttp
-click==8.1.3
+    #   requests
+charset-normalizer==3.3.2
+    # via requests
+click==8.1.7
     # via
     #   typer
     #   uvicorn
-dnspython==2.2.1
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
+dnspython==2.6.1
     # via email-validator
 email-validator==2.2.0
     # via pydantic
+fast-depends==2.4.11
+    # via faststream
 fastapi==0.115.0
     # via
     #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
     #   -r requirements/_base.in
     #   prometheus-fastapi-instrumentator
-frozenlist==1.3.1
+faststream==0.5.25
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+frozenlist==1.4.1
     # via
     #   aiohttp
     #   aiosignal
+googleapis-common-protos==1.65.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+grpcio==1.66.1
+    # via opentelemetry-exporter-otlp-proto-grpc
 h11==0.14.0
     # via
     #   httpcore
     #   uvicorn
-httpcore==1.0.2
+httpcore==1.0.5
     # via httpx
-httpx==0.26.0
+httpx==0.27.2
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -69,23 +106,83 @@ httpx==0.26.0
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
-idna==3.4
+idna==3.10
     # via
     #   anyio
     #   email-validator
     #   httpx
+    #   requests
     #   yarl
-jsonschema==3.2.0
-    # via -r requirements/../../../packages/models-library/requirements/_base.in
+importlib-metadata==8.4.0
+    # via opentelemetry-api
+jsonschema==4.23.0
+    # via
+    #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+jsonschema-specifications==2023.7.1
+    # via jsonschema
 markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-multidict==6.0.2
+multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
-orjson==3.10.0
+opentelemetry-api==1.27.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.27.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+opentelemetry-exporter-otlp-proto-common==1.27.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.27.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.48b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+opentelemetry-instrumentation-asgi==0.48b0
+    # via opentelemetry-instrumentation-fastapi
+opentelemetry-instrumentation-fastapi==0.48b0
+    # via -r requirements/../../../packages/service-library/requirements/_fastapi.in
+opentelemetry-instrumentation-requests==0.48b0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+opentelemetry-proto==1.27.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.27.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.48b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+opentelemetry-util-http==0.48b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+orjson==3.10.7
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -94,14 +191,23 @@ orjson==3.10.0
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/../../../packages/models-library/requirements/_base.in
-packaging==23.1
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+packaging==24.1
     # via -r requirements/_base.in
-prometheus-client==0.19.0
+pamqp==3.3.0
+    # via aiormq
+prometheus-client==0.21.0
     # via
     #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
     #   prometheus-fastapi-instrumentator
 prometheus-fastapi-instrumentator==6.1.0
     # via -r requirements/../../../packages/service-library/requirements/_fastapi.in
+protobuf==4.25.5
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
+psutil==6.0.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
 pydantic==2.9.2
     # via
     #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
@@ -111,8 +217,12 @@ pydantic==2.9.2
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
     #   -r requirements/_base.in
+    #   fast-depends
     #   fastapi
     #   pydantic-extra-types
     #   pydantic-settings
@@ -124,29 +234,58 @@ pydantic-settings==2.5.2
     # via
     #   -r requirements/../../../packages/models-library/requirements/_base.in
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
-pygments==2.15.1
+pygments==2.18.0
     # via rich
-pyrsistent==0.19.2
-    # via jsonschema
-python-dateutil==2.8.2
+pyinstrument==4.7.3
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+python-dateutil==2.9.0.post0
     # via arrow
 python-dotenv==1.0.0
     # via
     #   -r requirements/_base.in
     #   pydantic-settings
 rich==13.4.2
+pyyaml==6.0.2
     # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+redis==5.0.8
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+referencing==0.29.3
+    # via
+    #   -c requirements/../../../packages/service-library/requirements/./constraints.txt
+    #   jsonschema
+    #   jsonschema-specifications
+requests==2.32.3
+    # via opentelemetry-exporter-otlp-proto-http
+rich==13.8.1
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
     #   -r requirements/../../../packages/settings-library/requirements/_base.in
     #   typer
-setuptools==69.2.0
-    # via jsonschema
+rpds-py==0.20.0
+    # via
+    #   jsonschema
+    #   referencing
+setuptools==75.1.0
+    # via opentelemetry-instrumentation
 shellingham==1.5.4
     # via typer
 six==1.16.0
-    # via
-    #   jsonschema
-    #   python-dateutil
-sniffio==1.3.0
+    # via python-dateutil
+sniffio==1.3.1
     # via
     #   anyio
     #   httpx
@@ -159,18 +298,48 @@ starlette==0.38.6
     #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
     #   -c requirements/../../../requirements/constraints.txt
     #   fastapi
-typer==0.12.3
-    # via -r requirements/../../../packages/settings-library/requirements/_base.in
+tenacity==9.0.0
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+toolz==0.12.1
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+tqdm==4.66.5
+    # via -r requirements/../../../packages/service-library/requirements/_base.in
+typer==0.12.5
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/settings-library/requirements/_base.in
+types-python-dateutil==2.9.0.20240906
+    # via arrow
 typing-extensions==4.12.2
     # via
-    #   aiodocker
+    #   aiodebug
     #   fastapi
+    #   faststream
+    #   opentelemetry-sdk
     #   pydantic
     #   pydantic-core
     #   typer
-uvicorn==0.19.0
+urllib3==2.2.3
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   requests
+uvicorn==0.30.6
     # via
     #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
     #   -r requirements/_base.in
-yarl==1.9.2
-    # via aiohttp
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
+yarl==1.12.1
+    # via
+    #   aio-pika
+    #   aiohttp
+    #   aiormq
+zipp==3.20.2
+    # via importlib-metadata
diff --git a/services/agent/requirements/_test.in b/services/agent/requirements/_test.in
index 04f619082e08..ff76fcd01f53 100644
--- a/services/agent/requirements/_test.in
+++ b/services/agent/requirements/_test.in
@@ -11,6 +11,7 @@
 --constraint _base.txt
 
 aioboto3
+asgi-lifespan
 coverage
 faker
 httpx
@@ -20,3 +21,4 @@ pytest-asyncio
 pytest-cov
 pytest-mock
 pytest-runner
+python-dotenv
diff --git a/services/agent/requirements/_test.txt b/services/agent/requirements/_test.txt
index c3e2c6ee2fcb..01b05ccb4d82 100644
--- a/services/agent/requirements/_test.txt
+++ b/services/agent/requirements/_test.txt
@@ -1,15 +1,23 @@
-aioboto3==12.4.0
+aioboto3==13.1.1
     # via -r requirements/_test.in
-aiobotocore==2.12.3
+aiobotocore==2.13.1
     # via aioboto3
-aiohttp==3.8.5
+aiofiles==24.1.0
+    # via
+    #   -c requirements/_base.txt
+    #   aioboto3
+aiohappyeyeballs==2.4.0
+    # via
+    #   -c requirements/_base.txt
+    #   aiohttp
+aiohttp==3.10.6
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
     #   aiobotocore
-aioitertools==0.11.0
+aioitertools==0.12.0
     # via aiobotocore
-aiosignal==1.2.0
+aiosignal==1.3.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
@@ -17,54 +25,54 @@ annotated-types==0.7.0
     # via
     #   -c requirements/_base.txt
     #   pydantic
-anyio==3.6.2
+antlr4-python3-runtime==4.13.2
+    # via moto
+anyio==4.6.0
     # via
     #   -c requirements/_base.txt
     #   httpx
-async-timeout==4.0.2
-    # via
-    #   -c requirements/_base.txt
-    #   aiohttp
-attrs==21.4.0
+asgi-lifespan==2.1.0
+    # via -r requirements/_test.in
+attrs==24.2.0
     # via
     #   -c requirements/_base.txt
     #   aiohttp
     #   jsonschema
+    #   referencing
 aws-sam-translator==1.91.0
     # via cfn-lint
 aws-xray-sdk==2.14.0
     # via moto
 blinker==1.8.2
     # via flask
-boto3==1.34.69
+boto3==1.34.131
     # via
     #   aiobotocore
     #   aws-sam-translator
     #   moto
-botocore==1.34.69
+botocore==1.34.131
     # via
     #   aiobotocore
     #   aws-xray-sdk
     #   boto3
     #   moto
     #   s3transfer
-certifi==2023.11.17
+certifi==2024.8.30
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
     #   httpcore
     #   httpx
     #   requests
-cffi==1.17.0
+cffi==1.17.1
     # via cryptography
-cfn-lint==1.10.3
+cfn-lint==1.15.1
     # via moto
-charset-normalizer==2.1.1
+charset-normalizer==3.3.2
     # via
     #   -c requirements/_base.txt
-    #   aiohttp
     #   requests
-click==8.1.3
+click==8.1.7
     # via
     #   -c requirements/_base.txt
     #   flask
@@ -72,12 +80,11 @@ coverage==7.6.1
     # via
     #   -r requirements/_test.in
     #   pytest-cov
-cryptography==43.0.0
+cryptography==43.0.1
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   joserfc
     #   moto
-    #   python-jose
-    #   sshpubkeys
 docker==7.1.0
     # via moto
 ecdsa==0.19.0
@@ -85,35 +92,35 @@ ecdsa==0.19.0
     #   moto
     #   python-jose
     #   sshpubkeys
-faker==27.0.0
+faker==30.0.0
     # via -r requirements/_test.in
 flask==3.0.3
     # via
     #   flask-cors
     #   moto
-flask-cors==4.0.1
+flask-cors==5.0.0
     # via moto
-frozenlist==1.3.1
+frozenlist==1.4.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
     #   aiosignal
-graphql-core==3.2.3
+graphql-core==3.2.4
     # via moto
 h11==0.14.0
     # via
     #   -c requirements/_base.txt
     #   httpcore
-httpcore==1.0.2
+httpcore==1.0.5
     # via
     #   -c requirements/_base.txt
     #   httpx
-httpx==0.26.0
+httpx==0.27.2
     # via
     #   -c requirements/../../../requirements/constraints.txt
     #   -c requirements/_base.txt
     #   -r requirements/_test.in
-idna==3.4
+idna==3.10
     # via
     #   -c requirements/_base.txt
     #   anyio
@@ -133,51 +140,62 @@ jmespath==1.0.1
     # via
     #   boto3
     #   botocore
-jsondiff==2.2.0
+joserfc==1.0.0
+    # via moto
+jsondiff==2.2.1
     # via moto
 jsonpatch==1.33
     # via cfn-lint
+jsonpath-ng==1.6.1
+    # via moto
 jsonpointer==3.0.0
     # via jsonpatch
-jsonschema==3.2.0
+jsonschema==4.23.0
     # via
     #   -c requirements/_base.txt
     #   aws-sam-translator
     #   openapi-schema-validator
     #   openapi-spec-validator
+jsonschema-path==0.3.3
+    # via openapi-spec-validator
+jsonschema-specifications==2023.7.1
+    # via
+    #   -c requirements/_base.txt
+    #   jsonschema
+    #   openapi-schema-validator
+lazy-object-proxy==1.10.0
+    # via openapi-spec-validator
 markupsafe==2.1.5
     # via
     #   jinja2
     #   werkzeug
-moto==4.2.6
+moto==5.0.15
     # via -r requirements/_test.in
 mpmath==1.3.0
     # via sympy
-multidict==6.0.2
+multidict==6.1.0
     # via
     #   -c requirements/_base.txt
     #   aiohttp
     #   yarl
 networkx==3.3
     # via cfn-lint
-openapi-schema-validator==0.2.3
+openapi-schema-validator==0.6.2
     # via openapi-spec-validator
-openapi-spec-validator==0.4.0
-    # via
-    #   -c requirements/./constraints.txt
-    #   moto
-packaging==23.1
+openapi-spec-validator==0.7.1
+    # via moto
+packaging==24.1
     # via
     #   -c requirements/_base.txt
     #   pytest
+pathable==0.4.3
+    # via jsonschema-path
 pluggy==1.5.0
     # via pytest
-py-partiql-parser==0.4.0
+ply==3.11
+    # via jsonpath-ng
+py-partiql-parser==0.5.6
     # via moto
-pyasn1==0.6.0
-    # via
-    #   python-jose
-    #   rsa
 pycparser==2.22
     # via cffi
 pydantic==2.9.2
@@ -189,13 +207,9 @@ pydantic-core==2.23.4
     # via
     #   -c requirements/_base.txt
     #   pydantic
-pyparsing==3.1.2
+pyparsing==3.1.4
     # via moto
-pyrsistent==0.19.2
-    # via
-    #   -c requirements/_base.txt
-    #   jsonschema
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -r requirements/_test.in
     #   pytest-asyncio
@@ -211,57 +225,65 @@ pytest-mock==3.14.0
     # via -r requirements/_test.in
 pytest-runner==6.0.1
     # via -r requirements/_test.in
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   -c requirements/_base.txt
     #   botocore
     #   faker
     #   moto
-python-jose==3.3.0
-    # via moto
+python-dotenv==1.0.1
+    # via -r requirements/_test.in
 pyyaml==6.0.2
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   -c requirements/_base.txt
     #   cfn-lint
     #   jsondiff
+    #   jsonschema-path
     #   moto
-    #   openapi-spec-validator
     #   responses
-regex==2024.7.24
+referencing==0.29.3
+    # via
+    #   -c requirements/_base.txt
+    #   jsonschema
+    #   jsonschema-path
+    #   jsonschema-specifications
+regex==2024.9.11
     # via cfn-lint
 requests==2.32.3
     # via
+    #   -c requirements/_base.txt
     #   docker
+    #   jsonschema-path
     #   moto
     #   responses
 responses==0.25.3
     # via moto
-rsa==4.9
+rfc3339-validator==0.1.4
+    # via openapi-schema-validator
+rpds-py==0.20.0
     # via
-    #   -c requirements/../../../requirements/constraints.txt
-    #   python-jose
+    #   -c requirements/_base.txt
+    #   jsonschema
+    #   referencing
 s3transfer==0.10.2
     # via boto3
-setuptools==69.2.0
+setuptools==75.1.0
     # via
     #   -c requirements/_base.txt
-    #   jsonschema
     #   moto
-    #   openapi-spec-validator
 six==1.16.0
     # via
     #   -c requirements/_base.txt
-    #   ecdsa
-    #   jsonschema
     #   python-dateutil
-sniffio==1.3.0
+    #   rfc3339-validator
+sniffio==1.3.1
     # via
     #   -c requirements/_base.txt
     #   anyio
+    #   asgi-lifespan
     #   httpx
-sshpubkeys==3.3.1
-    # via moto
-sympy==1.13.2
+sympy==1.13.3
     # via cfn-lint
 typing-extensions==4.12.2
     # via
@@ -269,25 +291,26 @@ typing-extensions==4.12.2
     #   aws-sam-translator
     #   cfn-lint
     #   pydantic
-    #   pydantic-core
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   -c requirements/_base.txt
     #   botocore
     #   docker
     #   requests
     #   responses
-werkzeug==3.0.3
+werkzeug==3.0.4
     # via
     #   flask
     #   moto
 wrapt==1.16.0
     # via
+    #   -c requirements/_base.txt
     #   aiobotocore
     #   aws-xray-sdk
 xmltodict==0.13.0
     # via moto
-yarl==1.9.2
+yarl==1.12.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
diff --git a/services/agent/requirements/_tools.in b/services/agent/requirements/_tools.in
index 8e7d4eb265ee..1def82c12a30 100644
--- a/services/agent/requirements/_tools.in
+++ b/services/agent/requirements/_tools.in
@@ -3,7 +3,3 @@
 --constraint _test.txt
 
 --requirement ../../../requirements/devenv.txt
-
-black
-isort
-watchdog[watchmedo]
diff --git a/services/agent/requirements/_tools.txt b/services/agent/requirements/_tools.txt
index 8c9416c12a56..360f0628998a 100644
--- a/services/agent/requirements/_tools.txt
+++ b/services/agent/requirements/_tools.txt
@@ -1,16 +1,14 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
-    # via
-    #   -r requirements/../../../requirements/devenv.txt
-    #   -r requirements/_tools.in
-build==1.2.1
+    # via -r requirements/../../../requirements/devenv.txt
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
 cfgv==3.4.0
     # via pre-commit
-click==8.1.3
+click==8.1.7
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
@@ -20,18 +18,17 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
     #   -r requirements/../../../requirements/devenv.txt
-    #   -r requirements/_tools.in
     #   pylint
 mccabe==0.7.0
     # via pylint
-mypy==1.10.1
+mypy==1.11.2
     # via -r requirements/../../../requirements/devenv.txt
 mypy-extensions==1.0.0
     # via
@@ -39,7 +36,7 @@ mypy-extensions==1.0.0
     #   mypy
 nodeenv==1.9.1
     # via pre-commit
-packaging==23.1
+packaging==24.1
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
@@ -51,14 +48,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.1
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -67,12 +64,12 @@ pyproject-hooks==1.1.0
 pyyaml==6.0.2
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   pre-commit
-    #   watchdog
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
-setuptools==69.2.0
+setuptools==75.1.0
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
@@ -84,9 +81,7 @@ typing-extensions==4.12.2
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
-watchdog==4.0.2
-    # via -r requirements/_tools.in
 wheel==0.44.0
     # via pip-tools
diff --git a/services/agent/requirements/ci.txt b/services/agent/requirements/ci.txt
index f7cb0dd72a4f..81006b68c241 100644
--- a/services/agent/requirements/ci.txt
+++ b/services/agent/requirements/ci.txt
@@ -14,7 +14,7 @@
 simcore-common-library @ ../../packages/common-library
 simcore-models-library @ ../../packages/models-library
 pytest-simcore @ ../../packages/pytest-simcore
-simcore-service-library @ ../../packages/service-library
+simcore-service-library[fastapi] @ ../../packages/service-library
 simcore-settings-library @ ../../packages/settings-library
 
 # installs current package
diff --git a/services/agent/requirements/constraints.txt b/services/agent/requirements/constraints.txt
index b52ac1ee492d..e69de29bb2d1 100644
--- a/services/agent/requirements/constraints.txt
+++ b/services/agent/requirements/constraints.txt
@@ -1,11 +0,0 @@
-
-#
-# CONSTRAINTS DUE TO TEST LIBRARIES
-#
-
-# There are incompatible versions in the resolved dependencies:
-#   jsonschema==3.2.0 (from -c requirements/_base.txt (line 159))
-#   jsonschema~=3.2 (from -c requirements/./constraints.txt (line 12))
-#   jsonschema<5,>=3.0 (from cfn-lint==0.64.1->moto[server]==4.0.1->-r requirements/_test.in (line 21))
-#   jsonschema<5.0.0,>=4.0.0 (from openapi-spec-validator==0.5.1->moto[server]==4.0.1->-r requirements/_test.in (line 21))
-openapi-spec-validator<0.5.0
diff --git a/services/agent/requirements/dev.txt b/services/agent/requirements/dev.txt
index 6f5e8cf7e24e..692b48d09460 100644
--- a/services/agent/requirements/dev.txt
+++ b/services/agent/requirements/dev.txt
@@ -15,7 +15,7 @@
 --editable ../../packages/common-library
 --editable ../../packages/models-library
 --editable ../../packages/pytest-simcore
---editable ../../packages/service-library
+--editable ../../packages/service-library[fastapi]
 --editable ../../packages/settings-library
 
 # installs current package
diff --git a/services/agent/requirements/prod.txt b/services/agent/requirements/prod.txt
index 26afb420d402..aad1cc7a2bb2 100644
--- a/services/agent/requirements/prod.txt
+++ b/services/agent/requirements/prod.txt
@@ -11,7 +11,8 @@
 
 # installs this repo's packages
 simcore-models-library @ ../../packages/models-library
-simcore-service-library @ ../../packages/service-library
+simcore-service-library[fastapi] @ ../../packages/service-library
 simcore-settings-library @ ../../packages/settings-library
+
 # installs current package
 simcore-service-agent @ .
diff --git a/services/agent/src/simcore_service_agent/api/__init__.py b/services/agent/src/simcore_service_agent/api/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/services/agent/src/simcore_service_agent/api/rest/__init__.py b/services/agent/src/simcore_service_agent/api/rest/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/services/agent/src/simcore_service_agent/api/rest/_dependencies.py b/services/agent/src/simcore_service_agent/api/rest/_dependencies.py
new file mode 100644
index 000000000000..a02971d996a1
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/api/rest/_dependencies.py
@@ -0,0 +1,27 @@
+""" Free functions to inject dependencies in routes handlers
+"""
+
+from typing import Annotated, cast
+
+from fastapi import Depends, FastAPI, Request
+from servicelib.rabbitmq._client_rpc import RabbitMQRPCClient
+
+from ...core.settings import ApplicationSettings
+
+
+def get_application(request: Request) -> FastAPI:
+    return cast(FastAPI, request.app)
+
+
+def get_settings(
+    app: Annotated[FastAPI, Depends(get_application)]
+) -> ApplicationSettings:
+    assert isinstance(app.state.settings, ApplicationSettings)  # nosec
+    return app.state.settings
+
+
+def get_rabbitmq_client(
+    app: Annotated[FastAPI, Depends(get_application)]
+) -> RabbitMQRPCClient:
+    assert isinstance(app.state.rabbitmq_rpc_server, RabbitMQRPCClient)  # nosec
+    return app.state.rabbitmq_rpc_server
diff --git a/services/agent/src/simcore_service_agent/api/rest/_health.py b/services/agent/src/simcore_service_agent/api/rest/_health.py
new file mode 100644
index 000000000000..600de2467221
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/api/rest/_health.py
@@ -0,0 +1,25 @@
+from typing import Annotated
+
+import arrow
+from fastapi import APIRouter, Depends
+from models_library.api_schemas__common.health import HealthCheckGet
+from models_library.errors import RABBITMQ_CLIENT_UNHEALTHY_MSG
+from servicelib.rabbitmq import RabbitMQClient
+
+from ._dependencies import get_rabbitmq_client
+
+router = APIRouter()
+
+
+class HealthCheckError(RuntimeError):
+    """Failed a health check"""
+
+
+@router.get("/health", response_model=HealthCheckGet)
+async def check_service_health(
+    rabbitmq_client: Annotated[RabbitMQClient, Depends(get_rabbitmq_client)]
+):
+    if not rabbitmq_client.healthy:
+        raise HealthCheckError(RABBITMQ_CLIENT_UNHEALTHY_MSG)
+
+    return HealthCheckGet(timestamp=f"{__name__}@{arrow.utcnow().datetime.isoformat()}")
diff --git a/services/agent/src/simcore_service_agent/api/rest/routes.py b/services/agent/src/simcore_service_agent/api/rest/routes.py
new file mode 100644
index 000000000000..18688cf2f4d7
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/api/rest/routes.py
@@ -0,0 +1,14 @@
+from fastapi import FastAPI, HTTPException
+from servicelib.fastapi.exceptions_utils import (
+    handle_errors_as_500,
+    http_exception_as_json_response,
+)
+
+from . import _health
+
+
+def setup_rest_api(app: FastAPI):
+    app.include_router(_health.router)
+
+    app.add_exception_handler(Exception, handle_errors_as_500)
+    app.add_exception_handler(HTTPException, http_exception_as_json_response)
diff --git a/services/agent/src/simcore_service_agent/api/rpc/__init__.py b/services/agent/src/simcore_service_agent/api/rpc/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/services/agent/src/simcore_service_agent/api/rpc/_volumes.py b/services/agent/src/simcore_service_agent/api/rpc/_volumes.py
new file mode 100644
index 000000000000..96edb817e622
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/api/rpc/_volumes.py
@@ -0,0 +1,28 @@
+import logging
+
+from fastapi import FastAPI
+from models_library.projects_nodes_io import NodeID
+from servicelib.logging_utils import log_context
+from servicelib.rabbitmq import RPCRouter
+from servicelib.rabbitmq.rpc_interfaces.agent.errors import (
+    NoServiceVolumesFoundRPCError,
+)
+from simcore_service_agent.services.volumes_manager import VolumesManager
+
+_logger = logging.getLogger(__name__)
+
+router = RPCRouter()
+
+
+@router.expose(reraise_if_error_type=(NoServiceVolumesFoundRPCError,))
+async def remove_volumes_without_backup_for_service(
+    app: FastAPI, *, node_id: NodeID
+) -> None:
+    with log_context(_logger, logging.INFO, f"removing volumes for service: {node_id}"):
+        await VolumesManager.get_from_app_state(app).remove_service_volumes(node_id)
+
+
+@router.expose()
+async def backup_and_remove_volumes_for_all_services(app: FastAPI) -> None:
+    with log_context(_logger, logging.INFO, "removing all service volumes from node"):
+        await VolumesManager.get_from_app_state(app).remove_all_volumes()
diff --git a/services/agent/src/simcore_service_agent/api/rpc/routes.py b/services/agent/src/simcore_service_agent/api/rpc/routes.py
new file mode 100644
index 000000000000..7a658ae52806
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/api/rpc/routes.py
@@ -0,0 +1,28 @@
+from fastapi import FastAPI
+from models_library.rabbitmq_basic_types import RPCNamespace
+from servicelib.rabbitmq import RPCRouter
+from simcore_service_agent.core.settings import ApplicationSettings
+
+from ...services.rabbitmq import get_rabbitmq_rpc_server
+from . import _volumes
+
+ROUTERS: list[RPCRouter] = [
+    _volumes.router,
+]
+
+
+def setup_rpc_api_routes(app: FastAPI) -> None:
+    async def startup() -> None:
+        rpc_server = get_rabbitmq_rpc_server(app)
+        settings: ApplicationSettings = app.state.settings
+        rpc_namespace = RPCNamespace.from_entries(
+            {
+                "service": "agent",
+                "docker_node_id": settings.AGENT_DOCKER_NODE_ID,
+                "swarm_stack_name": settings.AGENT_VOLUMES_CLEANUP_TARGET_SWARM_STACK_NAME,
+            }
+        )
+        for router in ROUTERS:
+            await rpc_server.register_router(router, rpc_namespace, app)
+
+    app.add_event_handler("startup", startup)
diff --git a/services/agent/src/simcore_service_agent/core/_dependencies.py b/services/agent/src/simcore_service_agent/core/_dependencies.py
deleted file mode 100644
index d48e806851af..000000000000
--- a/services/agent/src/simcore_service_agent/core/_dependencies.py
+++ /dev/null
@@ -1,23 +0,0 @@
-""" Free functions to inject dependencies in routes handlers
-"""
-
-from typing import cast
-
-from fastapi import Depends, FastAPI, Request
-
-from ..modules.task_monitor import TaskMonitor
-from .settings import ApplicationSettings
-
-
-def get_application(request: Request) -> FastAPI:
-    return cast(FastAPI, request.app)
-
-
-def get_settings(app: FastAPI = Depends(get_application)) -> ApplicationSettings:
-    assert isinstance(app.state.settings, ApplicationSettings)  # nosec
-    return app.state.settings
-
-
-def get_task_monitor(app: FastAPI = Depends(get_application)) -> TaskMonitor:
-    assert isinstance(app.state.task_monitor, TaskMonitor)  # nosec
-    return app.state.task_monitor
diff --git a/services/agent/src/simcore_service_agent/core/_routes.py b/services/agent/src/simcore_service_agent/core/_routes.py
deleted file mode 100644
index 6f3486d06627..000000000000
--- a/services/agent/src/simcore_service_agent/core/_routes.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException, status
-
-from ..modules.task_monitor import TaskMonitor
-from ._dependencies import get_task_monitor
-
-router = APIRouter()
-
-
-@router.get("/health")
-def health(task_monitor: TaskMonitor = Depends(get_task_monitor)) -> None:
-    if not task_monitor.was_started or task_monitor.are_tasks_hanging:
-        raise HTTPException(status.HTTP_503_SERVICE_UNAVAILABLE, detail="unhealthy")
diff --git a/services/agent/src/simcore_service_agent/core/application.py b/services/agent/src/simcore_service_agent/core/application.py
index 3ac90922dc16..1a52eabb62ce 100644
--- a/services/agent/src/simcore_service_agent/core/application.py
+++ b/services/agent/src/simcore_service_agent/core/application.py
@@ -5,9 +5,6 @@
     get_common_oas_options,
     override_fastapi_openapi_method,
 )
-from servicelib.fastapi.prometheus_instrumentation import (
-    setup_prometheus_instrumentation,
-)
 from servicelib.logging_utils import config_all_loggers
 
 from .._meta import (
@@ -18,8 +15,11 @@
     SUMMARY,
     VERSION,
 )
-from ..modules import task_monitor
-from ._routes import router
+from ..api.rest.routes import setup_rest_api
+from ..api.rpc.routes import setup_rpc_api_routes
+from ..services.instrumentation import setup_instrumentation
+from ..services.rabbitmq import setup_rabbitmq
+from ..services.volumes_manager import setup_volume_manager
 from .settings import ApplicationSettings
 
 logger = logging.getLogger(__name__)
@@ -35,7 +35,6 @@ def _setup_logger(settings: ApplicationSettings):
 
 
 def create_app() -> FastAPI:
-    # SETTINGS
     settings = ApplicationSettings.create_from_envs()
     _setup_logger(settings)
     logger.debug(settings.model_dump_json(indent=2))
@@ -52,20 +51,18 @@ def create_app() -> FastAPI:
     override_fastapi_openapi_method(app)
     app.state.settings = settings
 
-    if app.state.settings.AGENT_PROMETHEUS_INSTRUMENTATION_ENABLED:
-        setup_prometheus_instrumentation(app)
-
-    # ROUTERS
-    app.include_router(router)
+    setup_instrumentation(app)
 
-    # EVENTS
-    task_monitor.setup(app)
+    setup_rabbitmq(app)
+    setup_volume_manager(app)
+    setup_rest_api(app)
+    setup_rpc_api_routes(app)
 
     async def _on_startup() -> None:
-        print(APP_STARTED_BANNER_MSG, flush=True)
+        print(APP_STARTED_BANNER_MSG, flush=True)  # noqa: T201
 
     async def _on_shutdown() -> None:
-        print(APP_FINISHED_BANNER_MSG, flush=True)
+        print(APP_FINISHED_BANNER_MSG, flush=True)  # noqa: T201
 
     app.add_event_handler("startup", _on_startup)
     app.add_event_handler("shutdown", _on_shutdown)
diff --git a/services/agent/src/simcore_service_agent/core/settings.py b/services/agent/src/simcore_service_agent/core/settings.py
index d57d8d0af5b4..a4e251d3e468 100644
--- a/services/agent/src/simcore_service_agent/core/settings.py
+++ b/services/agent/src/simcore_service_agent/core/settings.py
@@ -1,13 +1,12 @@
-from typing import Final
+from datetime import timedelta
 
 from models_library.basic_types import BootModeEnum, LogLevel
-from pydantic import AliasChoices, AnyHttpUrl, Field, NonNegativeInt, field_validator
+from pydantic import AliasChoices, AnyHttpUrl, Field, field_validator
 from settings_library.base import BaseCustomSettings
 from settings_library.r_clone import S3Provider
+from settings_library.rabbit import RabbitSettings
 from settings_library.utils_logging import MixinLoggingSettings
 
-_MINUTE: Final[NonNegativeInt] = 60
-
 
 class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
     LOGLEVEL: LogLevel = Field(
@@ -23,15 +22,17 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
     AGENT_VOLUMES_LOG_FORMAT_LOCAL_DEV_ENABLED: bool = Field(
         default=False,
         validation_alias=AliasChoices(
-            "AGENT_VOLUMES_LOG_FORMAT_LOCAL_DEV_ENABLED",
-            "LOG_FORMAT_LOCAL_DEV_ENABLED",
+            "AGENT_VOLUMES_LOG_FORMAT_LOCAL_DEV_ENABLED", "LOG_FORMAT_LOCAL_DEV_ENABLED"
+        ),
+        description=(
+            "Enables local development log format. WARNING: make sure it is "
+            "disabled if you want to have structured logs!"
         ),
-        description="Enables local development log format. WARNING: make sure it is disabled if you want to have structured logs!",
     )
     AGENT_VOLUMES_CLEANUP_TARGET_SWARM_STACK_NAME: str = Field(
         ..., description="Exactly the same as director-v2's `SWARM_STACK_NAME` env var"
     )
-    AGENT_VOLUMES_CLEANUP_S3_ENDPOINT: AnyHttpUrl | None
+    AGENT_VOLUMES_CLEANUP_S3_ENDPOINT: AnyHttpUrl
     AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY: str
     AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY: str
     AGENT_VOLUMES_CLEANUP_S3_BUCKET: str
@@ -47,11 +48,33 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
         [".hidden_do_not_remove", "key_values.json"],
         description="Files to ignore when syncing to s3",
     )
-    AGENT_VOLUMES_CLEANUP_INTERVAL_S: NonNegativeInt = Field(
-        60 * _MINUTE, description="interval at which to repeat volumes cleanup"
+    AGENT_VOLUMES_CLEANUP_INTERVAL: timedelta = Field(
+        timedelta(minutes=1), description="interval for running volumes removal"
+    )
+    AGENT_VOLUMES_CLENUP_BOOK_KEEPING_INTERVAL: timedelta = Field(
+        timedelta(minutes=1),
+        description=(
+            "interval at which to scan for unsued volumes and keep track since "
+            "they were detected as being unused"
+        ),
+    )
+    AGENT_VOLUMES_CLENUP_REMOVE_VOLUMES_INACTIVE_FOR: timedelta = Field(
+        timedelta(minutes=65),
+        description=(
+            "if a volume is unused for more than this interval it can be removed. "
+            "The default is set to a health 60+ miunutes since it might take upto "
+            "60 minutes for the dy-sidecar to properly save data form the volumes"
+        ),
     )
+
     AGENT_PROMETHEUS_INSTRUMENTATION_ENABLED: bool = True
 
+    AGENT_DOCKER_NODE_ID: str = Field(..., description="used by the rabbitmq module")
+
+    AGENT_RABBITMQ: RabbitSettings = Field(
+        auto_default_from_env=True, description="settings for service/rabbitmq"
+    )
+
     @field_validator("LOGLEVEL")
     @classmethod
     def valid_log_level(cls, value) -> LogLevel:
diff --git a/services/agent/src/simcore_service_agent/models/__init__.py b/services/agent/src/simcore_service_agent/models/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/services/agent/src/simcore_service_agent/models/volumes.py b/services/agent/src/simcore_service_agent/models/volumes.py
new file mode 100644
index 000000000000..ceb310486502
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/models/volumes.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+
+from models_library.api_schemas_directorv2.services import (
+    CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME,
+)
+from models_library.projects import ProjectID
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import RunID
+from models_library.users import UserID
+from pydantic import BaseModel, Field
+
+
+class DynamicServiceVolumeLabels(BaseModel):
+    node_uuid: NodeID
+    run_id: RunID
+    source: str
+    study_id: ProjectID
+    swarm_stack_name: str
+    user_id: UserID
+
+    @property
+    def directory_name(self) -> str:
+        return self.source[CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME:][::-1].strip("_")
+
+
+class VolumeDetails(BaseModel):
+    mountpoint: Path = Field(alias="Mountpoint")
+    labels: DynamicServiceVolumeLabels = Field(alias="Labels")
diff --git a/services/agent/src/simcore_service_agent/modules/task_monitor.py b/services/agent/src/simcore_service_agent/modules/task_monitor.py
deleted file mode 100644
index 8f4e3cd2ab3e..000000000000
--- a/services/agent/src/simcore_service_agent/modules/task_monitor.py
+++ /dev/null
@@ -1,169 +0,0 @@
-import asyncio
-import logging
-from collections import deque
-from contextlib import suppress
-from dataclasses import dataclass, field
-from time import time
-from typing import Any, Awaitable, Callable, Final, Optional
-
-from fastapi import FastAPI
-from pydantic import PositiveFloat, PositiveInt
-from servicelib.logging_utils import log_context
-
-from ..core.settings import ApplicationSettings
-from .volumes_cleanup import backup_and_remove_volumes
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_TASK_WAIT_ON_ERROR: Final[PositiveInt] = 10
-
-
-@dataclass
-class _TaskData:
-    target: Callable
-    args: Any
-    repeat_interval_s: Optional[PositiveFloat]
-    _start_time: Optional[PositiveFloat] = None
-
-    @property
-    def name(self) -> str:
-        return self.target.__name__
-
-    async def run(self) -> None:
-        coroutine = self.target(*self.args)
-
-        self._start_time = time()
-
-        try:
-            await coroutine
-        finally:
-            self._start_time = None
-
-    def is_hanging(self) -> bool:
-        # NOTE: tasks with no repeat_interval_s are design to run forever
-        if self.repeat_interval_s is None:
-            return False
-
-        if self._start_time is None:
-            return False
-
-        return (time() - self._start_time) > self.repeat_interval_s
-
-
-async def _task_runner(task_data: _TaskData) -> None:
-    with log_context(logger, logging.INFO, msg=f"'{task_data.name}'"):
-        while True:
-            try:
-                await task_data.run()
-            except Exception:  # pylint: disable=broad-except
-                logger.exception("Had an error while running '%s'", task_data.name)
-
-            if task_data.repeat_interval_s is None:
-                logger.warning(
-                    "Unexpected termination of '%s'; it will be restarted",
-                    task_data.name,
-                )
-
-            logger.info(
-                "Will run '%s' again in %s seconds",
-                task_data.name,
-                task_data.repeat_interval_s,
-            )
-            await asyncio.sleep(
-                DEFAULT_TASK_WAIT_ON_ERROR
-                if task_data.repeat_interval_s is None
-                else task_data.repeat_interval_s
-            )
-
-
-@dataclass
-class TaskMonitor:
-    _was_started: bool = False
-    _tasks: set[asyncio.Task] = field(default_factory=set)
-    _to_start: dict[str, _TaskData] = field(default_factory=dict)
-
-    @property
-    def was_started(self) -> bool:
-        return self._was_started
-
-    @property
-    def are_tasks_hanging(self) -> bool:
-        hanging_tasks_detected = False
-        for name, task_data in self._to_start.items():
-            if task_data.is_hanging():
-                logger.warning("Task '%s' is hanging", name)
-                hanging_tasks_detected = True
-        return hanging_tasks_detected
-
-    def register_job(
-        self,
-        target: Callable,
-        *args: Any,
-        repeat_interval_s: Optional[PositiveFloat] = None,
-    ) -> None:
-        if self._was_started:
-            raise RuntimeError(
-                "Cannot add more tasks, monitor already running with: "
-                f"{[x.get_name() for x in self._tasks]}"
-            )
-
-        task_data = _TaskData(target, args, repeat_interval_s)
-        if task_data.name in self._to_start:
-            raise RuntimeError(f"{target.__name__} is already registered")
-
-        self._to_start[target.__name__] = task_data
-
-    async def start(self) -> None:
-        self._was_started = True
-        for name, task_data in self._to_start.items():
-            logger.info("Starting task '%s'", name)
-            self._tasks.add(
-                asyncio.create_task(_task_runner(task_data), name=f"task_{name}")
-            )
-
-    async def shutdown(self):
-        async def _wait_for_task(task: asyncio.Task) -> None:
-            with suppress(asyncio.CancelledError):
-                await task
-
-        tasks_to_wait: deque[Awaitable] = deque()
-        for task in set(self._tasks):
-            logger.info("Cancel and stop task '%s'", task.get_name())
-
-            task.cancel()
-            tasks_to_wait.append(_wait_for_task(task))
-            self._tasks.remove(task)
-
-        await asyncio.gather(*tasks_to_wait, return_exceptions=True)
-        self._was_started = False
-        self._to_start = {}
-
-
-def setup(app: FastAPI) -> None:
-    async def _on_startup() -> None:
-        task_monitor = app.state.task_monitor = TaskMonitor()
-        settings: ApplicationSettings = app.state.settings
-
-        # setup all relative jobs
-        task_monitor.register_job(
-            backup_and_remove_volumes,
-            settings,
-            repeat_interval_s=settings.AGENT_VOLUMES_CLEANUP_INTERVAL_S,
-        )
-
-        await task_monitor.start()
-        logger.info("Started 🔍 task_monitor")
-
-    async def _on_shutdown() -> None:
-        task_monitor: TaskMonitor = app.state.task_monitor
-        await task_monitor.shutdown()
-        logger.info("Stopped 🔍 task_monitor")
-
-    app.add_event_handler("startup", _on_startup)
-    app.add_event_handler("shutdown", _on_shutdown)
-
-
-__all__: tuple[str, ...] = (
-    "setup",
-    "TaskMonitor",
-)
diff --git a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/__init__.py b/services/agent/src/simcore_service_agent/modules/volumes_cleanup/__init__.py
deleted file mode 100644
index 00002f118b7d..000000000000
--- a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from ._core import backup_and_remove_volumes
-
-__all__: tuple[str, ...] = ("backup_and_remove_volumes",)
diff --git a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_core.py b/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_core.py
deleted file mode 100644
index 6a4e63e3ce93..000000000000
--- a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_core.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import logging
-
-from ...core.settings import ApplicationSettings
-from ._docker import delete_volume, docker_client, get_dyv_volumes, is_volume_used
-from ._s3 import store_to_s3
-
-logger = logging.getLogger(__name__)
-
-
-async def backup_and_remove_volumes(settings: ApplicationSettings) -> None:
-    async with docker_client() as client:
-        dyv_volumes: list[dict] = await get_dyv_volumes(
-            client, settings.AGENT_VOLUMES_CLEANUP_TARGET_SWARM_STACK_NAME
-        )
-
-        if len(dyv_volumes) == 0:
-            return
-
-        cleaned_up_volumes_count = 0
-        logger.info("Beginning cleanup.")
-        for dyv_volume in dyv_volumes:
-            volume_name = dyv_volume["Name"]
-
-            if await is_volume_used(client, volume_name):
-                logger.debug("Skipped in use docker volume: '%s'", volume_name)
-                continue
-
-            try:
-                await store_to_s3(
-                    volume_name=volume_name,
-                    dyv_volume=dyv_volume,
-                    s3_endpoint=settings.AGENT_VOLUMES_CLEANUP_S3_ENDPOINT,
-                    s3_access_key=settings.AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY,
-                    s3_secret_key=settings.AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY,
-                    s3_bucket=settings.AGENT_VOLUMES_CLEANUP_S3_BUCKET,
-                    s3_region=settings.AGENT_VOLUMES_CLEANUP_S3_REGION,
-                    s3_provider=settings.AGENT_VOLUMES_CLEANUP_S3_PROVIDER,
-                    s3_retries=settings.AGENT_VOLUMES_CLEANUP_RETRIES,
-                    s3_parallelism=settings.AGENT_VOLUMES_CLEANUP_PARALLELISM,
-                    exclude_files=settings.AGENT_VOLUMES_CLEANUP_EXCLUDE_FILES,
-                )
-            except Exception as e:  # pylint:disable=broad-except
-                logger.error("%s", e)
-                continue
-
-            logger.info("Successfully cleaned up docker volume: '%s'", volume_name)
-
-            await delete_volume(client, volume_name)
-            logger.info("Removed docker volume: '%s'", volume_name)
-            cleaned_up_volumes_count += 1
-
-        if cleaned_up_volumes_count > 0:
-            logger.info(
-                (
-                    "The dy-sidecar volume cleanup detected %s "
-                    "zombie volumes on the current machine."
-                ),
-                cleaned_up_volumes_count,
-            )
-        else:
-            logger.info("Found no zombie dy-sidecar volumes to cleanup.")
diff --git a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_docker.py b/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_docker.py
deleted file mode 100644
index 26d1475fdc19..000000000000
--- a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_docker.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from collections import deque
-from contextlib import asynccontextmanager
-from typing import Any, AsyncIterator
-
-from aiodocker import Docker
-from aiodocker.utils import clean_filters
-from aiodocker.volumes import DockerVolume
-from servicelib.docker_constants import PREFIX_DYNAMIC_SIDECAR_VOLUMES
-
-
-@asynccontextmanager
-async def docker_client() -> AsyncIterator[Docker]:
-    async with Docker() as docker:
-        yield docker
-
-
-async def get_dyv_volumes(docker: Docker, target_swarm_stack_name: str) -> list[dict]:
-    dyv_volumes: deque[dict] = deque()
-    volumes = await docker.volumes.list()
-    for volume in volumes["Volumes"]:
-        volume_labels: dict[str, Any] = volume.get("Labels") or {}
-        if (
-            volume["Name"].startswith(f"{PREFIX_DYNAMIC_SIDECAR_VOLUMES}_")
-            and volume_labels.get("swarm_stack_name") == target_swarm_stack_name
-        ):
-            dyv_volumes.append(volume)
-    return list(dyv_volumes)
-
-
-async def delete_volume(docker: Docker, volume_name: str) -> None:
-    await DockerVolume(docker, volume_name).delete()
-
-
-async def is_volume_used(docker: Docker, volume_name: str) -> bool:
-    filters = clean_filters({"volume": volume_name})
-    containers = await docker.containers.list(all=True, filters=filters)
-    return len(containers) > 0
diff --git a/services/agent/src/simcore_service_agent/services/__init__.py b/services/agent/src/simcore_service_agent/services/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_s3.py b/services/agent/src/simcore_service_agent/services/backup.py
similarity index 56%
rename from services/agent/src/simcore_service_agent/modules/volumes_cleanup/_s3.py
rename to services/agent/src/simcore_service_agent/services/backup.py
index 0494f614a1d7..fb6de148eef3 100644
--- a/services/agent/src/simcore_service_agent/modules/volumes_cleanup/_s3.py
+++ b/services/agent/src/simcore_service_agent/services/backup.py
@@ -1,17 +1,24 @@
 import asyncio
 import logging
+import tempfile
 from asyncio.streams import StreamReader
 from pathlib import Path
 from textwrap import dedent
 from typing import Final
+from uuid import uuid4
 
-from pydantic import AnyHttpUrl
-from settings_library.r_clone import S3Provider
+from fastapi import FastAPI
 from settings_library.utils_r_clone import resolve_provider
 
-logger = logging.getLogger(__name__)
+from ..core.settings import ApplicationSettings
+from ..models.volumes import DynamicServiceVolumeLabels, VolumeDetails
 
-R_CLONE_CONFIG = """
+_logger = logging.getLogger(__name__)
+
+
+_R_CLONE_CONFIG: Final[
+    str
+] = """
 [dst]
 type = s3
 provider = {destination_provider}
@@ -21,46 +28,32 @@
 region = {destination_region}
 acl = private
 """
-VOLUME_NAME_FIXED_PORTION: Final[int] = 78
-
-
-def get_config_file_path(
-    s3_endpoint: AnyHttpUrl | None,
-    s3_access_key: str,
-    s3_secret_key: str,
-    s3_region: str,
-    s3_provider: S3Provider,
-) -> Path:
-    config_content = R_CLONE_CONFIG.format(
-        destination_provider=resolve_provider(s3_provider),
-        destination_access_key=s3_access_key,
-        destination_secret_key=s3_secret_key,
-        destination_endpoint=s3_endpoint,
-        destination_region=s3_region,
-    )
-    conf_path = Path("/tmp/rclone_config.ini")  # NOSONAR
-    conf_path.write_text(config_content)  # pylint:disable=unspecified-encoding
-    return conf_path
 
 
-def _get_dir_name(volume_name: str) -> str:
-    # from: "dyv_a0430d06-40d2-4c92-9490-6aca30e00fc7_898fff63-d402-5566-a99b-091522dd2ae9_stuptuo_krow_nayvoj_emoh_"
-    # gets: "home_jovyan_work_outputs"
-    return volume_name[VOLUME_NAME_FIXED_PORTION:][::-1].strip("_")
+def _get_config_file_path(settings: ApplicationSettings) -> Path:
+    config_content = _R_CLONE_CONFIG.format(
+        destination_provider=resolve_provider(
+            settings.AGENT_VOLUMES_CLEANUP_S3_PROVIDER
+        ),
+        destination_access_key=settings.AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY,
+        destination_secret_key=settings.AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY,
+        destination_endpoint=settings.AGENT_VOLUMES_CLEANUP_S3_ENDPOINT,
+        destination_region=settings.AGENT_VOLUMES_CLEANUP_S3_REGION,
+    )
+    conf_path = Path(tempfile.gettempdir()) / f"rclone_config_{uuid4()}.ini"
+    conf_path.write_text(config_content)
+    return conf_path
 
 
-def _get_s3_path(s3_bucket: str, labels: dict[str, str], volume_name: str) -> Path:
-    joint_key = "/".join(
-        (
-            s3_bucket,
-            labels["swarm_stack_name"],
-            labels["study_id"],
-            labels["node_uuid"],
-            labels["run_id"],
-            _get_dir_name(volume_name),
-        )
+def _get_s3_path(s3_bucket: str, labels: DynamicServiceVolumeLabels) -> Path:
+    return (
+        Path(s3_bucket)
+        / labels.swarm_stack_name
+        / f"{labels.study_id}"
+        / f"{labels.node_uuid}"
+        / labels.run_id
+        / labels.directory_name
     )
-    return Path(f"/{joint_key}")
 
 
 async def _read_stream(stream: StreamReader) -> str:
@@ -68,7 +61,7 @@ async def _read_stream(stream: StreamReader) -> str:
     while line := await stream.readline():
         message = line.decode()
         output += message
-        logger.debug(message.strip("\n"))
+        _logger.debug(message.strip("\n"))
     return output
 
 
@@ -79,12 +72,12 @@ def _get_r_clone_str_command(command: list[str], exclude_files: list[str]) -> st
         command.append(to_exclude)
 
     str_command = " ".join(command)
-    logger.info(str_command)
+    _logger.info(str_command)
     return str_command
 
 
 def _log_expected_operation(
-    dyv_volume_labels: dict[str, str],
+    labels: DynamicServiceVolumeLabels,
     s3_path: Path,
     r_clone_ls_output: str,
     volume_name: str,
@@ -101,50 +94,38 @@ def _log_expected_operation(
         ---
         volume_name         {volume_name}
         destination_path    {s3_path}
-        study_id:           {dyv_volume_labels['study_id']}
-        node_id:            {dyv_volume_labels['node_uuid']}
-        user_id:            {dyv_volume_labels['user_id']}
-        run_id:             {dyv_volume_labels['run_id']}
+        study_id:           {labels.study_id}
+        node_id:            {labels.node_uuid}
+        user_id:            {labels.user_id}
+        run_id:             {labels.run_id}
         ---
         Files to sync by rclone
         ---\n{r_clone_ls_output.rstrip()}
         ---
     """
     )
-    logger.log(log_level, formatted_message)
+    _logger.log(log_level, formatted_message)
 
 
-async def store_to_s3(  # pylint:disable=too-many-locals,too-many-arguments
-    volume_name: str,
-    dyv_volume: dict,
-    s3_endpoint: AnyHttpUrl | None,
-    s3_access_key: str,
-    s3_secret_key: str,
-    s3_bucket: str,
-    s3_region: str,
-    s3_provider: S3Provider,
-    s3_retries: int,
-    s3_parallelism: int,
-    exclude_files: list[str],
+async def _store_in_s3(
+    settings: ApplicationSettings, volume_name: str, volume_details: VolumeDetails
 ) -> None:
-    config_file_path = get_config_file_path(
-        s3_endpoint=s3_endpoint,
-        s3_access_key=s3_access_key,
-        s3_secret_key=s3_secret_key,
-        s3_region=s3_region,
-        s3_provider=s3_provider,
-    )
+    exclude_files = settings.AGENT_VOLUMES_CLEANUP_EXCLUDE_FILES
 
-    source_dir = dyv_volume["Mountpoint"]
+    config_file_path = _get_config_file_path(settings)
+
+    source_dir = volume_details.mountpoint
     if not Path(source_dir).exists():
-        logger.info(
+        _logger.info(
             "Volume mountpoint %s does not exist. Skipping backup, volume %s will be removed.",
             source_dir,
             volume_name,
         )
         return
 
-    s3_path = _get_s3_path(s3_bucket, dyv_volume["Labels"], volume_name)
+    s3_path = _get_s3_path(
+        settings.AGENT_VOLUMES_CLEANUP_S3_BUCKET, volume_details.labels
+    )
 
     # listing files rclone will sync
     r_clone_ls = [
@@ -159,11 +140,12 @@ async def store_to_s3(  # pylint:disable=too-many-locals,too-many-arguments
         stdout=asyncio.subprocess.PIPE,
         stderr=asyncio.subprocess.STDOUT,
     )
+
     assert process.stdout  # nosec
     r_clone_ls_output = await _read_stream(process.stdout)
     await process.wait()
     _log_expected_operation(
-        dyv_volume["Labels"], s3_path, r_clone_ls_output, volume_name
+        volume_details.labels, s3_path, r_clone_ls_output, volume_name
     )
 
     # sync files via rclone
@@ -174,9 +156,9 @@ async def store_to_s3(  # pylint:disable=too-many-locals,too-many-arguments
         "--low-level-retries",
         "3",
         "--retries",
-        f"{s3_retries}",
+        f"{settings.AGENT_VOLUMES_CLEANUP_RETRIES}",
         "--transfers",
-        f"{s3_parallelism}",
+        f"{settings.AGENT_VOLUMES_CLEANUP_PARALLELISM}",
         # below two options reduce to a minimum the memory footprint
         # https://forum.rclone.org/t/how-to-set-a-memory-limit/10230/4
         "--use-mmap",  # docs https://rclone.org/docs/#use-mmap
@@ -197,13 +179,24 @@ async def store_to_s3(  # pylint:disable=too-many-locals,too-many-arguments
         stdout=asyncio.subprocess.PIPE,
         stderr=asyncio.subprocess.STDOUT,
     )
+
     assert process.stdout  # nosec
     r_clone_sync_output = await _read_stream(process.stdout)
     await process.wait()
-    logger.info("Sync result:\n%s", r_clone_sync_output)
+    _logger.info("Sync result:\n%s", r_clone_sync_output)
 
     if process.returncode != 0:
-        raise RuntimeError(
+        msg = (
             f"Shell subprocesses yielded nonzero error code {process.returncode} "
             f"for command {str_r_clone_sync}\n{r_clone_sync_output}"
         )
+        raise RuntimeError(msg)
+
+
+async def backup_volume(
+    app: FastAPI, volume_details: VolumeDetails, volume_name: str
+) -> None:
+    settings: ApplicationSettings = app.state.settings
+    await _store_in_s3(
+        settings=settings, volume_name=volume_name, volume_details=volume_details
+    )
diff --git a/services/agent/src/simcore_service_agent/services/docker_utils.py b/services/agent/src/simcore_service_agent/services/docker_utils.py
new file mode 100644
index 000000000000..181fe13a2759
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/services/docker_utils.py
@@ -0,0 +1,108 @@
+import logging
+from collections.abc import Iterator
+from contextlib import contextmanager
+from typing import Final
+
+from aiodocker import DockerError
+from aiodocker.docker import Docker
+from aiodocker.volumes import DockerVolume
+from fastapi import FastAPI
+from models_library.api_schemas_directorv2.services import (
+    CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME,
+)
+from servicelib.docker_constants import PREFIX_DYNAMIC_SIDECAR_VOLUMES
+from servicelib.logging_utils import log_catch, log_context
+from simcore_service_agent.core.settings import ApplicationSettings
+from starlette import status
+
+from ..models.volumes import VolumeDetails
+from .backup import backup_volume
+from .instrumentation import get_instrumentation
+
+_logger = logging.getLogger(__name__)
+
+
+def _reverse_string(to_reverse: str) -> str:
+    return to_reverse[::-1]
+
+
+_VOLUMES_NOT_TO_BACKUP: Final[tuple[str, ...]] = (
+    _reverse_string("inputs"),
+    _reverse_string("shared-store"),
+)
+
+
+def _does_volume_require_backup(volume_name: str) -> bool:
+    # from    `dyv_1726228407_891aa1a7-eb31-459f-8aed-8c902f5f5fb0_dd84f39e-7154-4a13-ba1d-50068d723104_stupni_www_`
+    # retruns `stupni_www_`
+    inverse_name_part = volume_name[CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME:]
+    return not inverse_name_part.startswith(_VOLUMES_NOT_TO_BACKUP)
+
+
+async def get_unused_dynamc_sidecar_volumes(docker: Docker) -> set[str]:
+    """Returns all volumes unused by sidecars"""
+    volumes = await docker.volumes.list()
+    all_volumes: set[str] = {volume["Name"] for volume in volumes["Volumes"]}
+
+    containers = await docker.containers.list(all=True)
+
+    used_volumes: set[str] = set()
+    for container in containers:
+        container_info = await container.show()
+        mounts = container_info.get("Mounts", [])
+        for mount in mounts:
+            if mount["Type"] == "volume":
+                used_volumes.add(mount["Name"])
+
+    unused_volumes = all_volumes - used_volumes
+    return {v for v in unused_volumes if v.startswith(PREFIX_DYNAMIC_SIDECAR_VOLUMES)}
+
+
+async def get_volume_details(docker: Docker, *, volume_name: str) -> VolumeDetails:
+    volume_details = await DockerVolume(docker, volume_name).show()
+    return VolumeDetails.parse_obj(volume_details)
+
+
+@contextmanager
+def _log_volume_not_found(volume_name: str) -> Iterator[None]:
+    try:
+        yield
+    except DockerError as e:
+        if e.status == status.HTTP_404_NOT_FOUND:
+            _logger.info("Volume not found '%s'", volume_name)
+        else:
+            raise
+
+
+async def _backup_volume(app: FastAPI, docker: Docker, *, volume_name: str) -> None:
+    """Backs up only volumes which require a backup"""
+    if _does_volume_require_backup(volume_name):
+        with log_context(
+            _logger, logging.INFO, f"backup '{volume_name}'", log_duration=True
+        ):
+            volume_details = await get_volume_details(docker, volume_name=volume_name)
+            settings: ApplicationSettings = app.state.settings
+            get_instrumentation(app).agent_metrics.backedup_volumes(
+                settings.AGENT_DOCKER_NODE_ID
+            )
+            await backup_volume(app, volume_details, volume_name)
+    else:
+        _logger.debug("No backup is required for '%s'", volume_name)
+
+
+async def remove_volume(
+    app: FastAPI, docker: Docker, *, volume_name: str, requires_backup: bool
+) -> None:
+    """Removes a volume and backs data up if required"""
+    with log_context(
+        _logger, logging.DEBUG, f"removing '{volume_name}'", log_duration=True
+    ), log_catch(_logger, reraise=False), _log_volume_not_found(volume_name):
+        if requires_backup:
+            await _backup_volume(app, docker, volume_name=volume_name)
+
+        await DockerVolume(docker, volume_name).delete()
+
+        settings: ApplicationSettings = app.state.settings
+        get_instrumentation(app).agent_metrics.remove_volumes(
+            settings.AGENT_DOCKER_NODE_ID
+        )
diff --git a/services/agent/src/simcore_service_agent/services/instrumentation/__init__.py b/services/agent/src/simcore_service_agent/services/instrumentation/__init__.py
new file mode 100644
index 000000000000..49d7b66b0793
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/services/instrumentation/__init__.py
@@ -0,0 +1,6 @@
+from ._setup import get_instrumentation, setup_instrumentation
+
+__all__: tuple[str, ...] = (
+    "get_instrumentation",
+    "setup_instrumentation",
+)
diff --git a/services/agent/src/simcore_service_agent/services/instrumentation/_models.py b/services/agent/src/simcore_service_agent/services/instrumentation/_models.py
new file mode 100644
index 000000000000..bf5543745954
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/services/instrumentation/_models.py
@@ -0,0 +1,52 @@
+from dataclasses import dataclass, field
+from typing import Final
+
+from prometheus_client import CollectorRegistry, Counter
+from servicelib.instrumentation import MetricsBase, get_metrics_namespace
+
+from ..._meta import APP_NAME
+
+_METRICS_NAMESPACE: Final[str] = get_metrics_namespace(APP_NAME)
+_LABELS_COUNTERS: Final[tuple[str, ...]] = ("docker_node_id",)
+
+
+@dataclass(slots=True, kw_only=True)
+class AgentMetrics(MetricsBase):
+    volumes_removed: Counter = field(init=False)
+    volumes_backedup: Counter = field(init=False)
+
+    def __post_init__(self) -> None:
+        self.volumes_removed = Counter(
+            "volumes_removed_total",
+            "Number of removed volumes by the agent",
+            labelnames=_LABELS_COUNTERS,
+            namespace=_METRICS_NAMESPACE,
+            subsystem=self.subsystem,
+            registry=self.registry,
+        )
+
+        self.volumes_backedup = Counter(
+            "volumes_backedup_total",
+            "Number of removed volumes who's content was uplaoded by the agent",
+            labelnames=_LABELS_COUNTERS,
+            namespace=_METRICS_NAMESPACE,
+            subsystem=self.subsystem,
+            registry=self.registry,
+        )
+
+    def remove_volumes(self, docker_node_id: str) -> None:
+        self.volumes_removed.labels(docker_node_id=docker_node_id).inc()
+
+    def backedup_volumes(self, docker_node_id: str) -> None:
+        self.volumes_backedup.labels(docker_node_id=docker_node_id).inc()
+
+
+@dataclass(slots=True, kw_only=True)
+class AgentInstrumentation:
+    registry: CollectorRegistry
+    agent_metrics: AgentMetrics = field(init=False)
+
+    def __post_init__(self) -> None:
+        self.agent_metrics = AgentMetrics(  # pylint: disable=unexpected-keyword-arg
+            subsystem="agent", registry=self.registry
+        )
diff --git a/services/agent/src/simcore_service_agent/services/instrumentation/_setup.py b/services/agent/src/simcore_service_agent/services/instrumentation/_setup.py
new file mode 100644
index 000000000000..ad4e2f3cf2eb
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/services/instrumentation/_setup.py
@@ -0,0 +1,30 @@
+from fastapi import FastAPI
+from servicelib.fastapi.prometheus_instrumentation import (
+    setup_prometheus_instrumentation,
+)
+from simcore_service_agent.core.settings import ApplicationSettings
+
+from ._models import AgentInstrumentation
+
+
+def setup_instrumentation(app: FastAPI) -> None:
+    settings: ApplicationSettings = app.state.settings
+    if not settings.AGENT_PROMETHEUS_INSTRUMENTATION_ENABLED:
+        return
+
+    instrumentator = setup_prometheus_instrumentation(app)
+
+    async def on_startup() -> None:
+        app.state.instrumentation = AgentInstrumentation(
+            registry=instrumentator.registry
+        )
+
+    app.add_event_handler("startup", on_startup)
+
+
+def get_instrumentation(app: FastAPI) -> AgentInstrumentation:
+    assert (
+        app.state.instrumentation
+    ), "Instrumentation not setup. Please check the configuration"  # nosec
+    instrumentation: AgentInstrumentation = app.state.instrumentation
+    return instrumentation
diff --git a/services/agent/src/simcore_service_agent/services/rabbitmq.py b/services/agent/src/simcore_service_agent/services/rabbitmq.py
new file mode 100644
index 000000000000..3c548fb0b241
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/services/rabbitmq.py
@@ -0,0 +1,29 @@
+from typing import cast
+
+from fastapi import FastAPI
+from servicelib.rabbitmq import RabbitMQRPCClient, wait_till_rabbitmq_responsive
+from settings_library.rabbit import RabbitSettings
+
+
+def setup_rabbitmq(app: FastAPI) -> None:
+    settings: RabbitSettings = app.state.settings.AGENT_RABBITMQ
+    app.state.rabbitmq_rpc_server = None
+
+    async def _on_startup() -> None:
+        await wait_till_rabbitmq_responsive(settings.dsn)
+
+        app.state.rabbitmq_rpc_server = await RabbitMQRPCClient.create(
+            client_name="dynamic_scheduler_rpc_server", settings=settings
+        )
+
+    async def _on_shutdown() -> None:
+        if app.state.rabbitmq_rpc_server:
+            await app.state.rabbitmq_rpc_server.close()
+
+    app.add_event_handler("startup", _on_startup)
+    app.add_event_handler("shutdown", _on_shutdown)
+
+
+def get_rabbitmq_rpc_server(app: FastAPI) -> RabbitMQRPCClient:
+    assert app.state.rabbitmq_rpc_server  # nosec
+    return cast(RabbitMQRPCClient, app.state.rabbitmq_rpc_server)
diff --git a/services/agent/src/simcore_service_agent/services/volumes_manager.py b/services/agent/src/simcore_service_agent/services/volumes_manager.py
new file mode 100644
index 000000000000..526589a2c9cd
--- /dev/null
+++ b/services/agent/src/simcore_service_agent/services/volumes_manager.py
@@ -0,0 +1,188 @@
+import logging
+from asyncio import Lock, Task
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from typing import Final
+
+import arrow
+from aiodocker.docker import Docker
+from fastapi import FastAPI
+from models_library.projects_nodes_io import NodeID
+from pydantic import NonNegativeFloat
+from servicelib.background_task import start_periodic_task, stop_periodic_task
+from servicelib.fastapi.app_state import SingletonInAppStateMixin
+from servicelib.logging_utils import log_context
+from servicelib.rabbitmq.rpc_interfaces.agent.errors import (
+    NoServiceVolumesFoundRPCError,
+)
+from tenacity import AsyncRetrying, before_sleep_log, stop_after_delay, wait_fixed
+
+from ..core.settings import ApplicationSettings
+from .docker_utils import get_unused_dynamc_sidecar_volumes, remove_volume
+
+_logger = logging.getLogger(__name__)
+
+_WAIT_FOR_UNUSED_SERVICE_VOLUMES: Final[timedelta] = timedelta(minutes=1)
+
+
+@dataclass
+class VolumesManager(  # pylint:disable=too-many-instance-attributes
+    SingletonInAppStateMixin
+):
+    app: FastAPI
+    book_keeping_interval: timedelta
+    volume_cleanup_interval: timedelta
+    remove_volumes_inactive_for: NonNegativeFloat
+
+    docker: Docker = field(default_factory=Docker)
+    removal_lock: Lock = field(default_factory=Lock)
+
+    _task_bookkeeping: Task | None = None
+    _unused_volumes: dict[str, datetime] = field(default_factory=dict)
+
+    _task_periodic_volume_cleanup: Task | None = None
+
+    app_state_name: str = "volumes_manager"
+
+    async def setup(self) -> None:
+        self._task_bookkeeping = start_periodic_task(
+            self._bookkeeping_task,
+            interval=self.book_keeping_interval,
+            task_name="volumes bookkeeping",
+        )
+        self._task_periodic_volume_cleanup = start_periodic_task(
+            self._bookkeeping_task,
+            interval=self.volume_cleanup_interval,
+            task_name="volume cleanup",
+        )
+
+    async def shutdown(self) -> None:
+        await self.docker.close()
+
+        if self._task_bookkeeping:
+            await stop_periodic_task(self._task_bookkeeping)
+
+        if self._task_periodic_volume_cleanup:
+            await stop_periodic_task(self._task_periodic_volume_cleanup)
+
+    async def _bookkeeping_task(self) -> None:
+        with log_context(_logger, logging.DEBUG, "volume bookkeeping"):
+            current_unused_volumes = await get_unused_dynamc_sidecar_volumes(
+                self.docker
+            )
+            old_unused_volumes = set(self._unused_volumes.keys())
+
+            # remove
+            to_remove = old_unused_volumes - current_unused_volumes
+            for volume in to_remove:
+                self._unused_volumes.pop(volume, None)
+
+            # volumes which have just been detected as inactive
+            to_add = current_unused_volumes - old_unused_volumes
+            for volume in to_add:
+                self._unused_volumes[volume] = arrow.utcnow().datetime
+
+    async def _remove_volume_safe(
+        self, *, volume_name: str, requires_backup: bool
+    ) -> None:
+        # NOTE: to avoid race conditions only one volume can be removed
+        # also avoids issues with accessing the docker API in parallel
+        async with self.removal_lock:
+            await remove_volume(
+                self.app,
+                self.docker,
+                volume_name=volume_name,
+                requires_backup=requires_backup,
+            )
+
+    async def _periodic_volmue_cleanup_task(self) -> None:
+        with log_context(_logger, logging.DEBUG, "volume cleanup"):
+            volumes_to_remove: set[str] = set()
+            for volume_name, inactive_since in self._unused_volumes.items():
+                volume_inactive_sicne = (
+                    arrow.utcnow().datetime - inactive_since
+                ).total_seconds()
+                if volume_inactive_sicne > self.remove_volumes_inactive_for:
+                    volumes_to_remove.add(volume_name)
+
+            for volume in volumes_to_remove:
+                await self._remove_volume_safe(volume_name=volume, requires_backup=True)
+
+    async def _wait_for_service_volumes_to_become_unused(
+        self, node_id: NodeID
+    ) -> set[str]:
+        # NOTE: it usually takes a few seconds for volumes to become unused,
+        # if agent does not wait for this operation to finish,
+        # volumes will be removed and backed up by the background task
+        # causing unncecessary data transfer to S3
+        async for attempt in AsyncRetrying(
+            reraise=True,
+            stop=stop_after_delay(_WAIT_FOR_UNUSED_SERVICE_VOLUMES.total_seconds()),
+            wait=wait_fixed(1),
+            before_sleep=before_sleep_log(_logger, logging.DEBUG),
+        ):
+            with attempt:
+                current_unused_volumes = await get_unused_dynamc_sidecar_volumes(
+                    self.docker
+                )
+
+                service_volumes = {
+                    v for v in current_unused_volumes if f"{node_id}" in v
+                }
+                _logger.debug(
+                    "service %s found volumes to remove: %s", node_id, service_volumes
+                )
+                if len(service_volumes) == 0:
+                    raise NoServiceVolumesFoundRPCError(
+                        period=_WAIT_FOR_UNUSED_SERVICE_VOLUMES.total_seconds(),
+                        node_id=node_id,
+                    )
+
+        return service_volumes
+
+    async def remove_service_volumes(self, node_id: NodeID) -> None:
+        # bookkept volumes might not be up to date
+        service_volumes = await self._wait_for_service_volumes_to_become_unused(node_id)
+        _logger.debug(
+            "will remove volumes for %s from service_volumes=%s",
+            node_id,
+            service_volumes,
+        )
+
+        for volume_name in service_volumes:
+            # volumes already saved to S3 by the sidecar and no longer require backup
+            await self._remove_volume_safe(
+                volume_name=volume_name, requires_backup=False
+            )
+
+    async def remove_all_volumes(self) -> None:
+        # bookkept volumes might not be up to date
+        current_unused_volumes = await get_unused_dynamc_sidecar_volumes(self.docker)
+
+        with log_context(_logger, logging.INFO, "remove all volumes"):
+            for volume in current_unused_volumes:
+                await self._remove_volume_safe(volume_name=volume, requires_backup=True)
+
+
+def get_volumes_manager(app: FastAPI) -> VolumesManager:
+    return VolumesManager.get_from_app_state(app)
+
+
+def setup_volume_manager(app: FastAPI) -> None:
+    async def _on_startup() -> None:
+        settings: ApplicationSettings = app.state.settings
+
+        volumes_manager = VolumesManager(
+            app=app,
+            book_keeping_interval=settings.AGENT_VOLUMES_CLENUP_BOOK_KEEPING_INTERVAL,
+            volume_cleanup_interval=settings.AGENT_VOLUMES_CLEANUP_INTERVAL,
+            remove_volumes_inactive_for=settings.AGENT_VOLUMES_CLENUP_REMOVE_VOLUMES_INACTIVE_FOR.total_seconds(),
+        )
+        volumes_manager.set_to_app_state(app)
+        await volumes_manager.setup()
+
+    async def _on_shutdown() -> None:
+        await VolumesManager.get_from_app_state(app).shutdown()
+
+    app.add_event_handler("startup", _on_startup)
+    app.add_event_handler("shutdown", _on_shutdown)
diff --git a/services/agent/tests/conftest.py b/services/agent/tests/conftest.py
index 5e85be5a0354..c71656e2c085 100644
--- a/services/agent/tests/conftest.py
+++ b/services/agent/tests/conftest.py
@@ -1,190 +1,66 @@
 # pylint: disable=redefined-outer-name
 # pylint: disable=unused-argument
 
-import contextlib
-import logging
-from pathlib import Path
-from typing import AsyncIterator, Iterable
-from uuid import uuid4
 
-import aiodocker
 import pytest
-import simcore_service_agent
-from aiodocker.volumes import DockerVolume
 from common_library.pydantic_networks_extension import HttpUrlLegacy
+from faker import Faker
 from models_library.basic_types import BootModeEnum
-from models_library.services import RunID
 from moto.server import ThreadedMotoServer
 from pydantic import HttpUrl, TypeAdapter
+from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict
 from settings_library.r_clone import S3Provider
-from simcore_service_agent.core.settings import ApplicationSettings
 
 pytest_plugins = [
     "pytest_simcore.aws_server",
+    "pytest_simcore.docker_compose",
+    "pytest_simcore.docker_swarm",
+    "pytest_simcore.rabbit_service",
     "pytest_simcore.repository_paths",
 ]
 
 
-@pytest.fixture(scope="session")
-def project_slug_dir(osparc_simcore_root_dir: Path) -> Path:
-    # fixtures in pytest_simcore.environs
-    service_folder = osparc_simcore_root_dir / "services" / "agent"
-    assert service_folder.exists()
-    assert any(service_folder.glob("src/simcore_service_agent"))
-    return service_folder
-
-
-@pytest.fixture(scope="session")
-def installed_package_dir() -> Path:
-    dirpath = Path(simcore_service_agent.__file__).resolve().parent
-    assert dirpath.exists()
-    return dirpath
-
-
 @pytest.fixture
 def swarm_stack_name() -> str:
     return "test-simcore"
 
 
 @pytest.fixture
-def study_id() -> str:
-    return f"{uuid4()}"
-
-
-@pytest.fixture
-def node_uuid() -> str:
-    return f"{uuid4()}"
-
-
-@pytest.fixture
-def run_id() -> RunID:
-    return RunID.create()
-
-
-@pytest.fixture
-def bucket() -> str:
-    return f"test-bucket-{uuid4()}"
-
-
-@pytest.fixture
-def used_volume_path(tmp_path: Path) -> Path:
-    return tmp_path / "used_volume"
-
-
-@pytest.fixture
-def unused_volume_path(tmp_path: Path) -> Path:
-    return tmp_path / "unused_volume"
-
-
-def _get_source(run_id: str, node_uuid: str, volume_path: Path) -> str:
-    reversed_path = f"{volume_path}"[::-1].replace("/", "_")
-    return f"dyv_{run_id}_{node_uuid}_{reversed_path}"
+def docker_node_id() -> str:
+    return "test-node-id"
 
 
 @pytest.fixture
-async def unused_volume(
-    swarm_stack_name: str,
-    study_id: str,
-    node_uuid: str,
-    run_id: RunID,
-    unused_volume_path: Path,
-) -> AsyncIterator[DockerVolume]:
-    async with aiodocker.Docker() as docker_client:
-        source = _get_source(run_id, node_uuid, unused_volume_path)
-        volume = await docker_client.volumes.create(
-            {
-                "Name": source,
-                "Labels": {
-                    "node_uuid": node_uuid,
-                    "run_id": run_id,
-                    "source": source,
-                    "study_id": study_id,
-                    "swarm_stack_name": swarm_stack_name,
-                    "user_id": "1",
-                },
-            }
-        )
-
-        # attach to volume and create some files!!!
-
-        yield volume
-
-        with contextlib.suppress(aiodocker.DockerError):
-            await volume.delete()
+def bucket(faker: Faker) -> str:
+    return f"test-bucket-{faker.uuid4()}"
 
 
 @pytest.fixture
-async def used_volume(
-    swarm_stack_name: str,
-    study_id: str,
-    node_uuid: str,
-    run_id: RunID,
-    used_volume_path: Path,
-) -> AsyncIterator[DockerVolume]:
-    async with aiodocker.Docker() as docker_client:
-        source = _get_source(run_id, node_uuid, used_volume_path)
-        volume = await docker_client.volumes.create(
-            {
-                "Name": source,
-                "Labels": {
-                    "node_uuid": node_uuid,
-                    "run_id": run_id,
-                    "source": source,
-                    "study_id": study_id,
-                    "swarm_stack_name": swarm_stack_name,
-                    "user_id": "1",
-                },
-            }
-        )
-
-        container = await docker_client.containers.run(
-            config={
-                "Cmd": ["/bin/ash", "-c", "sleep 10000"],
-                "Image": "alpine:latest",
-                "HostConfig": {"Binds": [f"{volume.name}:{used_volume_path}"]},
-            },
-            name=f"using_volume_{volume.name}",
-        )
-        await container.start()
-
-        yield volume
-
-        await container.delete(force=True)
-        await volume.delete()
-
-
-@pytest.fixture
-def env(  # noqa: PT004
+def mock_environment(
     monkeypatch: pytest.MonkeyPatch,
     mocked_s3_server_url: HttpUrl,
     bucket: str,
     swarm_stack_name: str,
-) -> None:
-    mock_dict = {
-        "LOGLEVEL": "DEBUG",
-        "SC_BOOT_MODE": BootModeEnum.DEBUG,
-        "AGENT_VOLUMES_CLEANUP_TARGET_SWARM_STACK_NAME": swarm_stack_name,
-        "AGENT_VOLUMES_CLEANUP_S3_ENDPOINT": mocked_s3_server_url,
-        "AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY": "xxx",
-        "AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY": "xxx",
-        "AGENT_VOLUMES_CLEANUP_S3_BUCKET": bucket,
-        "AGENT_VOLUMES_CLEANUP_S3_PROVIDER": S3Provider.MINIO,
-    }
-    for key, value in mock_dict.items():
-        monkeypatch.setenv(key, value)
-
-
-@pytest.fixture
-def settings(env: None) -> ApplicationSettings:
-    return ApplicationSettings.create_from_envs()
-
-
-@pytest.fixture()
-def caplog_info_debug(
-    caplog: pytest.LogCaptureFixture,
-) -> Iterable[pytest.LogCaptureFixture]:
-    with caplog.at_level(logging.DEBUG):
-        yield caplog
+    docker_node_id: str,
+) -> EnvVarsDict:
+    return setenvs_from_dict(
+        monkeypatch,
+        {
+            "LOGLEVEL": "DEBUG",
+            "SC_BOOT_MODE": BootModeEnum.DEBUG,
+            "AGENT_VOLUMES_CLEANUP_TARGET_SWARM_STACK_NAME": swarm_stack_name,
+            "AGENT_VOLUMES_CLEANUP_S3_ENDPOINT": mocked_s3_server_url,
+            "AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY": "xxx",
+            "AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY": "xxx",
+            "AGENT_VOLUMES_CLEANUP_S3_BUCKET": bucket,
+            "AGENT_VOLUMES_CLEANUP_S3_PROVIDER": S3Provider.MINIO,
+            "RABBIT_HOST": "test",
+            "RABBIT_PASSWORD": "test",
+            "RABBIT_SECURE": "false",
+            "RABBIT_USER": "test",
+            "AGENT_DOCKER_NODE_ID": docker_node_id,
+        },
+    )
 
 
 @pytest.fixture(scope="module")
diff --git a/services/agent/tests/unit/conftest.py b/services/agent/tests/unit/conftest.py
new file mode 100644
index 000000000000..1a49ce6ba576
--- /dev/null
+++ b/services/agent/tests/unit/conftest.py
@@ -0,0 +1,144 @@
+# pylint: disable=redefined-outer-name
+# pylint: disable=unused-argument
+
+from collections.abc import AsyncIterable, AsyncIterator, Awaitable, Callable
+from contextlib import suppress
+from pathlib import Path
+from uuid import uuid4
+
+import aiodocker
+import pytest
+from aiodocker.containers import DockerContainer
+from aiodocker.volumes import DockerVolume
+from asgi_lifespan import LifespanManager
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from models_library.projects import ProjectID
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import RunID
+from models_library.users import UserID
+from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict
+from settings_library.rabbit import RabbitSettings
+from simcore_service_agent.core.application import create_app
+from utils import VOLUMES_TO_CREATE, get_source
+
+
+@pytest.fixture
+def service_env(
+    monkeypatch: pytest.MonkeyPatch,
+    mock_environment: EnvVarsDict,
+    rabbit_service: RabbitSettings,
+) -> EnvVarsDict:
+    return setenvs_from_dict(
+        monkeypatch,
+        {
+            **mock_environment,
+            "RABBIT_HOST": rabbit_service.RABBIT_HOST,
+            "RABBIT_PASSWORD": rabbit_service.RABBIT_PASSWORD.get_secret_value(),
+            "RABBIT_PORT": f"{rabbit_service.RABBIT_PORT}",
+            "RABBIT_SECURE": f"{rabbit_service.RABBIT_SECURE}",
+            "RABBIT_USER": rabbit_service.RABBIT_USER,
+        },
+    )
+
+
+@pytest.fixture
+async def initialized_app(service_env: EnvVarsDict) -> AsyncIterator[FastAPI]:
+    app: FastAPI = create_app()
+
+    async with LifespanManager(app):
+        yield app
+
+
+@pytest.fixture
+def test_client(initialized_app: FastAPI) -> TestClient:
+    return TestClient(initialized_app)
+
+
+@pytest.fixture
+def run_id() -> RunID:
+    return RunID.create()
+
+
+@pytest.fixture
+def project_id() -> ProjectID:
+    return uuid4()
+
+
+@pytest.fixture
+def user_id() -> UserID:
+    return 1
+
+
+@pytest.fixture
+def volumes_path(tmp_path: Path) -> Path:
+    return tmp_path / "volumes"
+
+
+@pytest.fixture
+async def create_dynamic_sidecar_volume(
+    run_id: RunID,
+    project_id: ProjectID,
+    swarm_stack_name: str,
+    user_id: UserID,
+    volumes_path: Path,
+) -> AsyncIterable[Callable[[NodeID, bool, str], Awaitable[str]]]:
+    volumes_to_cleanup: list[DockerVolume] = []
+    containers_to_cleanup: list[DockerContainer] = []
+
+    async with aiodocker.Docker() as docker_client:
+
+        async def _(node_id: NodeID, in_use: bool, volume_name: str) -> str:
+            source = get_source(run_id, node_id, volumes_path / volume_name)
+            volume = await docker_client.volumes.create(
+                {
+                    "Name": source,
+                    "Labels": {
+                        "node_uuid": f"{node_id}",
+                        "run_id": run_id,
+                        "source": source,
+                        "study_id": f"{project_id}",
+                        "swarm_stack_name": swarm_stack_name,
+                        "user_id": f"{user_id}",
+                    },
+                }
+            )
+            volumes_to_cleanup.append(volume)
+
+            if in_use:
+                container = await docker_client.containers.run(
+                    config={
+                        "Cmd": ["/bin/ash", "-c", "sleep 10000"],
+                        "Image": "alpine:latest",
+                        "HostConfig": {"Binds": [f"{volume.name}:{volumes_path}"]},
+                    },
+                    name=f"using_volume_{volume.name}",
+                )
+                await container.start()
+                containers_to_cleanup.append(container)
+
+            return source
+
+        yield _
+
+        for container in containers_to_cleanup:
+            with suppress(aiodocker.DockerError):
+                await container.delete(force=True)
+        for volume in volumes_to_cleanup:
+            with suppress(aiodocker.DockerError):
+                await volume.delete()
+
+
+@pytest.fixture
+def create_dynamic_sidecar_volumes(
+    create_dynamic_sidecar_volume: Callable[[NodeID, bool, str], Awaitable[str]]
+) -> Callable[[NodeID, bool], Awaitable[set[str]]]:
+    async def _(node_id: NodeID, in_use: bool) -> set[str]:
+        volume_names: set[str] = set()
+        for volume_name in VOLUMES_TO_CREATE:
+            name = await create_dynamic_sidecar_volume(node_id, in_use, volume_name)
+            volume_names.add(name)
+
+        return volume_names
+
+    return _
diff --git a/services/agent/tests/unit/test_api_rest__health.py b/services/agent/tests/unit/test_api_rest__health.py
new file mode 100644
index 000000000000..6e690daa7884
--- /dev/null
+++ b/services/agent/tests/unit/test_api_rest__health.py
@@ -0,0 +1,17 @@
+# pylint: disable=protected-access
+# pylint: disable=redefined-outer-name
+
+
+from fastapi import status
+from fastapi.testclient import TestClient
+from models_library.api_schemas__common.health import HealthCheckGet
+
+pytest_simcore_core_services_selection = [
+    "rabbit",
+]
+
+
+def test_health_ok(test_client: TestClient):
+    response = test_client.get("/health")
+    assert response.status_code == status.HTTP_200_OK
+    assert HealthCheckGet.parse_obj(response.json())
diff --git a/services/agent/tests/unit/test_api_rpc__volumes.py b/services/agent/tests/unit/test_api_rpc__volumes.py
new file mode 100644
index 000000000000..df7121d1418d
--- /dev/null
+++ b/services/agent/tests/unit/test_api_rpc__volumes.py
@@ -0,0 +1,67 @@
+# pylint:disable=redefined-outer-name
+# pylint:disable=unused-argument
+
+from collections.abc import Awaitable, Callable
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+import pytest_mock
+from fastapi import FastAPI
+from servicelib.rabbitmq import RabbitMQRPCClient
+from servicelib.rabbitmq.rpc_interfaces.agent import volumes
+
+pytest_simcore_core_services_selection = [
+    "rabbit",
+]
+
+
+@pytest.fixture
+async def rpc_client(
+    initialized_app: FastAPI,
+    rabbitmq_rpc_client: Callable[[str], Awaitable[RabbitMQRPCClient]],
+) -> RabbitMQRPCClient:
+    return await rabbitmq_rpc_client("client")
+
+
+@pytest.fixture
+def mocked_remove_service_volumes(mocker: pytest_mock.MockerFixture) -> AsyncMock:
+    return mocker.patch(
+        "simcore_service_agent.services.volumes_manager.VolumesManager.remove_service_volumes"
+    )
+
+
+@pytest.fixture
+def mocked_remove_all_volumes(mocker: pytest_mock.MockerFixture) -> AsyncMock:
+    return mocker.patch(
+        "simcore_service_agent.services.volumes_manager.VolumesManager.remove_all_volumes"
+    )
+
+
+async def test_backup_and_remove_volumes_for_all_services(
+    rpc_client: RabbitMQRPCClient,
+    swarm_stack_name: str,
+    docker_node_id: str,
+    mocked_remove_all_volumes: AsyncMock,
+):
+    assert mocked_remove_all_volumes.call_count == 0
+    await volumes.backup_and_remove_volumes_for_all_services(
+        rpc_client, docker_node_id=docker_node_id, swarm_stack_name=swarm_stack_name
+    )
+    assert mocked_remove_all_volumes.call_count == 1
+
+
+async def test_remove_volumes_without_backup_for_service(
+    rpc_client: RabbitMQRPCClient,
+    swarm_stack_name: str,
+    docker_node_id: str,
+    mocked_remove_service_volumes: AsyncMock,
+):
+    assert mocked_remove_service_volumes.call_count == 0
+    await volumes.remove_volumes_without_backup_for_service(
+        rpc_client,
+        docker_node_id=docker_node_id,
+        swarm_stack_name=swarm_stack_name,
+        node_id=uuid4(),
+    )
+    assert mocked_remove_service_volumes.call_count == 1
diff --git a/services/agent/tests/unit/test_cli.py b/services/agent/tests/unit/test_cli.py
index 97de609dd92b..a205dadb47b4 100644
--- a/services/agent/tests/unit/test_cli.py
+++ b/services/agent/tests/unit/test_cli.py
@@ -5,12 +5,13 @@
 
 import pytest
 from click.testing import Result
+from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict
 from simcore_service_agent.cli import main
 from typer.testing import CliRunner
 
 
 @pytest.fixture
-def cli_runner() -> CliRunner:
+def cli_runner(mock_environment: EnvVarsDict) -> CliRunner:
     return CliRunner()
 
 
@@ -20,7 +21,7 @@ def _format_cli_error(result: Result) -> str:
     return f"Below exception was raised by the cli:\n{tb_message}"
 
 
-def test_process_cli_options(env: None, cli_runner: CliRunner):
+def test_process_cli_options(cli_runner: CliRunner):
     result = cli_runner.invoke(main, ["--help"])
     print(result.stdout)
     assert result.exit_code == 0, _format_cli_error(result)
diff --git a/services/agent/tests/unit/test_core_routes.py b/services/agent/tests/unit/test_core_routes.py
deleted file mode 100644
index 1fd0252d1aaa..000000000000
--- a/services/agent/tests/unit/test_core_routes.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# pylint: disable=redefined-outer-name
-# pylint: disable=unused-argument
-# pylint: disable=protected-access
-
-from time import time
-from typing import AsyncIterator
-
-import pytest
-from fastapi import FastAPI, status
-from fastapi.testclient import TestClient
-from simcore_service_agent.core.application import create_app
-from simcore_service_agent.modules.task_monitor import TaskMonitor
-
-
-@pytest.fixture
-async def initialized_app() -> AsyncIterator[FastAPI]:
-    app: FastAPI = create_app()
-
-    await app.router.startup()
-    yield app
-    await app.router.shutdown()
-
-
-@pytest.fixture
-def test_client(initialized_app: FastAPI) -> TestClient:
-    return TestClient(initialized_app)
-
-
-def test_health_ok(env: None, test_client: TestClient):
-    response = test_client.get("/health")
-    assert response.status_code == status.HTTP_200_OK
-    assert response.json() == None
-
-
-def test_health_fails_not_started(
-    env: None, initialized_app: FastAPI, test_client: TestClient
-):
-    task_monitor: TaskMonitor = initialized_app.state.task_monitor
-    # emulate monitor not being started
-    task_monitor._was_started = False
-
-    response = test_client.get("/health")
-    assert response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
-    assert response.json() == {"detail": "unhealthy"}
-
-
-def test_health_fails_hanging_tasks(
-    env: None, initialized_app: FastAPI, test_client: TestClient
-):
-    task_monitor: TaskMonitor = initialized_app.state.task_monitor
-
-    # emulate tasks hanging
-    for task_data in task_monitor._to_start.values():
-        task_data._start_time = time() - 1e6
-
-    response = test_client.get("/health")
-    assert response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
-    assert response.json() == {"detail": "unhealthy"}
diff --git a/services/agent/tests/unit/test_modules_task_monitor.py b/services/agent/tests/unit/test_modules_task_monitor.py
deleted file mode 100644
index e13b73c5d007..000000000000
--- a/services/agent/tests/unit/test_modules_task_monitor.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# pylint:disable=protected-access
-
-import asyncio
-from typing import Final
-
-import pytest
-from pydantic import PositiveFloat
-from simcore_service_agent.modules.task_monitor import TaskMonitor
-
-REPEAT_TASK_INTERVAL_S: Final[PositiveFloat] = 0.05
-
-
-async def _job_which_raises_error() -> None:
-    raise RuntimeError("raised expected error")
-
-
-async def _job_which_hangs() -> None:
-    print("I will be hanging....")
-    await asyncio.sleep(REPEAT_TASK_INTERVAL_S * 10000)
-
-
-@pytest.mark.parametrize("repeat_interval_s", [REPEAT_TASK_INTERVAL_S, None])
-async def test_task_monitor_recovers_from_error(
-    caplog_info_debug: pytest.LogCaptureFixture,
-    repeat_interval_s: PositiveFloat | None,
-):
-
-    task_monitor = TaskMonitor()
-    task_monitor.register_job(
-        _job_which_raises_error, repeat_interval_s=repeat_interval_s
-    )
-
-    await task_monitor.start()
-
-    await asyncio.sleep(REPEAT_TASK_INTERVAL_S * 2)
-
-    await task_monitor.shutdown()
-    assert len(task_monitor._tasks) == 0
-    assert len(task_monitor._to_start) == 0
-
-    log_messages = caplog_info_debug.text
-    print(log_messages)
-
-    assert f"Starting '{_job_which_raises_error.__name__}' ..." in log_messages
-    assert 'RuntimeError("raised expected error")' in log_messages
-    assert (
-        f"Will run '{_job_which_raises_error.__name__}' again in {repeat_interval_s} seconds"
-        in log_messages
-    )
-    if repeat_interval_s is None:
-        assert (
-            f"Unexpected termination of '{_job_which_raises_error.__name__}'; it will be restarted"
-            in log_messages
-        )
-
-
-async def test_add_same_task_fails():
-    task_monitor = TaskMonitor()
-    task_monitor.register_job(_job_which_raises_error, repeat_interval_s=1)
-    with pytest.raises(RuntimeError) as exe_info:
-        task_monitor.register_job(_job_which_raises_error, repeat_interval_s=1)
-    assert (
-        f"{exe_info.value}"
-        == f"{_job_which_raises_error.__name__} is already registered"
-    )
-
-
-async def test_add_task_after_start_fails():
-    task_monitor = TaskMonitor()
-    await task_monitor.start()
-
-    with pytest.raises(RuntimeError) as exe_info:
-        task_monitor.register_job(_job_which_raises_error, repeat_interval_s=1)
-    assert (
-        f"{exe_info.value}" == "Cannot add more tasks, monitor already running with: []"
-    )
-    await task_monitor.shutdown()
-
-
-async def test_hanging_jobs_are_detected():
-    task_monitor = TaskMonitor()
-    task_monitor.register_job(
-        _job_which_hangs, repeat_interval_s=REPEAT_TASK_INTERVAL_S
-    )
-    await task_monitor.start()
-
-    assert task_monitor.are_tasks_hanging is False
-
-    await asyncio.sleep(REPEAT_TASK_INTERVAL_S * 2)
-
-    assert task_monitor.are_tasks_hanging is True
diff --git a/services/agent/tests/unit/test_modules_volumes_cleanup.py b/services/agent/tests/unit/test_modules_volumes_cleanup.py
deleted file mode 100644
index f6f25945d5b8..000000000000
--- a/services/agent/tests/unit/test_modules_volumes_cleanup.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# pylint: disable=redefined-outer-name
-# pylint: disable=unused-argument
-
-
-from pathlib import Path
-
-import pytest
-from aiodocker.volumes import DockerVolume
-from pytest import LogCaptureFixture
-from pytest_mock.plugin import MockerFixture
-from simcore_service_agent.core.settings import ApplicationSettings
-from simcore_service_agent.modules.volumes_cleanup import backup_and_remove_volumes
-
-
-@pytest.fixture
-async def mock_volumes_folders(
-    mocker: MockerFixture,
-    unused_volume: DockerVolume,
-    used_volume: DockerVolume,
-    unused_volume_path: Path,
-    used_volume_path: Path,
-) -> None:
-
-    unused_volume_path.mkdir(parents=True, exist_ok=True)
-    used_volume_path.mkdir(parents=True, exist_ok=True)
-
-    # root permissions are required to access the /var/docker data
-    # overwriting with a mocked path for this test
-    unused_volume_data = await unused_volume.show()
-    unused_volume_data["Mountpoint"] = f"{unused_volume_path}"
-    used_volume_data = await used_volume.show()
-    used_volume_data["Mountpoint"] = f"{used_volume_path}"
-
-    volumes_inspect = [unused_volume_data, used_volume_data]
-
-    # patch the function here
-    mocker.patch(
-        "aiodocker.volumes.DockerVolumes.list",
-        return_value={"Volumes": volumes_inspect},
-    )
-
-
-@pytest.fixture
-async def used_volume_name(used_volume: DockerVolume) -> str:
-    return (await used_volume.show())["Name"]
-
-
-@pytest.fixture
-async def unused_volume_name(unused_volume: DockerVolume) -> str:
-    return (await unused_volume.show())["Name"]
-
-
-async def test_workflow(
-    mock_volumes_folders: None,
-    caplog_info_debug: pytest.LogCaptureFixture,
-    settings: ApplicationSettings,
-    used_volume_name: str,
-    unused_volume_name: str,
-):
-    await backup_and_remove_volumes(settings)
-
-    log_messages = caplog_info_debug.messages
-    assert f"Removed docker volume: '{unused_volume_name}'" in log_messages
-    assert f"Skipped in use docker volume: '{used_volume_name}'" in log_messages
-
-
-@pytest.mark.parametrize(
-    "error_class, error_message",
-    [
-        (RuntimeError, "this was already handled"),
-        (Exception, "also capture all other generic errors"),
-    ],
-)
-async def test_regression_error_handling(
-    mock_volumes_folders: None,
-    caplog_info_debug: LogCaptureFixture,
-    settings: ApplicationSettings,
-    used_volume_name: str,
-    unused_volume_name: str,
-    mocker: MockerFixture,
-    error_class: type[BaseException],
-    error_message: str,
-):
-    mocker.patch(
-        "simcore_service_agent.modules.volumes_cleanup._core.store_to_s3",
-        side_effect=error_class(error_message),
-    )
-
-    await backup_and_remove_volumes(settings)
-
-    log_messages = caplog_info_debug.messages
-    assert error_message in log_messages
diff --git a/services/agent/tests/unit/test_modules_volumes_cleanup_docker.py b/services/agent/tests/unit/test_modules_volumes_cleanup_docker.py
deleted file mode 100644
index e2e74088bd67..000000000000
--- a/services/agent/tests/unit/test_modules_volumes_cleanup_docker.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# pylint: disable=redefined-outer-name)
-
-from typing import Any, AsyncIterator
-
-import aiodocker
-import pytest
-from aiodocker.volumes import DockerVolume
-from pytest_mock import MockerFixture
-from servicelib.docker_constants import PREFIX_DYNAMIC_SIDECAR_VOLUMES
-from simcore_service_agent.modules.volumes_cleanup._docker import (
-    docker_client,
-    get_dyv_volumes,
-    is_volume_used,
-)
-
-# UTILS
-
-
-async def _create_volume(
-    docker_client: aiodocker.Docker,
-    swarm_stack_name: str,
-    study_id: str,
-    node_uuid: str,
-    run_id: str,
-) -> DockerVolume:
-    mocked_source = f"{PREFIX_DYNAMIC_SIDECAR_VOLUMES}_a_test_ok"
-    volume = await docker_client.volumes.create(
-        {
-            "Name": mocked_source,
-            "Labels": {
-                "node_uuid": node_uuid,
-                "run_id": run_id,
-                "source": mocked_source,
-                "study_id": study_id,
-                "swarm_stack_name": swarm_stack_name,
-                "user_id": "1",
-            },
-        }
-    )
-    return volume
-
-
-# FIXTURES
-
-
-@pytest.fixture
-async def volume_with_correct_target(
-    swarm_stack_name: str,
-    study_id: str,
-    node_uuid: str,
-    run_id: str,
-) -> AsyncIterator[dict[str, Any]]:
-    async with aiodocker.Docker() as docker_client:
-        volume = await _create_volume(
-            docker_client, swarm_stack_name, study_id, node_uuid, run_id
-        )
-
-        yield await volume.show()
-
-        try:
-            await volume.delete()
-        except aiodocker.DockerError:
-            pass
-
-
-@pytest.fixture
-def wrong_swarm_stack_name() -> str:
-    return "a_different_swarm_stack_name"
-
-
-@pytest.fixture
-async def volume_with_wrong_target(
-    study_id: str, node_uuid: str, run_id: str, wrong_swarm_stack_name: str
-) -> None:
-    async with aiodocker.Docker() as docker_client:
-        volume = await _create_volume(
-            docker_client, wrong_swarm_stack_name, study_id, node_uuid, run_id
-        )
-
-        yield await volume.show()
-
-        try:
-            await volume.delete()
-        except aiodocker.DockerError:
-            pass
-
-
-# TESTS
-
-
-async def test_get_dyv_volumes_expect_a_volume(
-    volume_with_correct_target: dict[str, Any], swarm_stack_name: str
-):
-    async with aiodocker.Docker() as docker_client:
-        volumes = await get_dyv_volumes(docker_client, swarm_stack_name)
-        assert len(volumes) == 1
-        assert volumes[0] == volume_with_correct_target
-
-
-async def test_get_dyv_volumes_expect_no_volume(
-    volume_with_wrong_target: dict[str, Any],
-    swarm_stack_name: str,
-    wrong_swarm_stack_name: str,
-):
-    async with aiodocker.Docker() as docker_client:
-        volumes = await get_dyv_volumes(docker_client, swarm_stack_name)
-        assert len(volumes) == 0
-
-    async with aiodocker.Docker() as docker_client:
-        volumes = await get_dyv_volumes(docker_client, wrong_swarm_stack_name)
-        assert len(volumes) == 1
-        assert volumes[0] == volume_with_wrong_target
-
-
-async def test_is_volume_mounted_true_(used_volume: DockerVolume):
-    async with docker_client() as client:
-        assert await is_volume_used(client, used_volume.name) is True
-
-
-async def test_is_volume_mounted_false(unused_volume: DockerVolume):
-    async with docker_client() as client:
-        assert await is_volume_used(client, unused_volume.name) is False
-
-
-async def test_regression_volume_labels_are_none(mocker: MockerFixture):
-    mocked_volumes = {
-        "Volumes": [{"Name": f"{PREFIX_DYNAMIC_SIDECAR_VOLUMES}_test", "Labels": None}]
-    }
-
-    async with docker_client() as client:
-        mocker.patch.object(client.volumes, "list", return_value=mocked_volumes)
-
-        await get_dyv_volumes(client, "test")
diff --git a/services/agent/tests/unit/test_modules_volumes_cleanup_s3.py b/services/agent/tests/unit/test_modules_volumes_cleanup_s3.py
deleted file mode 100644
index 862f447fc1a9..000000000000
--- a/services/agent/tests/unit/test_modules_volumes_cleanup_s3.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# pylint: disable=redefined-outer-name
-# pylint: disable=unused-argument
-# pylint: disable=too-many-locals
-
-import hashlib
-from pathlib import Path
-
-import aioboto3
-import pytest
-from aiodocker.volumes import DockerVolume
-from pydantic import HttpUrl
-from pytest import LogCaptureFixture
-from simcore_service_agent.core.settings import ApplicationSettings
-from simcore_service_agent.modules.volumes_cleanup._s3 import (
-    S3Provider,
-    _get_dir_name,
-    _get_s3_path,
-    store_to_s3,
-)
-
-# UTILS
-
-
-def _get_file_hashes_in_path(
-    path_to_hash: Path, exclude_files: set[Path] | None = None
-) -> set[tuple[Path, str]]:
-    def _hash_path(path: Path):
-        sha256_hash = hashlib.sha256()
-        with path.open("rb") as file:
-            # Read and update hash string value in blocks of 4K
-            for byte_block in iter(lambda: file.read(4096), b""):
-                sha256_hash.update(byte_block)
-        return sha256_hash.hexdigest()
-
-    if path_to_hash.is_file():
-        return {(path_to_hash.relative_to(path_to_hash), _hash_path(path_to_hash))}
-
-    if exclude_files is None:
-        exclude_files = set()
-
-    return {
-        (path.relative_to(path_to_hash), _hash_path(path))
-        for path in path_to_hash.rglob("*")
-        if path.is_file() and path.relative_to(path_to_hash) not in exclude_files
-    }
-
-
-async def _download_files_from_bucket(
-    endpoint: str,
-    access_key: str,
-    secret_key: str,
-    bucket_name: str,
-    save_to: Path,
-    swarm_stack_name: str,
-    study_id: str,
-    node_uuid: str,
-    run_id: str,
-) -> None:
-    session = aioboto3.Session(
-        aws_access_key_id=access_key, aws_secret_access_key=secret_key
-    )
-    async with session.resource("s3", endpoint_url=endpoint, use_ssl=False) as s_3:
-        bucket = await s_3.Bucket(bucket_name)
-        async for s3_object in bucket.objects.all():
-            key_path = f"{swarm_stack_name}/{study_id}/{node_uuid}/{run_id}/"
-            if s3_object.key.startswith(key_path):
-                file_object = await s3_object.get()
-                file_path: Path = save_to / s3_object.key.replace(key_path, "")
-                file_path.parent.mkdir(parents=True, exist_ok=True)
-                print(f"Saving file to {file_path}")
-                file_content = await file_object["Body"].read()
-                file_path.write_bytes(file_content)
-
-
-def _create_data(folder: Path) -> None:
-    for file in {  # pylint:disable=use-sequence-for-iteration
-        ".hidden_do_not_remove",
-        "key_values.json",
-        "f1.txt",
-        "f2.txt",
-        "f3.txt",
-        "d1/f1.txt",
-        "d1/f2.txt",
-        "d1/f3.txt",
-        "d1/sd1/f1.txt",
-        "d1/sd1/f2.txt",
-        "d1/sd1/f3.txt",
-    }:
-        file_path = folder / file
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-        file_path.write_text("test")
-
-
-# FIXTURES
-
-
-@pytest.fixture
-def save_to(tmp_path: Path) -> Path:
-    return tmp_path / "save_to"
-
-
-# TESTS
-
-
-async def test_get_s3_path(
-    unused_volume: DockerVolume,
-    swarm_stack_name: str,
-    study_id: str,
-    node_uuid: str,
-    run_id: str,
-    bucket: str,
-):
-    volume_data = await unused_volume.show()
-    assert _get_s3_path(bucket, volume_data["Labels"], unused_volume.name) == Path(
-        f"/{bucket}/{swarm_stack_name}/{study_id}/{node_uuid}/{run_id}/{_get_dir_name(unused_volume.name)}"
-    )
-
-
-async def test_store_to_s3(
-    unused_volume: DockerVolume,
-    mocked_s3_server_url: HttpUrl,
-    unused_volume_path: Path,
-    save_to: Path,
-    study_id: str,
-    node_uuid: str,
-    run_id: str,
-    bucket: str,
-    settings: ApplicationSettings,
-):
-    _create_data(unused_volume_path)
-    dyv_volume = await unused_volume.show()
-
-    # overwrite to test locally not against volume
-    # root permissions are required to access this
-    dyv_volume["Mountpoint"] = unused_volume_path
-
-    await store_to_s3(
-        volume_name=unused_volume.name,
-        dyv_volume=dyv_volume,
-        s3_access_key="xxx",
-        s3_secret_key="xxx",
-        s3_bucket=bucket,
-        s3_endpoint=mocked_s3_server_url,
-        s3_region="us-east-1",
-        s3_provider=S3Provider.MINIO,
-        s3_parallelism=3,
-        s3_retries=1,
-        exclude_files=settings.AGENT_VOLUMES_CLEANUP_EXCLUDE_FILES,
-    )
-
-    await _download_files_from_bucket(
-        endpoint=f"{mocked_s3_server_url}",
-        access_key="xxx",
-        secret_key="xxx",
-        bucket_name=bucket,
-        save_to=save_to,
-        swarm_stack_name=dyv_volume["Labels"]["swarm_stack_name"],
-        study_id=study_id,
-        node_uuid=node_uuid,
-        run_id=run_id,
-    )
-
-    hashes_on_disk = _get_file_hashes_in_path(
-        unused_volume_path, set(map(Path, settings.AGENT_VOLUMES_CLEANUP_EXCLUDE_FILES))
-    )
-    volume_path_without_source_dir = save_to / _get_dir_name(unused_volume.name)
-    hashes_in_s3 = _get_file_hashes_in_path(volume_path_without_source_dir)
-    assert len(hashes_on_disk) > 0
-    assert len(hashes_in_s3) > 0
-    assert hashes_on_disk == hashes_in_s3
-
-
-@pytest.mark.parametrize("provider", [S3Provider.CEPH, S3Provider.MINIO])
-async def test_regression_non_aws_providers(
-    unused_volume: DockerVolume,
-    mocked_s3_server_url: HttpUrl,
-    unused_volume_path: Path,
-    bucket: str,
-    settings: ApplicationSettings,
-    caplog_info_debug: pytest.LogCaptureFixture,
-    provider: S3Provider,
-):
-    _create_data(unused_volume_path)
-    dyv_volume = await unused_volume.show()
-
-    # overwrite to test locally not against volume
-    # root permissions are required to access this
-    dyv_volume["Mountpoint"] = unused_volume_path
-
-    await store_to_s3(
-        volume_name=unused_volume.name,
-        dyv_volume=dyv_volume,
-        s3_access_key="xxx",
-        s3_secret_key="xxx",
-        s3_bucket=bucket,
-        s3_endpoint=mocked_s3_server_url,
-        s3_region="us-east-1",
-        s3_provider=provider,
-        s3_parallelism=3,
-        s3_retries=1,
-        exclude_files=settings.AGENT_VOLUMES_CLEANUP_EXCLUDE_FILES,
-    )
-
-    assert f'provider "{provider}" not known' not in caplog_info_debug.text
-
-
-async def test_regression_store_to_s3_volume_mountpoint_not_found(
-    unused_volume: DockerVolume,
-    mocked_s3_server_url: HttpUrl,
-    unused_volume_path: Path,
-    bucket: str,
-    settings: ApplicationSettings,
-    caplog_info_debug: LogCaptureFixture,
-):
-    dyv_volume = await unused_volume.show()
-    assert unused_volume_path.exists() is False
-
-    # overwrite to test locally not against volume
-    # root permissions are required to access this
-    dyv_volume["Mountpoint"] = unused_volume_path
-
-    await store_to_s3(
-        volume_name=unused_volume.name,
-        dyv_volume=dyv_volume,
-        s3_access_key="xxx",
-        s3_secret_key="xxx",
-        s3_bucket=bucket,
-        s3_endpoint=mocked_s3_server_url,
-        s3_region="us-east-1",
-        s3_provider=S3Provider.MINIO,
-        s3_parallelism=3,
-        s3_retries=1,
-        exclude_files=settings.AGENT_VOLUMES_CLEANUP_EXCLUDE_FILES,
-    )
-    assert f"mountpoint {unused_volume_path} does not exist" in caplog_info_debug.text
-    assert f"{unused_volume.name}" in caplog_info_debug.text
diff --git a/services/agent/tests/unit/test_services_backup.py b/services/agent/tests/unit/test_services_backup.py
new file mode 100644
index 000000000000..67a1203ea796
--- /dev/null
+++ b/services/agent/tests/unit/test_services_backup.py
@@ -0,0 +1,105 @@
+# pylint: disable=redefined-outer-name
+
+import asyncio
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+from typing import Final
+from uuid import uuid4
+
+import aioboto3
+import pytest
+from fastapi import FastAPI
+from models_library.projects import ProjectID
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import RunID
+from pydantic import NonNegativeInt
+from simcore_service_agent.core.settings import ApplicationSettings
+from simcore_service_agent.services.backup import backup_volume
+from simcore_service_agent.services.docker_utils import get_volume_details
+from simcore_service_agent.services.volumes_manager import VolumesManager
+from utils import VOLUMES_TO_CREATE
+
+pytest_simcore_core_services_selection = [
+    "rabbit",
+]
+
+_FILES_TO_CREATE_IN_VOLUME: Final[NonNegativeInt] = 10
+
+
+@pytest.fixture
+def volume_content(tmpdir: Path) -> Path:
+    path = Path(tmpdir) / "to_copy"
+    path.mkdir(parents=True, exist_ok=True)
+
+    for i in range(_FILES_TO_CREATE_IN_VOLUME):
+        (path / f"f{i}").write_text(f"some text for file {i}\n" * (i + 1))
+
+    return path
+
+
+@pytest.fixture
+def downlaoded_from_s3(tmpdir: Path) -> Path:
+    path = Path(tmpdir) / "downloaded_from_s3"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+async def test_backup_volume(
+    volume_content: Path,
+    project_id: ProjectID,
+    swarm_stack_name: str,
+    run_id: RunID,
+    downlaoded_from_s3: Path,
+    create_dynamic_sidecar_volumes: Callable[[NodeID, bool], Awaitable[set[str]]],
+    initialized_app: FastAPI,
+):
+    node_id = uuid4()
+    volumes: set[str] = await create_dynamic_sidecar_volumes(
+        node_id, True  # noqa: FBT003
+    )
+
+    for volume in volumes:
+        volume_details = await get_volume_details(
+            VolumesManager.get_from_app_state(initialized_app).docker,
+            volume_name=volume,
+        )
+        # root permissions are required to access the /var/docker data
+        # overwriting with a mocked path for this test
+        volume_details.mountpoint = volume_content
+        await backup_volume(initialized_app, volume_details, volume)
+
+    settings: ApplicationSettings = initialized_app.state.settings
+
+    session = aioboto3.Session(
+        aws_access_key_id=settings.AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY,
+        aws_secret_access_key=settings.AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY,
+    )
+
+    expected_files = _FILES_TO_CREATE_IN_VOLUME * len(VOLUMES_TO_CREATE)
+
+    async with session.client("s3", endpoint_url=settings.AGENT_VOLUMES_CLEANUP_S3_ENDPOINT) as s3_client:  # type: ignore
+        list_response = await s3_client.list_objects_v2(
+            Bucket=settings.AGENT_VOLUMES_CLEANUP_S3_BUCKET,
+            Prefix=f"{swarm_stack_name}/{project_id}/{node_id}/{run_id}",
+        )
+        synced_keys: list[str] = [o["Key"] for o in list_response["Contents"]]
+
+        assert len(synced_keys) == expected_files
+
+        async def _download_file(key: str) -> None:
+            key_path = Path(key)
+            (downlaoded_from_s3 / key_path.parent.name).mkdir(
+                parents=True, exist_ok=True
+            )
+            await s3_client.download_file(
+                settings.AGENT_VOLUMES_CLEANUP_S3_BUCKET,
+                key,
+                downlaoded_from_s3 / key_path.parent.name / key_path.name,
+            )
+
+        await asyncio.gather(*[_download_file(key) for key in synced_keys])
+
+        assert (
+            len([x for x in downlaoded_from_s3.rglob("*") if x.is_file()])
+            == expected_files
+        )
diff --git a/services/agent/tests/unit/test_services_docker_utils.py b/services/agent/tests/unit/test_services_docker_utils.py
new file mode 100644
index 000000000000..40f86529edb6
--- /dev/null
+++ b/services/agent/tests/unit/test_services_docker_utils.py
@@ -0,0 +1,148 @@
+# pylint: disable=protected-access
+# pylint: disable=redefined-outer-name
+
+from collections.abc import Awaitable, Callable
+from pathlib import Path
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+from aiodocker.docker import Docker
+from fastapi import FastAPI
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import RunID
+from pytest_mock import MockerFixture
+from servicelib.docker_constants import PREFIX_DYNAMIC_SIDECAR_VOLUMES
+from simcore_service_agent.services.docker_utils import (
+    _VOLUMES_NOT_TO_BACKUP,
+    _does_volume_require_backup,
+    _reverse_string,
+    get_unused_dynamc_sidecar_volumes,
+    get_volume_details,
+    remove_volume,
+)
+from simcore_service_agent.services.volumes_manager import VolumesManager
+from utils import VOLUMES_TO_CREATE, get_source
+
+pytest_simcore_core_services_selection = [
+    "rabbit",
+]
+
+
+def test__reverse_string():
+    assert _reverse_string("abcd") == "dcba"
+
+
+@pytest.mark.parametrize(
+    "volume_path_part, expected",
+    [
+        ("inputs", False),
+        ("shared-store", False),
+        ("outputs", True),
+        ("workdir", True),
+    ],
+)
+def test__does_volume_require_backup(
+    run_id: RunID, volume_path_part: str, expected: bool
+) -> None:
+    volume_name = get_source(run_id, uuid4(), Path("/apath") / volume_path_part)
+    print(volume_name)
+    assert _does_volume_require_backup(volume_name) is expected
+
+
+@pytest.fixture
+def volumes_manager_docker_client(initialized_app: FastAPI) -> Docker:
+    volumes_manager = VolumesManager.get_from_app_state(initialized_app)
+    return volumes_manager.docker
+
+
+@pytest.fixture
+def mock_backup_volume(mocker: MockerFixture) -> AsyncMock:
+    return mocker.patch("simcore_service_agent.services.docker_utils.backup_volume")
+
+
+@pytest.mark.parametrize("volume_count", [2])
+@pytest.mark.parametrize("requires_backup", [True, False])
+async def test_doclker_utils_workflow(
+    volume_count: int,
+    requires_backup: bool,
+    initialized_app: FastAPI,
+    volumes_manager_docker_client: Docker,
+    create_dynamic_sidecar_volumes: Callable[[NodeID, bool], Awaitable[set[str]]],
+    mock_backup_volume: AsyncMock,
+):
+    created_volumes: set[str] = set()
+    for _ in range(volume_count):
+        created_volume = await create_dynamic_sidecar_volumes(
+            uuid4(), False  # noqa: FBT003
+        )
+        created_volumes.update(created_volume)
+
+    volumes = await get_unused_dynamc_sidecar_volumes(volumes_manager_docker_client)
+    assert volumes == created_volumes, (
+        "Most likely you have a dirty working state, please check "
+        "that there are no previous docker volumes named `dyv_...` "
+        "currently present on the machine"
+    )
+
+    assert len(volumes) == len(VOLUMES_TO_CREATE) * volume_count
+
+    count_vloumes_to_backup = 0
+    count_volumes_to_skip = 0
+
+    for volume in volumes:
+        if _does_volume_require_backup(volume):
+            count_vloumes_to_backup += 1
+        else:
+            count_volumes_to_skip += 1
+
+        assert volume.startswith(PREFIX_DYNAMIC_SIDECAR_VOLUMES)
+        await remove_volume(
+            initialized_app,
+            volumes_manager_docker_client,
+            volume_name=volume,
+            requires_backup=requires_backup,
+        )
+
+    assert (
+        count_vloumes_to_backup
+        == (len(VOLUMES_TO_CREATE) - len(_VOLUMES_NOT_TO_BACKUP)) * volume_count
+    )
+    assert count_volumes_to_skip == len(_VOLUMES_NOT_TO_BACKUP) * volume_count
+
+    assert mock_backup_volume.call_count == (
+        count_vloumes_to_backup if requires_backup else 0
+    )
+
+    volumes = await get_unused_dynamc_sidecar_volumes(volumes_manager_docker_client)
+    assert len(volumes) == 0
+
+
+@pytest.mark.parametrize("requires_backup", [True, False])
+async def test_remove_misisng_volume_does_not_raise_error(
+    requires_backup: bool,
+    initialized_app: FastAPI,
+    volumes_manager_docker_client: Docker,
+):
+    await remove_volume(
+        initialized_app,
+        volumes_manager_docker_client,
+        volume_name="this-volume-does-not-exist",
+        requires_backup=requires_backup,
+    )
+
+
+async def test_get_volume_details(
+    volumes_path: Path,
+    volumes_manager_docker_client: Docker,
+    create_dynamic_sidecar_volumes: Callable[[NodeID, bool], Awaitable[set[str]]],
+):
+
+    volume_names = await create_dynamic_sidecar_volumes(uuid4(), False)  # noqa: FBT003
+    for volume_name in volume_names:
+        volume_details = await get_volume_details(
+            volumes_manager_docker_client, volume_name=volume_name
+        )
+        print(volume_details)
+        volume_prefix = f"{volumes_path}".replace("/", "_").strip("_")
+        assert volume_details.labels.directory_name.startswith(volume_prefix)
diff --git a/services/agent/tests/unit/test_services_volumes_manager.py b/services/agent/tests/unit/test_services_volumes_manager.py
new file mode 100644
index 000000000000..0dfc29ceb83a
--- /dev/null
+++ b/services/agent/tests/unit/test_services_volumes_manager.py
@@ -0,0 +1,185 @@
+# pylint: disable=protected-access
+# pylint: disable=redefined-outer-name
+# pylint: disable=unused-argument
+
+from copy import deepcopy
+from dataclasses import dataclass, field
+from datetime import timedelta
+from pathlib import Path
+from unittest.mock import AsyncMock
+from uuid import uuid4
+
+import pytest
+import pytest_mock
+from aiodocker.docker import Docker
+from fastapi import FastAPI
+from models_library.projects_nodes_io import NodeID
+from models_library.services_types import RunID
+from servicelib.rabbitmq.rpc_interfaces.agent.errors import (
+    NoServiceVolumesFoundRPCError,
+)
+from simcore_service_agent.services.volumes_manager import VolumesManager
+from tenacity import (
+    AsyncRetrying,
+    retry_if_exception_type,
+    stop_after_delay,
+    wait_fixed,
+)
+from utils import VOLUMES_TO_CREATE, get_source
+
+
+@dataclass
+class MockedVolumesProxy:
+    run_id: RunID
+    volumes: set[str] = field(default_factory=set)
+
+    def add_unused_volumes_for_service(self, node_id: NodeID) -> None:
+        for folder_name in VOLUMES_TO_CREATE:
+            volume_name = get_source(self.run_id, node_id, Path("/apath") / folder_name)
+            self.volumes.add(volume_name)
+
+    def remove_volume(self, volume_name: str) -> None:
+        self.volumes.remove(volume_name)
+
+    def get_unused_dynamc_sidecar_volumes(self) -> set[str]:
+        return deepcopy(self.volumes)
+
+
+@pytest.fixture
+async def mock_docker_utils(
+    mocker: pytest_mock.MockerFixture, run_id: RunID
+) -> MockedVolumesProxy:
+    proxy = MockedVolumesProxy(run_id)
+
+    async def _remove_volume(
+        app: FastAPI, docker: Docker, *, volume_name: str, requires_backup: bool
+    ) -> None:
+        proxy.remove_volume(volume_name)
+
+    async def _get_unused_dynamc_sidecar_volumes(app: FastAPI) -> set[str]:
+        return proxy.get_unused_dynamc_sidecar_volumes()
+
+    mocker.patch(
+        "simcore_service_agent.services.volumes_manager.remove_volume",
+        side_effect=_remove_volume,
+    )
+
+    mocker.patch(
+        "simcore_service_agent.services.volumes_manager.get_unused_dynamc_sidecar_volumes",
+        side_effect=_get_unused_dynamc_sidecar_volumes,
+    )
+
+    return proxy
+
+
+@pytest.fixture
+def spy_remove_volume(
+    mocker: pytest_mock.MockerFixture, mock_docker_utils: MockedVolumesProxy
+) -> AsyncMock:
+    return mocker.spy(mock_docker_utils, "remove_volume")
+
+
+@pytest.fixture
+async def volumes_manager() -> VolumesManager:
+    # NOTE: background tasks are disabled on purpose
+    return VolumesManager(
+        app=FastAPI(),
+        book_keeping_interval=timedelta(seconds=1),
+        volume_cleanup_interval=timedelta(seconds=1),
+        remove_volumes_inactive_for=timedelta(seconds=0.1).total_seconds(),
+    )
+
+
+@pytest.mark.parametrize("service_count", [1, 3])
+async def test_volumes_manager_remove_all_volumes(
+    service_count: int,
+    mock_docker_utils: MockedVolumesProxy,
+    spy_remove_volume: AsyncMock,
+    volumes_manager: VolumesManager,
+):
+    assert spy_remove_volume.call_count == 0
+
+    for _ in range(service_count):
+        mock_docker_utils.add_unused_volumes_for_service(uuid4())
+    assert spy_remove_volume.call_count == 0
+    assert (
+        len(mock_docker_utils.get_unused_dynamc_sidecar_volumes())
+        == len(VOLUMES_TO_CREATE) * service_count
+    )
+
+    await volumes_manager.remove_all_volumes()
+    assert spy_remove_volume.call_count == len(VOLUMES_TO_CREATE) * service_count
+    assert len(mock_docker_utils.get_unused_dynamc_sidecar_volumes()) == 0
+
+
+async def test_volumes_manager_remove_service_volumes(
+    mock_docker_utils: MockedVolumesProxy,
+    spy_remove_volume: AsyncMock,
+    volumes_manager: VolumesManager,
+):
+    assert spy_remove_volume.call_count == 0
+    mock_docker_utils.add_unused_volumes_for_service(uuid4())
+    node_id_to_remvoe = uuid4()
+    mock_docker_utils.add_unused_volumes_for_service(node_id_to_remvoe)
+
+    assert spy_remove_volume.call_count == 0
+    assert (
+        len(mock_docker_utils.get_unused_dynamc_sidecar_volumes())
+        == len(VOLUMES_TO_CREATE) * 2
+    )
+
+    await volumes_manager.remove_service_volumes(node_id_to_remvoe)
+
+    assert spy_remove_volume.call_count == len(VOLUMES_TO_CREATE)
+    unused_volumes = mock_docker_utils.get_unused_dynamc_sidecar_volumes()
+    assert len(unused_volumes) == len(VOLUMES_TO_CREATE)
+    for volume_name in unused_volumes:
+        assert f"{node_id_to_remvoe}" not in volume_name
+
+
+@pytest.fixture
+async def mock_wait_for_unused_service_volumes(
+    mocker: pytest_mock.MockerFixture,
+) -> None:
+    mocker.patch(
+        "simcore_service_agent.services.volumes_manager._WAIT_FOR_UNUSED_SERVICE_VOLUMES",
+        timedelta(seconds=2),
+    )
+
+
+async def test_volumes_manager_remove_service_volumes_when_volume_does_not_exist(
+    mock_wait_for_unused_service_volumes: None,
+    volumes_manager: VolumesManager,
+):
+    not_existing_service = uuid4()
+    with pytest.raises(NoServiceVolumesFoundRPCError):
+        await volumes_manager.remove_service_volumes(not_existing_service)
+
+
+async def test_volumes_manager_periodic_task_cleanup(
+    mock_docker_utils: MockedVolumesProxy,
+    spy_remove_volume: AsyncMock,
+    volumes_manager: VolumesManager,
+):
+    async def _run_volumes_clennup() -> None:
+        await volumes_manager._bookkeeping_task()  # noqa: SLF001
+        await volumes_manager._periodic_volmue_cleanup_task()  # noqa: SLF001
+
+    await _run_volumes_clennup()
+    assert spy_remove_volume.call_count == 0
+
+    mock_docker_utils.add_unused_volumes_for_service(uuid4())
+    await _run_volumes_clennup()
+    assert spy_remove_volume.call_count == 0
+
+    # wait for the amount of time to pass
+    async for attempt in AsyncRetrying(
+        wait=wait_fixed(0.1),
+        stop=stop_after_delay(1),
+        reraise=True,
+        retry=retry_if_exception_type(AssertionError),
+    ):
+        with attempt:
+            await _run_volumes_clennup()
+            assert spy_remove_volume.call_count == len(VOLUMES_TO_CREATE)
+            assert len(mock_docker_utils.get_unused_dynamc_sidecar_volumes()) == 0
diff --git a/services/agent/tests/unit/utils.py b/services/agent/tests/unit/utils.py
new file mode 100644
index 000000000000..8eeb23138d47
--- /dev/null
+++ b/services/agent/tests/unit/utils.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+from typing import Final
+
+from models_library.projects_nodes_io import NodeID
+
+
+def get_source(run_id: str, node_id: NodeID, full_volume_path: Path) -> str:
+    # NOTE: volume name is not trimmed here, but it's ok for the tests
+    reversed_path = f"{full_volume_path}"[::-1].replace("/", "_")
+    return f"dyv_{run_id}_{node_id}_{reversed_path}"
+
+
+VOLUMES_TO_CREATE: Final[list[str]] = [
+    "inputs",
+    "outputs",
+    "workspace",
+    "work",
+    "shared-store",
+]
diff --git a/services/api-server/VERSION b/services/api-server/VERSION
index a918a2aa18d5..faef31a4357c 100644
--- a/services/api-server/VERSION
+++ b/services/api-server/VERSION
@@ -1 +1 @@
-0.6.0
+0.7.0
diff --git a/services/api-server/openapi.json b/services/api-server/openapi.json
index 8ade6cff6147..7965ae507f20 100644
--- a/services/api-server/openapi.json
+++ b/services/api-server/openapi.json
@@ -3,7 +3,7 @@
   "info": {
     "title": "osparc.io public API",
     "description": "osparc-simcore public API specifications",
-    "version": "0.6.0"
+    "version": "0.7.0"
   },
   "paths": {
     "/v0/meta": {
@@ -1869,13 +1869,132 @@
         ]
       }
     },
+    "/v0/solvers/{solver_key}/releases/{version}/pricing_plan": {
+      "get": {
+        "tags": [
+          "solvers"
+        ],
+        "summary": "Get Solver Pricing Plan",
+        "description": "Gets solver pricing plan\n\nNew in *version 0.7*",
+        "operationId": "get_solver_pricing_plan",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ServicePricingPlanGet"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
     "/v0/solvers/{solver_key}/releases/{version}/jobs": {
       "get": {
         "tags": [
           "solvers"
         ],
         "summary": "List Jobs",
-        "description": "List of jobs in a specific released solver (limited to 20 jobs)\n\nSEE `get_jobs_page` for paginated version of this function",
+        "description": "List of jobs in a specific released solver (limited to 20 jobs)\n\n- DEPRECATION: This implementation and returned values are deprecated and the will be replaced by that of get_jobs_page\n- SEE `get_jobs_page` for paginated version of this function",
         "operationId": "list_jobs",
         "parameters": [
           {
@@ -2169,14 +2288,14 @@
         ]
       }
     },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}:start": {
-      "post": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}": {
+      "get": {
         "tags": [
           "solvers"
         ],
-        "summary": "Start Job",
-        "description": "Starts job job_id created with the solver solver_key:version\n\nAdded in *version 0.4.3*: query parameter `cluster_id`\nAdded in *version 0.6*: responds with a 202 when successfully starting a computation",
-        "operationId": "start_job",
+        "summary": "Get Job",
+        "description": "Gets job of a given solver",
+        "operationId": "get_job",
         "parameters": [
           {
             "required": true,
@@ -2207,25 +2326,15 @@
             },
             "name": "job_id",
             "in": "path"
-          },
-          {
-            "required": false,
-            "schema": {
-              "type": "integer",
-              "minimum": 0,
-              "title": "Cluster Id"
-            },
-            "name": "cluster_id",
-            "in": "query"
           }
         ],
         "responses": {
-          "202": {
+          "200": {
             "description": "Successful Response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
+                  "$ref": "#/components/schemas/Job"
                 }
               }
             }
@@ -2300,32 +2409,12 @@
               }
             }
           },
-          "200": {
-            "description": "Job already started",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
-                }
-              }
-            }
-          },
-          "406": {
-            "description": "Cluster not found",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ErrorGet"
-                }
-              }
-            }
-          },
           "422": {
-            "description": "Configuration error",
+            "description": "Validation Error",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorGet"
+                  "$ref": "#/components/schemas/HTTPValidationError"
                 }
               }
             }
@@ -2336,15 +2425,14 @@
             "HTTPBasic": []
           }
         ]
-      }
-    },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}:stop": {
-      "post": {
+      },
+      "delete": {
         "tags": [
           "solvers"
         ],
-        "summary": "Stop Job",
-        "operationId": "stop_job",
+        "summary": "Delete Job",
+        "description": "Deletes an existing solver job\n\nNew in *version 0.7*",
+        "operationId": "delete_job",
         "parameters": [
           {
             "required": true,
@@ -2378,15 +2466,8 @@
           }
         ],
         "responses": {
-          "200": {
-            "description": "Successful Response",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
-                }
-              }
-            }
+          "204": {
+            "description": "Successful Response"
           },
           "402": {
             "description": "Payment required",
@@ -2476,13 +2557,14 @@
         ]
       }
     },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}:inspect": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}:start": {
       "post": {
         "tags": [
           "solvers"
         ],
-        "summary": "Inspect Job",
-        "operationId": "inspect_job",
+        "summary": "Start Job",
+        "description": "Starts job job_id created with the solver solver_key:version\n\nAdded in *version 0.4.3*: query parameter `cluster_id`\nAdded in *version 0.6*: responds with a 202 when successfully starting a computation",
+        "operationId": "start_job",
         "parameters": [
           {
             "required": true,
@@ -2513,10 +2595,20 @@
             },
             "name": "job_id",
             "in": "path"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "integer",
+              "minimum": 0,
+              "title": "Cluster Id"
+            },
+            "name": "cluster_id",
+            "in": "query"
           }
         ],
         "responses": {
-          "200": {
+          "202": {
             "description": "Successful Response",
             "content": {
               "application/json": {
@@ -2596,12 +2688,32 @@
               }
             }
           },
+          "200": {
+            "description": "Job already started",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/JobStatus"
+                }
+              }
+            }
+          },
+          "406": {
+            "description": "Cluster not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
           "422": {
-            "description": "Validation Error",
+            "description": "Configuration error",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
@@ -2614,14 +2726,13 @@
         ]
       }
     },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}": {
-      "get": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}:stop": {
+      "post": {
         "tags": [
           "solvers"
         ],
-        "summary": "Get Job",
-        "description": "Gets job of a given solver",
-        "operationId": "get_job",
+        "summary": "Stop Job",
+        "operationId": "stop_job",
         "parameters": [
           {
             "required": true,
@@ -2660,7 +2771,7 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/Job"
+                  "$ref": "#/components/schemas/JobStatus"
                 }
               }
             }
@@ -2753,13 +2864,13 @@
         ]
       }
     },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/outputs": {
-      "get": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}:inspect": {
+      "post": {
         "tags": [
           "solvers"
         ],
-        "summary": "Get Job Outputs",
-        "operationId": "get_job_outputs",
+        "summary": "Inspect Job",
+        "operationId": "inspect_job",
         "parameters": [
           {
             "required": true,
@@ -2798,7 +2909,7 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobOutputs"
+                  "$ref": "#/components/schemas/JobStatus"
                 }
               }
             }
@@ -2814,7 +2925,7 @@
             }
           },
           "404": {
-            "description": "Job not found",
+            "description": "Job/wallet/pricing details not found",
             "content": {
               "application/json": {
                 "schema": {
@@ -2891,14 +3002,14 @@
         ]
       }
     },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/outputs/logfile": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/metadata": {
       "get": {
         "tags": [
           "solvers"
         ],
-        "summary": "Get Job Output Logfile",
-        "description": "Special extra output with persistent logs file for the solver run.\n\n**NOTE**: this is not a log stream but a predefined output that is only\navailable after the job is done.\n\nNew in *version 0.4.0*",
-        "operationId": "get_job_output_logfile",
+        "summary": "Get Job Custom Metadata",
+        "description": "Gets custom metadata from a job\n\nNew in *version 0.7*",
+        "operationId": "get_job_custom_metadata",
         "parameters": [
           {
             "required": true,
@@ -2932,33 +3043,25 @@
           }
         ],
         "responses": {
-          "307": {
-            "description": "Successful Response"
-          },
           "200": {
-            "description": "Returns a log file",
+            "description": "Successful Response",
             "content": {
-              "application/octet-stream": {
-                "schema": {
-                  "type": "string",
-                  "format": "binary"
-                }
-              },
-              "application/zip": {
-                "schema": {
-                  "type": "string",
-                  "format": "binary"
-                }
-              },
-              "text/plain": {
+              "application/json": {
                 "schema": {
-                  "type": "string"
+                  "$ref": "#/components/schemas/JobMetadata"
                 }
               }
             }
           },
           "404": {
-            "description": "Log not found"
+            "description": "Metadata not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
           },
           "429": {
             "description": "Too many requests",
@@ -3026,15 +3129,14 @@
             "HTTPBasic": []
           }
         ]
-      }
-    },
-    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/logstream": {
-      "get": {
+      },
+      "patch": {
         "tags": [
           "solvers"
         ],
-        "summary": "Get Log Stream",
-        "operationId": "get_log_stream",
+        "summary": "Replace Job Custom Metadata",
+        "description": "Updates custom metadata from a job\n\nNew in *version 0.7*",
+        "operationId": "replace_job_custom_metadata",
         "parameters": [
           {
             "required": true,
@@ -3067,30 +3169,31 @@
             "in": "path"
           }
         ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/JobMetadataUpdate"
+              }
+            }
+          },
+          "required": true
+        },
         "responses": {
           "200": {
-            "description": "Returns a JobLog or an ErrorGet",
+            "description": "Successful Response",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
-                  "anyOf": [
-                    {
-                      "$ref": "#/components/schemas/JobLog"
-                    },
-                    {
-                      "$ref": "#/components/schemas/ErrorGet"
-                    }
-                  ],
-                  "type": "string",
-                  "title": "Response 200 Get Log Stream V0 Solvers  Solver Key  Releases  Version  Jobs  Job Id  Logstream Get"
+                  "$ref": "#/components/schemas/JobMetadata"
                 }
               }
             }
           },
-          "409": {
-            "description": "Conflict: Logs are already being streamed",
+          "404": {
+            "description": "Metadata not found",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/ErrorGet"
                 }
@@ -3100,7 +3203,7 @@
           "429": {
             "description": "Too many requests",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/ErrorGet"
                 }
@@ -3110,7 +3213,7 @@
           "500": {
             "description": "Internal server error",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/ErrorGet"
                 }
@@ -3120,7 +3223,7 @@
           "502": {
             "description": "Unexpected error when communicating with backend service",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/ErrorGet"
                 }
@@ -3130,7 +3233,7 @@
           "503": {
             "description": "Service unavailable",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/ErrorGet"
                 }
@@ -3140,7 +3243,7 @@
           "504": {
             "description": "Request to a backend service timed out.",
             "content": {
-              "application/x-ndjson": {
+              "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/ErrorGet"
                 }
@@ -3165,15 +3268,35 @@
         ]
       }
     },
-    "/v0/studies": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/page": {
       "get": {
         "tags": [
-          "studies"
+          "solvers"
         ],
-        "summary": "List Studies",
-        "description": "New in *version 0.5.0*",
-        "operationId": "list_studies",
+        "summary": "Get Jobs Page",
+        "description": "List of jobs on a specific released solver (includes pagination)\n\nNew in *version 0.7*",
+        "operationId": "get_jobs_page",
         "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          },
           {
             "required": false,
             "schema": {
@@ -3204,62 +3327,33 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/Page_Study_"
+                  "$ref": "#/components/schemas/Page_Job_"
                 }
               }
             }
           },
-          "422": {
-            "description": "Validation Error",
+          "402": {
+            "description": "Payment required",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
-          }
-        },
-        "security": [
-          {
-            "HTTPBasic": []
-          }
-        ]
-      }
-    },
-    "/v0/studies/{study_id}": {
-      "get": {
-        "tags": [
-          "studies"
-        ],
-        "summary": "Get Study",
-        "description": "New in *version 0.5.0*",
-        "operationId": "get_study",
-        "parameters": [
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Study Id"
-            },
-            "name": "study_id",
-            "in": "path"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Successful Response",
+          },
+          "404": {
+            "description": "Job/wallet/pricing details not found",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/Study"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
           },
-          "404": {
-            "description": "Study not found",
+          "429": {
+            "description": "Too many requests",
             "content": {
               "application/json": {
                 "schema": {
@@ -3268,76 +3362,38 @@
               }
             }
           },
-          "422": {
-            "description": "Validation Error",
+          "500": {
+            "description": "Internal server error",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
-          }
-        },
-        "security": [
-          {
-            "HTTPBasic": []
-          }
-        ]
-      }
-    },
-    "/v0/studies/{study_id}:clone": {
-      "post": {
-        "tags": [
-          "studies"
-        ],
-        "summary": "Clone Study",
-        "operationId": "clone_study",
-        "parameters": [
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Study Id"
-            },
-            "name": "study_id",
-            "in": "path"
           },
-          {
-            "required": false,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "X-Simcore-Parent-Project-Uuid"
-            },
-            "name": "x-simcore-parent-project-uuid",
-            "in": "header"
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
           },
-          {
-            "required": false,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "X-Simcore-Parent-Node-Id"
-            },
-            "name": "x-simcore-parent-node-id",
-            "in": "header"
-          }
-        ],
-        "responses": {
-          "201": {
-            "description": "Successful Response",
+          "503": {
+            "description": "Service unavailable",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/Study"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
           },
-          "404": {
-            "description": "Study not found",
+          "504": {
+            "description": "Request to a backend service timed out.",
             "content": {
               "application/json": {
                 "schema": {
@@ -3364,23 +3420,42 @@
         ]
       }
     },
-    "/v0/studies/{study_id}/ports": {
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/outputs": {
       "get": {
         "tags": [
-          "studies"
+          "solvers"
         ],
-        "summary": "List Study Ports",
-        "description": "Lists metadata on ports of a given study\n\nNew in *version 0.5.0*",
-        "operationId": "list_study_ports",
+        "summary": "Get Job Outputs",
+        "operationId": "get_job_outputs",
         "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          },
           {
             "required": true,
             "schema": {
               "type": "string",
               "format": "uuid",
-              "title": "Study Id"
+              "title": "Job Id"
             },
-            "name": "study_id",
+            "name": "job_id",
             "in": "path"
           }
         ],
@@ -3390,13 +3465,13 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/OnePage_StudyPort_"
+                  "$ref": "#/components/schemas/JobOutputs"
                 }
               }
             }
           },
-          "404": {
-            "description": "Study not found",
+          "402": {
+            "description": "Payment required",
             "content": {
               "application/json": {
                 "schema": {
@@ -3405,9 +3480,1237 @@
               }
             }
           },
-          "422": {
-            "description": "Validation Error",
-            "content": {
+          "404": {
+            "description": "Job not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/outputs/logfile": {
+      "get": {
+        "tags": [
+          "solvers"
+        ],
+        "summary": "Get Job Output Logfile",
+        "description": "Special extra output with persistent logs file for the solver run.\n\n**NOTE**: this is not a log stream but a predefined output that is only\navailable after the job is done.\n\nNew in *version 0.4.0*",
+        "operationId": "get_job_output_logfile",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "307": {
+            "description": "Successful Response"
+          },
+          "200": {
+            "description": "Returns a log file",
+            "content": {
+              "application/octet-stream": {
+                "schema": {
+                  "type": "string",
+                  "format": "binary"
+                }
+              },
+              "application/zip": {
+                "schema": {
+                  "type": "string",
+                  "format": "binary"
+                }
+              },
+              "text/plain": {
+                "schema": {
+                  "type": "string"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Log not found"
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/wallet": {
+      "get": {
+        "tags": [
+          "solvers"
+        ],
+        "summary": "Get Job Wallet",
+        "description": "Get job wallet\n\nNew in *version 0.7*",
+        "operationId": "get_job_wallet",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/WalletGetWithAvailableCredits"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Wallet not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "403": {
+            "description": "Access to wallet is not allowed",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/pricing_unit": {
+      "get": {
+        "tags": [
+          "solvers"
+        ],
+        "summary": "Get Job Pricing Unit",
+        "description": "Get job pricing unit\n\nNew in *version 0.7*",
+        "operationId": "get_job_pricing_unit",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/PricingUnitGet"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Pricing unit not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/solvers/{solver_key}/releases/{version}/jobs/{job_id}/logstream": {
+      "get": {
+        "tags": [
+          "solvers"
+        ],
+        "summary": "Get Log Stream",
+        "operationId": "get_log_stream",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^simcore/services/comp/([a-z0-9][a-z0-9_.-]*/)*([a-z0-9-_]+[a-z0-9])$",
+              "title": "Solver Key"
+            },
+            "name": "solver_key",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "pattern": "^(0|[1-9]\\d*)(\\.(0|[1-9]\\d*)){2}(-(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*)(\\.(0|[1-9]\\d*|\\d*[-a-zA-Z][-\\da-zA-Z]*))*)?(\\+[-\\da-zA-Z]+(\\.[-\\da-zA-Z-]+)*)?$",
+              "title": "Version"
+            },
+            "name": "version",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Returns a JobLog or an ErrorGet",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "anyOf": [
+                    {
+                      "$ref": "#/components/schemas/JobLog"
+                    },
+                    {
+                      "$ref": "#/components/schemas/ErrorGet"
+                    }
+                  ],
+                  "type": "string",
+                  "title": "Response 200 Get Log Stream V0 Solvers  Solver Key  Releases  Version  Jobs  Job Id  Logstream Get"
+                }
+              }
+            }
+          },
+          "409": {
+            "description": "Conflict: Logs are already being streamed",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/x-ndjson": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies": {
+      "get": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "List Studies",
+        "description": "New in *version 0.5.0*",
+        "operationId": "list_studies",
+        "parameters": [
+          {
+            "required": false,
+            "schema": {
+              "type": "integer",
+              "maximum": 100,
+              "minimum": 1,
+              "title": "Limit",
+              "default": 50
+            },
+            "name": "limit",
+            "in": "query"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "integer",
+              "minimum": 0,
+              "title": "Offset",
+              "default": 0
+            },
+            "name": "offset",
+            "in": "query"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Page_Study_"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}": {
+      "get": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Get Study",
+        "description": "New in *version 0.5.0*",
+        "operationId": "get_study",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Study"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Study not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}:clone": {
+      "post": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Clone Study",
+        "operationId": "clone_study",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "X-Simcore-Parent-Project-Uuid"
+            },
+            "name": "x-simcore-parent-project-uuid",
+            "in": "header"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "X-Simcore-Parent-Node-Id"
+            },
+            "name": "x-simcore-parent-node-id",
+            "in": "header"
+          }
+        ],
+        "responses": {
+          "201": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Study"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Study not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/ports": {
+      "get": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "List Study Ports",
+        "description": "Lists metadata on ports of a given study\n\nNew in *version 0.5.0*",
+        "operationId": "list_study_ports",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/OnePage_StudyPort_"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Study not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/jobs": {
+      "post": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Create Study Job",
+        "description": "hidden -- if True (default) hides project from UI",
+        "operationId": "create_study_job",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "boolean",
+              "title": "Hidden",
+              "default": true
+            },
+            "name": "hidden",
+            "in": "query"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "X-Simcore-Parent-Project-Uuid"
+            },
+            "name": "x-simcore-parent-project-uuid",
+            "in": "header"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "X-Simcore-Parent-Node-Id"
+            },
+            "name": "x-simcore-parent-node-id",
+            "in": "header"
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/JobInputs"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Job"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/jobs/{job_id}": {
+      "delete": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Delete Study Job",
+        "description": "Deletes an existing study job",
+        "operationId": "delete_study_job",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "204": {
+            "description": "Successful Response"
+          },
+          "404": {
+            "description": "Not Found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/jobs/{job_id}:start": {
+      "post": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Start Study Job",
+        "description": "Changed in *version 0.6.0*: Now responds with a 202 when successfully starting a computation",
+        "operationId": "start_study_job",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          },
+          {
+            "required": false,
+            "schema": {
+              "type": "integer",
+              "minimum": 0,
+              "title": "Cluster Id"
+            },
+            "name": "cluster_id",
+            "in": "query"
+          }
+        ],
+        "responses": {
+          "202": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/JobStatus"
+                }
+              }
+            }
+          },
+          "402": {
+            "description": "Payment required",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Job/wallet/pricing details not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Too many requests",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "503": {
+            "description": "Service unavailable",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "504": {
+            "description": "Request to a backend service timed out.",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "200": {
+            "description": "Job already started",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/JobStatus"
+                }
+              }
+            }
+          },
+          "406": {
+            "description": "Cluster not found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Configuration error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/jobs/{job_id}:stop": {
+      "post": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Stop Study Job",
+        "operationId": "stop_study_job",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/JobStatus"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
               "application/json": {
                 "schema": {
                   "$ref": "#/components/schemas/HTTPValidationError"
@@ -3423,14 +4726,129 @@
         ]
       }
     },
-    "/v0/studies/{study_id}/jobs": {
+    "/v0/studies/{study_id}/jobs/{job_id}:inspect": {
+      "post": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Inspect Study Job",
+        "operationId": "inspect_study_job",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/JobStatus"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/jobs/{job_id}/outputs": {
       "post": {
         "tags": [
           "studies"
         ],
-        "summary": "Create Study Job",
-        "description": "hidden -- if True (default) hides project from UI",
-        "operationId": "create_study_job",
+        "summary": "Get Study Job Outputs",
+        "operationId": "get_study_job_outputs",
+        "parameters": [
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Study Id"
+            },
+            "name": "study_id",
+            "in": "path"
+          },
+          {
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid",
+              "title": "Job Id"
+            },
+            "name": "job_id",
+            "in": "path"
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/JobOutputs"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/studies/{study_id}/jobs/{job_id}/outputs/log-links": {
+      "get": {
+        "tags": [
+          "studies"
+        ],
+        "summary": "Get download links for study job log files",
+        "operationId": "get_study_job_output_logfile",
         "parameters": [
           {
             "required": true,
@@ -3443,53 +4861,23 @@
             "in": "path"
           },
           {
-            "required": false,
-            "schema": {
-              "type": "boolean",
-              "title": "Hidden",
-              "default": true
-            },
-            "name": "hidden",
-            "in": "query"
-          },
-          {
-            "required": false,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "X-Simcore-Parent-Project-Uuid"
-            },
-            "name": "x-simcore-parent-project-uuid",
-            "in": "header"
-          },
-          {
-            "required": false,
+            "required": true,
             "schema": {
               "type": "string",
               "format": "uuid",
-              "title": "X-Simcore-Parent-Node-Id"
+              "title": "Job Id"
             },
-            "name": "x-simcore-parent-node-id",
-            "in": "header"
+            "name": "job_id",
+            "in": "path"
           }
         ],
-        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/JobInputs"
-              }
-            }
-          },
-          "required": true
-        },
         "responses": {
           "200": {
             "description": "Successful Response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/Job"
+                  "$ref": "#/components/schemas/JobLogsMap"
                 }
               }
             }
@@ -3512,14 +4900,14 @@
         ]
       }
     },
-    "/v0/studies/{study_id}/jobs/{job_id}": {
-      "delete": {
+    "/v0/studies/{study_id}/jobs/{job_id}/metadata": {
+      "get": {
         "tags": [
           "studies"
         ],
-        "summary": "Delete Study Job",
-        "description": "Deletes an existing study job",
-        "operationId": "delete_study_job",
+        "summary": "Get Study Job Custom Metadata",
+        "description": "Get custom metadata from a study's job\n\nNew in *version 0.7*",
+        "operationId": "get_study_job_custom_metadata",
         "parameters": [
           {
             "required": true,
@@ -3543,15 +4931,12 @@
           }
         ],
         "responses": {
-          "204": {
-            "description": "Successful Response"
-          },
-          "404": {
-            "description": "Not Found",
+          "200": {
+            "description": "Successful Response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorGet"
+                  "$ref": "#/components/schemas/JobMetadata"
                 }
               }
             }
@@ -3572,16 +4957,14 @@
             "HTTPBasic": []
           }
         ]
-      }
-    },
-    "/v0/studies/{study_id}/jobs/{job_id}:start": {
-      "post": {
+      },
+      "put": {
         "tags": [
           "studies"
         ],
-        "summary": "Start Study Job",
-        "description": "Changed in *version 0.6.0*: Now responds with a 202 when successfully starting a computation",
-        "operationId": "start_study_job",
+        "summary": "Replace Study Job Custom Metadata",
+        "description": "Changes custom metadata of a study's job\n\nNew in *version 0.7*",
+        "operationId": "replace_study_job_custom_metadata",
         "parameters": [
           {
             "required": true,
@@ -3602,61 +4985,68 @@
             },
             "name": "job_id",
             "in": "path"
-          },
-          {
-            "required": false,
-            "schema": {
-              "type": "integer",
-              "minimum": 0,
-              "title": "Cluster Id"
-            },
-            "name": "cluster_id",
-            "in": "query"
           }
         ],
-        "responses": {
-          "202": {
-            "description": "Successful Response",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
-                }
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/JobMetadataUpdate"
               }
             }
           },
-          "402": {
-            "description": "Payment required",
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorGet"
+                  "$ref": "#/components/schemas/JobMetadata"
                 }
               }
             }
           },
-          "404": {
-            "description": "Job/wallet/pricing details not found",
+          "422": {
+            "description": "Validation Error",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorGet"
+                  "$ref": "#/components/schemas/HTTPValidationError"
                 }
               }
             }
-          },
-          "429": {
-            "description": "Too many requests",
+          }
+        },
+        "security": [
+          {
+            "HTTPBasic": []
+          }
+        ]
+      }
+    },
+    "/v0/wallets/default": {
+      "get": {
+        "tags": [
+          "wallets"
+        ],
+        "summary": "Get Default Wallet",
+        "description": "Get default wallet\n\nNew in *version 0.7*",
+        "operationId": "get_default_wallet",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ErrorGet"
+                  "$ref": "#/components/schemas/WalletGetWithAvailableCredits"
                 }
               }
             }
           },
-          "500": {
-            "description": "Internal server error",
+          "404": {
+            "description": "Wallet not found",
             "content": {
               "application/json": {
                 "schema": {
@@ -3665,8 +5055,8 @@
               }
             }
           },
-          "502": {
-            "description": "Unexpected error when communicating with backend service",
+          "403": {
+            "description": "Access to wallet is not allowed",
             "content": {
               "application/json": {
                 "schema": {
@@ -3675,8 +5065,8 @@
               }
             }
           },
-          "503": {
-            "description": "Service unavailable",
+          "429": {
+            "description": "Too many requests",
             "content": {
               "application/json": {
                 "schema": {
@@ -3685,8 +5075,8 @@
               }
             }
           },
-          "504": {
-            "description": "Request to a backend service timed out.",
+          "500": {
+            "description": "Internal server error",
             "content": {
               "application/json": {
                 "schema": {
@@ -3695,18 +5085,18 @@
               }
             }
           },
-          "200": {
-            "description": "Job already started",
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
           },
-          "406": {
-            "description": "Cluster not found",
+          "503": {
+            "description": "Service unavailable",
             "content": {
               "application/json": {
                 "schema": {
@@ -3715,8 +5105,8 @@
               }
             }
           },
-          "422": {
-            "description": "Configuration error",
+          "504": {
+            "description": "Request to a backend service timed out.",
             "content": {
               "application/json": {
                 "schema": {
@@ -3733,32 +5123,22 @@
         ]
       }
     },
-    "/v0/studies/{study_id}/jobs/{job_id}:stop": {
-      "post": {
+    "/v0/wallets/{wallet_id}": {
+      "get": {
         "tags": [
-          "studies"
+          "wallets"
         ],
-        "summary": "Stop Study Job",
-        "operationId": "stop_study_job",
+        "summary": "Get Wallet",
+        "description": "Get wallet\n\nNew in *version 0.7*",
+        "operationId": "get_wallet",
         "parameters": [
           {
             "required": true,
             "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Study Id"
-            },
-            "name": "study_id",
-            "in": "path"
-          },
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Job Id"
+              "type": "integer",
+              "title": "Wallet Id"
             },
-            "name": "job_id",
+            "name": "wallet_id",
             "in": "path"
           }
         ],
@@ -3768,181 +5148,77 @@
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
+                  "$ref": "#/components/schemas/WalletGetWithAvailableCredits"
                 }
               }
             }
           },
-          "422": {
-            "description": "Validation Error",
+          "404": {
+            "description": "Wallet not found",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
-          }
-        },
-        "security": [
-          {
-            "HTTPBasic": []
-          }
-        ]
-      }
-    },
-    "/v0/studies/{study_id}/jobs/{job_id}:inspect": {
-      "post": {
-        "tags": [
-          "studies"
-        ],
-        "summary": "Inspect Study Job",
-        "operationId": "inspect_study_job",
-        "parameters": [
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Study Id"
-            },
-            "name": "study_id",
-            "in": "path"
           },
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Job Id"
-            },
-            "name": "job_id",
-            "in": "path"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Successful Response",
+          "403": {
+            "description": "Access to wallet is not allowed",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobStatus"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
           },
-          "422": {
-            "description": "Validation Error",
+          "429": {
+            "description": "Too many requests",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
+                  "$ref": "#/components/schemas/ErrorGet"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
-          }
-        },
-        "security": [
-          {
-            "HTTPBasic": []
-          }
-        ]
-      }
-    },
-    "/v0/studies/{study_id}/jobs/{job_id}/outputs": {
-      "post": {
-        "tags": [
-          "studies"
-        ],
-        "summary": "Get Study Job Outputs",
-        "operationId": "get_study_job_outputs",
-        "parameters": [
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Study Id"
-            },
-            "name": "study_id",
-            "in": "path"
           },
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Job Id"
-            },
-            "name": "job_id",
-            "in": "path"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Successful Response",
+          "502": {
+            "description": "Unexpected error when communicating with backend service",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobOutputs"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
           },
-          "422": {
-            "description": "Validation Error",
+          "503": {
+            "description": "Service unavailable",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/HTTPValidationError"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
-          }
-        },
-        "security": [
-          {
-            "HTTPBasic": []
-          }
-        ]
-      }
-    },
-    "/v0/studies/{study_id}/jobs/{job_id}/outputs/log-links": {
-      "get": {
-        "tags": [
-          "studies"
-        ],
-        "summary": "Get download links for study job log files",
-        "operationId": "get_study_job_output_logfile",
-        "parameters": [
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Study Id"
-            },
-            "name": "study_id",
-            "in": "path"
           },
-          {
-            "required": true,
-            "schema": {
-              "type": "string",
-              "format": "uuid",
-              "title": "Job Id"
-            },
-            "name": "job_id",
-            "in": "path"
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "Successful Response",
+          "504": {
+            "description": "Request to a backend service timed out.",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/JobLogsMap"
+                  "$ref": "#/components/schemas/ErrorGet"
                 }
               }
             }
@@ -4438,6 +5714,79 @@
         ],
         "title": "JobLogsMap"
       },
+      "JobMetadata": {
+        "properties": {
+          "job_id": {
+            "type": "string",
+            "format": "uuid",
+            "title": "Job Id",
+            "description": "Parent Job"
+          },
+          "metadata": {
+            "additionalProperties": {
+              "anyOf": [
+                {
+                  "type": "boolean"
+                },
+                {
+                  "type": "integer"
+                },
+                {
+                  "type": "number"
+                },
+                {
+                  "type": "string"
+                }
+              ]
+            },
+            "type": "object",
+            "title": "Metadata",
+            "description": "Custom key-value map"
+          },
+          "url": {
+            "type": "string",
+            "maxLength": 2083,
+            "minLength": 1,
+            "format": "uri",
+            "title": "Url",
+            "description": "Link to get this resource (self)"
+          }
+        },
+        "type": "object",
+        "required": [
+          "job_id",
+          "metadata",
+          "url"
+        ],
+        "title": "JobMetadata"
+      },
+      "JobMetadataUpdate": {
+        "properties": {
+          "metadata": {
+            "additionalProperties": {
+              "anyOf": [
+                {
+                  "type": "boolean"
+                },
+                {
+                  "type": "integer"
+                },
+                {
+                  "type": "number"
+                },
+                {
+                  "type": "string"
+                }
+              ]
+            },
+            "type": "object",
+            "title": "Metadata",
+            "description": "Custom key-value map"
+          }
+        },
+        "type": "object",
+        "title": "JobMetadataUpdate"
+      },
       "JobOutputs": {
         "properties": {
           "job_id": {
@@ -4730,6 +6079,41 @@
         ],
         "title": "Page[File]"
       },
+      "Page_Job_": {
+        "properties": {
+          "items": {
+            "items": {
+              "$ref": "#/components/schemas/Job"
+            },
+            "type": "array",
+            "title": "Items"
+          },
+          "total": {
+            "type": "integer",
+            "minimum": 0,
+            "title": "Total"
+          },
+          "limit": {
+            "type": "integer",
+            "minimum": 1,
+            "title": "Limit"
+          },
+          "offset": {
+            "type": "integer",
+            "minimum": 0,
+            "title": "Offset"
+          },
+          "links": {
+            "$ref": "#/components/schemas/Links"
+          }
+        },
+        "type": "object",
+        "required": [
+          "items",
+          "links"
+        ],
+        "title": "Page[Job]"
+      },
       "Page_Study_": {
         "properties": {
           "items": {
@@ -4765,6 +6149,49 @@
         ],
         "title": "Page[Study]"
       },
+      "PricingPlanClassification": {
+        "type": "string",
+        "enum": [
+          "TIER"
+        ],
+        "title": "PricingPlanClassification",
+        "description": "An enumeration."
+      },
+      "PricingUnitGet": {
+        "properties": {
+          "pricingUnitId": {
+            "type": "integer",
+            "exclusiveMinimum": true,
+            "title": "Pricingunitid",
+            "minimum": 0
+          },
+          "unitName": {
+            "type": "string",
+            "title": "Unitname"
+          },
+          "unitExtraInfo": {
+            "type": "object",
+            "title": "Unitextrainfo"
+          },
+          "currentCostPerUnit": {
+            "type": "number",
+            "title": "Currentcostperunit"
+          },
+          "default": {
+            "type": "boolean",
+            "title": "Default"
+          }
+        },
+        "type": "object",
+        "required": [
+          "pricingUnitId",
+          "unitName",
+          "unitExtraInfo",
+          "currentCostPerUnit",
+          "default"
+        ],
+        "title": "PricingUnitGet"
+      },
       "Profile": {
         "properties": {
           "first_name": {
@@ -4867,6 +6294,54 @@
         "title": "RunningState",
         "description": "State of execution of a project's computational workflow\n\nSEE StateType for task state"
       },
+      "ServicePricingPlanGet": {
+        "properties": {
+          "pricingPlanId": {
+            "type": "integer",
+            "exclusiveMinimum": true,
+            "title": "Pricingplanid",
+            "minimum": 0
+          },
+          "displayName": {
+            "type": "string",
+            "title": "Displayname"
+          },
+          "description": {
+            "type": "string",
+            "title": "Description"
+          },
+          "classification": {
+            "$ref": "#/components/schemas/PricingPlanClassification"
+          },
+          "createdAt": {
+            "type": "string",
+            "format": "date-time",
+            "title": "Createdat"
+          },
+          "pricingPlanKey": {
+            "type": "string",
+            "title": "Pricingplankey"
+          },
+          "pricingUnits": {
+            "items": {
+              "$ref": "#/components/schemas/PricingUnitGet"
+            },
+            "type": "array",
+            "title": "Pricingunits"
+          }
+        },
+        "type": "object",
+        "required": [
+          "pricingPlanId",
+          "displayName",
+          "description",
+          "classification",
+          "createdAt",
+          "pricingPlanKey",
+          "pricingUnits"
+        ],
+        "title": "ServicePricingPlanGet"
+      },
       "Solver": {
         "properties": {
           "id": {
@@ -5129,6 +6604,73 @@
           "type"
         ],
         "title": "ValidationError"
+      },
+      "WalletGetWithAvailableCredits": {
+        "properties": {
+          "walletId": {
+            "type": "integer",
+            "exclusiveMinimum": true,
+            "title": "Walletid",
+            "minimum": 0
+          },
+          "name": {
+            "type": "string",
+            "maxLength": 100,
+            "minLength": 1,
+            "title": "Name"
+          },
+          "description": {
+            "type": "string",
+            "title": "Description"
+          },
+          "owner": {
+            "type": "integer",
+            "exclusiveMinimum": true,
+            "title": "Owner",
+            "minimum": 0
+          },
+          "thumbnail": {
+            "type": "string",
+            "title": "Thumbnail"
+          },
+          "status": {
+            "$ref": "#/components/schemas/WalletStatus"
+          },
+          "created": {
+            "type": "string",
+            "format": "date-time",
+            "title": "Created"
+          },
+          "modified": {
+            "type": "string",
+            "format": "date-time",
+            "title": "Modified"
+          },
+          "availableCredits": {
+            "type": "number",
+            "title": "Availablecredits"
+          }
+        },
+        "type": "object",
+        "required": [
+          "walletId",
+          "name",
+          "owner",
+          "status",
+          "created",
+          "modified",
+          "availableCredits"
+        ],
+        "title": "WalletGetWithAvailableCredits"
+      },
+      "WalletStatus": {
+        "type": "string",
+        "enum": [
+          "ACTIVE",
+          "INACTIVE"
+        ],
+        "title": "WalletStatus",
+        "description": "An enumeration."
       }
     },
     "securitySchemes": {
diff --git a/services/api-server/requirements/_base.txt b/services/api-server/requirements/_base.txt
index a132d7fb2a64..76bdd46ba363 100644
--- a/services/api-server/requirements/_base.txt
+++ b/services/api-server/requirements/_base.txt
@@ -1,16 +1,56 @@
 aio-pika==9.4.1
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 aiocache==0.12.2
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
 aiodebug==2.3.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 aiodocker==0.21.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 aiofiles==23.2.1
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
+    #   -r requirements/_base.in
 aiohttp==3.9.3
-    # via aiodocker
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
+    #   aiodocker
 aiopg==1.4.0
+    # via
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
+    #   -r requirements/_base.in
 aiormq==6.8.0
     # via aio-pika
 aiosignal==1.3.1
     # via aiohttp
 alembic==1.13.1
+    # via
+    #   -r requirements/../../../packages/postgres-database/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in
 anyio==4.3.0
     # via
     #   fast-depends
@@ -19,12 +59,19 @@ anyio==4.3.0
     #   starlette
     #   watchfiles
 arrow==1.3.0
+    # via
+    #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+asgiref==3.8.1
+    # via opentelemetry-instrumentation-asgi
 async-timeout==4.0.3
     # via
-    #   aiohttp
     #   aiopg
     #   asyncpg
-    #   redis
 asyncpg==0.29.0
     # via sqlalchemy
 attrs==23.2.0
@@ -33,37 +80,102 @@ attrs==23.2.0
     #   jsonschema
 certifi==2024.2.2
     # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
     #   httpcore
     #   httpx
+    #   requests
 cffi==1.16.0
     # via cryptography
+charset-normalizer==3.3.2
+    # via requests
 click==8.1.7
     # via
     #   typer
     #   uvicorn
 cryptography==42.0.5
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/_base.in
+deprecated==1.2.14
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
 dnspython==2.6.1
     # via email-validator
 email-validator==2.1.1
     # via
     #   fastapi
     #   pydantic
-exceptiongroup==1.2.0
-    # via anyio
 fast-depends==2.4.2
     # via faststream
 fastapi==0.99.1
     # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
+    #   -r requirements/_base.in
     #   fastapi-pagination
     #   prometheus-fastapi-instrumentator
 fastapi-pagination==0.12.17
+    # via
+    #   -c requirements/./constraints.txt
+    #   -r requirements/_base.in
 faststream==0.5.10
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 frozenlist==1.4.1
     # via
     #   aiohttp
     #   aiosignal
+googleapis-common-protos==1.65.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
 greenlet==3.0.3
     # via sqlalchemy
+grpcio==1.66.0
+    # via opentelemetry-exporter-otlp-proto-grpc
 h11==0.14.0
     # via
     #   httpcore
@@ -73,20 +185,76 @@ httpcore==1.0.5
 httptools==0.6.1
     # via uvicorn
 httpx==0.27.0
-    # via fastapi
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
+    #   -r requirements/_base.in
+    #   fastapi
 idna==3.6
     # via
     #   anyio
     #   email-validator
     #   httpx
+    #   requests
     #   yarl
+importlib-metadata==8.0.0
+    # via opentelemetry-api
 itsdangerous==2.1.2
     # via fastapi
 jinja2==3.1.3
-    # via fastapi
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   fastapi
 jsonschema==3.2.0
+    # via
+    #   -c requirements/./constraints.txt
+    #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
 mako==1.3.2
-    # via alembic
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   alembic
 markdown-it-py==3.0.0
     # via rich
 markupsafe==2.1.5
@@ -99,16 +267,111 @@ multidict==6.0.5
     # via
     #   aiohttp
     #   yarl
+opentelemetry-api==1.26.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-instrumentation
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp==1.26.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+opentelemetry-exporter-otlp-proto-common==1.26.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-exporter-otlp-proto-http==1.26.0
+    # via opentelemetry-exporter-otlp
+opentelemetry-instrumentation==0.47b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+opentelemetry-instrumentation-asgi==0.47b0
+    # via opentelemetry-instrumentation-fastapi
+opentelemetry-instrumentation-fastapi==0.47b0
+    # via -r requirements/../../../packages/service-library/requirements/_fastapi.in
+opentelemetry-instrumentation-requests==0.47b0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+opentelemetry-proto==1.26.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.26.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.47b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
+    #   opentelemetry-sdk
+opentelemetry-util-http==0.47b0
+    # via
+    #   opentelemetry-instrumentation-asgi
+    #   opentelemetry-instrumentation-fastapi
+    #   opentelemetry-instrumentation-requests
 orjson==3.10.0
-    # via fastapi
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/_base.in
+    #   fastapi
 packaging==24.0
+    # via
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
+    #   -r requirements/_base.in
 pamqp==3.3.0
     # via aiormq
 parse==1.20.2
+    # via -r requirements/_base.in
 pint==0.23
+    # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in
 prometheus-client==0.20.0
-    # via prometheus-fastapi-instrumentator
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
+    #   prometheus-fastapi-instrumentator
 prometheus-fastapi-instrumentator==6.1.0
+    # via -r requirements/../../../packages/service-library/requirements/_fastapi.in
+protobuf==4.25.4
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
+psutil==6.0.0
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 psycopg2-binary==2.9.9
     # via
     #   aiopg
@@ -117,12 +380,44 @@ pycparser==2.22
     # via cffi
 pydantic==1.10.14
     # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/postgres-database/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
+    #   -r requirements/_base.in
     #   fast-depends
     #   fastapi
     #   fastapi-pagination
 pygments==2.17.2
     # via rich
 pyinstrument==4.6.2
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 pyrsistent==0.20.0
     # via jsonschema
 python-dateutil==2.9.0.post0
@@ -135,13 +430,60 @@ python-multipart==0.0.9
     # via fastapi
 pyyaml==6.0.1
     # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/_base.in
     #   fastapi
     #   uvicorn
 redis==5.0.4
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+repro-zipfile==0.3.1
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+requests==2.32.3
+    # via opentelemetry-exporter-otlp-proto-http
 rich==13.7.1
-    # via typer
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/_base.in
+    #   typer
 setuptools==69.2.0
-    # via jsonschema
+    # via
+    #   jsonschema
+    #   opentelemetry-instrumentation
 shellingham==1.5.4
     # via typer
 six==1.16.0
@@ -154,6 +496,22 @@ sniffio==1.3.1
     #   httpx
 sqlalchemy==1.4.52
     # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   -r requirements/../../../packages/postgres-database/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in
     #   aiopg
     #   alembic
 starlette==0.27.0
@@ -180,9 +538,22 @@ tenacity==8.5.0
     #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
     #   -r requirements/_base.in
 toolz==0.12.1
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
 tqdm==4.66.2
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/_base.in
 typer==0.12.3
-    # via faststream
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/_base.in
+    #   -r requirements/_base.in
+    #   faststream
 types-python-dateutil==2.9.0.20240316
     # via arrow
 typing-extensions==4.10.0
@@ -190,26 +561,67 @@ typing-extensions==4.10.0
     #   aiodebug
     #   aiodocker
     #   alembic
-    #   anyio
     #   fastapi
     #   fastapi-pagination
     #   faststream
+    #   opentelemetry-sdk
     #   pint
     #   pydantic
     #   typer
-    #   uvicorn
 ujson==5.9.0
-    # via fastapi
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   fastapi
+urllib3==2.2.2
+    # via
+    #   -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt
+    #   -c requirements/../../../requirements/constraints.txt
+    #   requests
 uvicorn==0.29.0
-    # via fastapi
+    # via
+    #   -r requirements/../../../packages/service-library/requirements/_fastapi.in
+    #   fastapi
 uvloop==0.19.0
     # via uvicorn
 watchfiles==0.21.0
     # via uvicorn
 websockets==12.0
     # via uvicorn
+wrapt==1.16.0
+    # via
+    #   deprecated
+    #   opentelemetry-instrumentation
 yarl==1.9.4
     # via
+    #   -r requirements/../../../packages/postgres-database/requirements/_base.in
+    #   -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in
     #   aio-pika
     #   aiohttp
     #   aiormq
+zipp==3.20.1
+    # via importlib-metadata
diff --git a/services/api-server/requirements/_test.txt b/services/api-server/requirements/_test.txt
index 8a1ddb9caa4e..f4201ab9d4d6 100644
--- a/services/api-server/requirements/_test.txt
+++ b/services/api-server/requirements/_test.txt
@@ -19,10 +19,6 @@ anyio==4.3.0
     #   httpx
 asgi-lifespan==2.1.0
     # via -r requirements/_test.in
-async-timeout==4.0.3
-    # via
-    #   -c requirements/_base.txt
-    #   aiohttp
 attrs==23.2.0
     # via
     #   -c requirements/_base.txt
@@ -37,19 +33,19 @@ aws-sam-translator==1.55.0
     #   cfn-lint
 aws-xray-sdk==2.14.0
     # via moto
-boto3==1.35.2
+boto3==1.35.25
     # via
     #   aws-sam-translator
     #   moto
-boto3-stubs==1.35.2
+boto3-stubs==1.35.25
     # via types-boto3
-botocore==1.35.2
+botocore==1.35.25
     # via
     #   aws-xray-sdk
     #   boto3
     #   moto
     #   s3transfer
-botocore-stubs==1.35.2
+botocore-stubs==1.35.25
     # via boto3-stubs
 certifi==2024.2.2
     # via
@@ -67,7 +63,9 @@ cfn-lint==0.72.0
     #   -c requirements/./constraints.txt
     #   moto
 charset-normalizer==3.3.2
-    # via requests
+    # via
+    #   -c requirements/_base.txt
+    #   requests
 click==8.1.7
     # via
     #   -c requirements/_base.txt
@@ -91,25 +89,20 @@ ecdsa==0.19.0
     #   moto
     #   python-jose
     #   sshpubkeys
-exceptiongroup==1.2.0
-    # via
-    #   -c requirements/_base.txt
-    #   anyio
-    #   pytest
-faker==27.0.0
+faker==29.0.0
     # via -r requirements/_test.in
 flask==2.1.3
     # via
     #   flask-cors
     #   moto
-flask-cors==4.0.1
+flask-cors==5.0.0
     # via moto
 frozenlist==1.4.1
     # via
     #   -c requirements/_base.txt
     #   aiohttp
     #   aiosignal
-graphql-core==3.2.3
+graphql-core==3.2.4
     # via moto
 greenlet==3.0.3
     # via
@@ -154,11 +147,11 @@ jmespath==1.0.1
     #   botocore
 jschema-to-python==1.2.3
     # via cfn-lint
-jsondiff==2.2.0
+jsondiff==2.2.1
     # via moto
 jsonpatch==1.33
     # via cfn-lint
-jsonpickle==3.2.2
+jsonpickle==3.3.0
     # via jschema-to-python
 jsonpointer==3.0.0
     # via jsonpatch
@@ -194,7 +187,7 @@ multidict==6.0.5
     #   -c requirements/_base.txt
     #   aiohttp
     #   yarl
-mypy==1.11.1
+mypy==1.11.2
     # via sqlalchemy
 mypy-extensions==1.0.0
     # via mypy
@@ -210,13 +203,13 @@ packaging==24.0
     # via
     #   -c requirements/_base.txt
     #   pytest
-pbr==6.0.0
+pbr==6.1.0
     # via
     #   jschema-to-python
     #   sarif-om
 pluggy==1.5.0
     # via pytest
-pyasn1==0.6.0
+pyasn1==0.6.1
     # via
     #   python-jose
     #   rsa
@@ -228,13 +221,13 @@ pyinstrument==4.6.2
     # via
     #   -c requirements/_base.txt
     #   -r requirements/_test.in
-pyparsing==3.1.2
+pyparsing==3.1.4
     # via moto
 pyrsistent==0.20.0
     # via
     #   -c requirements/_base.txt
     #   jsonschema
-pytest==8.3.2
+pytest==8.3.3
     # via
     #   -r requirements/_test.in
     #   pytest-asyncio
@@ -261,7 +254,7 @@ python-dateutil==2.9.0.post0
     #   moto
 python-jose==3.3.0
     # via moto
-pytz==2024.1
+pytz==2024.2
     # via moto
 pyyaml==6.0.1
     # via
@@ -274,6 +267,7 @@ pyyaml==6.0.1
     #   responses
 requests==2.32.3
     # via
+    #   -c requirements/_base.txt
     #   docker
     #   moto
     #   responses
@@ -318,30 +312,25 @@ sqlalchemy2-stubs==0.0.2a38
     # via sqlalchemy
 sshpubkeys==3.3.1
     # via moto
-tomli==2.0.1
-    # via
-    #   coverage
-    #   mypy
-    #   pytest
 types-aiofiles==24.1.0.20240626
     # via -r requirements/_test.in
-types-awscrt==0.21.2
+types-awscrt==0.21.5
     # via botocore-stubs
 types-boto3==1.0.2
     # via -r requirements/_test.in
-types-s3transfer==0.10.1
+types-s3transfer==0.10.2
     # via boto3-stubs
 typing-extensions==4.10.0
     # via
     #   -c requirements/_base.txt
     #   alembic
-    #   anyio
     #   boto3-stubs
     #   mypy
     #   sqlalchemy2-stubs
 urllib3==2.2.2
     # via
     #   -c requirements/../../../requirements/constraints.txt
+    #   -c requirements/_base.txt
     #   botocore
     #   docker
     #   requests
@@ -351,7 +340,9 @@ werkzeug==2.1.2
     #   flask
     #   moto
 wrapt==1.16.0
-    # via aws-xray-sdk
+    # via
+    #   -c requirements/_base.txt
+    #   aws-xray-sdk
 xmltodict==0.13.0
     # via moto
 yarl==1.9.4
diff --git a/services/api-server/requirements/_tools.txt b/services/api-server/requirements/_tools.txt
index ee67b7d505f4..f7033c1523f0 100644
--- a/services/api-server/requirements/_tools.txt
+++ b/services/api-server/requirements/_tools.txt
@@ -1,8 +1,8 @@
-astroid==3.2.4
+astroid==3.3.4
     # via pylint
 black==24.8.0
     # via -r requirements/../../../requirements/devenv.txt
-build==1.2.1
+build==1.2.2
     # via pip-tools
 bump2version==1.0.1
     # via -r requirements/../../../requirements/devenv.txt
@@ -20,9 +20,9 @@ dill==0.3.8
     # via pylint
 distlib==0.3.8
     # via virtualenv
-filelock==3.15.4
+filelock==3.16.1
     # via virtualenv
-identify==2.6.0
+identify==2.6.1
     # via pre-commit
 isort==5.13.2
     # via
@@ -41,7 +41,7 @@ markupsafe==2.1.5
     #   jinja2
 mccabe==0.7.0
     # via pylint
-mypy==1.11.1
+mypy==1.11.2
     # via
     #   -c requirements/_test.txt
     #   -r requirements/../../../requirements/devenv.txt
@@ -64,14 +64,14 @@ pip==24.2
     # via pip-tools
 pip-tools==7.4.1
     # via -r requirements/../../../requirements/devenv.txt
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   black
     #   pylint
     #   virtualenv
 pre-commit==3.8.0
     # via -r requirements/../../../requirements/devenv.txt
-pylint==3.2.6
+pylint==3.3.0
     # via -r requirements/../../../requirements/devenv.txt
 pyproject-hooks==1.1.0
     # via
@@ -84,33 +84,23 @@ pyyaml==6.0.1
     #   -c requirements/_test.txt
     #   pre-commit
     #   watchdog
-ruff==0.6.1
+ruff==0.6.7
     # via -r requirements/../../../requirements/devenv.txt
 setuptools==69.2.0
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
     #   pip-tools
-tomli==2.0.1
-    # via
-    #   -c requirements/_test.txt
-    #   black
-    #   build
-    #   mypy
-    #   pip-tools
-    #   pylint
 tomlkit==0.13.2
     # via pylint
 typing-extensions==4.10.0
     # via
     #   -c requirements/_base.txt
     #   -c requirements/_test.txt
-    #   astroid
-    #   black
     #   mypy
-virtualenv==20.26.3
+virtualenv==20.26.5
     # via pre-commit
-watchdog==4.0.2
+watchdog==5.0.2
     # via -r requirements/_tools.in
 wheel==0.44.0
     # via pip-tools
diff --git a/services/api-server/setup.cfg b/services/api-server/setup.cfg
index 5cef2727de76..da01c1bbd3ea 100644
--- a/services/api-server/setup.cfg
+++ b/services/api-server/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.6.0
+current_version = 0.7.0
 commit = True
 message = services/api-server version: {current_version} → {new_version}
 tag = False
@@ -10,12 +10,12 @@ commit_args = --no-verify
 [tool:pytest]
 asyncio_mode = auto
 addopts = --strict-markers
-markers =
+markers = 
 	slow: marks tests as slow (deselect with '-m "not slow"')
 	acceptance_test: "marks tests as 'acceptance tests' i.e. does the system do what the user expects? Typically those are workflows."
 	testit: "marks test to run during development"
 
 [mypy]
-plugins =
+plugins = 
 	pydantic.mypy
 	sqlalchemy.ext.mypy.plugin
diff --git a/services/api-server/src/simcore_service_api_server/_meta.py b/services/api-server/src/simcore_service_api_server/_meta.py
index 6d44a73765df..33a0480a25ce 100644
--- a/services/api-server/src/simcore_service_api_server/_meta.py
+++ b/services/api-server/src/simcore_service_api_server/_meta.py
@@ -17,6 +17,7 @@
 VERSION: Final[Version] = info.version
 API_VERSION: Final[VersionStr] = info.__version__
 API_VTAG: Final[str] = info.api_prefix_path_tag
+APP_NAME: Final[str] = PROJECT_NAME
 SUMMARY: Final[str] = info.get_summary()
 
 
diff --git a/services/api-server/src/simcore_service_api_server/api/routes/files.py b/services/api-server/src/simcore_service_api_server/api/routes/files.py
index a89771dc490c..76110352782d 100644
--- a/services/api-server/src/simcore_service_api_server/api/routes/files.py
+++ b/services/api-server/src/simcore_service_api_server/api/routes/files.py
@@ -2,7 +2,6 @@
 import datetime
 import io
 import logging
-from textwrap import dedent
 from typing import IO, Annotated, Any
 from uuid import UUID
 
@@ -10,7 +9,6 @@
 from fastapi import File as FileParam
 from fastapi import Header, Request, UploadFile, status
 from fastapi.exceptions import HTTPException
-from fastapi.responses import HTMLResponse
 from fastapi_pagination.api import create_page
 from models_library.api_schemas_storage import ETag, FileUploadCompletionBody, LinkType
 from models_library.basic_types import SHA256Str
@@ -29,7 +27,6 @@
 from starlette.datastructures import URL
 from starlette.responses import RedirectResponse
 
-from ..._meta import API_VTAG
 from ...exceptions.service_errors_utils import DEFAULT_BACKEND_SERVICE_STATUS_CODES
 from ...models.pagination import Page, PaginationParams
 from ...models.schemas.errors import ErrorGet
@@ -158,7 +155,11 @@ def _get_spooled_file_size(file_io: IO) -> int:
     return file_size
 
 
-@router.put("/content", response_model=File, responses=_FILE_STATUS_CODES)
+@router.put(
+    "/content",
+    response_model=File,
+    responses=_FILE_STATUS_CODES,
+)
 @cancel_on_disconnect
 async def upload_file(
     request: Request,
@@ -433,24 +434,3 @@ async def download_file(
 
     _logger.info("Downloading %s to %s ...", file_meta, presigned_download_link)
     return RedirectResponse(presigned_download_link)
-
-
-async def files_upload_multiple_view():
-    """Extra **Web form** to upload multiple files at http://localhost:8000/v0/files/upload-form-view
-        and overcomes the limitations of Swagger-UI view
-
-    NOTE: Only enabled if DEBUG=1
-    NOTE: As of 2020-10-07, Swagger UI doesn't support multiple file uploads in the same form field
-    """
-    return HTMLResponse(
-        content=dedent(
-            f"""
-        
-        
- - -
- - """ - ) - ) diff --git a/services/api-server/src/simcore_service_api_server/api/routes/solvers.py b/services/api-server/src/simcore_service_api_server/api/routes/solvers.py index 18e238208266..01e58dc2653d 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/solvers.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/solvers.py @@ -265,7 +265,8 @@ async def list_solver_ports( @router.get( "/{solver_key:path}/releases/{version}/pricing_plan", response_model=ServicePricingPlanGet, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, + description="Gets solver pricing plan\n\n" + + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7"), responses=_SOLVER_STATUS_CODES, ) async def get_solver_pricing_plan( diff --git a/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs.py b/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs.py index 560d656d5d4e..0cdbfdf7e0a3 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs.py @@ -40,7 +40,6 @@ from ..dependencies.authentication import get_current_user_id, get_product_name from ..dependencies.services import get_api_client from ..dependencies.webserver import AuthSession, get_webserver_session -from ._common import API_SERVER_DEV_FEATURES_ENABLED from ._constants import FMSG_CHANGELOG_ADDED_IN_VERSION, FMSG_CHANGELOG_NEW_IN_VERSION _logger = logging.getLogger(__name__) @@ -145,9 +144,8 @@ async def create_job( "/{solver_key:path}/releases/{version}/jobs/{job_id:uuid}", status_code=status.HTTP_204_NO_CONTENT, responses=JOBS_STATUS_CODES, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, description="Deletes an existing solver job\n\n" - + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.5"), + + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7"), ) async def delete_job( solver_key: SolverKeyId, @@ -271,9 +269,8 @@ async def inspect_job( "/{solver_key:path}/releases/{version}/jobs/{job_id:uuid}/metadata", response_model=JobMetadata, responses=METADATA_STATUS_CODES, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, description="Updates custom metadata from a job\n\n" - + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.5"), + + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7"), ) async def replace_job_custom_metadata( solver_key: SolverKeyId, diff --git a/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py b/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py index 2be5ac934d11..fb98a858e475 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py @@ -52,7 +52,6 @@ from ..dependencies.rabbitmq import get_log_check_timeout, get_log_distributor from ..dependencies.services import get_api_client from ..dependencies.webserver import AuthSession, get_webserver_session -from ._common import API_SERVER_DEV_FEATURES_ENABLED from ._constants import FMSG_CHANGELOG_NEW_IN_VERSION from .solvers_jobs import ( JOBS_STATUS_CODES, @@ -128,7 +127,8 @@ async def list_jobs( ): """List of jobs in a specific released solver (limited to 20 jobs) - SEE `get_jobs_page` for paginated version of this function + - DEPRECATION: This implementation and returned values are deprecated and the will be replaced by that of get_jobs_page + - SEE `get_jobs_page` for paginated version of this function """ solver = await catalog_client.get_service( @@ -158,7 +158,6 @@ async def list_jobs( "/{solver_key:path}/releases/{version}/jobs/page", response_model=Page[Job], responses=JOBS_STATUS_CODES, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, description=( "List of jobs on a specific released solver (includes pagination)\n\n" + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7") @@ -349,9 +348,8 @@ async def get_job_output_logfile( "/{solver_key:path}/releases/{version}/jobs/{job_id:uuid}/metadata", response_model=JobMetadata, responses=METADATA_STATUS_CODES, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, description="Gets custom metadata from a job\n\n" - + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.5"), + + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7"), ) async def get_job_custom_metadata( solver_key: SolverKeyId, @@ -380,7 +378,7 @@ async def get_job_custom_metadata( "/{solver_key:path}/releases/{version}/jobs/{job_id:uuid}/wallet", response_model=WalletGetWithAvailableCredits | None, responses=WALLET_STATUS_CODES, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, + description=("Get job wallet\n\n" + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7")), ) async def get_job_wallet( solver_key: SolverKeyId, @@ -400,7 +398,9 @@ async def get_job_wallet( "/{solver_key:path}/releases/{version}/jobs/{job_id:uuid}/pricing_unit", response_model=PricingUnitGet | None, responses=_PRICING_UNITS_STATUS_CODES, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, + description=( + "Get job pricing unit\n\n" + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7") + ), ) async def get_job_pricing_unit( solver_key: SolverKeyId, diff --git a/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py b/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py index 60916446809b..177b50d1e6cc 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/studies_jobs.py @@ -52,7 +52,7 @@ from ...services.webserver import AuthSession from ..dependencies.application import get_reverse_url_mapper from ._common import API_SERVER_DEV_FEATURES_ENABLED -from ._constants import FMSG_CHANGELOG_CHANGED_IN_VERSION +from ._constants import FMSG_CHANGELOG_CHANGED_IN_VERSION, FMSG_CHANGELOG_NEW_IN_VERSION _logger = logging.getLogger(__name__) router = APIRouter() @@ -333,7 +333,10 @@ async def get_study_job_output_logfile( @router.get( "/{study_id}/jobs/{job_id}/metadata", response_model=JobMetadata, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, + description=( + "Get custom metadata from a study's job\n\n" + + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7") + ), ) async def get_study_job_custom_metadata( study_id: StudyID, @@ -341,7 +344,6 @@ async def get_study_job_custom_metadata( webserver_api: Annotated[AuthSession, Depends(get_webserver_session)], url_for: Annotated[Callable, Depends(get_reverse_url_mapper)], ): - """Gets custom metadata from a job""" job_name = _compose_job_resource_name(study_id, job_id) msg = f"Gets metadata attached to study_id={study_id!r} job_id={job_id!r}.\njob_name={job_name!r}.\nSEE https://github.com/ITISFoundation/osparc-simcore/issues/4313" _logger.debug(msg) @@ -361,7 +363,10 @@ async def get_study_job_custom_metadata( @router.put( "/{study_id}/jobs/{job_id}/metadata", response_model=JobMetadata, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, + description=( + "Changes custom metadata of a study's job\n\n" + + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7") + ), ) async def replace_study_job_custom_metadata( study_id: StudyID, @@ -370,7 +375,6 @@ async def replace_study_job_custom_metadata( webserver_api: Annotated[AuthSession, Depends(get_webserver_session)], url_for: Annotated[Callable, Depends(get_reverse_url_mapper)], ): - """Changes job's custom metadata""" job_name = _compose_job_resource_name(study_id, job_id) msg = f"Attaches metadata={replace.metadata!r} to study_id={study_id!r} job_id={job_id!r}.\njob_name={job_name!r}.\nSEE https://github.com/ITISFoundation/osparc-simcore/issues/4313" diff --git a/services/api-server/src/simcore_service_api_server/api/routes/wallets.py b/services/api-server/src/simcore_service_api_server/api/routes/wallets.py index 0043dce61575..0b3df66b1d54 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/wallets.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/wallets.py @@ -7,7 +7,7 @@ from ...exceptions.service_errors_utils import DEFAULT_BACKEND_SERVICE_STATUS_CODES from ...models.schemas.errors import ErrorGet from ..dependencies.webserver import AuthSession, get_webserver_session -from ._common import API_SERVER_DEV_FEATURES_ENABLED +from ._constants import FMSG_CHANGELOG_NEW_IN_VERSION _logger = logging.getLogger(__name__) @@ -28,8 +28,8 @@ @router.get( "/default", + description="Get default wallet\n\n" + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7"), response_model=WalletGetWithAvailableCredits, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, responses=WALLET_STATUS_CODES, ) async def get_default_wallet( @@ -41,8 +41,8 @@ async def get_default_wallet( @router.get( "/{wallet_id}", response_model=WalletGetWithAvailableCredits, - include_in_schema=API_SERVER_DEV_FEATURES_ENABLED, responses=WALLET_STATUS_CODES, + description="Get wallet\n\n" + FMSG_CHANGELOG_NEW_IN_VERSION.format("0.7"), ) async def get_wallet( wallet_id: int, diff --git a/services/api-server/src/simcore_service_api_server/core/application.py b/services/api-server/src/simcore_service_api_server/core/application.py index 82612bfe86cb..39b05720a6d2 100644 --- a/services/api-server/src/simcore_service_api_server/core/application.py +++ b/services/api-server/src/simcore_service_api_server/core/application.py @@ -5,10 +5,11 @@ from models_library.basic_types import BootModeEnum from packaging.version import Version from servicelib.fastapi.profiler_middleware import ProfilerMiddleware +from servicelib.fastapi.tracing import setup_tracing from servicelib.logging_utils import config_all_loggers from .. import exceptions -from .._meta import API_VERSION, API_VTAG +from .._meta import API_VERSION, API_VTAG, APP_NAME from ..api.root import create_router from ..api.routes.health import router as health_router from ..services import catalog, director_v2, storage, webserver @@ -82,6 +83,8 @@ def init_app(settings: ApplicationSettings | None = None) -> FastAPI: if settings.API_SERVER_WEBSERVER: webserver.setup(app, settings.API_SERVER_WEBSERVER) + if app.state.settings.API_SERVER_TRACING: + setup_tracing(app, app.state.settings.API_SERVER_TRACING, APP_NAME) if settings.API_SERVER_CATALOG: catalog.setup(app, settings.API_SERVER_CATALOG) diff --git a/services/api-server/src/simcore_service_api_server/core/settings.py b/services/api-server/src/simcore_service_api_server/core/settings.py index 9cc61e1c11ad..3c00b3489b9f 100644 --- a/services/api-server/src/simcore_service_api_server/core/settings.py +++ b/services/api-server/src/simcore_service_api_server/core/settings.py @@ -9,6 +9,7 @@ from settings_library.postgres import PostgresSettings from settings_library.rabbit import RabbitSettings from settings_library.storage import StorageSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from settings_library.utils_session import ( DEFAULT_SESSION_COOKIE_NAME, @@ -86,6 +87,9 @@ class ApplicationSettings(BasicSettings): API_SERVER_ALLOWED_HEALTH_CHECK_FAILURES: PositiveInt = 5 API_SERVER_PROMETHEUS_INSTRUMENTATION_COLLECT_SECONDS: PositiveInt = 5 API_SERVER_PROFILING: bool = False + API_SERVER_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) @cached_property def debug(self) -> bool: diff --git a/services/api-server/src/simcore_service_api_server/exceptions/backend_errors.py b/services/api-server/src/simcore_service_api_server/exceptions/backend_errors.py index ffaa1fd5618f..0a23d0400f75 100644 --- a/services/api-server/src/simcore_service_api_server/exceptions/backend_errors.py +++ b/services/api-server/src/simcore_service_api_server/exceptions/backend_errors.py @@ -12,7 +12,9 @@ class BaseBackEndError(ApiServerBaseError): @classmethod def named_fields(cls) -> set[str]: - return set(parse.compile(cls.msg_template).named_fields) + return set( + parse.compile(cls.msg_template).named_fields # pylint: disable=no-member + ) class ListSolversOrStudiesError(BaseBackEndError): diff --git a/services/api-server/src/simcore_service_api_server/models/api_resources.py b/services/api-server/src/simcore_service_api_server/models/api_resources.py index 3f64fd323c03..82a62aa068cd 100644 --- a/services/api-server/src/simcore_service_api_server/models/api_resources.py +++ b/services/api-server/src/simcore_service_api_server/models/api_resources.py @@ -2,8 +2,8 @@ import urllib.parse from typing import Any +from common_library.pydantic_basic_types import ConstrainedStr from pydantic import BaseModel, Field -from pydantic.types import ConstrainedStr # RESOURCE NAMES https://cloud.google.com/apis/design/resource_names # @@ -32,6 +32,7 @@ class RelativeResourceName(ConstrainedStr): regex = re.compile(_RELATIVE_RESOURCE_NAME_RE) + # TODO: no frozen here with the new type, WHAT TO GCR? class Config: frozen = True diff --git a/services/api-server/src/simcore_service_api_server/models/basic_types.py b/services/api-server/src/simcore_service_api_server/models/basic_types.py index 53ea6fe31ce7..d209897d67c5 100644 --- a/services/api-server/src/simcore_service_api_server/models/basic_types.py +++ b/services/api-server/src/simcore_service_api_server/models/basic_types.py @@ -1,8 +1,8 @@ import re +from common_library.pydantic_basic_types import ConstrainedStr from fastapi.responses import StreamingResponse from models_library.basic_regex import SIMPLE_VERSION_RE -from pydantic import ConstrainedStr class VersionStr(ConstrainedStr): diff --git a/services/api-server/src/simcore_service_api_server/models/schemas/files.py b/services/api-server/src/simcore_service_api_server/models/schemas/files.py index eece67dfa594..d62128f1bd4b 100644 --- a/services/api-server/src/simcore_service_api_server/models/schemas/files.py +++ b/services/api-server/src/simcore_service_api_server/models/schemas/files.py @@ -6,19 +6,12 @@ from uuid import UUID, uuid3 import aiofiles +from common_library.pydantic_basic_types import ConstrainedStr from fastapi import UploadFile from models_library.api_schemas_storage import ETag from models_library.basic_types import SHA256Str from models_library.projects_nodes_io import StorageFileID -from pydantic import ( - AnyUrl, - BaseModel, - ByteSize, - ConstrainedStr, - Field, - parse_obj_as, - validator, -) +from pydantic import AnyUrl, BaseModel, ByteSize, Field, parse_obj_as, validator from servicelib.file_utils import create_sha256_checksum _NAMESPACE_FILEID_KEY = UUID("aa154444-d22d-4290-bb15-df37dba87865") diff --git a/services/autoscaling/requirements/_base.txt b/services/autoscaling/requirements/_base.txt index 0716ffccf418..ee6878dd43b0 100644 --- a/services/autoscaling/requirements/_base.txt +++ b/services/autoscaling/requirements/_base.txt @@ -8,7 +8,9 @@ aiobotocore==2.13.1 # via aioboto3 aiocache==0.12.2 # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in aiodebug==2.3.0 # via @@ -57,13 +59,12 @@ arrow==1.3.0 # -r requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/_base.in # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in -async-timeout==4.0.3 - # via - # aiohttp - # redis +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi attrs==23.2.0 # via # aiohttp @@ -94,6 +95,9 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -111,6 +115,12 @@ dask==2024.5.1 # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # -r requirements/_base.in # distributed +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distributed==2024.5.1 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -119,8 +129,6 @@ dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via pydantic -exceptiongroup==1.2.1 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -152,6 +160,12 @@ fsspec==2024.5.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # dask +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -178,11 +192,13 @@ idna==3.7 # anyio # email-validator # httpx + # requests # yarl importlib-metadata==7.1.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # dask + # opentelemetry-api jinja2==3.1.4 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -232,6 +248,65 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.3 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -268,9 +343,15 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in -psutil==5.9.8 +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in # distributed pydantic==1.10.15 # via @@ -351,6 +432,12 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -364,6 +451,8 @@ rpds-py==0.18.1 # referencing s3transfer==0.10.1 # via boto3 +setuptools==74.0.0 + # via opentelemetry-instrumentation sh==2.0.6 # via -r requirements/../../../packages/aws-library/requirements/_base.in shellingham==1.5.4 @@ -440,16 +529,15 @@ typing-extensions==4.11.0 # via # aiodebug # aiodocker - # anyio # fastapi # faststream + # opentelemetry-sdk # pydantic # typer # types-aiobotocore # types-aiobotocore-ec2 # types-aiobotocore-s3 # types-aiobotocore-ssm - # uvicorn urllib3==2.2.1 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -467,10 +555,14 @@ urllib3==2.2.1 # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # botocore # distributed + # requests uvicorn==0.29.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in wrapt==1.16.0 - # via aiobotocore + # via + # aiobotocore + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # aio-pika diff --git a/services/autoscaling/requirements/_test.txt b/services/autoscaling/requirements/_test.txt index dca31f29f759..47379c4d69fa 100644 --- a/services/autoscaling/requirements/_test.txt +++ b/services/autoscaling/requirements/_test.txt @@ -6,10 +6,6 @@ anyio==4.3.0 # httpx asgi-lifespan==2.1.0 # via -r requirements/_test.in -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # redis attrs==23.2.0 # via # -c requirements/_base.txt @@ -44,12 +40,14 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography cfn-lint==1.10.3 # via moto charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -58,33 +56,28 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # joserfc # moto -deepdiff==7.0.1 +deepdiff==8.0.1 # via -r requirements/_test.in docker==7.1.0 # via # -r requirements/_test.in # moto -exceptiongroup==1.2.1 - # via - # -c requirements/_base.txt - # anyio - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in -fakeredis==2.23.5 +fakeredis==2.24.1 # via -r requirements/_test.in flask==3.0.3 # via # flask-cors # moto -flask-cors==4.0.1 +flask-cors==5.0.0 # via moto -graphql-core==3.2.3 +graphql-core==3.2.4 # via moto h11==0.14.0 # via @@ -125,7 +118,7 @@ jmespath==1.0.1 # botocore joserfc==1.0.0 # via moto -jsondiff==2.2.0 +jsondiff==2.2.1 # via moto jsonpatch==1.33 # via cfn-lint @@ -155,7 +148,7 @@ markupsafe==2.1.5 # -c requirements/_base.txt # jinja2 # werkzeug -moto==5.0.13 +moto==5.0.15 # via -r requirements/_test.in mpmath==1.3.0 # via sympy @@ -165,7 +158,7 @@ openapi-schema-validator==0.6.2 # via openapi-spec-validator openapi-spec-validator==0.7.1 # via moto -ordered-set==4.1.0 +orderly-set==5.2.2 # via deepdiff packaging==24.0 # via @@ -180,11 +173,11 @@ ply==3.11 # via jsonpath-ng pprintpp==0.4.0 # via pytest-icdiff -psutil==5.9.8 +psutil==6.0.0 # via # -c requirements/_base.txt # -r requirements/_test.in -py-partiql-parser==0.5.5 +py-partiql-parser==0.5.6 # via moto pycparser==2.22 # via cffi @@ -193,9 +186,9 @@ pydantic==1.10.15 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # aws-sam-translator -pyparsing==3.1.2 +pyparsing==3.1.4 # via moto -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -245,10 +238,11 @@ referencing==0.29.3 # jsonschema # jsonschema-path # jsonschema-specifications -regex==2024.7.24 +regex==2024.9.11 # via cfn-lint requests==2.32.3 # via + # -c requirements/_base.txt # docker # jsonschema-path # moto @@ -268,8 +262,10 @@ s3transfer==0.10.1 # via # -c requirements/_base.txt # boto3 -setuptools==73.0.1 - # via moto +setuptools==74.0.0 + # via + # -c requirements/_base.txt + # moto six==1.16.0 # via # -c requirements/_base.txt @@ -285,14 +281,10 @@ sortedcontainers==2.4.0 # via # -c requirements/_base.txt # fakeredis -sympy==1.13.2 +sympy==1.13.3 # via cfn-lint termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # pytest types-aiobotocore==2.13.0 # via # -c requirements/_base.txt @@ -301,7 +293,7 @@ types-aiobotocore-ec2==2.13.0 # via # -c requirements/_base.txt # types-aiobotocore -types-aiobotocore-iam==2.13.2 +types-aiobotocore-iam==2.13.3 # via types-aiobotocore types-aiobotocore-s3==2.13.0 # via @@ -315,15 +307,13 @@ types-awscrt==0.20.9 # via # -c requirements/_base.txt # botocore-stubs -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.11.0 # via # -c requirements/_base.txt - # anyio # aws-sam-translator # cfn-lint - # fakeredis # pydantic # types-aiobotocore # types-aiobotocore-ec2 @@ -338,7 +328,7 @@ urllib3==2.2.1 # docker # requests # responses -werkzeug==3.0.3 +werkzeug==3.0.4 # via # flask # moto diff --git a/services/autoscaling/requirements/_tools.txt b/services/autoscaling/requirements/_tools.txt index db86636a373b..97a49efc2ebd 100644 --- a/services/autoscaling/requirements/_tools.txt +++ b/services/autoscaling/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -48,14 +48,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,32 +68,23 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==74.0.0 # via + # -c requirements/_base.txt # -c requirements/_test.txt # pip-tools -tomli==2.0.1 - # via - # -c requirements/_test.txt - # black - # build - # mypy - # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.11.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/autoscaling/src/simcore_service_autoscaling/constants.py b/services/autoscaling/src/simcore_service_autoscaling/constants.py index 4415d3eca2ca..086c47b906fb 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/constants.py +++ b/services/autoscaling/src/simcore_service_autoscaling/constants.py @@ -12,9 +12,15 @@ ) PREPULL_COMMAND_NAME: Final[str] = "docker images pulling" -DOCKER_PULL_COMMAND: Final[ - str -] = "docker compose -f /docker-pull.compose.yml -p buffering pull" +DOCKER_JOIN_COMMAND_NAME: Final[str] = "docker swarm join" +DOCKER_JOIN_COMMAND_EC2_TAG_KEY: Final[AWSTagKey] = parse_obj_as( + AWSTagKey, "io.simcore.autoscaling.joined_command_sent" +) + + +DOCKER_PULL_COMMAND: Final[str] = ( + "docker compose -f /docker-pull.compose.yml -p buffering pull" +) PRE_PULLED_IMAGES_EC2_TAG_KEY: Final[AWSTagKey] = parse_obj_as( AWSTagKey, "io.simcore.autoscaling.pre_pulled_images" diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/application.py b/services/autoscaling/src/simcore_service_autoscaling/core/application.py index 403d72e854b3..ce10d22f7825 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/core/application.py +++ b/services/autoscaling/src/simcore_service_autoscaling/core/application.py @@ -1,6 +1,7 @@ import logging from fastapi import FastAPI +from servicelib.fastapi.tracing import setup_tracing from .._meta import ( API_VERSION, @@ -69,6 +70,8 @@ def create_app(settings: ApplicationSettings) -> FastAPI: setup_auto_scaler_background_task(app) setup_buffer_machines_pool_task(app) + if app.state.settings.AUTOSCALING_TRACING: + setup_tracing(app, app.state.settings.AUTOSCALING_TRACING, APP_NAME) # ERROR HANDLERS diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/settings.py b/services/autoscaling/src/simcore_service_autoscaling/core/settings.py index 299293056d80..23af4b958bf6 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/core/settings.py +++ b/services/autoscaling/src/simcore_service_autoscaling/core/settings.py @@ -28,6 +28,7 @@ from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings from settings_library.ssm import SSMSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from types_aiobotocore_ec2.literals import InstanceTypeType @@ -112,7 +113,7 @@ class EC2InstancesSettings(BaseCustomSettings): ) EC2_INSTANCES_TIME_BEFORE_TERMINATION: datetime.timedelta = Field( default=datetime.timedelta(minutes=1), - description="Time after which an EC2 instance may being the termination process (0<=T<=59 minutes, is automatically capped)" + description="Time after which an EC2 instance may begin the termination process (0<=T<=59 minutes, is automatically capped)" "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", ) EC2_INSTANCES_TIME_BEFORE_FINAL_TERMINATION: datetime.timedelta = Field( @@ -271,6 +272,20 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): " are maintained as active (in the docker terminology) " "but a docker node label named osparc-services-ready is attached", ) + AUTOSCALING_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) + + AUTOSCALING_DOCKER_JOIN_DRAINED: bool = Field( + default=True, + description="If true, new nodes join the swarm as drained. If false as active.", + ) + + AUTOSCALING_WAIT_FOR_CLOUD_INIT_BEFORE_WARM_BUFFER_ACTIVATION: bool = Field( + default=False, + description="If True, then explicitely wait for cloud-init process to be completed before issuing commands. " + "TIP: might be useful when cheap machines are used", + ) @cached_property def LOG_LEVEL(self): # noqa: N802 diff --git a/services/autoscaling/src/simcore_service_autoscaling/models.py b/services/autoscaling/src/simcore_service_autoscaling/models.py index c51c05680ec2..c77f9fe349cc 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/models.py +++ b/services/autoscaling/src/simcore_service_autoscaling/models.py @@ -98,6 +98,11 @@ class Cluster: # pylint: disable=too-many-instance-attributes "description": "This is a EC2-backed docker node which is docker drained and waiting for termination" } ) + retired_nodes: list[AssociatedInstance] = field( + metadata={ + "description": "This is a EC2-backed docker node which was retired and waiting to be drained and eventually terminated or re-used" + } + ) terminated_instances: list[NonAssociatedInstance] def can_scale_down(self) -> bool: @@ -107,6 +112,7 @@ def can_scale_down(self) -> bool: or self.drained_nodes or self.pending_ec2s or self.terminating_nodes + or self.retired_nodes ) def total_number_of_machines(self) -> int: @@ -119,6 +125,7 @@ def total_number_of_machines(self) -> int: + len(self.pending_ec2s) + len(self.broken_ec2s) + len(self.terminating_nodes) + + len(self.retired_nodes) ) def __repr__(self) -> str: @@ -137,6 +144,7 @@ def _get_instance_ids( f"buffer-ec2s: count={len(self.buffer_ec2s)} {_get_instance_ids(self.buffer_ec2s)}, " f"disconnected-nodes: count={len(self.disconnected_nodes)}, " f"terminating-nodes: count={len(self.terminating_nodes)} {_get_instance_ids(self.terminating_nodes)}, " + f"retired-nodes: count={len(self.retired_nodes)} {_get_instance_ids(self.retired_nodes)}, " f"terminated-ec2s: count={len(self.terminated_instances)} {_get_instance_ids(self.terminated_instances)})" ) diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py index 571c4bd24ba0..b1e629f4e7af 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py @@ -15,6 +15,7 @@ Resources, ) from aws_library.ec2._errors import EC2TooManyInstancesError +from aws_library.ec2._models import AWSTagValue from fastapi import FastAPI from models_library.generated_models.docker_rest_api import Node, NodeState from servicelib.logging_utils import log_catch, log_context @@ -22,6 +23,7 @@ from servicelib.utils_formatting import timedelta_as_minute_second from types_aiobotocore_ec2.literals import InstanceTypeType +from ..constants import DOCKER_JOIN_COMMAND_EC2_TAG_KEY, DOCKER_JOIN_COMMAND_NAME from ..core.errors import ( Ec2InvalidDnsNameError, TaskBestFittingInstanceNotFoundError, @@ -121,7 +123,7 @@ async def _analyze_current_cluster( ] # analyse attached ec2s - active_nodes, pending_nodes, all_drained_nodes = [], [], [] + active_nodes, pending_nodes, all_drained_nodes, retired_nodes = [], [], [], [] for instance in attached_ec2s: if await auto_scaling_mode.is_instance_active(app, instance): node_used_resources = await auto_scaling_mode.compute_node_used_resources( @@ -136,6 +138,9 @@ async def _analyze_current_cluster( ) elif auto_scaling_mode.is_instance_drained(instance): all_drained_nodes.append(instance) + elif await auto_scaling_mode.is_instance_retired(app, instance): + # it should be drained, but it is not, so we force it to be drained such that it might be re-used if needed + retired_nodes.append(instance) else: pending_nodes.append(instance) @@ -157,6 +162,7 @@ async def _analyze_current_cluster( NonAssociatedInstance(ec2_instance=i) for i in terminated_ec2_instances ], disconnected_nodes=[n for n in docker_nodes if _node_not_ready(n)], + retired_nodes=retired_nodes, ) _logger.info("current state: %s", f"{cluster!r}") return cluster @@ -200,13 +206,17 @@ async def _make_pending_buffer_ec2s_join_cluster( app: FastAPI, cluster: Cluster, ) -> Cluster: + ec2_client = get_ec2_client(app) if buffer_ec2s_pending := [ i.ec2_instance for i in cluster.pending_ec2s if is_buffer_machine(i.ec2_instance.tags) + and (DOCKER_JOIN_COMMAND_EC2_TAG_KEY not in i.ec2_instance.tags) ]: # started buffer instance shall be asked to join the cluster once they are running + app_settings = get_application_settings(app) ssm_client = get_ssm_client(app) + buffer_ec2_connection_state = await limited_gather( *[ ssm_client.is_instance_connected_to_ssm_server(i.id) @@ -223,27 +233,42 @@ async def _make_pending_buffer_ec2s_join_cluster( ) if c is True ] - buffer_ec2_initialized = await limited_gather( - *[ - ssm_client.wait_for_has_instance_completed_cloud_init(i.id) - for i in buffer_ec2_connected_to_ssm_server - ], - reraise=False, - log=_logger, - limit=20, - ) - buffer_ec2_ready_for_command = [ - i - for i, r in zip( - buffer_ec2_connected_to_ssm_server, buffer_ec2_initialized, strict=True + buffer_ec2_ready_for_command = buffer_ec2_connected_to_ssm_server + if app_settings.AUTOSCALING_WAIT_FOR_CLOUD_INIT_BEFORE_WARM_BUFFER_ACTIVATION: + buffer_ec2_initialized = await limited_gather( + *[ + ssm_client.wait_for_has_instance_completed_cloud_init(i.id) + for i in buffer_ec2_connected_to_ssm_server + ], + reraise=False, + log=_logger, + limit=20, + ) + buffer_ec2_ready_for_command = [ + i + for i, r in zip( + buffer_ec2_connected_to_ssm_server, + buffer_ec2_initialized, + strict=True, + ) + if r is True + ] + if buffer_ec2_ready_for_command: + ssm_command = await ssm_client.send_command( + [i.id for i in buffer_ec2_ready_for_command], + command=await utils_docker.get_docker_swarm_join_bash_command( + join_as_drained=app_settings.AUTOSCALING_DOCKER_JOIN_DRAINED + ), + command_name=DOCKER_JOIN_COMMAND_NAME, + ) + await ec2_client.set_instances_tags( + buffer_ec2_ready_for_command, + tags={ + DOCKER_JOIN_COMMAND_EC2_TAG_KEY: AWSTagValue( + ssm_command.command_id + ), + }, ) - if r is True - ] - await ssm_client.send_command( - [i.id for i in buffer_ec2_ready_for_command], - command=await utils_docker.get_docker_swarm_join_bash_command(), - command_name="docker swarm join", - ) return cluster @@ -1060,6 +1085,43 @@ async def _notify_machine_creation_progress( ) +async def _drain_retired_nodes( + app: FastAPI, + cluster: Cluster, +) -> Cluster: + if not cluster.retired_nodes: + return cluster + + app_settings = get_application_settings(app) + docker_client = get_docker_client(app) + # drain this empty nodes + updated_nodes: list[Node] = await asyncio.gather( + *( + utils_docker.set_node_osparc_ready( + app_settings, + docker_client, + node.node, + ready=False, + ) + for node in cluster.retired_nodes + ) + ) + if updated_nodes: + _logger.info( + "following nodes were set to drain: '%s'", + f"{[node.Description.Hostname for node in updated_nodes if node.Description]}", + ) + newly_drained_instances = [ + AssociatedInstance(node=node, ec2_instance=instance.ec2_instance) + for instance, node in zip(cluster.retired_nodes, updated_nodes, strict=True) + ] + return dataclasses.replace( + cluster, + retired_nodes=[], + drained_nodes=cluster.drained_nodes + newly_drained_instances, + ) + + async def _autoscale_cluster( app: FastAPI, cluster: Cluster, @@ -1166,6 +1228,7 @@ async def auto_scale_cluster( cluster = await _try_attach_pending_ec2s( app, cluster, auto_scaling_mode, allowed_instance_types ) + cluster = await _drain_retired_nodes(app, cluster) cluster = await _autoscale_cluster( app, cluster, auto_scaling_mode, allowed_instance_types diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py index 7c30b1c61dbd..921af2cdd01b 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py @@ -86,6 +86,11 @@ async def compute_cluster_total_resources( async def is_instance_active(app: FastAPI, instance: AssociatedInstance) -> bool: ... + @staticmethod + @abstractmethod + async def is_instance_retired(app: FastAPI, instance: AssociatedInstance) -> bool: + ... + @staticmethod def is_instance_drained(instance: AssociatedInstance) -> bool: return not utils_docker.is_node_osparc_ready(instance.node) diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py index 0e3862e51b1b..ecddfc5e8ec1 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py @@ -177,6 +177,14 @@ async def is_instance_active(app: FastAPI, instance: AssociatedInstance) -> bool _scheduler_url(app), _scheduler_auth(app), instance.ec2_instance ) + @staticmethod + async def is_instance_retired(app: FastAPI, instance: AssociatedInstance) -> bool: + if not utils_docker.is_node_osparc_ready(instance.node): + return False + return await dask.is_worker_retired( + _scheduler_url(app), _scheduler_auth(app), instance.ec2_instance + ) + @staticmethod async def try_retire_nodes(app: FastAPI) -> None: await dask.try_retire_nodes(_scheduler_url(app), _scheduler_auth(app)) diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py index 4f6cd21006b9..f267ebe013cc 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_dynamic.py @@ -102,6 +102,13 @@ async def is_instance_active(app: FastAPI, instance: AssociatedInstance) -> bool assert app # nosec return utils_docker.is_node_osparc_ready(instance.node) + @staticmethod + async def is_instance_retired(app: FastAPI, instance: AssociatedInstance) -> bool: + assert app # nosec + assert instance # nosec + # nothing to do here + return False + @staticmethod async def try_retire_nodes(app: FastAPI) -> None: assert app # nosec diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py index 3133c8c27fc0..d9f1c5505685 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py @@ -33,10 +33,6 @@ from fastapi import FastAPI from pydantic import NonNegativeInt from servicelib.logging_utils import log_context -from simcore_service_autoscaling.modules.instrumentation import ( - get_instrumentation, - has_instrumentation, -) from types_aiobotocore_ec2.literals import InstanceTypeType from ..constants import ( @@ -55,6 +51,7 @@ ) from .auto_scaling_mode_base import BaseAutoscaling from .ec2 import get_ec2_client +from .instrumentation import get_instrumentation, has_instrumentation from .ssm import get_ssm_client _logger = logging.getLogger(__name__) @@ -197,8 +194,9 @@ async def _terminate_instances_with_invalid_pre_pulled_images( ].pre_pulled_instances() for instance in all_pre_pulled_instances: + pre_pulled_images = load_pre_pulled_images_from_tags(instance.tags) if ( - pre_pulled_images := load_pre_pulled_images_from_tags(instance.tags) + pre_pulled_images is not None ) and pre_pulled_images != ec2_boot_config.pre_pull_images: _logger.info( "%s", diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py index edb02d40e2f5..5e1c7e2f0c75 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py @@ -11,6 +11,7 @@ import distributed.scheduler from aws_library.ec2 import EC2InstanceData, Resources from dask_task_models_library.resource_constraints import DaskTaskResources +from distributed.core import Status from models_library.clusters import InternalClusterAuthentication, TLSAuthentication from pydantic import AnyUrl, ByteSize, parse_obj_as @@ -120,8 +121,28 @@ async def is_worker_connected( ) -> bool: with contextlib.suppress(DaskNoWorkersError, DaskWorkerNotFoundError): async with _scheduler_client(scheduler_url, authentication) as client: - _dask_worker_from_ec2_instance(client, worker_ec2_instance) - return True + _, worker_details = _dask_worker_from_ec2_instance( + client, worker_ec2_instance + ) + return Status(worker_details["status"]) == Status.running + return False + + +async def is_worker_retired( + scheduler_url: AnyUrl, + authentication: InternalClusterAuthentication, + worker_ec2_instance: EC2InstanceData, +) -> bool: + with contextlib.suppress(DaskNoWorkersError, DaskWorkerNotFoundError): + async with _scheduler_client(scheduler_url, authentication) as client: + _, worker_details = _dask_worker_from_ec2_instance( + client, worker_ec2_instance + ) + return Status(worker_details["status"]) in { + Status.closed, + Status.closing, + Status.closing_gracefully, + } return False @@ -152,9 +173,9 @@ def _list_tasks( } async with _scheduler_client(scheduler_url, authentication) as client: - list_of_tasks: dict[ - dask.typing.Key, DaskTaskResources - ] = await _wrap_client_async_routine(client.run_on_scheduler(_list_tasks)) + list_of_tasks: dict[dask.typing.Key, DaskTaskResources] = ( + await _wrap_client_async_routine(client.run_on_scheduler(_list_tasks)) + ) _logger.debug("found unrunnable tasks: %s", list_of_tasks) return [ DaskTask( @@ -186,10 +207,10 @@ def _list_processing_tasks( return worker_to_processing_tasks async with _scheduler_client(scheduler_url, authentication) as client: - worker_to_tasks: dict[ - str, list[tuple[dask.typing.Key, DaskTaskResources]] - ] = await _wrap_client_async_routine( - client.run_on_scheduler(_list_processing_tasks) + worker_to_tasks: dict[str, list[tuple[dask.typing.Key, DaskTaskResources]]] = ( + await _wrap_client_async_routine( + client.run_on_scheduler(_list_processing_tasks) + ) ) _logger.debug("found processing tasks: %s", worker_to_tasks) tasks_per_worker = defaultdict(list) @@ -255,12 +276,12 @@ def _list_processing_tasks_on_worker( _logger.debug("looking for processing tasksfor %s", f"{worker_url=}") # now get the used resources - worker_processing_tasks: list[ - tuple[dask.typing.Key, DaskTaskResources] - ] = await _wrap_client_async_routine( - client.run_on_scheduler( - _list_processing_tasks_on_worker, worker_url=worker_url - ), + worker_processing_tasks: list[tuple[dask.typing.Key, DaskTaskResources]] = ( + await _wrap_client_async_routine( + client.run_on_scheduler( + _list_processing_tasks_on_worker, worker_url=worker_url + ), + ) ) total_resources_used: collections.Counter[str] = collections.Counter() diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_constants.py b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_constants.py index 4d8f53025b5a..1224ea719078 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_constants.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_constants.py @@ -1,8 +1,10 @@ from typing import Final +from servicelib.instrumentation import get_metrics_namespace + from ..._meta import APP_NAME -METRICS_NAMESPACE: Final[str] = APP_NAME.replace("-", "_") +METRICS_NAMESPACE: Final[str] = get_metrics_namespace(APP_NAME) EC2_INSTANCE_LABELS: Final[tuple[str, ...]] = ("instance_type",) CLUSTER_METRICS_DEFINITIONS: Final[dict[str, tuple[str, tuple[str, ...]]]] = { @@ -42,6 +44,10 @@ "Number of EC2-backed docker nodes that started the termination process", EC2_INSTANCE_LABELS, ), + "retired_nodes": ( + "Number of EC2-backed docker nodes that were actively retired and waiting for draining and termination or re-use", + EC2_INSTANCE_LABELS, + ), "terminated_instances": ( "Number of EC2 instances that were terminated (they are typically visible 1 hour)", EC2_INSTANCE_LABELS, diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py index f7aaadbdc2a2..e3bc20ef5183 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py @@ -22,8 +22,10 @@ async def on_startup() -> None: metrics_subsystem = ( "dynamic" if app_settings.AUTOSCALING_NODES_MONITORING else "computational" ) - app.state.instrumentation = AutoscalingInstrumentation( - registry=instrumentator.registry, subsystem=metrics_subsystem + app.state.instrumentation = ( + AutoscalingInstrumentation( # pylint: disable=unexpected-keyword-arg + registry=instrumentator.registry, subsystem=metrics_subsystem + ) ) async def on_shutdown() -> None: diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py index 70b96cabfe2a..3831b33b826d 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py @@ -2,6 +2,7 @@ from typing import Final from prometheus_client import CollectorRegistry, Counter, Histogram +from servicelib.instrumentation import MetricsBase from ...models import BufferPoolManager, Cluster from ._constants import ( @@ -13,11 +14,6 @@ from ._utils import TrackedGauge, create_gauge -@dataclass(slots=True, kw_only=True) -class MetricsBase: - subsystem: str - - @dataclass(slots=True, kw_only=True) class ClusterMetrics(MetricsBase): # pylint: disable=too-many-instance-attributes active_nodes: TrackedGauge = field(init=False) @@ -29,13 +25,19 @@ class ClusterMetrics(MetricsBase): # pylint: disable=too-many-instance-attribut buffer_ec2s: TrackedGauge = field(init=False) disconnected_nodes: TrackedGauge = field(init=False) terminating_nodes: TrackedGauge = field(init=False) + retired_nodes: TrackedGauge = field(init=False) terminated_instances: TrackedGauge = field(init=False) def __post_init__(self) -> None: cluster_subsystem = f"{self.subsystem}_cluster" # Creating and assigning gauges using the field names and the metric definitions for field_name, definition in CLUSTER_METRICS_DEFINITIONS.items(): - gauge = create_gauge(field_name, definition, cluster_subsystem) + gauge = create_gauge( + field_name=field_name, + definition=definition, + subsystem=cluster_subsystem, + registry=self.registry, + ) setattr(self, field_name, gauge) def update_from_cluster(self, cluster: Cluster) -> None: @@ -64,6 +66,7 @@ def __post_init__(self) -> None: labelnames=EC2_INSTANCE_LABELS, namespace=METRICS_NAMESPACE, subsystem=self.subsystem, + registry=self.registry, ) self.started_instances = Counter( "started_instances_total", @@ -71,6 +74,7 @@ def __post_init__(self) -> None: labelnames=EC2_INSTANCE_LABELS, namespace=METRICS_NAMESPACE, subsystem=self.subsystem, + registry=self.registry, ) self.stopped_instances = Counter( "stopped_instances_total", @@ -78,6 +82,7 @@ def __post_init__(self) -> None: labelnames=EC2_INSTANCE_LABELS, namespace=METRICS_NAMESPACE, subsystem=self.subsystem, + registry=self.registry, ) self.terminated_instances = Counter( "terminated_instances_total", @@ -85,6 +90,7 @@ def __post_init__(self) -> None: labelnames=EC2_INSTANCE_LABELS, namespace=METRICS_NAMESPACE, subsystem=self.subsystem, + registry=self.registry, ) def instance_started(self, instance_type: str) -> None: @@ -122,7 +128,12 @@ def __post_init__(self) -> None: setattr( self, field_name, - create_gauge(field_name, definition, buffer_pools_subsystem), + create_gauge( + field_name=field_name, + definition=definition, + subsystem=buffer_pools_subsystem, + registry=self.registry, + ), ) self.instances_ready_to_pull_seconds = Histogram( "instances_ready_to_pull_duration_seconds", @@ -131,6 +142,7 @@ def __post_init__(self) -> None: namespace=METRICS_NAMESPACE, subsystem=buffer_pools_subsystem, buckets=(10, 20, 30, 40, 50, 60, 120), + registry=self.registry, ) self.instances_completed_pulling_seconds = Histogram( "instances_completed_pulling_duration_seconds", @@ -149,6 +161,7 @@ def __post_init__(self) -> None: 30 * _MINUTE, 40 * _MINUTE, ), + registry=self.registry, ) def update_from_buffer_pool_manager( @@ -173,8 +186,16 @@ class AutoscalingInstrumentation(MetricsBase): buffer_machines_pools_metrics: BufferPoolsMetrics = field(init=False) def __post_init__(self) -> None: - self.cluster_metrics = ClusterMetrics(subsystem=self.subsystem) - self.ec2_client_metrics = EC2ClientMetrics(subsystem=self.subsystem) - self.buffer_machines_pools_metrics = BufferPoolsMetrics( - subsystem=self.subsystem + self.cluster_metrics = ClusterMetrics( # pylint: disable=unexpected-keyword-arg + subsystem=self.subsystem, registry=self.registry + ) + self.ec2_client_metrics = ( + EC2ClientMetrics( # pylint: disable=unexpected-keyword-arg + subsystem=self.subsystem, registry=self.registry + ) + ) + self.buffer_machines_pools_metrics = ( + BufferPoolsMetrics( # pylint: disable=unexpected-keyword-arg + subsystem=self.subsystem, registry=self.registry + ) ) diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_utils.py b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_utils.py index 2d991b71cc75..8f80b1f05e81 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_utils.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_utils.py @@ -3,7 +3,7 @@ from dataclasses import dataclass, field from aws_library.ec2._models import EC2InstanceData -from prometheus_client import Gauge +from prometheus_client import CollectorRegistry, Gauge from ._constants import METRICS_NAMESPACE @@ -27,9 +27,11 @@ def update_from_instances(self, instances: Iterable[EC2InstanceData]) -> None: def create_gauge( + *, field_name: str, definition: tuple[str, tuple[str, ...]], subsystem: str, + registry: CollectorRegistry, ) -> TrackedGauge: description, labelnames = definition return TrackedGauge( @@ -39,5 +41,6 @@ def create_gauge( labelnames=labelnames, namespace=METRICS_NAMESPACE, subsystem=subsystem, + registry=registry, ) ) diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py index 953c81573340..d5fca4c3bb68 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/auto_scaling_core.py @@ -80,7 +80,11 @@ async def ec2_startup_script( ec2_boot_specific: EC2InstanceBootSpecific, app_settings: ApplicationSettings ) -> str: startup_commands = ec2_boot_specific.custom_boot_scripts.copy() - startup_commands.append(await utils_docker.get_docker_swarm_join_bash_command()) + startup_commands.append( + await utils_docker.get_docker_swarm_join_bash_command( + join_as_drained=app_settings.AUTOSCALING_DOCKER_JOIN_DRAINED + ) + ) if app_settings.AUTOSCALING_REGISTRY: # noqa: SIM102 if pull_image_cmd := utils_docker.get_docker_pull_images_on_start_bash_command( ec2_boot_specific.pre_pull_images diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py index fe782f66b2a4..6449952decd8 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py @@ -392,7 +392,7 @@ async def compute_cluster_used_resources( _DOCKER_SWARM_JOIN_PATTERN = re.compile(_DOCKER_SWARM_JOIN_RE) -async def get_docker_swarm_join_bash_command() -> str: +async def get_docker_swarm_join_bash_command(*, join_as_drained: bool) -> str: """this assumes we are on a manager node""" command = ["docker", "swarm", "join-token", "worker"] process = await asyncio.create_subprocess_exec( @@ -409,7 +409,7 @@ async def get_docker_swarm_join_bash_command() -> str: decoded_stdout = stdout.decode() if match := re.search(_DOCKER_SWARM_JOIN_PATTERN, decoded_stdout): capture = match.groupdict() - return f"{capture['command']} --availability=drain {capture['token']} {capture['address']}" + return f"{capture['command']} --availability={'drain' if join_as_drained else 'active'} {capture['token']} {capture['address']}" msg = f"expected docker '{_DOCKER_SWARM_JOIN_RE}' command not found: received {decoded_stdout}!" raise RuntimeError(msg) diff --git a/services/autoscaling/tests/manual/.env-devel b/services/autoscaling/tests/manual/.env-devel index 72096488618b..a7069054e6af 100644 --- a/services/autoscaling/tests/manual/.env-devel +++ b/services/autoscaling/tests/manual/.env-devel @@ -1,6 +1,9 @@ AUTOSCALING_DEBUG=true +AUTOSCALING_DRAIN_NODES_WITH_LABELS=False +AUTOSCALING_DOCKER_JOIN_DRAINED=True +AUTOSCALING_WAIT_FOR_CLOUD_INIT_BEFORE_WARM_BUFFER_ACTIVATION=False AUTOSCALING_LOGLEVEL=INFO -AUTOSCALING_TASK_INTERVAL=30 +AUTOSCALING_POLL_INTERVAL=10 AUTOSCALING_EC2_ACCESS_KEY_ID=XXXXXXXXXX AUTOSCALING_EC2_SECRET_ACCESS_KEY=XXXXXXXXXX AUTOSCALING_EC2_ENDPOINT=null @@ -13,20 +16,21 @@ EC2_INSTANCES_MACHINES_BUFFER=0 EC2_INSTANCES_MAX_INSTANCES=20 EC2_INSTANCES_TIME_BEFORE_DRAINING="00:00:10" EC2_INSTANCES_TIME_BEFORE_TERMINATION="00:03:00" -EC2_INSTANCES_ALLOWED_TYPES='{"t2.micro": {"ami_id": "XXXXXXXX", "custom_boot_scripts": ["whoami"], "pre_pull_images": ["ubuntu:latest"]}}' +EC2_INSTANCES_ALLOWED_TYPES={"t2.micro": {"ami_id": "XXXXXXXX", "custom_boot_scripts": ["whoami"], "pre_pull_images": ["ubuntu:latest"]}} EC2_INSTANCES_ATTACHED_IAM_PROFILE=XXXXXXXXX EC2_INSTANCES_KEY_NAME=XXXXXXXXXX EC2_INSTANCES_NAME_PREFIX=testing-osparc-computational-cluster -EC2_INSTANCES_SECURITY_GROUP_IDS="[\"XXXXXXXXXX\"]" +EC2_INSTANCES_SECURITY_GROUP_IDS=["XXXXXXXXXX"] EC2_INSTANCES_SUBNET_ID=XXXXXXXXXX -EC2_INSTANCES_CUSTOM_TAGS='{"special": "testing"}' +EC2_INSTANCES_CUSTOM_TAGS={"special": "testing"} EC2_INSTANCES_TIME_BEFORE_DRAINING=00:00:20 EC2_INSTANCES_TIME_BEFORE_TERMINATION=00:01:00 LOG_FORMAT_LOCAL_DEV_ENABLED=True # define the following to activate dynamic autoscaling -# NODES_MONITORING_NEW_NODES_LABELS="[\"testing.autoscaled-node\"]" -# NODES_MONITORING_NODE_LABELS="[\"testing.monitored-node\"]" -# NODES_MONITORING_SERVICE_LABELS="[\"testing.monitored-service\"]" +# AUTOSCALING_NODES_MONITORING={} +# NODES_MONITORING_NEW_NODES_LABELS=["testing.autoscaled-node"] +# NODES_MONITORING_NODE_LABELS=["testing.monitored-node"] +# NODES_MONITORING_SERVICE_LABELS=["testing.monitored-service"] # may be activated or not # RABBIT_HOST=rabbit diff --git a/services/autoscaling/tests/manual/README.md b/services/autoscaling/tests/manual/README.md index 9f4497c5e510..d1819c72eef6 100644 --- a/services/autoscaling/tests/manual/README.md +++ b/services/autoscaling/tests/manual/README.md @@ -109,8 +109,9 @@ make up-devel # this will deploy the autoscaling stack docker service create \ --name=test-service \ --reserve-cpu=1 \ ---reserve-memory=1GiB \ +--reserve-memory=512MiB \ --constraint=node.labels.testing.monitored-node==true \ +--constraint=node.labels.io.simcore.osparc-services-ready==true \ --label=testing.monitored-service=true \ --container-label=io.simcore.runtime.user-id=99 \ --container-label=io.simcore.runtime.project-id='5054a589-3ba4-46c3-829d-2e3d1a6a043f' \ @@ -120,5 +121,5 @@ docker service create \ --container-label=io.simcore.runtime.swarm-stack-name=thestack \ --container-label=io.simcore.runtime.memory-limit=1GB \ --container-label=io.simcore.runtime.cpu-limit=1 \ -redis # will create a redis service reserving 4 CPUs and 1GiB of RAM +redis # will create a redis service reserving 1 CPUs and 512MiB of RAM ``` diff --git a/services/autoscaling/tests/manual/docker-compose-computational.yml b/services/autoscaling/tests/manual/docker-compose-computational.yml index 29575c76f7ea..d97387ca95ba 100644 --- a/services/autoscaling/tests/manual/docker-compose-computational.yml +++ b/services/autoscaling/tests/manual/docker-compose-computational.yml @@ -1,8 +1,9 @@ services: autoscaling: environment: + - AUTOSCALING_DASK={} - DASK_MONITORING_URL=tcp://dask-scheduler:8786 - - DASK_SCHEDULER_AUTH='{}' + - DASK_SCHEDULER_AUTH={} dask-sidecar: image: itisfoundation/dask-sidecar:master-github-latest init: true diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py index b8ad45dca785..b705ea85b783 100644 --- a/services/autoscaling/tests/unit/conftest.py +++ b/services/autoscaling/tests/unit/conftest.py @@ -147,6 +147,30 @@ def with_labelize_drain_nodes( ) +@pytest.fixture( + params=[ + "with_AUTOSCALING_DOCKER_JOIN_DRAINED", + "without_AUTOSCALING_DOCKER_JOIN_DRAINED", + ] +) +def with_docker_join_drained(request: pytest.FixtureRequest) -> bool: + return bool(request.param == "with_AUTOSCALING_DOCKER_JOIN_DRAINED") + + +@pytest.fixture +def app_with_docker_join_drained( + app_environment: EnvVarsDict, + monkeypatch: pytest.MonkeyPatch, + with_docker_join_drained: bool, +) -> EnvVarsDict: + return app_environment | setenvs_from_dict( + monkeypatch, + { + "AUTOSCALING_DOCKER_JOIN_DRAINED": f"{with_docker_join_drained}", + }, + ) + + @pytest.fixture(scope="session") def fake_ssm_settings() -> SSMSettings: return SSMSettings(**SSMSettings.Config.schema_extra["examples"][0]) @@ -354,11 +378,17 @@ def enabled_rabbitmq( return rabbit_service +_LIFESPAN_TIMEOUT: Final[int] = 10 + + @pytest.fixture async def initialized_app(app_environment: EnvVarsDict) -> AsyncIterator[FastAPI]: settings = ApplicationSettings.create_from_envs() app = create_app(settings) - async with LifespanManager(app): + # NOTE: the timeout is sometime too small for CI machines, and even larger machines + async with LifespanManager( + app, startup_timeout=_LIFESPAN_TIMEOUT, shutdown_timeout=_LIFESPAN_TIMEOUT + ): yield app @@ -775,6 +805,7 @@ def _creator(**cluter_overrides) -> Cluster: buffer_ec2s=[], disconnected_nodes=[], terminating_nodes=[], + retired_nodes=[], terminated_instances=[], ), **cluter_overrides, diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py index a1b411fa9881..5811b43b2f06 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py +++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py @@ -1,9 +1,10 @@ # pylint: disable=no-value-for-parameter # pylint: disable=redefined-outer-name -# pylint: disable=unused-argument -# pylint: disable=unused-variable # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=too-many-statements +# pylint: disable=unused-argument +# pylint: disable=unused-variable import asyncio @@ -70,6 +71,7 @@ def local_dask_scheduler_server_envs( @pytest.fixture def minimal_configuration( with_labelize_drain_nodes: EnvVarsDict, + app_with_docker_join_drained: EnvVarsDict, docker_swarm: None, mocked_ec2_server_envs: EnvVarsDict, mocked_ssm_server_envs: EnvVarsDict, diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py index 3b2c9be59dcc..3a79a11c853c 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py +++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py @@ -1,9 +1,10 @@ # pylint: disable=no-value-for-parameter # pylint: disable=redefined-outer-name -# pylint: disable=unused-argument -# pylint: disable=unused-variable # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=too-many-statements +# pylint: disable=unused-argument +# pylint: disable=unused-variable import asyncio import datetime @@ -181,6 +182,7 @@ async def drained_host_node( @pytest.fixture def minimal_configuration( with_labelize_drain_nodes: EnvVarsDict, + app_with_docker_join_drained: EnvVarsDict, docker_swarm: None, mocked_ec2_server_envs: EnvVarsDict, mocked_ssm_server_envs: EnvVarsDict, @@ -1010,6 +1012,7 @@ async def test_cluster_scaling_up_and_down_against_aws( skip_if_external_envfile_dict: None, external_ec2_instances_allowed_types: None | dict[str, EC2InstanceBootSpecific], with_labelize_drain_nodes: EnvVarsDict, + app_with_docker_join_drained: EnvVarsDict, docker_swarm: None, disabled_rabbitmq: None, disable_dynamic_service_background_task: None, diff --git a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py b/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py index 4ed2452d1cdb..26ac271db298 100644 --- a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py +++ b/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py @@ -1,8 +1,9 @@ # pylint: disable=no-value-for-parameter # pylint: disable=redefined-outer-name +# pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable -# pylint: disable=too-many-arguments import datetime import json @@ -114,6 +115,7 @@ def with_ec2_instance_allowed_types_env( @pytest.fixture def minimal_configuration( disabled_rabbitmq: None, + disable_dynamic_service_background_task: None, disable_buffers_pool_background_task: None, enabled_dynamic_mode: EnvVarsDict, mocked_ec2_server_envs: EnvVarsDict, @@ -454,6 +456,7 @@ class _BufferMachineParams: ) async def test_monitor_buffer_machines_terminates_supernumerary_instances( minimal_configuration: None, + fake_pre_pull_images: list[DockerGenericTag], ec2_client: EC2Client, buffer_count: int, ec2_instances_allowed_types_with_only_1_buffered: dict[InstanceTypeType, Any], @@ -466,12 +469,15 @@ async def test_monitor_buffer_machines_terminates_supernumerary_instances( ], expected_buffer_params: _BufferMachineParams, ): + # dirty hack + if expected_buffer_params.pre_pulled_images == []: + expected_buffer_params.pre_pulled_images = fake_pre_pull_images # have too many machines of accepted type buffer_machines = await create_buffer_machines( buffer_count + 5, next(iter(list(ec2_instances_allowed_types_with_only_1_buffered))), expected_buffer_params.instance_state_name, - [], + fake_pre_pull_images, ) await assert_autoscaled_dynamic_warm_pools_ec2_instances( ec2_client, @@ -688,6 +694,7 @@ def pre_pull_images( async def test_monitor_buffer_machines_against_aws( skip_if_external_envfile_dict: None, disable_buffers_pool_background_task: None, + disable_dynamic_service_background_task: None, disabled_rabbitmq: None, mocked_redis_server: None, external_envfile_dict: EnvVarsDict, diff --git a/services/autoscaling/tests/unit/test_modules_instrumentation_utils.py b/services/autoscaling/tests/unit/test_modules_instrumentation_utils.py index f72fa262a975..31a19701f8eb 100644 --- a/services/autoscaling/tests/unit/test_modules_instrumentation_utils.py +++ b/services/autoscaling/tests/unit/test_modules_instrumentation_utils.py @@ -2,6 +2,7 @@ from typing import TypedDict from aws_library.ec2._models import EC2InstanceData +from prometheus_client import CollectorRegistry from prometheus_client.metrics import MetricWrapperBase from simcore_service_autoscaling.modules.instrumentation._constants import ( EC2_INSTANCE_LABELS, @@ -40,10 +41,12 @@ def test_update_gauge_sets_old_entries_to_0( fake_ec2_instance_data: Callable[..., EC2InstanceData] ): # Create a Gauge with example labels + registry = CollectorRegistry() tracked_gauge = create_gauge( - "example_gauge", + field_name="example_gauge", definition=("An example gauge", EC2_INSTANCE_LABELS), subsystem="whatever", + registry=registry, ) ec2_instance_type_1 = fake_ec2_instance_data() diff --git a/services/autoscaling/tests/unit/test_utils_docker.py b/services/autoscaling/tests/unit/test_utils_docker.py index 03985cfba78c..8e5b8cd90a82 100644 --- a/services/autoscaling/tests/unit/test_utils_docker.py +++ b/services/autoscaling/tests/unit/test_utils_docker.py @@ -821,10 +821,14 @@ async def test_compute_cluster_used_resources_with_services_running( async def test_get_docker_swarm_join_script(host_node: Node): - join_script = await get_docker_swarm_join_bash_command() + join_script = await get_docker_swarm_join_bash_command(join_as_drained=True) assert join_script.startswith("docker swarm join") assert "--availability=drain" in join_script + join_script = await get_docker_swarm_join_bash_command(join_as_drained=False) + assert join_script.startswith("docker swarm join") + assert "--availability=active" in join_script + async def test_get_docker_swarm_join_script_bad_return_code_raises( host_node: Node, @@ -840,7 +844,7 @@ async def test_get_docker_swarm_join_script_bad_return_code_raises( ) mocked_asyncio_process.return_value.returncode = 137 with pytest.raises(RuntimeError, match=r"unexpected error .+"): - await get_docker_swarm_join_bash_command() + await get_docker_swarm_join_bash_command(join_as_drained=True) # NOTE: the sleep here is to provide some time for asyncio to properly close its process communication # to silence the warnings await asyncio.sleep(2) @@ -860,7 +864,7 @@ async def test_get_docker_swarm_join_script_returning_unexpected_command_raises( ) mocked_asyncio_process.return_value.returncode = 0 with pytest.raises(RuntimeError, match=r"expected docker .+"): - await get_docker_swarm_join_bash_command() + await get_docker_swarm_join_bash_command(join_as_drained=True) # NOTE: the sleep here is to provide some time for asyncio to properly close its process communication # to silence the warnings await asyncio.sleep(2) diff --git a/services/autoscaling/tests/unit/test_utils_rabbitmq.py b/services/autoscaling/tests/unit/test_utils_rabbitmq.py index 6b6308399d08..1c5920f9dc74 100644 --- a/services/autoscaling/tests/unit/test_utils_rabbitmq.py +++ b/services/autoscaling/tests/unit/test_utils_rabbitmq.py @@ -1,7 +1,8 @@ -# pylint:disable=unused-variable -# pylint:disable=unused-argument +# pylint: disable=too-many-positional-arguments # pylint:disable=redefined-outer-name # pylint:disable=too-many-arguments +# pylint:disable=unused-argument +# pylint:disable=unused-variable from collections.abc import Awaitable, Callable diff --git a/services/catalog/requirements/_base.txt b/services/catalog/requirements/_base.txt index cf3adce5de01..5ed5819a1250 100644 --- a/services/catalog/requirements/_base.txt +++ b/services/catalog/requirements/_base.txt @@ -1,7 +1,9 @@ aio-pika==9.4.1 # via -r requirements/../../../packages/service-library/requirements/_base.in aiocache==0.12.2 - # via -r requirements/_base.in + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.21.0 @@ -36,11 +38,10 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi async-timeout==4.0.3 - # via - # aiohttp - # asyncpg - # redis + # via asyncpg asyncpg==0.29.0 # via # -r requirements/_base.in @@ -61,18 +62,25 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer # uvicorn +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via # fastapi # pydantic -exceptiongroup==1.2.0 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -93,8 +101,14 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http greenlet==3.0.3 # via sqlalchemy +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -120,7 +134,10 @@ idna==3.6 # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api itsdangerous==2.1.2 # via fastapi jinja2==3.1.3 @@ -163,6 +180,59 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -185,6 +255,12 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.9 # via sqlalchemy pydantic==1.10.14 @@ -247,6 +323,10 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -256,6 +336,8 @@ rpds-py==0.18.0 # via # jsonschema # referencing +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer six==1.16.0 @@ -305,12 +387,11 @@ typing-extensions==4.10.0 # aiodebug # aiodocker # alembic - # anyio # fastapi # faststream + # opentelemetry-sdk # pydantic # typer - # uvicorn ujson==5.9.0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -321,6 +402,16 @@ ujson==5.9.0 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # fastapi +urllib3==2.2.2 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests uvicorn==0.29.0 # via # -r requirements/../../../packages/service-library/requirements/_fastapi.in @@ -331,9 +422,15 @@ watchfiles==0.21.0 # via uvicorn websockets==12.0 # via uvicorn +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # -r requirements/../../../packages/postgres-database/requirements/_base.in # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/catalog/requirements/_test.txt b/services/catalog/requirements/_test.txt index ee25905a2e3e..6fdd398def33 100644 --- a/services/catalog/requirements/_test.txt +++ b/services/catalog/requirements/_test.txt @@ -17,10 +17,6 @@ anyio==4.3.0 # httpx asgi-lifespan==2.1.0 # via -r requirements/_test.in -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # aiohttp attrs==23.2.0 # via # -c requirements/_base.txt @@ -36,7 +32,9 @@ certifi==2024.2.2 # httpx # requests charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -45,12 +43,7 @@ coverage==7.6.1 # via pytest-cov docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.0 - # via - # -c requirements/_base.txt - # anyio - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in frozenlist==1.4.1 # via @@ -105,7 +98,7 @@ multidict==6.0.5 # -c requirements/_base.txt # aiohttp # yarl -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -119,7 +112,7 @@ ptvsd==4.3.2 # via -r requirements/_test.in py-cpuinfo==9.0.0 # via pytest-benchmark -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-aiohttp @@ -154,7 +147,9 @@ referencing==0.29.3 # jsonschema # jsonschema-specifications requests==2.32.3 - # via docker + # via + # -c requirements/_base.txt + # docker respx==0.21.1 # via -r requirements/_test.in rpds-py==0.18.0 @@ -180,25 +175,20 @@ sqlalchemy==1.4.52 # alembic sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy -tomli==2.0.1 - # via - # coverage - # mypy - # pytest types-psycopg2==2.9.21.20240819 # via -r requirements/_test.in -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.10.0 # via # -c requirements/_base.txt # alembic - # anyio # mypy # sqlalchemy2-stubs urllib3==2.2.2 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # docker # requests yarl==1.9.4 diff --git a/services/catalog/requirements/_tools.txt b/services/catalog/requirements/_tools.txt index bc04c03d0dd6..c0a526c13100 100644 --- a/services/catalog/requirements/_tools.txt +++ b/services/catalog/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -51,14 +51,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -69,30 +69,22 @@ pyyaml==6.0.1 # -c requirements/_base.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools -tomli==2.0.1 +setuptools==74.0.0 # via - # -c requirements/_test.txt - # black - # build - # mypy + # -c requirements/_base.txt # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.10.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/catalog/src/simcore_service_catalog/_meta.py b/services/catalog/src/simcore_service_catalog/_meta.py index bc1b13173a5b..6aab1be93b27 100644 --- a/services/catalog/src/simcore_service_catalog/_meta.py +++ b/services/catalog/src/simcore_service_catalog/_meta.py @@ -15,6 +15,7 @@ VERSION: Final[Version] = info.version API_VERSION: Final[VersionStr] = info.__version__ API_VTAG: Final[str] = info.api_prefix_path_tag +APP_NAME: Final[str] = info.project_name SUMMARY: Final[str] = info.get_summary() diff --git a/services/catalog/src/simcore_service_catalog/core/application.py b/services/catalog/src/simcore_service_catalog/core/application.py index d68036da081d..a28dc8c5a321 100644 --- a/services/catalog/src/simcore_service_catalog/core/application.py +++ b/services/catalog/src/simcore_service_catalog/core/application.py @@ -9,9 +9,10 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from starlette.middleware.base import BaseHTTPMiddleware -from .._meta import API_VERSION, API_VTAG, PROJECT_NAME, SUMMARY +from .._meta import API_VERSION, API_VTAG, APP_NAME, PROJECT_NAME, SUMMARY from ..api.rest.routes import setup_rest_api_routes from ..api.rpc.routes import setup_rpc_api_routes from ..exceptions.handlers import setup_exception_handlers @@ -64,6 +65,8 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: app.add_middleware( BaseHTTPMiddleware, dispatch=timing_middleware.add_process_time_header ) + if app.state.settings.CATALOG_TRACING: + setup_tracing(app, app.state.settings.CATALOG_TRACING, APP_NAME) app.add_middleware(GZipMiddleware) diff --git a/services/catalog/src/simcore_service_catalog/core/background_tasks.py b/services/catalog/src/simcore_service_catalog/core/background_tasks.py index fa1c4cbb6591..e6eb7c59fcbc 100644 --- a/services/catalog/src/simcore_service_catalog/core/background_tasks.py +++ b/services/catalog/src/simcore_service_catalog/core/background_tasks.py @@ -15,12 +15,14 @@ from pprint import pformat from typing import Final -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException from models_library.services import ServiceMetaDataPublished from models_library.services_types import ServiceKey, ServiceVersion from packaging.version import Version +from pydantic import ValidationError from simcore_service_catalog.api.dependencies.director import get_director_api from simcore_service_catalog.services import manifest +from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncEngine from ..db.repositories.groups import GroupsRepository @@ -62,32 +64,45 @@ def _by_version(t: tuple[ServiceKey, ServiceVersion]) -> Version: sorted_services = sorted(service_keys, key=_by_version) for service_key, service_version in sorted_services: + service_metadata: ServiceMetaDataPublished = services_in_registry[ (service_key, service_version) ] - ## Set deprecation date to null (is valid date value for postgres) + try: + ## Set deprecation date to null (is valid date value for postgres) - # DEFAULT policies - ( - owner_gid, - service_access_rights, - ) = await access_rights.evaluate_default_policy(app, service_metadata) + # DEFAULT policies + ( + owner_gid, + service_access_rights, + ) = await access_rights.evaluate_default_policy(app, service_metadata) - # AUTO-UPGRADE PATCH policy - inherited_access_rights = await access_rights.evaluate_auto_upgrade_policy( - service_metadata, services_repo - ) + # AUTO-UPGRADE PATCH policy + inherited_access_rights = await access_rights.evaluate_auto_upgrade_policy( + service_metadata, services_repo + ) - service_access_rights += inherited_access_rights - service_access_rights = access_rights.reduce_access_rights( - service_access_rights - ) + service_access_rights += inherited_access_rights + service_access_rights = access_rights.reduce_access_rights( + service_access_rights + ) - # set the service in the DB - await services_repo.create_or_update_service( - ServiceMetaDataAtDB(**service_metadata.dict(), owner=owner_gid), - service_access_rights, - ) + # set the service in the DB + await services_repo.create_or_update_service( + ServiceMetaDataAtDB(**service_metadata.dict(), owner=owner_gid), + service_access_rights, + ) + + except (HTTPException, ValidationError, SQLAlchemyError) as err: + # Resilient to single failures: errors in individual (service,key) should not prevent the evaluation of the rest + # and stop the background task from running. + # SEE https://github.com/ITISFoundation/osparc-simcore/issues/6318 + _logger.warning( + "Skipping '%s:%s' due to %s", + service_key, + service_version, + err, + ) async def _ensure_registry_and_database_are_synced(app: FastAPI) -> None: diff --git a/services/catalog/src/simcore_service_catalog/core/events.py b/services/catalog/src/simcore_service_catalog/core/events.py index fb2329019b52..f22adbba4ece 100644 --- a/services/catalog/src/simcore_service_catalog/core/events.py +++ b/services/catalog/src/simcore_service_catalog/core/events.py @@ -3,7 +3,7 @@ from typing import TypeAlias from fastapi import FastAPI -from servicelib.db_async_engine import close_db_connection, connect_to_db +from servicelib.fastapi.db_asyncpg_engine import close_db_connection, connect_to_db from servicelib.logging_utils import log_context from .._meta import APP_FINISHED_BANNER_MSG, APP_STARTED_BANNER_MSG diff --git a/services/catalog/src/simcore_service_catalog/core/settings.py b/services/catalog/src/simcore_service_catalog/core/settings.py index 01781d7ded61..6235dcfd37fb 100644 --- a/services/catalog/src/simcore_service_catalog/core/settings.py +++ b/services/catalog/src/simcore_service_catalog/core/settings.py @@ -12,6 +12,7 @@ from settings_library.http_client_request import ClientRequestSettings from settings_library.postgres import PostgresSettings from settings_library.rabbit import RabbitSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings _logger = logging.getLogger(__name__) @@ -87,3 +88,6 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): CATALOG_SERVICES_DEFAULT_SPECIFICATIONS: ServiceSpecifications = ( _DEFAULT_SERVICE_SPECIFICATIONS ) + CATALOG_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) diff --git a/services/catalog/src/simcore_service_catalog/exceptions/errors.py b/services/catalog/src/simcore_service_catalog/exceptions/errors.py index c507e6574174..8729cb437f58 100644 --- a/services/catalog/src/simcore_service_catalog/exceptions/errors.py +++ b/services/catalog/src/simcore_service_catalog/exceptions/errors.py @@ -16,5 +16,13 @@ class UninitializedGroupError(RepositoryError): msg_tempalte = "{group} groups was never initialized" -class DirectorUnresponsiveError(CatalogBaseError): +class BaseDirectorError(CatalogBaseError): + ... + + +class DirectorUnresponsiveError(BaseDirectorError): msg_template = "Director-v0 is not responsive" + + +class DirectorStatusError(BaseDirectorError): + ... diff --git a/services/catalog/src/simcore_service_catalog/services/access_rights.py b/services/catalog/src/simcore_service_catalog/services/access_rights.py index 8e1f485b0d7e..d35e7d4e5c0e 100644 --- a/services/catalog/src/simcore_service_catalog/services/access_rights.py +++ b/services/catalog/src/simcore_service_catalog/services/access_rights.py @@ -5,7 +5,7 @@ import logging import operator from collections.abc import Callable -from datetime import datetime, timezone +from datetime import UTC, datetime from typing import Any, cast from urllib.parse import quote_plus @@ -25,9 +25,7 @@ _logger = logging.getLogger(__name__) -_LEGACY_SERVICES_DATE: datetime = datetime( - year=2020, month=8, day=19, tzinfo=timezone.utc -) +_LEGACY_SERVICES_DATE: datetime = datetime(year=2020, month=8, day=19, tzinfo=UTC) def _is_frontend_service(service: ServiceMetaDataPublished) -> bool: @@ -46,9 +44,6 @@ async def _is_old_service(app: FastAPI, service: ServiceMetaDataPublished) -> bo ) if not data or "build_date" not in data: return True - - _logger.debug("retrieved service extras are %s", data) - service_build_data = arrow.get(data["build_date"]).datetime return bool(service_build_data < _LEGACY_SERVICES_DATE) @@ -63,6 +58,12 @@ async def evaluate_default_policy( 1. All services published in osparc prior 19.08.2020 will be visible to everyone (refered as 'old service'). 2. Services published after 19.08.2020 will be visible ONLY to his/her owner 3. Front-end services are have execute-access to everyone + + + Raises: + HTTPException: from calls to director's rest API. Maps director errors into catalog's server error + SQLAlchemyError: from access to pg database + ValidationError: from pydantic model errors """ db_engine: AsyncEngine = app.state.engine diff --git a/services/catalog/src/simcore_service_catalog/services/director.py b/services/catalog/src/simcore_service_catalog/services/director.py index 0b48a1f3856d..7c6925902f4a 100644 --- a/services/catalog/src/simcore_service_catalog/services/director.py +++ b/services/catalog/src/simcore_service_catalog/services/director.py @@ -156,8 +156,8 @@ async def setup_director(app: FastAPI) -> None: with log_context( _logger, logging.DEBUG, "Setup director at %s", f"{settings.base_url=}" ): - async for attempt in AsyncRetrying(**_director_startup_retry_policy): + client = DirectorApi(base_url=settings.base_url, app=app) with attempt: client = DirectorApi(base_url=settings.base_url, app=app) if not await client.is_responsive(): diff --git a/services/catalog/src/simcore_service_catalog/services/services_api.py b/services/catalog/src/simcore_service_catalog/services/services_api.py index 93a2886362ec..032909f4853c 100644 --- a/services/catalog/src/simcore_service_catalog/services/services_api.py +++ b/services/catalog/src/simcore_service_catalog/services/services_api.py @@ -97,18 +97,19 @@ async def list_services_paginated( product_name=product_name, user_id=user_id, limit=limit, offset=offset ) - # injects access-rights - access_rights: dict[ - tuple[str, str], list[ServiceAccessRightsAtDB] - ] = await repo.list_services_access_rights( - ((s.key, s.version) for s in services), product_name=product_name - ) - if not access_rights: - raise CatalogForbiddenError( - name="any service", - user_id=user_id, - product_name=product_name, + if services: + # injects access-rights + access_rights: dict[ + tuple[str, str], list[ServiceAccessRightsAtDB] + ] = await repo.list_services_access_rights( + ((s.key, s.version) for s in services), product_name=product_name ) + if not access_rights: + raise CatalogForbiddenError( + name="any service", + user_id=user_id, + product_name=product_name, + ) # get manifest of those with access rights got = await manifest.get_batch_services( diff --git a/services/catalog/tests/unit/with_dbs/conftest.py b/services/catalog/tests/unit/with_dbs/conftest.py index 1681622314f9..e31913ab9bbf 100644 --- a/services/catalog/tests/unit/with_dbs/conftest.py +++ b/services/catalog/tests/unit/with_dbs/conftest.py @@ -1,6 +1,7 @@ # pylint: disable=not-context-manager # pylint: disable=protected-access # pylint: disable=redefined-outer-name +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable @@ -109,7 +110,7 @@ async def product( """ # NOTE: this fixture ignores products' group-id but it is fine for this test context assert product["group_id"] is None - async with insert_and_get_row_lifespan( + async with insert_and_get_row_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup sqlalchemy_async_engine, table=products, values=product, @@ -149,7 +150,7 @@ async def user( injects a user in db """ assert user_id == user["id"] - async with insert_and_get_row_lifespan( + async with insert_and_get_row_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup sqlalchemy_async_engine, table=users, values=user, @@ -442,9 +443,9 @@ def _fake_factory( @pytest.fixture -def create_director_list_services_from() -> Callable[ - [list[dict[str, Any]], list], list[dict[str, Any]] -]: +def create_director_list_services_from() -> ( + Callable[[list[dict[str, Any]], list], list[dict[str, Any]]] +): """Convenience function to merge outputs of - `create_fake_service_data` callable with those of - `expected_director_list_services` fixture diff --git a/services/catalog/tests/unit/with_dbs/test_api_rest_services_specifications.py b/services/catalog/tests/unit/with_dbs/test_api_rest_services_specifications.py index f8515b572983..394ea9123ad3 100644 --- a/services/catalog/tests/unit/with_dbs/test_api_rest_services_specifications.py +++ b/services/catalog/tests/unit/with_dbs/test_api_rest_services_specifications.py @@ -1,7 +1,8 @@ # pylint: disable=redefined-outer-name +# pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable -# pylint: disable=too-many-arguments import asyncio diff --git a/services/catalog/tests/unit/with_dbs/test_api_rpc.py b/services/catalog/tests/unit/with_dbs/test_api_rpc.py index dfbf9c4adc84..3aeaaf4ef73a 100644 --- a/services/catalog/tests/unit/with_dbs/test_api_rpc.py +++ b/services/catalog/tests/unit/with_dbs/test_api_rpc.py @@ -1,7 +1,8 @@ # pylint: disable=redefined-outer-name +# pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable -# pylint: disable=too-many-arguments from collections.abc import AsyncIterator, Callable @@ -245,7 +246,7 @@ async def other_user( ) -> AsyncIterator[dict[str, Any]]: _user = random_user(fake=faker, id=user_id + 1) - async with insert_and_get_row_lifespan( + async with insert_and_get_row_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup sqlalchemy_async_engine, table=users, values=_user, diff --git a/services/catalog/tests/unit/with_dbs/test_core_background_task__sync.py b/services/catalog/tests/unit/with_dbs/test_core_background_task__sync.py index 0093c5dd70c3..a2927eefb418 100644 --- a/services/catalog/tests/unit/with_dbs/test_core_background_task__sync.py +++ b/services/catalog/tests/unit/with_dbs/test_core_background_task__sync.py @@ -9,7 +9,8 @@ from typing import Any import pytest -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException, status +from pytest_mock import MockerFixture from respx.router import MockRouter from simcore_postgres_database.models.services import services_meta_data from simcore_service_catalog.core.background_tasks import _run_sync_services @@ -40,6 +41,7 @@ async def cleanup_service_meta_data_db_content(sqlalchemy_async_engine: AsyncEng await conn.execute(services_meta_data.delete()) +@pytest.mark.parametrize("director_fails", [False, True]) async def test_registry_sync_task( background_tasks_setup_disabled: None, rabbitmq_and_rpc_setup_disabled: None, @@ -49,10 +51,20 @@ async def test_registry_sync_task( app: FastAPI, services_repo: ServicesRepository, cleanup_service_meta_data_db_content: None, + mocker: MockerFixture, + director_fails: bool, ): - assert app.state + if director_fails: + # Emulates issue https://github.com/ITISFoundation/osparc-simcore/issues/6318 + mocker.patch( + "simcore_service_catalog.services.access_rights._is_old_service", + side_effect=HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail="fake director error" + ), + ) + service_key = expected_director_list_services[0]["key"] service_version = expected_director_list_services[0]["version"] @@ -75,6 +87,10 @@ async def test_registry_sync_task( key=service_key, version=service_version, ) - assert got_from_db - assert got_from_db.key == service_key - assert got_from_db.version == service_version + + if director_fails: + assert not got_from_db + else: + assert got_from_db + assert got_from_db.key == service_key + assert got_from_db.version == service_version diff --git a/services/clusters-keeper/requirements/_base.txt b/services/clusters-keeper/requirements/_base.txt index 2b7f7c6b2341..448a09d5af5d 100644 --- a/services/clusters-keeper/requirements/_base.txt +++ b/services/clusters-keeper/requirements/_base.txt @@ -7,7 +7,10 @@ aioboto3==13.1.0 aiobotocore==2.13.1 # via aioboto3 aiocache==0.12.2 - # via -r requirements/../../../packages/aws-library/requirements/_base.in + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in aiodebug==2.3.0 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in @@ -60,6 +63,8 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi attrs==23.2.0 # via # aiohttp @@ -90,6 +95,9 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -107,6 +115,12 @@ dask==2024.5.1 # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # -r requirements/_base.in # distributed +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distributed==2024.5.1 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -134,6 +148,12 @@ fsspec==2024.5.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # dask +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -160,11 +180,13 @@ idna==3.7 # anyio # email-validator # httpx + # requests # yarl importlib-metadata==7.1.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # dask + # opentelemetry-api jinja2==3.1.4 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -214,6 +236,65 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.3 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -250,9 +331,15 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in -psutil==5.9.8 +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in # distributed pydantic==2.9.2 # via @@ -354,6 +441,12 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -367,6 +460,8 @@ rpds-py==0.18.1 # referencing s3transfer==0.10.1 # via boto3 +setuptools==74.0.0 + # via opentelemetry-instrumentation sh==2.0.6 # via -r requirements/../../../packages/aws-library/requirements/_base.in shellingham==1.5.4 @@ -445,6 +540,7 @@ typing-extensions==4.11.0 # aiodocker # fastapi # faststream + # opentelemetry-sdk # pydantic # pydantic-core # typer @@ -469,10 +565,14 @@ urllib3==2.2.1 # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # botocore # distributed + # requests uvicorn==0.29.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in wrapt==1.16.0 - # via aiobotocore + # via + # aiobotocore + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # aio-pika diff --git a/services/clusters-keeper/requirements/_test.txt b/services/clusters-keeper/requirements/_test.txt index 491a8027fd1a..a43369668cc5 100644 --- a/services/clusters-keeper/requirements/_test.txt +++ b/services/clusters-keeper/requirements/_test.txt @@ -54,12 +54,14 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography cfn-lint==1.10.3 # via moto charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -68,35 +70,35 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # joserfc # moto debugpy==1.8.5 # via -r requirements/_test.in -deepdiff==7.0.1 +deepdiff==8.0.1 # via -r requirements/_test.in docker==7.1.0 # via # -r requirements/_test.in # moto -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in -fakeredis==2.23.5 +fakeredis==2.24.1 # via -r requirements/_test.in flask==3.0.3 # via # flask-cors # moto -flask-cors==4.0.1 +flask-cors==5.0.0 # via moto frozenlist==1.4.1 # via # -c requirements/_base.txt # aiohttp # aiosignal -graphql-core==3.2.3 +graphql-core==3.2.4 # via moto h11==0.14.0 # via @@ -136,7 +138,7 @@ jmespath==1.0.1 # botocore joserfc==1.0.0 # via moto -jsondiff==2.2.0 +jsondiff==2.2.1 # via moto jsonpatch==1.33 # via cfn-lint @@ -166,7 +168,7 @@ markupsafe==2.1.5 # -c requirements/_base.txt # jinja2 # werkzeug -moto==5.0.13 +moto==5.0.15 # via -r requirements/_test.in mpmath==1.3.0 # via sympy @@ -181,7 +183,7 @@ openapi-schema-validator==0.6.2 # via openapi-spec-validator openapi-spec-validator==0.7.1 # via moto -ordered-set==4.1.0 +orderly-set==5.2.2 # via deepdiff packaging==24.0 # via @@ -195,11 +197,11 @@ pluggy==1.5.0 # via pytest ply==3.11 # via jsonpath-ng -psutil==5.9.8 +psutil==6.0.0 # via # -c requirements/_base.txt # -r requirements/_test.in -py-partiql-parser==0.5.5 +py-partiql-parser==0.5.6 # via moto pycparser==2.22 # via cffi @@ -212,9 +214,9 @@ pydantic-core==2.23.4 # via # -c requirements/_base.txt # pydantic -pyparsing==3.1.2 +pyparsing==3.1.4 # via moto -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -260,10 +262,11 @@ referencing==0.29.3 # jsonschema # jsonschema-path # jsonschema-specifications -regex==2024.7.24 +regex==2024.9.11 # via cfn-lint requests==2.32.3 # via + # -c requirements/_base.txt # docker # jsonschema-path # moto @@ -283,8 +286,10 @@ s3transfer==0.10.1 # via # -c requirements/_base.txt # boto3 -setuptools==73.0.1 - # via moto +setuptools==74.0.0 + # via + # -c requirements/_base.txt + # moto six==1.16.0 # via # -c requirements/_base.txt @@ -300,9 +305,9 @@ sortedcontainers==2.4.0 # via # -c requirements/_base.txt # fakeredis -sympy==1.13.2 +sympy==1.13.3 # via cfn-lint -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.11.0 # via @@ -320,7 +325,7 @@ urllib3==2.2.1 # docker # requests # responses -werkzeug==3.0.3 +werkzeug==3.0.4 # via # flask # moto diff --git a/services/clusters-keeper/requirements/_tools.txt b/services/clusters-keeper/requirements/_tools.txt index 7a2c3f9d91b9..97a49efc2ebd 100644 --- a/services/clusters-keeper/requirements/_tools.txt +++ b/services/clusters-keeper/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -48,14 +48,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,10 +68,11 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==74.0.0 # via + # -c requirements/_base.txt # -c requirements/_test.txt # pip-tools tomlkit==0.13.2 @@ -81,9 +82,9 @@ typing-extensions==4.11.0 # -c requirements/_base.txt # -c requirements/_test.txt # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/api/health.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/api/health.py index ad2882da3c84..a971a551e4e0 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/api/health.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/api/health.py @@ -21,7 +21,7 @@ @router.get("/", include_in_schema=True, response_class=PlainTextResponse) async def health_check(): # NOTE: sync url in docker/healthcheck.py with this entrypoint! - return f"{__name__}.health_check@{datetime.datetime.now(datetime.timezone.utc).isoformat()}" + return f"{__name__}.health_check@{datetime.datetime.now(datetime.UTC).isoformat()}" class _ComponentStatus(BaseModel): @@ -33,6 +33,7 @@ class _StatusGet(BaseModel): rabbitmq: _ComponentStatus ec2: _ComponentStatus redis_client_sdk: _ComponentStatus + ssm: _ComponentStatus @router.get("/status", include_in_schema=True, response_model=_StatusGet) @@ -40,18 +41,26 @@ async def get_status(app: Annotated[FastAPI, Depends(get_app)]) -> _StatusGet: return _StatusGet( rabbitmq=_ComponentStatus( is_enabled=is_rabbitmq_enabled(app), - is_responsive=await get_rabbitmq_client(app).ping() - if is_rabbitmq_enabled(app) - else False, + is_responsive=( + await get_rabbitmq_client(app).ping() + if is_rabbitmq_enabled(app) + else False + ), ), ec2=_ComponentStatus( is_enabled=bool(app.state.ec2_client), - is_responsive=await app.state.ec2_client.ping() - if app.state.ec2_client - else False, + is_responsive=( + await app.state.ec2_client.ping() if app.state.ec2_client else False + ), ), redis_client_sdk=_ComponentStatus( is_enabled=bool(app.state.redis_client_sdk), is_responsive=await get_redis_client(app).ping(), ), + ssm=_ComponentStatus( + is_enabled=(app.state.ssm_client is not None), + is_responsive=( + await app.state.ssm_client.ping() if app.state.ssm_client else False + ), + ), ) diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/constants.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/constants.py new file mode 100644 index 000000000000..7f970665f25e --- /dev/null +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/constants.py @@ -0,0 +1,15 @@ +from typing import Final + +from aws_library.ec2._models import AWSTagKey, AWSTagValue +from pydantic import parse_obj_as + +DOCKER_STACK_DEPLOY_COMMAND_NAME: Final[str] = "private cluster docker deploy" +DOCKER_STACK_DEPLOY_COMMAND_EC2_TAG_KEY: Final[AWSTagKey] = parse_obj_as( + AWSTagKey, "io.simcore.clusters-keeper.private_cluster_docker_deploy" +) + +USER_ID_TAG_KEY: Final[AWSTagKey] = parse_obj_as(AWSTagKey, "user_id") +WALLET_ID_TAG_KEY: Final[AWSTagKey] = parse_obj_as(AWSTagKey, "wallet_id") +ROLE_TAG_KEY: Final[AWSTagKey] = parse_obj_as(AWSTagKey, "role") +WORKER_ROLE_TAG_VALUE: Final[AWSTagValue] = parse_obj_as(AWSTagValue, "worker") +MANAGER_ROLE_TAG_VALUE: Final[AWSTagValue] = parse_obj_as(AWSTagValue, "manager") diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/application.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/application.py index 2e528e1b30b8..ac3955a3f253 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/application.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/application.py @@ -4,6 +4,7 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from .._meta import ( API_VERSION, @@ -18,6 +19,7 @@ from ..modules.ec2 import setup as setup_ec2 from ..modules.rabbitmq import setup as setup_rabbitmq from ..modules.redis import setup as setup_redis +from ..modules.ssm import setup as setup_ssm from ..rpc.rpc_routes import setup_rpc_routes from .settings import ApplicationSettings @@ -42,12 +44,19 @@ def create_app(settings: ApplicationSettings) -> FastAPI: if app.state.settings.CLUSTERS_KEEPER_PROMETHEUS_INSTRUMENTATION_ENABLED: setup_prometheus_instrumentation(app) + if app.state.settings.CLUSTERS_KEEPER_TRACING: + setup_tracing( + app, + app.state.settings.CLUSTERS_KEEPER_TRACING, + APP_NAME, + ) # PLUGINS SETUP setup_api_routes(app) setup_rabbitmq(app) setup_rpc_routes(app) setup_ec2(app) + setup_ssm(app) setup_redis(app) setup_clusters_management(app) diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py index 106ca40b5a7e..01e51037c357 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py @@ -29,6 +29,8 @@ from settings_library.ec2 import EC2Settings from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings +from settings_library.ssm import SSMSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from types_aiobotocore_ec2.literals import InstanceTypeType @@ -53,6 +55,21 @@ class ClustersKeeperEC2Settings(EC2Settings): ) +class ClustersKeeperSSMSettings(SSMSettings): + class Config(SSMSettings.Config): + env_prefix = CLUSTERS_KEEPER_ENV_PREFIX + + schema_extra: ClassVar[dict[str, Any]] = { # type: ignore[misc] + "examples": [ + { + f"{CLUSTERS_KEEPER_ENV_PREFIX}{key}": var + for key, var in example.items() + } + for example in SSMSettings.Config.schema_extra["examples"] + ], + } + + class WorkersEC2InstancesSettings(BaseCustomSettings): WORKERS_EC2_INSTANCES_ALLOWED_TYPES: dict[str, EC2InstanceBootSpecific] = Field( ..., @@ -186,6 +203,13 @@ class PrimaryEC2InstancesSettings(BaseCustomSettings): "that take longer than this time will be terminated as sometimes it happens that EC2 machine fail on start.", ) + PRIMARY_EC2_INSTANCES_DOCKER_DEFAULT_ADDRESS_POOL: str = Field( + default="172.20.0.0/14", + description="defines the docker swarm default address pool in CIDR format " + "(see https://docs.docker.com/reference/cli/docker/swarm/init/)", + ) + + @field_validator("PRIMARY_EC2_INSTANCES_ALLOWED_TYPES") @classmethod def check_valid_instance_names( @@ -258,6 +282,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): json_schema_extra={"auto_default_from_env": True} ) + CLUSTERS_KEEPER_SSM_ACCESS: ClustersKeeperSSMSettings | None = Field( + auto_default_from_env=True + ) + CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES: PrimaryEC2InstancesSettings | None = Field( json_schema_extra={"auto_default_from_env": True} ) @@ -297,9 +325,11 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): "(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)", ) - CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION: NonNegativeInt = Field( - default=5, - description="Max number of missed heartbeats before a cluster is terminated", + CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION: NonNegativeInt = ( + Field( + default=5, + description="Max number of missed heartbeats before a cluster is terminated", + ) ) CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG: str = Field( @@ -324,6 +354,9 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): description="override the dask scheduler 'worker-saturation' field" ", see https://selectfrom.dev/deep-dive-into-dask-distributed-scheduler-9fdb3b36b7c7", ) + CLUSTERS_KEEPER_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) SWARM_STACK_NAME: str = Field( ..., description="Stack name defined upon deploy (see main Makefile)" diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters.py index 38246f3008a0..89860549fd3a 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters.py @@ -49,7 +49,7 @@ async def _get_primary_ec2_params( ec2_instance_types: list[ EC2InstanceType ] = await ec2_client.get_ec2_instance_capabilities( - instance_type_names=[ec2_type_name] + instance_type_names={ec2_type_name} ) assert ec2_instance_types # nosec assert len(ec2_instance_types) == 1 # nosec @@ -72,15 +72,7 @@ async def create_cluster( tags=creation_ec2_tags(app_settings, user_id=user_id, wallet_id=wallet_id), startup_script=create_startup_script( app_settings, - cluster_machines_name_prefix=get_cluster_name( - app_settings, user_id=user_id, wallet_id=wallet_id, is_manager=False - ), ec2_boot_specific=ec2_instance_boot_specs, - additional_custom_tags={ - AWSTagKey("user_id"): AWSTagValue(f"{user_id}"), - AWSTagKey("wallet_id"): AWSTagValue(f"{wallet_id}"), - AWSTagKey("role"): AWSTagValue("worker"), - }, ), ami_id=ec2_instance_boot_specs.ami_id, key_name=app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_KEY_NAME, diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_core.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_core.py index 799b724cdcf9..f3ebe712b9a3 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_core.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_core.py @@ -5,12 +5,22 @@ import arrow from aws_library.ec2 import AWSTagKey, EC2InstanceData +from aws_library.ec2._models import AWSTagValue from fastapi import FastAPI from models_library.users import UserID from models_library.wallets import WalletID from pydantic import TypeAdapter from servicelib.logging_utils import log_catch - +from servicelib.utils import limited_gather + +from ..constants import ( + DOCKER_STACK_DEPLOY_COMMAND_EC2_TAG_KEY, + DOCKER_STACK_DEPLOY_COMMAND_NAME, + ROLE_TAG_KEY, + USER_ID_TAG_KEY, + WALLET_ID_TAG_KEY, + WORKER_ROLE_TAG_VALUE, +) from ..core.settings import get_application_settings from ..modules.clusters import ( delete_clusters, @@ -18,9 +28,17 @@ get_cluster_workers, set_instance_heartbeat, ) +from ..utils.clusters import create_deploy_cluster_stack_script from ..utils.dask import get_scheduler_auth, get_scheduler_url -from ..utils.ec2 import HEARTBEAT_TAG_KEY +from ..utils.ec2 import ( + HEARTBEAT_TAG_KEY, + get_cluster_name, + user_id_from_instance_tags, + wallet_id_from_instance_tags, +) from .dask import is_scheduler_busy, ping_scheduler +from .ec2 import get_ec2_client +from .ssm import get_ssm_client _logger = logging.getLogger(__name__) @@ -44,8 +62,8 @@ def _get_instance_last_heartbeat(instance: EC2InstanceData) -> datetime.datetime async def _get_all_associated_worker_instances( app: FastAPI, primary_instances: Iterable[EC2InstanceData], -) -> list[EC2InstanceData]: - worker_instances = [] +) -> set[EC2InstanceData]: + worker_instances: set[EC2InstanceData] = set() for instance in primary_instances: assert "user_id" in instance.tags # nosec user_id = UserID(instance.tags[_USER_ID_TAG_KEY]) @@ -57,7 +75,7 @@ async def _get_all_associated_worker_instances( else None ) - worker_instances.extend( + worker_instances.update( await get_cluster_workers(app, user_id=user_id, wallet_id=wallet_id) ) return worker_instances @@ -65,12 +83,12 @@ async def _get_all_associated_worker_instances( async def _find_terminateable_instances( app: FastAPI, instances: Iterable[EC2InstanceData] -) -> list[EC2InstanceData]: +) -> set[EC2InstanceData]: app_settings = get_application_settings(app) assert app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES # nosec # get the corresponding ec2 instance data - terminateable_instances: list[EC2InstanceData] = [] + terminateable_instances: set[EC2InstanceData] = set() time_to_wait_before_termination = ( app_settings.CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION @@ -84,7 +102,7 @@ async def _find_terminateable_instances( elapsed_time_since_heartbeat = arrow.utcnow().datetime - last_heartbeat allowed_time_to_wait = time_to_wait_before_termination if elapsed_time_since_heartbeat >= allowed_time_to_wait: - terminateable_instances.append(instance) + terminateable_instances.add(instance) else: _logger.info( "%s has still %ss before being terminateable", @@ -95,14 +113,14 @@ async def _find_terminateable_instances( elapsed_time_since_startup = arrow.utcnow().datetime - instance.launch_time allowed_time_to_wait = startup_delay if elapsed_time_since_startup >= allowed_time_to_wait: - terminateable_instances.append(instance) + terminateable_instances.add(instance) # get all terminateable instances associated worker instances worker_instances = await _get_all_associated_worker_instances( app, terminateable_instances ) - return terminateable_instances + worker_instances + return terminateable_instances.union(worker_instances) async def check_clusters(app: FastAPI) -> None: @@ -114,6 +132,7 @@ async def check_clusters(app: FastAPI) -> None: if await ping_scheduler(get_scheduler_url(instance), get_scheduler_auth(app)) } + # set intance heartbeat if scheduler is busy for instance in connected_intances: with log_catch(_logger, reraise=False): # NOTE: some connected instance could in theory break between these 2 calls, therefore this is silenced and will @@ -126,6 +145,7 @@ async def check_clusters(app: FastAPI) -> None: f"{instance.id=} for {instance.tags=}", ) await set_instance_heartbeat(app, instance=instance) + # clean any cluster that is not doing anything if terminateable_instances := await _find_terminateable_instances( app, connected_intances ): @@ -140,7 +160,7 @@ async def check_clusters(app: FastAPI) -> None: for instance in disconnected_instances if _get_instance_last_heartbeat(instance) is None } - + # remove instances that were starting for too long if terminateable_instances := await _find_terminateable_instances( app, starting_instances ): @@ -151,7 +171,72 @@ async def check_clusters(app: FastAPI) -> None: ) await delete_clusters(app, instances=terminateable_instances) - # the other instances are broken (they were at some point connected but now not anymore) + # NOTE: transmit command to start docker swarm/stack if needed + # once the instance is connected to the SSM server, + # use ssm client to send the command to these instances, + # we send a command that contain: + # the docker-compose file in binary, + # the call to init the docker swarm and the call to deploy the stack + instances_in_need_of_deployment = { + i + for i in starting_instances - terminateable_instances + if DOCKER_STACK_DEPLOY_COMMAND_EC2_TAG_KEY not in i.tags + } + + if instances_in_need_of_deployment: + app_settings = get_application_settings(app) + ssm_client = get_ssm_client(app) + ec2_client = get_ec2_client(app) + instances_in_need_of_deployment_ssm_connection_state = await limited_gather( + *[ + ssm_client.is_instance_connected_to_ssm_server(i.id) + for i in instances_in_need_of_deployment + ], + reraise=False, + log=_logger, + limit=20, + ) + ec2_connected_to_ssm_server = [ + i + for i, c in zip( + instances_in_need_of_deployment, + instances_in_need_of_deployment_ssm_connection_state, + strict=True, + ) + if c is True + ] + started_instances_ready_for_command = ec2_connected_to_ssm_server + if started_instances_ready_for_command: + # we need to send 1 command per machine here, as the user_id/wallet_id changes + for i in started_instances_ready_for_command: + ssm_command = await ssm_client.send_command( + [i.id], + command=create_deploy_cluster_stack_script( + app_settings, + cluster_machines_name_prefix=get_cluster_name( + app_settings, + user_id=user_id_from_instance_tags(i.tags), + wallet_id=wallet_id_from_instance_tags(i.tags), + is_manager=False, + ), + additional_custom_tags={ + USER_ID_TAG_KEY: i.tags[USER_ID_TAG_KEY], + WALLET_ID_TAG_KEY: i.tags[WALLET_ID_TAG_KEY], + ROLE_TAG_KEY: WORKER_ROLE_TAG_VALUE, + }, + ), + command_name=DOCKER_STACK_DEPLOY_COMMAND_NAME, + ) + await ec2_client.set_instances_tags( + started_instances_ready_for_command, + tags={ + DOCKER_STACK_DEPLOY_COMMAND_EC2_TAG_KEY: AWSTagValue( + ssm_command.command_id + ), + }, + ) + + # the remaining instances are broken (they were at some point connected but now not anymore) broken_instances = disconnected_instances - starting_instances if terminateable_instances := await _find_terminateable_instances( app, broken_instances diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_task.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_task.py index 806cb6d472c1..410edba1efbc 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_task.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/clusters_management_task.py @@ -47,6 +47,7 @@ def setup(app: FastAPI): for s in [ app_settings.CLUSTERS_KEEPER_EC2_ACCESS, app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES, + app_settings.CLUSTERS_KEEPER_SSM_ACCESS, ] ): logger.warning( diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/ssm.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/ssm.py new file mode 100644 index 000000000000..218812d55232 --- /dev/null +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/modules/ssm.py @@ -0,0 +1,56 @@ +import logging +from typing import cast + +from aws_library.ssm import SimcoreSSMAPI +from aws_library.ssm._errors import SSMNotConnectedError +from fastapi import FastAPI +from settings_library.ssm import SSMSettings +from tenacity.asyncio import AsyncRetrying +from tenacity.before_sleep import before_sleep_log +from tenacity.stop import stop_after_delay +from tenacity.wait import wait_random_exponential + +from ..core.errors import ConfigurationError +from ..core.settings import get_application_settings + +_logger = logging.getLogger(__name__) + + +def setup(app: FastAPI) -> None: + async def on_startup() -> None: + app.state.ssm_client = None + settings: SSMSettings | None = get_application_settings( + app + ).CLUSTERS_KEEPER_SSM_ACCESS + + if not settings: + _logger.warning("SSM client is de-activated in the settings") + return + + app.state.ssm_client = client = await SimcoreSSMAPI.create(settings) + + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(120), + wait=wait_random_exponential(max=30), + before_sleep=before_sleep_log(_logger, logging.WARNING), + ): + with attempt: + connected = await client.ping() + if not connected: + raise SSMNotConnectedError # pragma: no cover + + async def on_shutdown() -> None: + if app.state.ssm_client: + await cast(SimcoreSSMAPI, app.state.ssm_client).close() + + app.add_event_handler("startup", on_startup) + app.add_event_handler("shutdown", on_shutdown) + + +def get_ssm_client(app: FastAPI) -> SimcoreSSMAPI: + if not app.state.ssm_client: + raise ConfigurationError( + msg="SSM client is not available. Please check the configuration." + ) + return cast(SimcoreSSMAPI, app.state.ssm_client) diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py index a2c037075cbb..d91a6b3df78f 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py @@ -8,6 +8,7 @@ import arrow import yaml from aws_library.ec2 import EC2InstanceBootSpecific, EC2InstanceData, EC2Tags +from aws_library.ec2._models import CommandStr from fastapi.encoders import jsonable_encoder from models_library.api_schemas_clusters_keeper.clusters import ( ClusterState, @@ -107,35 +108,43 @@ def _convert_to_env_dict(entries: dict[str, Any]) -> str: def create_startup_script( app_settings: ApplicationSettings, *, - cluster_machines_name_prefix: str, ec2_boot_specific: EC2InstanceBootSpecific, - additional_custom_tags: EC2Tags, ) -> str: assert app_settings.CLUSTERS_KEEPER_EC2_ACCESS # nosec assert app_settings.CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES # nosec - environment_variables = _prepare_environment_variables( - app_settings, - cluster_machines_name_prefix=cluster_machines_name_prefix, - additional_custom_tags=additional_custom_tags, - ) - startup_commands = ec2_boot_specific.custom_boot_scripts.copy() + return "\n".join(startup_commands) + + +def create_deploy_cluster_stack_script( + app_settings: ApplicationSettings, + *, + cluster_machines_name_prefix: str, + additional_custom_tags: EC2Tags, +) -> str: + deploy_script: list[CommandStr] = [] assert app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES # nosec if isinstance( app_settings.CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH, TLSAuthentication, ): - + # get the dask certificates download_certificates_commands = [ f"mkdir --parents {_HOST_CERTIFICATES_BASE_PATH}", f'aws ssm get-parameter --name "{app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_SSM_TLS_DASK_CA}" --region us-east-1 --with-decryption --query "Parameter.Value" --output text > {_HOST_TLS_CA_FILE_PATH}', f'aws ssm get-parameter --name "{app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_SSM_TLS_DASK_CERT}" --region us-east-1 --with-decryption --query "Parameter.Value" --output text > {_HOST_TLS_CERT_FILE_PATH}', f'aws ssm get-parameter --name "{app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_SSM_TLS_DASK_KEY}" --region us-east-1 --with-decryption --query "Parameter.Value" --output text > {_HOST_TLS_KEY_FILE_PATH}', ] - startup_commands.extend(download_certificates_commands) + deploy_script.extend(download_certificates_commands) + + environment_variables = _prepare_environment_variables( + app_settings, + cluster_machines_name_prefix=cluster_machines_name_prefix, + additional_custom_tags=additional_custom_tags, + ) - startup_commands.extend( + deploy_script.extend( [ # NOTE: https://stackoverflow.com/questions/41203492/solving-redis-warnings-on-overcommit-memory-and-transparent-huge-pages-for-ubunt "sysctl vm.overcommit_memory=1", @@ -143,11 +152,11 @@ def create_startup_script( f"echo '{_prometheus_yml_base64_encoded()}' | base64 -d > {_HOST_PROMETHEUS_PATH}", f"echo '{_prometheus_basic_auth_yml_base64_encoded(app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_PROMETHEUS_USERNAME, app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_PROMETHEUS_PASSWORD.get_secret_value())}' | base64 -d > {_HOST_PROMETHEUS_WEB_PATH}", # NOTE: --default-addr-pool is necessary in order to prevent conflicts with AWS node IPs - "docker swarm init --default-addr-pool 172.20.0.0/14", + f"docker swarm init --default-addr-pool {app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_DOCKER_DEFAULT_ADDRESS_POOL}", f"{' '.join(environment_variables)} docker stack deploy --with-registry-auth --compose-file={_HOST_DOCKER_COMPOSE_PATH} dask_stack", ] ) - return "\n".join(startup_commands) + return "\n".join(deploy_script) def _convert_ec2_state_to_cluster_state( diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/ec2.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/ec2.py index a19dac17d322..1d4534ff0258 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/ec2.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/ec2.py @@ -7,6 +7,12 @@ from pydantic import TypeAdapter from .._meta import VERSION +from ..constants import ( + MANAGER_ROLE_TAG_VALUE, + ROLE_TAG_KEY, + USER_ID_TAG_KEY, + WALLET_ID_TAG_KEY, +) from ..core.settings import ApplicationSettings _APPLICATION_TAG_KEY: Final[str] = "io.simcore.clusters-keeper" @@ -52,9 +58,9 @@ def creation_ec2_tags( app_settings, user_id=user_id, wallet_id=wallet_id, is_manager=True ) ), - AWSTagKey("user_id"): AWSTagValue(f"{user_id}"), - AWSTagKey("wallet_id"): AWSTagValue(f"{wallet_id}"), - AWSTagKey("role"): AWSTagValue("manager"), + USER_ID_TAG_KEY: AWSTagValue(f"{user_id}"), + WALLET_ID_TAG_KEY: AWSTagValue(f"{wallet_id}"), + ROLE_TAG_KEY: MANAGER_ROLE_TAG_VALUE, } | app_settings.CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES.PRIMARY_EC2_INSTANCES_CUSTOM_TAGS ) @@ -69,8 +75,8 @@ def ec2_instances_for_user_wallet_filter( ) -> EC2Tags: return ( _minimal_identification_tag(app_settings) - | {AWSTagKey("user_id"): AWSTagValue(f"{user_id}")} - | {AWSTagKey("wallet_id"): AWSTagValue(f"{wallet_id}")} + | {USER_ID_TAG_KEY: AWSTagValue(f"{user_id}")} + | {WALLET_ID_TAG_KEY: AWSTagValue(f"{wallet_id}")} ) @@ -83,3 +89,14 @@ def compose_user_data(bash_command: str) -> str: echo "completed user data bash script" """ ) + + +def wallet_id_from_instance_tags(tags: EC2Tags) -> WalletID | None: + wallet_id_str = tags[WALLET_ID_TAG_KEY] + if wallet_id_str == "None": + return None + return WalletID(wallet_id_str) + + +def user_id_from_instance_tags(tags: EC2Tags) -> UserID: + return UserID(tags[USER_ID_TAG_KEY]) diff --git a/services/clusters-keeper/tests/unit/conftest.py b/services/clusters-keeper/tests/unit/conftest.py index 3f5a72cca06d..14c5e0d93c1e 100644 --- a/services/clusters-keeper/tests/unit/conftest.py +++ b/services/clusters-keeper/tests/unit/conftest.py @@ -22,11 +22,13 @@ from fastapi import FastAPI from models_library.users import UserID from models_library.wallets import WalletID +from pydantic import SecretStr from pytest_mock.plugin import MockerFixture from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict from servicelib.rabbitmq import RabbitMQRPCClient from settings_library.ec2 import EC2Settings from settings_library.rabbit import RabbitSettings +from settings_library.ssm import SSMSettings from simcore_service_clusters_keeper.core.application import create_app from simcore_service_clusters_keeper.core.settings import ( CLUSTERS_KEEPER_ENV_PREFIX, @@ -86,6 +88,21 @@ def mocked_ec2_server_envs( return setenvs_from_dict(monkeypatch, changed_envs) +@pytest.fixture +def mocked_ssm_server_envs( + mocked_ssm_server_settings: SSMSettings, + monkeypatch: pytest.MonkeyPatch, +) -> EnvVarsDict: + # NOTE: overrides the SSMSettings with what clusters-keeper expects + changed_envs: EnvVarsDict = { + f"{CLUSTERS_KEEPER_ENV_PREFIX}{k}": ( + v.get_secret_value() if isinstance(v, SecretStr) else v + ) + for k, v in mocked_ssm_server_settings.dict().items() + } + return setenvs_from_dict(monkeypatch, changed_envs) + + @pytest.fixture def ec2_settings(mocked_ec2_server_settings: EC2Settings) -> EC2Settings: return mocked_ec2_server_settings @@ -105,6 +122,9 @@ def app_environment( "CLUSTERS_KEEPER_EC2_ACCESS": "{}", "CLUSTERS_KEEPER_EC2_ACCESS_KEY_ID": faker.pystr(), "CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY": faker.pystr(), + "CLUSTERS_KEEPER_SSM_ACCESS": "{}", + "CLUSTERS_KEEPER_SSM_ACCESS_KEY_ID": faker.pystr(), + "CLUSTERS_KEEPER_SSM_SECRET_ACCESS_KEY": faker.pystr(), "CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES": "{}", "CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX": faker.pystr(), "CLUSTERS_KEEPER_DASK_NTHREADS": f"{faker.pyint(min_value=0)}", @@ -210,6 +230,11 @@ def disabled_ec2(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("CLUSTERS_KEEPER_EC2_ACCESS", "null") +@pytest.fixture +def disabled_ssm(app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("CLUSTERS_KEEPER_SSM_ACCESS", "null") + + @pytest.fixture def enabled_rabbitmq( app_environment: EnvVarsDict, rabbit_service: RabbitSettings diff --git a/services/clusters-keeper/tests/unit/test_api_health.py b/services/clusters-keeper/tests/unit/test_api_health.py index a2a14bcf72c0..e1a5de4c6ced 100644 --- a/services/clusters-keeper/tests/unit/test_api_health.py +++ b/services/clusters-keeper/tests/unit/test_api_health.py @@ -21,6 +21,7 @@ def app_environment( app_environment: EnvVarsDict, enabled_rabbitmq: None, mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, mocked_redis_server: None, ) -> EnvVarsDict: return app_environment @@ -69,6 +70,9 @@ async def test_status( assert status_response.ec2.is_enabled is True assert status_response.ec2.is_responsive is False + assert status_response.ssm.is_enabled is True + assert status_response.ssm.is_responsive is False + # restart the server mocked_aws_server.start() @@ -83,3 +87,6 @@ async def test_status( assert status_response.ec2.is_enabled is True assert status_response.ec2.is_responsive is True + + assert status_response.ssm.is_enabled is True + assert status_response.ssm.is_responsive is True diff --git a/services/clusters-keeper/tests/unit/test_modules_clusters.py b/services/clusters-keeper/tests/unit/test_modules_clusters.py index 16cfbde04b2c..497b9e447e74 100644 --- a/services/clusters-keeper/tests/unit/test_modules_clusters.py +++ b/services/clusters-keeper/tests/unit/test_modules_clusters.py @@ -49,6 +49,7 @@ def _base_configuration( mocked_redis_server: None, mocked_ec2_server_envs: EnvVarsDict, mocked_primary_ec2_instances_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, ) -> None: ... diff --git a/services/clusters-keeper/tests/unit/test_modules_clusters_management_core.py b/services/clusters-keeper/tests/unit/test_modules_clusters_management_core.py index 09720632fd4b..438e69ee72eb 100644 --- a/services/clusters-keeper/tests/unit/test_modules_clusters_management_core.py +++ b/services/clusters-keeper/tests/unit/test_modules_clusters_management_core.py @@ -60,6 +60,7 @@ def _base_configuration( mocked_redis_server: None, mocked_ec2_server_envs: EnvVarsDict, mocked_primary_ec2_instances_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, ) -> None: ... diff --git a/services/clusters-keeper/tests/unit/test_modules_clusters_management_task.py b/services/clusters-keeper/tests/unit/test_modules_clusters_management_task.py index 0c9c52eab4c3..d22bdce1f765 100644 --- a/services/clusters-keeper/tests/unit/test_modules_clusters_management_task.py +++ b/services/clusters-keeper/tests/unit/test_modules_clusters_management_task.py @@ -37,6 +37,7 @@ def mock_background_task(mocker: MockerFixture) -> mock.Mock: async def test_clusters_management_task_created_and_deleted( disabled_rabbitmq: None, mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, mocked_redis_server: None, mock_background_task: mock.Mock, initialized_app: FastAPI, diff --git a/services/clusters-keeper/tests/unit/test_modules_ec2.py b/services/clusters-keeper/tests/unit/test_modules_ec2.py index 0820ada58183..439e54aaa2d9 100644 --- a/services/clusters-keeper/tests/unit/test_modules_ec2.py +++ b/services/clusters-keeper/tests/unit/test_modules_ec2.py @@ -5,13 +5,16 @@ import pytest from fastapi import FastAPI +from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict from simcore_service_clusters_keeper.core.errors import ConfigurationError from simcore_service_clusters_keeper.modules.ec2 import get_ec2_client +from simcore_service_clusters_keeper.modules.ssm import get_ssm_client -async def test_ec2_does_not_initialize_if_deactivated( +async def test_ec2_does_not_initialize_if_ec2_deactivated( disabled_rabbitmq: None, disabled_ec2: None, + mocked_ssm_server_envs: EnvVarsDict, mocked_redis_server: None, initialized_app: FastAPI, ): @@ -19,3 +22,5 @@ async def test_ec2_does_not_initialize_if_deactivated( assert initialized_app.state.ec2_client is None with pytest.raises(ConfigurationError): get_ec2_client(initialized_app) + + assert get_ssm_client(initialized_app) diff --git a/services/clusters-keeper/tests/unit/test_modules_rabbitmq.py b/services/clusters-keeper/tests/unit/test_modules_rabbitmq.py index 110d4fe48271..e1ef5f850dc5 100644 --- a/services/clusters-keeper/tests/unit/test_modules_rabbitmq.py +++ b/services/clusters-keeper/tests/unit/test_modules_rabbitmq.py @@ -43,8 +43,8 @@ def rabbit_log_message(faker: Faker) -> LoggerRabbitMessage: return LoggerRabbitMessage( user_id=faker.pyint(min_value=1), - project_id=faker.uuid4(), - node_id=faker.uuid4(), + project_id=faker.uuid4(cast_to=None), + node_id=faker.uuid4(cast_to=None), messages=faker.pylist(allowed_types=(str,)), ) @@ -62,6 +62,7 @@ def rabbit_message( def test_rabbitmq_does_not_initialize_if_deactivated( disabled_rabbitmq: None, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, ): @@ -78,6 +79,7 @@ def test_rabbitmq_does_not_initialize_if_deactivated( def test_rabbitmq_initializes( enabled_rabbitmq: RabbitSettings, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, ): @@ -95,6 +97,7 @@ def test_rabbitmq_initializes( async def test_post_message( enabled_rabbitmq: RabbitSettings, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, rabbit_message: RabbitMessageBase, @@ -124,6 +127,7 @@ async def test_post_message( async def test_post_message_with_disabled_rabbit_does_not_raise( disabled_rabbitmq: None, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, rabbit_message: RabbitMessageBase, @@ -135,6 +139,7 @@ async def test_post_message_when_rabbit_disconnected_does_not_raise( paused_container: Callable[[str], AbstractAsyncContextManager[None]], enabled_rabbitmq: RabbitSettings, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, rabbit_log_message: LoggerRabbitMessage, diff --git a/services/clusters-keeper/tests/unit/test_modules_redis.py b/services/clusters-keeper/tests/unit/test_modules_redis.py index f6b760f27fbc..44fb9a9f6ace 100644 --- a/services/clusters-keeper/tests/unit/test_modules_redis.py +++ b/services/clusters-keeper/tests/unit/test_modules_redis.py @@ -10,6 +10,7 @@ async def test_redis_raises_if_missing( disabled_rabbitmq: None, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, ): diff --git a/services/clusters-keeper/tests/unit/test_modules_remote_debug.py b/services/clusters-keeper/tests/unit/test_modules_remote_debug.py index dbb5a91922e1..3fe8b823d136 100644 --- a/services/clusters-keeper/tests/unit/test_modules_remote_debug.py +++ b/services/clusters-keeper/tests/unit/test_modules_remote_debug.py @@ -23,6 +23,7 @@ def app_environment( def test_application_with_debug_enabled( disabled_rabbitmq: None, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, initialized_app: FastAPI, ): diff --git a/services/clusters-keeper/tests/unit/test_modules_ssm.py b/services/clusters-keeper/tests/unit/test_modules_ssm.py new file mode 100644 index 000000000000..3bcffb72661b --- /dev/null +++ b/services/clusters-keeper/tests/unit/test_modules_ssm.py @@ -0,0 +1,22 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable + + +import pytest +from fastapi import FastAPI +from simcore_service_clusters_keeper.core.errors import ConfigurationError +from simcore_service_clusters_keeper.modules.ssm import get_ssm_client + + +async def test_ssm_does_not_initialize_if_ssm_deactivated( + disabled_rabbitmq: None, + disabled_ec2: None, + disabled_ssm: None, + mocked_redis_server: None, + initialized_app: FastAPI, +): + assert hasattr(initialized_app.state, "ssm_client") + assert initialized_app.state.ssm_client is None + with pytest.raises(ConfigurationError): + get_ssm_client(initialized_app) diff --git a/services/clusters-keeper/tests/unit/test_rpc_clusters.py b/services/clusters-keeper/tests/unit/test_rpc_clusters.py index 41146c827bd7..a280cbb5338f 100644 --- a/services/clusters-keeper/tests/unit/test_rpc_clusters.py +++ b/services/clusters-keeper/tests/unit/test_rpc_clusters.py @@ -43,6 +43,7 @@ def _base_configuration( mocked_redis_server: None, mocked_ec2_server_envs: EnvVarsDict, mocked_primary_ec2_instances_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, initialized_app: FastAPI, ensure_run_in_sequence_context_is_empty: None, ) -> None: diff --git a/services/clusters-keeper/tests/unit/test_rpc_ec2_instances.py b/services/clusters-keeper/tests/unit/test_rpc_ec2_instances.py index d03b6b74502d..f4eea132cdf8 100644 --- a/services/clusters-keeper/tests/unit/test_rpc_ec2_instances.py +++ b/services/clusters-keeper/tests/unit/test_rpc_ec2_instances.py @@ -24,6 +24,7 @@ def _base_configuration( enabled_rabbitmq: None, mocked_redis_server: None, mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, initialized_app: FastAPI, ) -> None: ... diff --git a/services/clusters-keeper/tests/unit/test_utils_clusters.py b/services/clusters-keeper/tests/unit/test_utils_clusters.py index 7191b1a23f98..20cde00c0476 100644 --- a/services/clusters-keeper/tests/unit/test_utils_clusters.py +++ b/services/clusters-keeper/tests/unit/test_utils_clusters.py @@ -29,6 +29,7 @@ from simcore_service_clusters_keeper.utils.clusters import ( _prepare_environment_variables, create_cluster_from_ec2_instance, + create_deploy_cluster_stack_script, create_startup_script, ) from types_aiobotocore_ec2.literals import InstanceStateNameType @@ -51,16 +52,26 @@ def ec2_boot_specs(app_settings: ApplicationSettings) -> EC2InstanceBootSpecific return ec2_boot_specs +@pytest.fixture(params=[TLSAuthentication, NoAuthentication]) +def backend_cluster_auth( + request: pytest.FixtureRequest, +) -> InternalClusterAuthentication: + return request.param + + @pytest.fixture def app_environment( app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch, + backend_cluster_auth: InternalClusterAuthentication, ) -> EnvVarsDict: return app_environment | setenvs_from_dict( monkeypatch, { "CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH": json_dumps( TLSAuthentication.model_config["json_schema_extra"]["examples"][0] + if isinstance(backend_cluster_auth, TLSAuthentication) + else NoAuthentication.model_config["json_schema_extra"]["examples"][0] ) }, ) @@ -69,38 +80,52 @@ def app_environment( def test_create_startup_script( disabled_rabbitmq: None, mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, mocked_redis_server: None, app_settings: ApplicationSettings, - cluster_machines_name_prefix: str, - clusters_keeper_docker_compose: dict[str, Any], ec2_boot_specs: EC2InstanceBootSpecific, ): - additional_custom_tags = { - AWSTagKey("pytest-tag-key"): AWSTagValue("pytest-tag-value") - } startup_script = create_startup_script( app_settings, - cluster_machines_name_prefix=cluster_machines_name_prefix, ec2_boot_specific=ec2_boot_specs, - additional_custom_tags=additional_custom_tags, ) assert isinstance(startup_script, str) assert len(ec2_boot_specs.custom_boot_scripts) > 0 for boot_script in ec2_boot_specs.custom_boot_scripts: assert boot_script in startup_script + + +def test_create_deploy_cluster_stack_script( + disabled_rabbitmq: None, + mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, + mocked_redis_server: None, + app_settings: ApplicationSettings, + cluster_machines_name_prefix: str, + clusters_keeper_docker_compose: dict[str, Any], +): + additional_custom_tags = { + AWSTagKey("pytest-tag-key"): AWSTagValue("pytest-tag-value") + } + deploy_script = create_deploy_cluster_stack_script( + app_settings, + cluster_machines_name_prefix=cluster_machines_name_prefix, + additional_custom_tags=additional_custom_tags, + ) + assert isinstance(deploy_script, str) # we have commands to pipe into a docker-compose file - assert " | base64 -d > /docker-compose.yml" in startup_script + assert " | base64 -d > /docker-compose.yml" in deploy_script # we have commands to init a docker-swarm - assert "docker swarm init" in startup_script + assert "docker swarm init --default-addr-pool" in deploy_script # we have commands to deploy a stack assert ( "docker stack deploy --with-registry-auth --compose-file=/docker-compose.yml dask_stack" - in startup_script + in deploy_script ) # before that we have commands that setup ENV variables, let's check we have all of them as defined in the docker-compose # let's get what was set in the startup script and compare with the expected one of the docker-compose startup_script_envs_definition = ( - startup_script.splitlines()[-1].split("docker stack deploy")[0].strip() + deploy_script.splitlines()[-1].split("docker stack deploy")[0].strip() ) assert startup_script_envs_definition # Use regular expression to split the string into key-value pairs (courtesy of chatGPT) @@ -137,7 +162,7 @@ def test_create_startup_script( "WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS", ] assert all( - re.search(rf"{i}=\[(\\\".+\\\")*\]", startup_script) for i in list_settings + re.search(rf"{i}=\[(\\\".+\\\")*\]", deploy_script) for i in list_settings ) # check dicts have \' in front @@ -146,35 +171,55 @@ def test_create_startup_script( "WORKERS_EC2_INSTANCES_CUSTOM_TAGS", ] assert all( - re.search(rf"{i}=\'{{(\".+\":\s\".*\")+}}\'", startup_script) + re.search(rf"{i}=\'{{(\".+\":\s\".*\")+}}\'", deploy_script) for i in dict_settings ) # check the additional tags are in assert all( - f'"{key}": "{value}"' in startup_script + f'"{key}": "{value}"' in deploy_script for key, value in additional_custom_tags.items() ) -def test_create_startup_script_script_size_below_16kb( +def test_create_deploy_cluster_stack_script_below_64kb( disabled_rabbitmq: None, mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, mocked_redis_server: None, app_settings: ApplicationSettings, cluster_machines_name_prefix: str, clusters_keeper_docker_compose: dict[str, Any], - ec2_boot_specs: EC2InstanceBootSpecific, ): additional_custom_tags = { AWSTagKey("pytest-tag-key"): AWSTagValue("pytest-tag-value") } - startup_script = create_startup_script( + deploy_script = create_deploy_cluster_stack_script( app_settings, cluster_machines_name_prefix=cluster_machines_name_prefix, - ec2_boot_specific=ec2_boot_specs, additional_custom_tags=additional_custom_tags, ) + deploy_script_size_in_bytes = len(deploy_script.encode("utf-8")) + assert deploy_script_size_in_bytes < 64000, ( + f"script size is {deploy_script_size_in_bytes} bytes that exceeds the SSM command of 64KB. " + "TIP: split commands or reduce size." + ) + + +def test_create_startup_script_script_size_below_16kb( + disabled_rabbitmq: None, + mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, + mocked_redis_server: None, + app_settings: ApplicationSettings, + cluster_machines_name_prefix: str, + clusters_keeper_docker_compose: dict[str, Any], + ec2_boot_specs: EC2InstanceBootSpecific, +): + startup_script = create_startup_script( + app_settings, + ec2_boot_specific=ec2_boot_specs, + ) script_size_in_bytes = len(startup_script.encode("utf-8")) print( @@ -184,13 +229,13 @@ def test_create_startup_script_script_size_below_16kb( assert script_size_in_bytes < 15 * 1024 -def test_startup_script_defines_all_envs_for_docker_compose( +def test__prepare_environment_variables_defines_all_envs_for_docker_compose( disabled_rabbitmq: None, mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, mocked_redis_server: None, app_settings: ApplicationSettings, cluster_machines_name_prefix: str, - ec2_boot_specs: EC2InstanceBootSpecific, clusters_keeper_docker_compose_file: Path, ): additional_custom_tags = { @@ -202,8 +247,8 @@ def test_startup_script_defines_all_envs_for_docker_compose( additional_custom_tags=additional_custom_tags, ) assert environment_variables - process = subprocess.run( - [ # noqa: S603, S607 + process = subprocess.run( # noqa: S603 + [ # noqa: S607 "docker", "compose", "--dry-run", diff --git a/services/clusters-keeper/tests/unit/test_utils_ec2.py b/services/clusters-keeper/tests/unit/test_utils_ec2.py index cc466d113ac4..125670475dba 100644 --- a/services/clusters-keeper/tests/unit/test_utils_ec2.py +++ b/services/clusters-keeper/tests/unit/test_utils_ec2.py @@ -25,6 +25,7 @@ def wallet_id(faker: Faker) -> WalletID: def test_get_cluster_name( disabled_rabbitmq: None, disabled_ec2: None, + disabled_ssm: None, mocked_redis_server: None, app_settings: ApplicationSettings, user_id: UserID, @@ -46,9 +47,21 @@ def test_get_cluster_name( == f"{app_settings.CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX}osparc-computational-cluster-worker-{app_settings.SWARM_STACK_NAME}-user_id:{user_id}-wallet_id:{wallet_id}" ) + assert ( + get_cluster_name(app_settings, user_id=user_id, wallet_id=None, is_manager=True) + == f"{app_settings.CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX}osparc-computational-cluster-manager-{app_settings.SWARM_STACK_NAME}-user_id:{user_id}-wallet_id:None" + ) + assert ( + get_cluster_name( + app_settings, user_id=user_id, wallet_id=None, is_manager=False + ) + == f"{app_settings.CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX}osparc-computational-cluster-worker-{app_settings.SWARM_STACK_NAME}-user_id:{user_id}-wallet_id:None" + ) + def test_creation_ec2_tags( mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, disabled_rabbitmq: None, mocked_redis_server: None, app_settings: ApplicationSettings, @@ -78,6 +91,7 @@ def test_creation_ec2_tags( def test_all_created_ec2_instances_filter( mocked_ec2_server_envs: EnvVarsDict, + mocked_ssm_server_envs: EnvVarsDict, disabled_rabbitmq: None, mocked_redis_server: None, app_settings: ApplicationSettings, diff --git a/services/dask-sidecar/requirements/_base.txt b/services/dask-sidecar/requirements/_base.txt index cca6d21ea7aa..a8a80bd33011 100644 --- a/services/dask-sidecar/requirements/_base.txt +++ b/services/dask-sidecar/requirements/_base.txt @@ -2,6 +2,8 @@ aio-pika==9.4.1 # via -r requirements/../../../packages/service-library/requirements/_base.in aiobotocore==2.13.0 # via s3fs +aiocache==0.12.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.21.0 @@ -44,10 +46,6 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in -async-timeout==4.0.3 - # via - # aiohttp - # redis attrs==23.2.0 # via # aiohttp @@ -59,6 +57,20 @@ bokeh==3.4.1 # via dask botocore==1.34.106 # via aiobotocore +certifi==2024.7.4 + # via + # -c requirements/../../../packages/dask-task-models-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/dask-task-models-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/dask-task-models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # dask @@ -80,6 +92,12 @@ dask==2024.5.1 # distributed dask-gateway==2024.1.0 # via -r requirements/_base.in +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distributed==2024.5.1 # via # dask @@ -88,8 +106,6 @@ dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via pydantic -exceptiongroup==1.2.1 - # via anyio fast-depends==2.4.2 # via faststream faststream==0.5.10 @@ -103,13 +119,22 @@ fsspec==2024.5.0 # -r requirements/_base.in # dask # s3fs +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc idna==3.7 # via # anyio # email-validator + # requests # yarl importlib-metadata==7.1.0 - # via dask + # via + # dask + # opentelemetry-api jinja2==3.1.4 # via # -c requirements/../../../packages/dask-task-models-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -156,6 +181,45 @@ numpy==1.26.4 # bokeh # contourpy # pandas +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via opentelemetry-instrumentation-requests +opentelemetry-instrumentation-requests==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via opentelemetry-instrumentation-requests orjson==3.10.3 # via # -c requirements/../../../packages/dask-task-models-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -185,8 +249,14 @@ pillow==10.3.0 # via bokeh prometheus-client==0.20.0 # via -r requirements/_base.in -psutil==5.9.8 - # via distributed +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # distributed pydantic==1.10.15 # via # -c requirements/../../../packages/dask-task-models-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -254,6 +324,10 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/dask-task-models-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -266,6 +340,8 @@ rpds-py==0.18.1 # referencing s3fs==2024.5.0 # via fsspec +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer six==1.16.0 @@ -303,8 +379,8 @@ typing-extensions==4.11.0 # via # aiodebug # aiodocker - # anyio # faststream + # opentelemetry-sdk # pydantic # typer tzdata==2024.1 @@ -322,8 +398,12 @@ urllib3==2.2.1 # -c requirements/../../../requirements/constraints.txt # botocore # distributed + # requests wrapt==1.16.0 - # via aiobotocore + # via + # aiobotocore + # deprecated + # opentelemetry-instrumentation xyzservices==2024.4.0 # via bokeh yarl==1.9.4 diff --git a/services/dask-sidecar/requirements/_dask-distributed.txt b/services/dask-sidecar/requirements/_dask-distributed.txt index b4ff09e80033..e9ebbb2a0f58 100644 --- a/services/dask-sidecar/requirements/_dask-distributed.txt +++ b/services/dask-sidecar/requirements/_dask-distributed.txt @@ -63,7 +63,7 @@ partd==1.4.2 # via # -c requirements/./_base.txt # dask -psutil==5.9.8 +psutil==6.0.0 # via # -c requirements/./_base.txt # distributed diff --git a/services/dask-sidecar/requirements/_test.txt b/services/dask-sidecar/requirements/_test.txt index 787ef0f3be82..7f13a97ad899 100644 --- a/services/dask-sidecar/requirements/_test.txt +++ b/services/dask-sidecar/requirements/_test.txt @@ -25,13 +25,16 @@ botocore==1.34.106 certifi==2024.7.4 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography cfn-lint==1.10.3 # via moto charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -40,7 +43,7 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # joserfc @@ -50,19 +53,15 @@ docker==7.1.0 # via # -r requirements/_test.in # moto -exceptiongroup==1.2.1 - # via - # -c requirements/_base.txt - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in flask==3.0.3 # via # flask-cors # moto -flask-cors==4.0.1 +flask-cors==5.0.0 # via moto -graphql-core==3.2.3 +graphql-core==3.2.4 # via moto icdiff==2.0.7 # via pytest-icdiff @@ -87,7 +86,7 @@ jmespath==1.0.1 # botocore joserfc==1.0.0 # via moto -jsondiff==2.2.0 +jsondiff==2.2.1 # via moto jsonpatch==1.33 # via cfn-lint @@ -115,7 +114,7 @@ markupsafe==2.1.5 # -c requirements/_base.txt # jinja2 # werkzeug -moto==5.0.13 +moto==5.0.15 # via -r requirements/_test.in mpmath==1.3.0 # via sympy @@ -138,7 +137,7 @@ ply==3.11 # via jsonpath-ng pprintpp==0.4.0 # via pytest-icdiff -py-partiql-parser==0.5.5 +py-partiql-parser==0.5.6 # via moto pycparser==2.22 # via cffi @@ -147,13 +146,13 @@ pydantic==1.10.15 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # aws-sam-translator -pyftpdlib==1.5.10 +pyftpdlib==2.0.0 # via pytest-localftpserver pyopenssl==24.2.1 # via pytest-localftpserver -pyparsing==3.1.2 +pyparsing==3.1.4 # via moto -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -204,10 +203,11 @@ referencing==0.29.3 # jsonschema # jsonschema-path # jsonschema-specifications -regex==2024.7.24 +regex==2024.9.11 # via cfn-lint requests==2.32.3 # via + # -c requirements/_base.txt # docker # jsonschema-path # moto @@ -223,21 +223,19 @@ rpds-py==0.18.1 # referencing s3transfer==0.10.2 # via boto3 -setuptools==73.0.1 - # via moto +setuptools==74.0.0 + # via + # -c requirements/_base.txt + # moto six==1.16.0 # via # -c requirements/_base.txt # python-dateutil # rfc3339-validator -sympy==1.13.2 +sympy==1.13.3 # via cfn-lint termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # pytest types-aiofiles==24.1.0.20240626 # via -r requirements/_test.in typing-extensions==4.11.0 @@ -254,7 +252,7 @@ urllib3==2.2.1 # docker # requests # responses -werkzeug==3.0.3 +werkzeug==3.0.4 # via # flask # moto diff --git a/services/dask-sidecar/requirements/_tools.txt b/services/dask-sidecar/requirements/_tools.txt index db86636a373b..97a49efc2ebd 100644 --- a/services/dask-sidecar/requirements/_tools.txt +++ b/services/dask-sidecar/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -48,14 +48,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,32 +68,23 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==74.0.0 # via + # -c requirements/_base.txt # -c requirements/_test.txt # pip-tools -tomli==2.0.1 - # via - # -c requirements/_test.txt - # black - # build - # mypy - # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.11.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py index 63e9bc97a1b2..7e2228750864 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py @@ -12,11 +12,11 @@ from uuid import uuid4 from aiodocker import Docker +from common_library.pydantic_basic_types import IDStr from dask_task_models_library.container_tasks.docker import DockerBasicAuth from dask_task_models_library.container_tasks.errors import ServiceRuntimeError from dask_task_models_library.container_tasks.io import FileUrl, TaskOutputData from dask_task_models_library.container_tasks.protocol import ContainerTaskParameters -from models_library.basic_types import IDStr from models_library.progress_bar import ProgressReport from packaging import version from pydantic import ValidationError diff --git a/services/datcore-adapter/requirements/_base.txt b/services/datcore-adapter/requirements/_base.txt index b384e50d8b3f..ed00053e60d5 100644 --- a/services/datcore-adapter/requirements/_base.txt +++ b/services/datcore-adapter/requirements/_base.txt @@ -1,7 +1,9 @@ aio-pika==9.4.1 # via -r requirements/../../../packages/service-library/requirements/_base.in aiocache==0.12.2 - # via -r requirements/_base.in + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.21.0 @@ -35,10 +37,8 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in -async-timeout==4.0.3 - # via - # aiohttp - # redis +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi attrs==23.2.0 # via # aiohttp @@ -60,16 +60,23 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer # uvicorn +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via pydantic -exceptiongroup==1.2.0 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -92,6 +99,12 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -121,7 +134,10 @@ idna==3.6 # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jmespath==1.0.1 # via # boto3 @@ -140,6 +156,59 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -158,6 +227,12 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in pydantic==1.10.14 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -212,6 +287,10 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -223,6 +302,8 @@ rpds-py==0.18.0 # referencing s3transfer==0.10.1 # via boto3 +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer six==1.16.0 @@ -257,13 +338,12 @@ typing-extensions==4.10.0 # via # aiodebug # aiodocker - # anyio # fastapi # fastapi-pagination # faststream + # opentelemetry-sdk # pydantic # typer - # uvicorn urllib3==2.2.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -273,6 +353,7 @@ urllib3==2.2.1 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # botocore + # requests uvicorn==0.29.0 # via # -r requirements/../../../packages/service-library/requirements/_fastapi.in @@ -283,8 +364,14 @@ watchfiles==0.21.0 # via uvicorn websockets==12.0 # via uvicorn +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/datcore-adapter/requirements/_test.txt b/services/datcore-adapter/requirements/_test.txt index f006d59e4ed4..b09942fe970d 100644 --- a/services/datcore-adapter/requirements/_test.txt +++ b/services/datcore-adapter/requirements/_test.txt @@ -4,9 +4,9 @@ anyio==4.3.0 # httpx asgi-lifespan==2.1.0 # via -r requirements/_test.in -boto3-stubs==1.35.2 +boto3-stubs==1.35.25 # via types-boto3 -botocore-stubs==1.35.2 +botocore-stubs==1.35.25 # via # boto3-stubs # types-botocore @@ -18,19 +18,16 @@ certifi==2024.2.2 # httpx # requests charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -exceptiongroup==1.2.0 - # via - # -c requirements/_base.txt - # anyio - # pytest execnet==2.1.1 # via pytest-xdist -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in h11==0.14.0 # via @@ -63,7 +60,7 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -96,7 +93,9 @@ python-dateutil==2.9.0.post0 # -c requirements/_base.txt # faker requests==2.32.3 - # via -r requirements/_test.in + # via + # -c requirements/_base.txt + # -r requirements/_test.in respx==0.21.1 # via -r requirements/_test.in six==1.16.0 @@ -111,22 +110,17 @@ sniffio==1.3.1 # httpx termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # pytest -types-awscrt==0.21.2 +types-awscrt==0.21.5 # via botocore-stubs types-boto3==1.0.2 # via -r requirements/_test.in types-botocore==1.0.2 # via -r requirements/_test.in -types-s3transfer==0.10.1 +types-s3transfer==0.10.2 # via boto3-stubs typing-extensions==4.10.0 # via # -c requirements/_base.txt - # anyio # boto3-stubs urllib3==2.2.1 # via diff --git a/services/datcore-adapter/requirements/_tools.txt b/services/datcore-adapter/requirements/_tools.txt index b833085b8821..508da70431fe 100644 --- a/services/datcore-adapter/requirements/_tools.txt +++ b/services/datcore-adapter/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -46,14 +46,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -65,30 +65,22 @@ pyyaml==6.0.1 # -c requirements/_base.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools -tomli==2.0.1 +setuptools==74.0.0 # via - # -c requirements/_test.txt - # black - # build - # mypy + # -c requirements/_base.txt # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.10.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/datcore-adapter/src/simcore_service_datcore_adapter/_meta.py b/services/datcore-adapter/src/simcore_service_datcore_adapter/_meta.py index 3d9f59a1841f..673b3bec726e 100644 --- a/services/datcore-adapter/src/simcore_service_datcore_adapter/_meta.py +++ b/services/datcore-adapter/src/simcore_service_datcore_adapter/_meta.py @@ -14,6 +14,7 @@ API_VERSION: Final[VersionStr] = parse_obj_as(VersionStr, __version__) MAJOR, MINOR, PATCH = __version__.split(".") API_VTAG: Final[str] = f"v{MAJOR}" +APP_NAME: Final[str] = current_distribution.metadata["Name"] PROJECT_NAME: Final[str] = current_distribution.metadata["Name"] diff --git a/services/datcore-adapter/src/simcore_service_datcore_adapter/core/application.py b/services/datcore-adapter/src/simcore_service_datcore_adapter/core/application.py index 2b57a46ed14c..8c4fb44e8e91 100644 --- a/services/datcore-adapter/src/simcore_service_datcore_adapter/core/application.py +++ b/services/datcore-adapter/src/simcore_service_datcore_adapter/core/application.py @@ -6,9 +6,10 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from servicelib.logging_utils import config_all_loggers -from .._meta import API_VERSION, API_VTAG +from .._meta import API_VERSION, API_VTAG, APP_NAME from ..api.errors.http_error import http_error_handler from ..api.errors.validation_error import http422_error_handler from ..api.module_setup import setup_api @@ -66,6 +67,12 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: if app.state.settings.DATCORE_ADAPTER_PROMETHEUS_INSTRUMENTATION_ENABLED: setup_prometheus_instrumentation(app) + if app.state.settings.DATCORE_ADAPTER_TRACING: + setup_tracing( + app, + app.state.settings.DATCORE_ADAPTER_TRACING, + APP_NAME, + ) # events app.add_event_handler("startup", on_startup) diff --git a/services/datcore-adapter/src/simcore_service_datcore_adapter/core/settings.py b/services/datcore-adapter/src/simcore_service_datcore_adapter/core/settings.py index a550589571f2..68e879807abd 100644 --- a/services/datcore-adapter/src/simcore_service_datcore_adapter/core/settings.py +++ b/services/datcore-adapter/src/simcore_service_datcore_adapter/core/settings.py @@ -4,6 +4,7 @@ from pydantic import Field, parse_obj_as, validator from pydantic.networks import AnyUrl from settings_library.base import BaseCustomSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings @@ -40,6 +41,9 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): description="Enables local development log format. WARNING: make sure it is disabled if you want to have structured logs!", ) DATCORE_ADAPTER_PROMETHEUS_INSTRUMENTATION_ENABLED: bool = True + DATCORE_ADAPTER_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) @cached_property def debug(self) -> bool: diff --git a/services/director-v2/.env-devel b/services/director-v2/.env-devel index 021acb4620af..9244f2d8723a 100644 --- a/services/director-v2/.env-devel +++ b/services/director-v2/.env-devel @@ -65,6 +65,5 @@ R_CLONE_OPTION_TRANSFERS=5 R_CLONE_OPTION_RETRIES=3 R_CLONE_OPTION_BUFFER_SIZE=0M -TRACING_ENABLED=True -TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 +TRACING_OBSERVABILITY_BACKEND_ENDPOINT=http://jaeger:9411 TRAEFIK_SIMCORE_ZONE=internal_simcore_stack diff --git a/services/director-v2/openapi.json b/services/director-v2/openapi.json index b2e27ac6a703..cdd6d4eca051 100644 --- a/services/director-v2/openapi.json +++ b/services/director-v2/openapi.json @@ -2608,6 +2608,15 @@ "description": "set True if the dy-sidecar saves the state and uploads the outputs", "default": false }, + "instrumentation": { + "allOf": [ + { + "$ref": "#/components/schemas/ServicesInstrumentation" + } + ], + "title": "Instrumentation", + "description": "keeps track times for various operations" + }, "dynamic_sidecar_id": { "type": "string", "maxLength": 25, @@ -3613,17 +3622,36 @@ }, "ServiceState": { "enum": [ + "failed", "pending", "pulling", "starting", "running", + "stopping", "complete", - "failed", - "stopping" + "idle" ], "title": "ServiceState", "description": "An enumeration." }, + "ServicesInstrumentation": { + "properties": { + "start_requested_at": { + "type": "string", + "format": "date-time", + "title": "Start Requested At", + "description": "moment in which the process of starting the service was requested" + }, + "close_requested_at": { + "type": "string", + "format": "date-time", + "title": "Close Requested At", + "description": "moment in which the process of stopping the service was requested" + } + }, + "type": "object", + "title": "ServicesInstrumentation" + }, "SimpleAuthentication": { "properties": { "type": { diff --git a/services/director-v2/requirements/_base.txt b/services/director-v2/requirements/_base.txt index 41696797fe88..a151f9fb0ab2 100644 --- a/services/director-v2/requirements/_base.txt +++ b/services/director-v2/requirements/_base.txt @@ -5,6 +5,8 @@ aio-pika==9.4.1 # -r requirements/_base.in aiocache==0.12.2 # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/_base.in # -r requirements/_base.in aiodebug==2.3.0 @@ -71,12 +73,12 @@ arrow==1.3.0 # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi async-timeout==4.0.3 # via - # aiohttp # aiopg # asyncpg - # redis asyncpg==0.29.0 # via sqlalchemy attrs==23.2.0 @@ -109,6 +111,9 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -130,6 +135,12 @@ dask==2024.5.1 # distributed dask-gateway==2024.1.0 # via -r requirements/_base.in +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions distributed==2024.5.1 # via # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -141,8 +152,6 @@ email-validator==2.1.1 # via # fastapi # pydantic -exceptiongroup==1.2.1 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -179,8 +188,14 @@ fsspec==2024.5.0 # via # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # dask +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http greenlet==3.0.3 # via sqlalchemy +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -217,11 +232,13 @@ idna==3.7 # anyio # email-validator # httpx + # requests # yarl importlib-metadata==7.1.0 # via # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # dask + # opentelemetry-api itsdangerous==2.2.0 # via fastapi jinja2==3.1.4 @@ -304,6 +321,65 @@ networkx==3.3 # via -r requirements/_base.in numpy==1.26.4 # via -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests ordered-set==4.1.0 # via -r requirements/_base.in orjson==3.10.3 @@ -352,8 +428,14 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in -psutil==5.9.8 +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # distributed psycopg2-binary==2.9.9 @@ -473,6 +555,12 @@ referencing==0.29.3 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/dask-task-models-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -486,6 +574,8 @@ rpds-py==0.18.1 # via # jsonschema # referencing +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer simple-websocket==1.0.0 @@ -586,13 +676,12 @@ typing-extensions==4.11.0 # aiodebug # aiodocker # alembic - # anyio # fastapi # faststream + # opentelemetry-sdk # pint # pydantic # typer - # uvicorn ujson==5.10.0 # via # -c requirements/../../../packages/dask-task-models-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -634,6 +723,7 @@ urllib3==2.2.1 # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt # distributed + # requests uvicorn==0.29.0 # via # -r requirements/../../../packages/service-library/requirements/_fastapi.in @@ -644,6 +734,10 @@ watchfiles==0.21.0 # via uvicorn websockets==12.0 # via uvicorn +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation wsproto==1.2.0 # via simple-websocket yarl==1.9.4 diff --git a/services/director-v2/requirements/_test.txt b/services/director-v2/requirements/_test.txt index 4d1d11143d3f..22d12c69c200 100644 --- a/services/director-v2/requirements/_test.txt +++ b/services/director-v2/requirements/_test.txt @@ -16,7 +16,7 @@ aiohttp==3.9.5 # -c requirements/_base.txt # aiobotocore # dask-gateway-server -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore aiormq==6.8.0 # via @@ -38,16 +38,12 @@ asgi-lifespan==2.1.0 # via -r requirements/_test.in async-asgi-testclient==1.4.11 # via -r requirements/_test.in -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # aiohttp attrs==23.2.0 # via # -c requirements/_base.txt # aiohttp # pytest-docker -bokeh==3.5.1 +bokeh==3.5.2 # via dask boto3==1.34.131 # via aiobotocore @@ -63,10 +59,12 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -79,11 +77,11 @@ cloudpickle==3.0.0 # distributed colorlog==6.8.2 # via dask-gateway-server -contourpy==1.2.1 +contourpy==1.3.0 # via bokeh coverage==7.6.1 # via pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # dask-gateway-server @@ -100,14 +98,9 @@ distributed==2024.5.1 # dask docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.1 - # via - # -c requirements/_base.txt - # anyio - # pytest execnet==2.1.1 # via pytest-xdist -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in flaky==3.8.1 # via -r requirements/_test.in @@ -188,7 +181,7 @@ multidict==6.0.5 # aiohttp # async-asgi-testclient # yarl -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -210,7 +203,7 @@ pamqp==3.3.0 # via # -c requirements/_base.txt # aiormq -pandas==2.2.2 +pandas==2.2.3 # via bokeh partd==1.4.2 # via @@ -222,13 +215,13 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -psutil==5.9.8 +psutil==6.0.0 # via # -c requirements/_base.txt # distributed pycparser==2.22 # via cffi -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -259,7 +252,7 @@ python-dateutil==2.9.0.post0 # botocore # faker # pandas -pytz==2024.1 +pytz==2024.2 # via pandas pyyaml==6.0.1 # via @@ -270,6 +263,7 @@ pyyaml==6.0.1 # distributed requests==2.32.3 # via + # -c requirements/_base.txt # async-asgi-testclient # docker respx==0.21.1 @@ -303,11 +297,6 @@ tblib==3.0.0 # via # -c requirements/_base.txt # distributed -tomli==2.0.1 - # via - # coverage - # mypy - # pytest toolz==0.12.1 # via # -c requirements/_base.txt @@ -321,20 +310,19 @@ tornado==6.4 # distributed traitlets==5.14.3 # via dask-gateway-server -types-networkx==3.2.1.20240820 +types-networkx==3.2.1.20240918 # via -r requirements/_test.in types-psycopg2==2.9.21.20240819 # via -r requirements/_test.in -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.11.0 # via # -c requirements/_base.txt # alembic - # anyio # mypy # sqlalchemy2-stubs -tzdata==2024.1 +tzdata==2024.2 # via pandas urllib3==2.2.1 # via @@ -345,8 +333,10 @@ urllib3==2.2.1 # docker # requests wrapt==1.16.0 - # via aiobotocore -xyzservices==2024.6.0 + # via + # -c requirements/_base.txt + # aiobotocore +xyzservices==2024.9.0 # via bokeh yarl==1.9.4 # via diff --git a/services/director-v2/requirements/_tools.txt b/services/director-v2/requirements/_tools.txt index ea37071c1adc..062a460207d6 100644 --- a/services/director-v2/requirements/_tools.txt +++ b/services/director-v2/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -51,14 +51,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -71,30 +71,22 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools -tomli==2.0.1 +setuptools==74.0.0 # via - # -c requirements/_test.txt - # black - # build - # mypy + # -c requirements/_base.txt # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.11.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/director-v2/src/simcore_service_director_v2/__init__.py b/services/director-v2/src/simcore_service_director_v2/__init__.py index d689cb28b1f0..f047bb136bd5 100644 --- a/services/director-v2/src/simcore_service_director_v2/__init__.py +++ b/services/director-v2/src/simcore_service_director_v2/__init__.py @@ -1,4 +1,4 @@ """ Python package for the simcore_service_director_v2. """ -from .meta import __version__ +from ._meta import __version__ diff --git a/services/director-v2/src/simcore_service_director_v2/meta.py b/services/director-v2/src/simcore_service_director_v2/_meta.py similarity index 93% rename from services/director-v2/src/simcore_service_director_v2/meta.py rename to services/director-v2/src/simcore_service_director_v2/_meta.py index 2d1d9d740d8c..4ebfef7135c9 100644 --- a/services/director-v2/src/simcore_service_director_v2/meta.py +++ b/services/director-v2/src/simcore_service_director_v2/_meta.py @@ -15,4 +15,5 @@ VERSION: Final[Version] = info.version API_VERSION: Final[VersionStr] = info.__version__ API_VTAG: Final[str] = info.api_prefix_path_tag +APP_NAME: Final[str] = PROJECT_NAME SUMMARY: Final[str] = info.get_summary() diff --git a/services/director-v2/src/simcore_service_director_v2/api/entrypoints.py b/services/director-v2/src/simcore_service_director_v2/api/entrypoints.py index 323675160568..671fc78d2d4c 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/entrypoints.py +++ b/services/director-v2/src/simcore_service_director_v2/api/entrypoints.py @@ -1,6 +1,6 @@ from fastapi import APIRouter -from ..meta import API_VTAG +from .._meta import API_VTAG from .routes import ( clusters, computations, diff --git a/services/director-v2/src/simcore_service_director_v2/api/routes/computations.py b/services/director-v2/src/simcore_service_director_v2/api/routes/computations.py index cea6e18770db..49fd757e8867 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/routes/computations.py +++ b/services/director-v2/src/simcore_service_director_v2/api/routes/computations.py @@ -286,7 +286,7 @@ async def _try_start_pipeline( ) # NOTE: in case of a burst of calls to that endpoint, we might end up in a weird state. @run_sequentially_in_context(target_args=["computation.project_id"]) -async def create_computation( # noqa: PLR0913 +async def create_computation( # noqa: PLR0913 # pylint:disable=too-many-positional-arguments computation: ComputationCreate, request: Request, project_repo: Annotated[ diff --git a/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py b/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py index a2a99f4bea3c..750f634bb3b2 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py +++ b/services/director-v2/src/simcore_service_director_v2/api/routes/dynamic_services.py @@ -136,7 +136,6 @@ async def create_dynamic_service( logger.debug("Redirecting %s", redirect_url_with_query) return RedirectResponse(str(redirect_url_with_query)) - # if not await is_sidecar_running( service.node_uuid, dynamic_services_settings.DYNAMIC_SCHEDULER.SWARM_STACK_NAME ): diff --git a/services/director-v2/src/simcore_service_director_v2/api/routes/health.py b/services/director-v2/src/simcore_service_director_v2/api/routes/health.py index 79c954c44edd..9ce8dc97ef6c 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/routes/health.py +++ b/services/director-v2/src/simcore_service_director_v2/api/routes/health.py @@ -2,7 +2,7 @@ from typing import Annotated from fastapi import APIRouter, Depends -from models_library.api_schemas_directorv2.health import HealthCheckGet +from models_library.api_schemas__common.health import HealthCheckGet from models_library.errors import RABBITMQ_CLIENT_UNHEALTHY_MSG from servicelib.rabbitmq import RabbitMQClient diff --git a/services/director-v2/src/simcore_service_director_v2/api/routes/meta.py b/services/director-v2/src/simcore_service_director_v2/api/routes/meta.py index 79e07311a70c..775a6f0b65ce 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/routes/meta.py +++ b/services/director-v2/src/simcore_service_director_v2/api/routes/meta.py @@ -2,7 +2,7 @@ from models_library.api_schemas__common.meta import BaseMeta from models_library.basic_types import VersionStr -from ...meta import API_VERSION, API_VTAG +from ..._meta import API_VERSION, API_VTAG router = APIRouter() diff --git a/services/director-v2/src/simcore_service_director_v2/cli/__init__.py b/services/director-v2/src/simcore_service_director_v2/cli/__init__.py index ce8bfea76848..eb4d050bd80b 100644 --- a/services/director-v2/src/simcore_service_director_v2/cli/__init__.py +++ b/services/director-v2/src/simcore_service_director_v2/cli/__init__.py @@ -8,8 +8,8 @@ from models_library.projects_nodes_io import NodeID from settings_library.utils_cli import create_settings_command +from .._meta import PROJECT_NAME from ..core.settings import AppSettings -from ..meta import PROJECT_NAME from ..modules.osparc_variables import substitutions from ._close_and_save_service import async_close_and_save_service from ._core import ( diff --git a/services/director-v2/src/simcore_service_director_v2/constants.py b/services/director-v2/src/simcore_service_director_v2/constants.py index 424ac151acb6..fc700254ed0b 100644 --- a/services/director-v2/src/simcore_service_director_v2/constants.py +++ b/services/director-v2/src/simcore_service_director_v2/constants.py @@ -4,7 +4,6 @@ DYNAMIC_SIDECAR_SERVICE_PREFIX: Final[str] = "dy-sidecar" DYNAMIC_PROXY_SERVICE_PREFIX: Final[str] = "dy-proxy" -DYNAMIC_VOLUME_REMOVER_PREFIX: Final[str] = "dy-volrm" # label storing scheduler_data to allow service # monitoring recovery after director-v2 reboots diff --git a/services/director-v2/src/simcore_service_director_v2/core/application.py b/services/director-v2/src/simcore_service_director_v2/core/application.py index a43848298a76..330717e60624 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/application.py +++ b/services/director-v2/src/simcore_service_director_v2/core/application.py @@ -7,18 +7,16 @@ override_fastapi_openapi_method, ) from servicelib.fastapi.profiler_middleware import ProfilerMiddleware -from servicelib.fastapi.prometheus_instrumentation import ( - setup_prometheus_instrumentation, -) +from servicelib.fastapi.tracing import setup_tracing from servicelib.logging_utils import config_all_loggers +from .._meta import API_VERSION, API_VTAG, APP_NAME, PROJECT_NAME, SUMMARY from ..api.entrypoints import api_router from ..api.errors.http_error import ( http_error_handler, make_http_error_handler_for_exception, ) from ..api.errors.validation_error import http422_error_handler -from ..meta import API_VERSION, API_VTAG, PROJECT_NAME, SUMMARY from ..modules import ( catalog, comp_scheduler, @@ -27,6 +25,7 @@ director_v0, dynamic_services, dynamic_sidecar, + instrumentation, notifier, rabbitmq, redis, @@ -191,7 +190,9 @@ def init_app(settings: AppSettings | None = None) -> FastAPI: resource_usage_tracker_client.setup(app) if settings.DIRECTOR_V2_PROMETHEUS_INSTRUMENTATION_ENABLED: - setup_prometheus_instrumentation(app) + instrumentation.setup(app) + if settings.DIRECTOR_V2_TRACING: + setup_tracing(app, app.state.settings.DIRECTOR_V2_TRACING, APP_NAME) if settings.DIRECTOR_V2_PROFILING: app.add_middleware(ProfilerMiddleware) diff --git a/services/director-v2/src/simcore_service_director_v2/core/dynamic_services_settings/__init__.py b/services/director-v2/src/simcore_service_director_v2/core/dynamic_services_settings/__init__.py index 276e21b40c3a..b4abd4f5b6e1 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/dynamic_services_settings/__init__.py +++ b/services/director-v2/src/simcore_service_director_v2/core/dynamic_services_settings/__init__.py @@ -1,5 +1,6 @@ from pydantic import Field from settings_library.base import BaseCustomSettings +from settings_library.webserver import WebServerSettings from .egress_proxy import EgressProxySettings from .proxy import DynamicSidecarProxySettings @@ -29,3 +30,5 @@ class DynamicServicesSettings(BaseCustomSettings): DYNAMIC_SIDECAR_PLACEMENT_SETTINGS: PlacementSettings = Field( auto_default_from_env=True ) + + WEBSERVER_SETTINGS: WebServerSettings = Field(auto_default_from_env=True) diff --git a/services/director-v2/src/simcore_service_director_v2/core/events.py b/services/director-v2/src/simcore_service_director_v2/core/events.py index 9f79abfc69b7..13af6ca3009f 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/events.py +++ b/services/director-v2/src/simcore_service_director_v2/core/events.py @@ -1,6 +1,6 @@ from servicelib.async_utils import cancel_sequential_workers -from ..meta import __version__, info +from .._meta import __version__, info # # SEE https://patorjk.com/software/taag/#p=display&f=Small&t=Director diff --git a/services/director-v2/src/simcore_service_director_v2/core/settings.py b/services/director-v2/src/simcore_service_director_v2/core/settings.py index d87b567149aa..d495dd4aeef2 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/core/settings.py @@ -36,6 +36,7 @@ ResourceUsageTrackerSettings, ) from settings_library.storage import StorageSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from simcore_sdk.node_ports_v2 import FileLinkType @@ -224,6 +225,9 @@ class AppSettings(BaseCustomSettings, MixinLoggingSettings): ..., description="Base URL used to access the public api e.g. http://127.0.0.1:6000 for development or https://api.osparc.io", ) + DIRECTOR_V2_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) @validator("LOG_LEVEL", pre=True) @classmethod diff --git a/services/director-v2/src/simcore_service_director_v2/models/dynamic_services_scheduler.py b/services/director-v2/src/simcore_service_director_v2/models/dynamic_services_scheduler.py index 33272e9f946e..1e575486d42f 100644 --- a/services/director-v2/src/simcore_service_director_v2/models/dynamic_services_scheduler.py +++ b/services/director-v2/src/simcore_service_director_v2/models/dynamic_services_scheduler.py @@ -2,12 +2,15 @@ import logging import re from collections.abc import Mapping +from datetime import datetime from enum import Enum from functools import cached_property from pathlib import Path from typing import Any, TypeAlias from uuid import UUID +import arrow +from common_library.pydantic_basic_types import ConstrainedStr from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceCreate from models_library.api_schemas_directorv2.dynamic_services_service import ( CommonServiceDetails, @@ -25,15 +28,7 @@ from models_library.services import RunID from models_library.services_resources import ServiceResourcesDict from models_library.wallets import WalletInfo -from pydantic import ( - AnyHttpUrl, - BaseModel, - ConstrainedStr, - Extra, - Field, - parse_obj_as, - validator, -) +from pydantic import AnyHttpUrl, BaseModel, Extra, Field, parse_obj_as, validator from servicelib.error_codes import ErrorCodeStr from servicelib.exception_utils import DelayedExceptionHandler @@ -170,6 +165,28 @@ def mark_removed(self) -> None: self.was_removed = True +class ServicesInstrumentation(BaseModel): + start_requested_at: datetime | None = Field( + None, + description="moment in which the process of starting the service was requested", + ) + close_requested_at: datetime | None = Field( + None, + description="moment in which the process of stopping the service was requested", + ) + + def elapsed_since_start_request(self) -> float | None: + if self.start_requested_at is None: + return None + + return (arrow.utcnow().datetime - self.start_requested_at).total_seconds() + + def elapsed_since_close_request(self) -> float | None: + if self.close_requested_at is None: + return None + return (arrow.utcnow().datetime - self.close_requested_at).total_seconds() + + class DynamicSidecar(BaseModel): status: Status = Field( Status.create_as_initially_ok(), @@ -254,6 +271,11 @@ def compose_spec_submitted(self) -> bool: description="set True if the dy-sidecar saves the state and uploads the outputs", ) + instrumentation: ServicesInstrumentation = Field( + default_factory=lambda: ServicesInstrumentation.parse_obj({}), + description="keeps track times for various operations", + ) + # below had already been validated and # used only to start the proxy dynamic_sidecar_id: ServiceId | None = Field( diff --git a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/base_scheduler.py b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/base_scheduler.py index 3ba703a78b78..08396686e431 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/base_scheduler.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/base_scheduler.py @@ -27,6 +27,7 @@ from models_library.projects_state import RunningState from models_library.services import ServiceKey, ServiceType, ServiceVersion from models_library.users import UserID +from networkx.classes.reportviews import InDegreeView from pydantic import PositiveInt from servicelib.common_headers import UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient @@ -734,8 +735,10 @@ async def _schedule_tasks_to_start( # noqa: C901 if t.state == RunningState.SUCCESS } ) + dag_in_degree = dag.in_degree() + assert isinstance(dag_in_degree, InDegreeView) # nosec next_task_node_ids = [ - node_id for node_id, degree in dag.in_degree() if degree == 0 + node_id for node_id, degree in dag_in_degree if degree == 0 ] # get the tasks to start diff --git a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_core.py b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_core.py index dabb45dfb0f0..f14c08826938 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_core.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_core.py @@ -5,7 +5,7 @@ import arrow import sqlalchemy as sa from aiopg.sa.result import ResultProxy, RowProxy -from models_library.basic_types import IDStr +from common_library.pydantic_basic_types import IDStr from models_library.errors import ErrorDict from models_library.projects import ProjectAtDB, ProjectID from models_library.projects_nodes_io import NodeID diff --git a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/groups_extra_properties.py b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/groups_extra_properties.py index 8706a899d125..c08477016345 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/groups_extra_properties.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/groups_extra_properties.py @@ -1,3 +1,4 @@ +from pydantic import BaseModel from simcore_postgres_database.utils_groups_extra_properties import ( GroupExtraProperties, GroupExtraPropertiesRepo, @@ -6,6 +7,12 @@ from ._base import BaseRepository +class UserExtraProperties(BaseModel): + is_internet_enabled: bool + is_telemetry_enabled: bool + is_efs_enabled: bool + + class GroupsExtraPropertiesRepository(BaseRepository): async def _get_aggregated_properties_for_user( self, @@ -31,3 +38,15 @@ async def is_telemetry_enabled(self, *, user_id: int, product_name: str) -> bool ) telemetry_enabled: bool = group_extra_properties.enable_telemetry return telemetry_enabled + + async def get_user_extra_properties( + self, *, user_id: int, product_name: str + ) -> UserExtraProperties: + group_extra_properties = await self._get_aggregated_properties_for_user( + user_id=user_id, product_name=product_name + ) + return UserExtraProperties( + is_internet_enabled=group_extra_properties.internet_access, + is_telemetry_enabled=group_extra_properties.enable_telemetry, + is_efs_enabled=group_extra_properties.enable_efs, + ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/api_client/_public.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/api_client/_public.py index 190a35a315c5..7ce782c6366a 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/api_client/_public.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/api_client/_public.py @@ -349,18 +349,20 @@ async def stop_service( progress_callback, ) - async def restore_service_state(self, dynamic_sidecar_endpoint: AnyHttpUrl) -> None: + async def restore_service_state(self, dynamic_sidecar_endpoint: AnyHttpUrl) -> int: response = await self._thin_client.post_containers_tasks_state_restore( dynamic_sidecar_endpoint ) task_id: TaskId = response.json() - await self._await_for_result( + result: Any | None = await self._await_for_result( task_id, dynamic_sidecar_endpoint, self._dynamic_services_scheduler_settings.DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT, _debug_progress_callback, ) + assert isinstance(result, int) # nosec + return result async def pull_user_services_images( self, dynamic_sidecar_endpoint: AnyHttpUrl @@ -381,18 +383,20 @@ async def save_service_state( self, dynamic_sidecar_endpoint: AnyHttpUrl, progress_callback: ProgressCallback | None = None, - ) -> None: + ) -> int: response = await self._thin_client.post_containers_tasks_state_save( dynamic_sidecar_endpoint ) task_id: TaskId = response.json() - await self._await_for_result( + result: Any | None = await self._await_for_result( task_id, dynamic_sidecar_endpoint, self._dynamic_services_scheduler_settings.DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT, progress_callback, ) + assert isinstance(result, int) # nosec + return result async def pull_service_input_ports( self, @@ -416,18 +420,20 @@ async def pull_service_output_ports( self, dynamic_sidecar_endpoint: AnyHttpUrl, port_keys: list[str] | None = None, - ) -> None: + ) -> int: response = await self._thin_client.post_containers_tasks_ports_outputs_pull( dynamic_sidecar_endpoint, port_keys ) task_id: TaskId = response.json() - await self._await_for_result( + result: Any | None = await self._await_for_result( task_id, dynamic_sidecar_endpoint, self._dynamic_services_scheduler_settings.DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT, _debug_progress_callback, ) + assert isinstance(result, int) # nosec + return result async def push_service_output_ports( self, diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/__init__.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/__init__.py index 99cab1542c53..5fb63db124bb 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/__init__.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/__init__.py @@ -16,7 +16,6 @@ try_to_remove_network, update_scheduler_data_label, ) -from ._volume import remove_pending_volume_removal_services, remove_volumes_from_node __all__: tuple[str, ...] = ( "are_sidecar_and_proxy_services_present", @@ -29,12 +28,10 @@ "get_or_create_networks_ids", "get_projects_networks_containers", "get_swarm_network", - "is_sidecar_running", "is_dynamic_sidecar_stack_missing", + "is_sidecar_running", "remove_dynamic_sidecar_network", "remove_dynamic_sidecar_stack", - "remove_pending_volume_removal_services", - "remove_volumes_from_node", "try_to_remove_network", "update_scheduler_data_label", ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/_volume.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/_volume.py deleted file mode 100644 index e5891bd9f6f3..000000000000 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api/_volume.py +++ /dev/null @@ -1,159 +0,0 @@ -import logging -from datetime import datetime, timezone - -from fastapi.encoders import jsonable_encoder -from models_library.projects import ProjectID -from models_library.projects_nodes_io import NodeID -from models_library.users import UserID -from servicelib.docker_utils import to_datetime -from servicelib.logging_utils import log_context -from tenacity import TryAgain -from tenacity.asyncio import AsyncRetrying -from tenacity.retry import retry_if_exception_type -from tenacity.stop import stop_after_delay -from tenacity.wait import wait_fixed - -from ....constants import DYNAMIC_VOLUME_REMOVER_PREFIX -from ..docker_service_specs.volume_remover import spec_volume_removal_service -from ._utils import docker_client - -_logger = logging.getLogger(__name__) - - -# FROM https://docs.docker.com/engine/swarm/how-swarm-mode-works/swarm-task-states/ -SERVICE_FINISHED_STATES: set[str] = { - "complete", - "failed", - "shutdown", - "rejected", - "orphaned", - "remove", -} - - -async def remove_volumes_from_node( - swarm_stack_name: str, - volume_names: list[str], - docker_node_id: str, - user_id: UserID, - project_id: ProjectID, - node_uuid: NodeID, - *, - volume_removal_attempts: int = 15, - sleep_between_attempts_s: int = 2, -) -> bool: - """ - Starts a service at target docker node which will remove - all entries in the `volumes_names` list. - """ - - async with docker_client() as client: - # Timeout for the runtime of the service is calculated based on the amount - # of attempts required to remove each individual volume, - # in the worst case scenario when all volumes are do not exit. - volume_removal_timeout_s = volume_removal_attempts * sleep_between_attempts_s - service_timeout_s = volume_removal_timeout_s * len(volume_names) - - service_spec = spec_volume_removal_service( - swarm_stack_name=swarm_stack_name, - docker_node_id=docker_node_id, - user_id=user_id, - project_id=project_id, - node_uuid=node_uuid, - volume_names=volume_names, - volume_removal_attempts=volume_removal_attempts, - sleep_between_attempts_s=sleep_between_attempts_s, - service_timeout_s=service_timeout_s, - ) - - volume_removal_service = await client.services.create( - **jsonable_encoder(service_spec, by_alias=True, exclude_unset=True) - ) - - service_id = volume_removal_service["ID"] - try: - async for attempt in AsyncRetrying( - stop=stop_after_delay(service_timeout_s), - wait=wait_fixed(0.5), - retry=retry_if_exception_type(TryAgain), - reraise=True, - ): - with attempt: - _logger.debug( - "Waiting for removal of %s, with service id %s", - node_uuid, - service_id, - ) - tasks = await client.tasks.list(filters={"service": service_id}) - # NOTE: the service will have at most 1 task, since there is no restart - # policy present - if len(tasks) != 1: - # Docker swarm needs a bit of time to startup the tasks - raise TryAgain( - f"Expected 1 task for service {service_id} on node {node_uuid}, found {tasks=}" - ) - - task = tasks[0] - task_status = task["Status"] - _logger.debug("Service %s, %s", service_id, f"{task_status=}") - task_state = task_status["State"] - if task_state not in SERVICE_FINISHED_STATES: - raise TryAgain( - f"Waiting for task to finish for service {service_id} on node {node_uuid}: {task_status=}" - ) - - if not ( - task_state == "complete" - and task_status["ContainerStatus"]["ExitCode"] == 0 - ): - _logger.error( - "Service %s on node %s status: %s", - service_id, - node_uuid, - f"{task_status=}", - ) - # NOTE: above implies the volumes will remain in the system and - # have to be manually removed. - return False - finally: - # NOTE: services created in swarm need to be removed, there is no way - # to instruct swarm to remove a service after it's created - # container/task finished - with log_context( - _logger, - logging.DEBUG, - f"deleting service {service_id} on node {node_uuid}", - ): - await client.services.delete(service_id) - - _logger.debug("Finished removing volumes for service %s", node_uuid) - return True - - -async def remove_pending_volume_removal_services(swarm_stack_name: str) -> None: - """ - Removes all pending volume removal services. Such a service - will be considered pending if it is running for longer than its - intended duration (defined in the `service_timeout_s` label). - """ - service_filters = { - "label": [f"swarm_stack_name={swarm_stack_name}"], - "name": [f"{DYNAMIC_VOLUME_REMOVER_PREFIX}"], - } - async with docker_client() as client: - volume_removal_services = await client.services.list(filters=service_filters) - - for volume_removal_service in volume_removal_services: - service_timeout_s = int( - volume_removal_service["Spec"]["Labels"]["service_timeout_s"] - ) - created_at = to_datetime(volume_removal_services[0]["CreatedAt"]) - time_diff = datetime.now(tz=timezone.utc) - created_at - service_timed_out = time_diff.seconds > (service_timeout_s * 10) - if service_timed_out: - service_id = volume_removal_service["ID"] - service_name = volume_removal_service["Spec"]["Name"] - _logger.debug( - "Removing pending volume removal service %s", service_name - ) - await client.services.delete(service_id) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/proxy.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/proxy.py index fb9df7876864..b946e71d4ea8 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/proxy.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/proxy.py @@ -9,6 +9,8 @@ ) from pydantic import ByteSize from servicelib.common_headers import X_SIMCORE_USER_AGENT +from settings_library import webserver +from settings_library.utils_session import DEFAULT_SESSION_COOKIE_NAME from ....core.dynamic_services_settings import DynamicServicesSettings from ....core.dynamic_services_settings.proxy import DynamicSidecarProxySettings @@ -43,6 +45,9 @@ def get_dynamic_proxy_spec( dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings = ( dynamic_services_settings.DYNAMIC_SCHEDULER ) + webserver_settings: webserver.WebServerSettings = ( + dynamic_services_settings.WEBSERVER_SETTINGS + ) mounts = [ # docker socket needed to use the docker api @@ -77,9 +82,11 @@ def get_dynamic_proxy_spec( "io.simcore.zone": f"{dynamic_services_scheduler_settings.TRAEFIK_SIMCORE_ZONE}", "traefik.docker.network": swarm_network_name, "traefik.enable": "true", + # security + f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.accesscontrolallowcredentials": "true", f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.customresponseheaders.Content-Security-Policy": f"frame-ancestors {scheduler_data.request_dns} {scheduler_data.node_uuid}.services.{scheduler_data.request_dns}", f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.accesscontrolallowmethods": "GET,OPTIONS,PUT,POST,DELETE,PATCH,HEAD", - f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.accesscontrolallowheaders": f"{X_SIMCORE_USER_AGENT}", + f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.accesscontrolallowheaders": f"{X_SIMCORE_USER_AGENT},Set-Cookie", f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.accessControlAllowOriginList": ",".join( [ f"{scheduler_data.request_scheme}://{scheduler_data.request_dns}", @@ -88,11 +95,22 @@ def get_dynamic_proxy_spec( ), f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.accesscontrolmaxage": "100", f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-security-headers.headers.addvaryheader": "true", + # auth + f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-auth.forwardauth.address": f"{webserver_settings.api_base_url}/auth:check", + f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-auth.forwardauth.trustForwardHeader": "true", + f"traefik.http.middlewares.{scheduler_data.proxy_service_name}-auth.forwardauth.authResponseHeaders": f"Set-Cookie,{DEFAULT_SESSION_COOKIE_NAME}", + # routing f"traefik.http.services.{scheduler_data.proxy_service_name}.loadbalancer.server.port": "80", f"traefik.http.routers.{scheduler_data.proxy_service_name}.entrypoints": "http", f"traefik.http.routers.{scheduler_data.proxy_service_name}.priority": "10", f"traefik.http.routers.{scheduler_data.proxy_service_name}.rule": rf"HostRegexp(`{scheduler_data.node_uuid}\.services\.(?P.+)`)", - f"traefik.http.routers.{scheduler_data.proxy_service_name}.middlewares": f"{dynamic_services_scheduler_settings.SWARM_STACK_NAME}_gzip@swarm, {scheduler_data.proxy_service_name}-security-headers", + f"traefik.http.routers.{scheduler_data.proxy_service_name}.middlewares": ",".join( + [ + f"{dynamic_services_scheduler_settings.SWARM_STACK_NAME}_gzip@swarm", + f"{scheduler_data.proxy_service_name}-security-headers", + f"{scheduler_data.proxy_service_name}-auth", + ] + ), "dynamic_type": "dynamic-sidecar", # tagged as dynamic service } | StandardSimcoreDockerLabels( diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index 78b66242355f..b788e455cf38 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -30,6 +30,7 @@ from ....core.dynamic_services_settings.sidecar import DynamicSidecarSettings from ....core.settings import AppSettings from ....models.dynamic_services_scheduler import SchedulerData +from ....modules.db.repositories.groups_extra_properties import UserExtraProperties from .._namespace import get_compose_namespace from ..volumes import DynamicSidecarVolumesPathsResolver from ._constants import DOCKER_CONTAINER_SPEC_RESTART_POLICY_DEFAULTS @@ -220,6 +221,7 @@ async def _get_mounts( app_settings: AppSettings, has_quota_support: bool, rpc_client: RabbitMQRPCClient, + is_efs_enabled: bool, ) -> list[dict[str, Any]]: mounts: list[dict[str, Any]] = [ # docker socket needed to use the docker api @@ -270,18 +272,9 @@ async def _get_mounts( ) ) - # We check whether user has access to EFS feature - use_efs = False - efs_settings = dynamic_sidecar_settings.DYNAMIC_SIDECAR_EFS_SETTINGS - if ( - efs_settings - and scheduler_data.user_id in efs_settings.EFS_ONLY_ENABLED_FOR_USERIDS - ): - use_efs = True - # state paths now get mounted via different driver and are synced to s3 automatically for path_to_mount in scheduler_data.paths_mapping.state_paths: - if use_efs: + if is_efs_enabled: assert dynamic_sidecar_settings.DYNAMIC_SIDECAR_EFS_SETTINGS # nosec _storage_directory_name = DynamicSidecarVolumesPathsResolver.volume_name( @@ -411,10 +404,9 @@ async def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: app_settings: AppSettings, *, has_quota_support: bool, - allow_internet_access: bool, hardware_info: HardwareInfo | None, metrics_collection_allowed: bool, - telemetry_enabled: bool, + user_extra_properties: UserExtraProperties, rpc_client: RabbitMQRPCClient, ) -> AioDockerServiceSpec: """ @@ -434,6 +426,7 @@ async def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: app_settings=app_settings, has_quota_support=has_quota_support, rpc_client=rpc_client, + is_efs_enabled=user_extra_properties.is_efs_enabled, ) ports = _get_ports( @@ -512,9 +505,9 @@ async def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: compose_namespace, scheduler_data, app_settings, - allow_internet_access=allow_internet_access, + allow_internet_access=user_extra_properties.is_internet_enabled, metrics_collection_allowed=metrics_collection_allowed, - telemetry_enabled=telemetry_enabled, + telemetry_enabled=user_extra_properties.is_telemetry_enabled, ), "Hosts": [], "Image": dynamic_sidecar_settings.DYNAMIC_SIDECAR_IMAGE, diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/volume_remover.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/volume_remover.py deleted file mode 100644 index cefbe0156ec1..000000000000 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/volume_remover.py +++ /dev/null @@ -1,185 +0,0 @@ -import json -import re -from asyncio.log import logger -from typing import Final -from uuid import uuid4 - -from models_library.aiodocker_api import AioDockerServiceSpec -from models_library.projects import ProjectID -from models_library.projects_nodes_io import NodeID -from models_library.services_resources import ( - CPU_10_PERCENT, - CPU_100_PERCENT, - MEMORY_50MB, - MEMORY_250MB, -) -from models_library.users import UserID -from pydantic import parse_obj_as - -from ....constants import DYNAMIC_VOLUME_REMOVER_PREFIX - - -class DockerVersion(str): - """ - Extracts `XX.XX.XX` where X is a range [0-9] from - a given docker version - """ - - @classmethod - def __get_validators__(cls): - yield cls.validate_docker_version - - @classmethod - def validate_docker_version(cls, docker_version: str) -> str: - try: - search_result = re.search(r"^\d\d.(\d\d|\d).(\d\d|\d)", docker_version) - assert search_result # nosec - return search_result.group() - except AttributeError: - raise ValueError( # pylint: disable=raise-missing-from - f"{docker_version} appears not to be a valid docker version" - ) - - -DIND_VERSION: Final[DockerVersion] = parse_obj_as(DockerVersion, "20.10.14") - -# NOTE: below `retry` function is inspired by -# https://gist.github.com/sj26/88e1c6584397bb7c13bd11108a579746 -SH_SCRIPT_REMOVE_VOLUMES = """ -set -e; - -error_counter=0 - -function retry {{ - local retries=$1 - shift - - local count=0 - while true; - do - - local command_result - set +e - $($@ > /tmp/command_result 2>&1) - exit_code=$? - set -e - - command_result=$(cat /tmp/command_result) - echo "$command_result" - volume_name=$4 - - case "$command_result" in - *"Error: No such volume: $volume_name"*) - return 0 - ;; - esac - - if [ $exit_code -eq 0 ]; then - return 0 - fi - - count=$(($count + 1)) - if [ $count -lt $retries ]; then - echo "Retry $count/$retries exited $exit_code, retrying in {sleep} seconds..." - sleep {sleep} - else - echo "Retry $count/$retries exited $exit_code, no more retries left." - let error_counter=error_counter+1 - return 0 - fi - done - return 0 -}} - -for volume_name in {volume_names_seq} -do - retry {retries} docker volume rm "$volume_name" -done - -if [ "$error_counter" -ne "0" ]; then - echo "ERROR: Please check above logs, there was/were $error_counter error/s." - exit 1 -fi -""" - - -def spec_volume_removal_service( - swarm_stack_name: str, - docker_node_id: str, - user_id: UserID, - project_id: ProjectID, - node_uuid: NodeID, - volume_names: list[str], - docker_version: DockerVersion = DIND_VERSION, - *, - volume_removal_attempts: int, - sleep_between_attempts_s: int, - service_timeout_s: int, -) -> AioDockerServiceSpec: - """ - Generates a service spec for with base image - `docker:{docker_version}-dind` running the above bash script. - - The bash script will attempt to remove each individual volume - a few times before giving up. - The script will exit with error if it is not capable of - removing the volume. - - NOTE: expect the container of the service to exit with code 0, - otherwise there was an error. - NOTE: the bash script will exit 1 if it cannot find a - volume to remove. - NOTE: service must be removed once it finishes or it will - remain in the system. - NOTE: when running docker-in-docker https://hub.docker.com/_/docker - selecting the same version as the actual docker engine running - on the current node allows to avoid possible incompatible - versions. It is assumed that the same version of docker - will be running in the entire swarm. - """ - - volume_names_seq = " ".join(volume_names) - formatted_command = SH_SCRIPT_REMOVE_VOLUMES.format( - volume_names_seq=volume_names_seq, - retries=volume_removal_attempts, - sleep=sleep_between_attempts_s, - ) - logger.debug("Service will run:\n%s", formatted_command) - command = ["sh", "-c", formatted_command] - - create_service_params = { - "labels": { - "volume_names": json.dumps(volume_names), - "volume_removal_attempts": f"{volume_removal_attempts}", - "sleep_between_attempts_s": f"{sleep_between_attempts_s}", - "service_timeout_s": f"{service_timeout_s}", - "swarm_stack_name": swarm_stack_name, - "user_id": f"{user_id}", - "study_id": f"{project_id}", - "node_id": f"{node_uuid}", - }, - "name": f"{DYNAMIC_VOLUME_REMOVER_PREFIX}_{uuid4()}", - "task_template": { - "ContainerSpec": { - "Command": command, - "Image": f"docker:{docker_version}-dind", - "Mounts": [ - { - "Source": "/var/run/docker.sock", - "Target": "/var/run/docker.sock", - "Type": "bind", - } - ], - }, - "Placement": {"Constraints": [f"node.id == {docker_node_id}"]}, - "RestartPolicy": {"Condition": "none"}, - "Resources": { - "Reservations": { - "MemoryBytes": MEMORY_50MB, - "NanoCPUs": CPU_10_PERCENT, - }, - "Limits": {"MemoryBytes": MEMORY_250MB, "NanoCPUs": CPU_100_PERCENT}, - }, - }, - } - return AioDockerServiceSpec.parse_obj(create_service_params) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_event_create_sidecars.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_event_create_sidecars.py index 6943f7a08526..0d7f2f8288ff 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_event_create_sidecars.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_event_create_sidecars.py @@ -181,7 +181,8 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: groups_extra_properties = get_repository(app, GroupsExtraPropertiesRepository) assert scheduler_data.product_name is not None # nosec - allow_internet_access: bool = await groups_extra_properties.has_internet_access( + + user_extra_properties = await groups_extra_properties.get_user_extra_properties( user_id=scheduler_data.user_id, product_name=scheduler_data.product_name ) @@ -194,7 +195,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: "uuid": f"{scheduler_data.node_uuid}", # needed for removal when project is closed }, "Attachable": True, - "Internal": not allow_internet_access, + "Internal": not user_extra_properties.is_internet_enabled, } dynamic_sidecar_network_id = await create_network(network_config) @@ -217,11 +218,6 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: # generate a new `run_id` to avoid resource collisions scheduler_data.run_id = RunID.create() - # telemetry configuration - is_telemetry_enabled = await groups_extra_properties.is_telemetry_enabled( - user_id=scheduler_data.user_id, product_name=scheduler_data.product_name - ) - rpc_client: RabbitMQRPCClient = app.state.rabbitmq_rpc_client # WARNING: do NOT log, this structure has secrets in the open @@ -235,9 +231,8 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: app_settings=app.state.settings, hardware_info=scheduler_data.hardware_info, has_quota_support=dynamic_services_scheduler_settings.DYNAMIC_SIDECAR_ENABLE_VOLUME_LIMITS, - allow_internet_access=allow_internet_access, metrics_collection_allowed=metrics_collection_allowed, - telemetry_enabled=is_telemetry_enabled, + user_extra_properties=user_extra_properties, rpc_client=rpc_client, ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_user_services.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_user_services.py index e88937b13b79..f708c1cb22c4 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_user_services.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_user_services.py @@ -19,6 +19,7 @@ DynamicServicesSchedulerSettings, ) from .....models.dynamic_services_scheduler import SchedulerData +from .....modules.instrumentation import get_instrumentation, get_metrics_labels from .....utils.db import get_repository from ....db.repositories.groups_extra_properties import GroupsExtraPropertiesRepository from ....db.repositories.projects import ProjectsRepository @@ -222,4 +223,15 @@ async def progress_create_containers( scheduler_data.dynamic_sidecar.were_containers_created = True + # NOTE: user services are already in running state, meaning it is safe to pull inputs + await sidecars_client.pull_service_input_ports(dynamic_sidecar_endpoint) + + start_duration = ( + scheduler_data.dynamic_sidecar.instrumentation.elapsed_since_start_request() + ) + assert start_duration is not None # nosec + get_instrumentation(app).dynamic_sidecar_metrics.start_time_duration.labels( + **get_metrics_labels(scheduler_data) + ).observe(start_duration) + _logger.info("Internal state after creating user services %s", scheduler_data) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_utils.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_utils.py index 6fc6357ec3cc..9dbe2763bc93 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_utils.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_events_utils.py @@ -27,6 +27,13 @@ from servicelib.fastapi.long_running_tasks.server import TaskProgress from servicelib.logging_utils import log_context from servicelib.rabbitmq import RabbitMQClient +from servicelib.rabbitmq._client_rpc import RabbitMQRPCClient +from servicelib.rabbitmq.rpc_interfaces.agent.errors import ( + NoServiceVolumesFoundRPCError, +) +from servicelib.rabbitmq.rpc_interfaces.agent.volumes import ( + remove_volumes_without_backup_for_service, +) from servicelib.utils import limited_gather, logged_gather from simcore_postgres_database.models.comp_tasks import NodeClass from tenacity import RetryError, TryAgain @@ -44,6 +51,12 @@ DockerStatus, SchedulerData, ) +from .....modules.instrumentation import ( + get_instrumentation, + get_metrics_labels, + get_rate, + track_duration, +) from .....utils.db import get_repository from ....db.repositories.projects import ProjectsRepository from ....db.repositories.projects_networks import ProjectsNetworksRepository @@ -61,11 +74,9 @@ get_projects_networks_containers, remove_dynamic_sidecar_network, remove_dynamic_sidecar_stack, - remove_volumes_from_node, try_to_remove_network, ) from ...errors import EntrypointContainerNotFoundError -from ...volumes import DY_SIDECAR_SHARED_STORE_PATH, DynamicSidecarVolumesPathsResolver if TYPE_CHECKING: # NOTE: TYPE_CHECKING is True when static type checkers are running, @@ -157,9 +168,15 @@ async def service_save_state( progress_callback: ProgressCallback | None = None, ) -> None: scheduler_data: SchedulerData = _get_scheduler_data(app, node_uuid) - await sidecars_client.save_service_state( - scheduler_data.endpoint, progress_callback=progress_callback - ) + + with track_duration() as duration: + size = await sidecars_client.save_service_state( + scheduler_data.endpoint, progress_callback=progress_callback + ) + get_instrumentation(app).dynamic_sidecar_metrics.push_service_state_rate.labels( + **get_metrics_labels(scheduler_data) + ).observe(get_rate(size, duration.to_flaot())) + await sidecars_client.update_volume_state( scheduler_data.endpoint, volume_category=VolumeCategory.STATES, @@ -218,30 +235,17 @@ async def service_remove_sidecar_proxy_docker_networks_and_volumes( task_progress.update( message="removing volumes", percent=ProgressPercent(0.3) ) - unique_volume_names = [ - DynamicSidecarVolumesPathsResolver.source( - path=volume_path, - node_uuid=scheduler_data.node_uuid, - run_id=scheduler_data.run_id, - ) - for volume_path in [ - DY_SIDECAR_SHARED_STORE_PATH, - scheduler_data.paths_mapping.inputs_path, - scheduler_data.paths_mapping.outputs_path, - *scheduler_data.paths_mapping.state_paths, - ] - ] - with log_context( - _logger, logging.DEBUG, f"removing volumes via service for {node_uuid}" - ): - await remove_volumes_from_node( - swarm_stack_name=swarm_stack_name, - volume_names=unique_volume_names, - docker_node_id=scheduler_data.dynamic_sidecar.docker_node_id, - user_id=scheduler_data.user_id, - project_id=scheduler_data.project_id, - node_uuid=scheduler_data.node_uuid, - ) + with log_context(_logger, logging.DEBUG, f"removing volumes '{node_uuid}'"): + rabbit_rpc_client: RabbitMQRPCClient = app.state.rabbitmq_rpc_client + try: + await remove_volumes_without_backup_for_service( + rabbit_rpc_client, + docker_node_id=scheduler_data.dynamic_sidecar.docker_node_id, + swarm_stack_name=swarm_stack_name, + node_id=scheduler_data.node_uuid, + ) + except NoServiceVolumesFoundRPCError as e: + _logger.info("Could not remove volumes, reason: %s", e) _logger.debug( "Removed dynamic-sidecar services and crated container for '%s'", @@ -375,6 +379,16 @@ async def attempt_pod_removal_and_data_saving( rabbitmq_client: RabbitMQClient = app.state.rabbitmq_client await rabbitmq_client.publish(message.channel_name, message) + # metrics + + stop_duration = ( + scheduler_data.dynamic_sidecar.instrumentation.elapsed_since_close_request() + ) + assert stop_duration is not None # nosec + get_instrumentation(app).dynamic_sidecar_metrics.stop_time_duration.labels( + **get_metrics_labels(scheduler_data) + ).observe(stop_duration) + async def attach_project_networks(app: FastAPI, scheduler_data: SchedulerData) -> None: _logger.debug("Attaching project networks for %s", scheduler_data.service_name) @@ -460,14 +474,44 @@ async def prepare_services_environment( ) ) + async def _pull_output_ports_with_metrics() -> None: + with track_duration() as duration: + size: int = await sidecars_client.pull_service_output_ports( + dynamic_sidecar_endpoint + ) + + get_instrumentation(app).dynamic_sidecar_metrics.output_ports_pull_rate.labels( + **get_metrics_labels(scheduler_data) + ).observe(get_rate(size, duration.to_flaot())) + + async def _pull_user_services_images_with_metrics() -> None: + with track_duration() as duration: + await sidecars_client.pull_user_services_images(dynamic_sidecar_endpoint) + + get_instrumentation( + app + ).dynamic_sidecar_metrics.pull_user_services_images_duration.labels( + **get_metrics_labels(scheduler_data) + ).observe( + duration.to_flaot() + ) + + async def _restore_service_state_with_metrics() -> None: + with track_duration() as duration: + size = await sidecars_client.restore_service_state(dynamic_sidecar_endpoint) + + get_instrumentation(app).dynamic_sidecar_metrics.pull_service_state_rate.labels( + **get_metrics_labels(scheduler_data) + ).observe(get_rate(size, duration.to_flaot())) + tasks = [ - sidecars_client.pull_user_services_images(dynamic_sidecar_endpoint), - sidecars_client.pull_service_output_ports(dynamic_sidecar_endpoint), + _pull_user_services_images_with_metrics(), + _pull_output_ports_with_metrics(), ] # When enabled no longer downloads state via nodeports # S3 is used to store state paths if not app_settings.DIRECTOR_V2_DEV_FEATURE_R_CLONE_MOUNTS_ENABLED: - tasks.append(sidecars_client.restore_service_state(dynamic_sidecar_endpoint)) + tasks.append(_restore_service_state_with_metrics()) await limited_gather(*tasks, limit=3) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler.py index b6a5571a4af2..1e66fd82c527 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler.py @@ -17,10 +17,12 @@ import contextlib import functools import logging +import time from asyncio import Lock, Queue, Task from dataclasses import dataclass, field from typing import Final +import arrow from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import ( DynamicServiceCreate, @@ -54,6 +56,11 @@ DynamicServicesSchedulerSettings, ) from .....models.dynamic_services_scheduler import SchedulerData, ServiceName +from .....modules.instrumentation import ( + get_instrumentation, + get_metrics_labels, + get_rate, +) from ...api_client import SidecarsClient, get_sidecars_client from ...docker_api import update_scheduler_data_label from ...errors import DynamicSidecarError, DynamicSidecarNotFoundError @@ -86,7 +93,6 @@ class Scheduler( # pylint: disable=too-many-instance-attributes, too-many-publi ) _inverse_search_mapping: dict[NodeID, ServiceName] = field(default_factory=dict) _scheduler_task: Task | None = None - _cleanup_volume_removal_services_task: Task | None = None _trigger_observation_queue_task: Task | None = None _trigger_observation_queue: Queue = field(default_factory=Queue) _observation_counter: int = 0 @@ -116,10 +122,6 @@ async def start(self) -> None: name="dynamic-scheduler-trigger-obs-queue", ) - self._cleanup_volume_removal_services_task = asyncio.create_task( - _scheduler_utils.cleanup_volume_removal_services(self.app), - name="dynamic-scheduler-cleanup-volume-removal-services", - ) await _scheduler_utils.discover_running_services(self) async def shutdown(self) -> None: @@ -127,12 +129,6 @@ async def shutdown(self) -> None: self._inverse_search_mapping = {} self._to_observe = {} - if self._cleanup_volume_removal_services_task is not None: - self._cleanup_volume_removal_services_task.cancel() - with contextlib.suppress(asyncio.CancelledError): - await self._cleanup_volume_removal_services_task - self._cleanup_volume_removal_services_task = None - if self._scheduler_task is not None: await stop_periodic_task(self._scheduler_task, timeout=5) self._scheduler_task = None @@ -255,6 +251,9 @@ async def add_service( request_simcore_user_agent=request_simcore_user_agent, can_save=can_save, ) + scheduler_data.dynamic_sidecar.instrumentation.start_requested_at = ( + arrow.utcnow().datetime + ) await self.add_service_from_scheduler_data(scheduler_data) async def add_service_from_scheduler_data( @@ -353,6 +352,10 @@ async def mark_service_for_removal( ) return + current.dynamic_sidecar.instrumentation.close_requested_at = ( + arrow.utcnow().datetime + ) + # PC-> ANE: could you please review what to do when can_save=None assert can_save is not None # nosec current.dynamic_sidecar.service_removal_state.mark_to_remove( @@ -455,9 +458,19 @@ async def retrieve_service_inputs( dynamic_sidecar_endpoint: AnyHttpUrl = scheduler_data.endpoint sidecars_client: SidecarsClient = await get_sidecars_client(self.app, node_uuid) + started = time.time() transferred_bytes = await sidecars_client.pull_service_input_ports( dynamic_sidecar_endpoint, port_keys ) + duration = time.time() - started + + get_instrumentation( + self.app + ).dynamic_sidecar_metrics.input_ports_pull_rate.labels( + **get_metrics_labels(scheduler_data) + ).observe( + get_rate(transferred_bytes, duration) + ) if scheduler_data.restart_policy == RestartPolicy.ON_INPUTS_DOWNLOADED: logger.info("Will restart containers") diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler_utils.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler_utils.py index 7438e9d996f7..b03356770845 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler_utils.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/_core/_scheduler_utils.py @@ -1,4 +1,3 @@ -import asyncio import logging from typing import Final @@ -15,11 +14,7 @@ ) from .....models.dynamic_services_scheduler import DynamicSidecarStatus, SchedulerData from ...api_client import SidecarsClient, get_sidecars_client -from ...docker_api import ( - get_dynamic_sidecar_state, - get_dynamic_sidecars_to_observe, - remove_pending_volume_removal_services, -) +from ...docker_api import get_dynamic_sidecar_state, get_dynamic_sidecars_to_observe from ...docker_states import extract_containers_minimum_statuses from ...errors import DockerServiceNotFoundError from ._events_utils import service_push_outputs @@ -60,32 +55,6 @@ async def service_awaits_manual_interventions(scheduler_data: SchedulerData) -> return service_awaits_intervention -async def cleanup_volume_removal_services(app: FastAPI) -> None: - settings: DynamicServicesSchedulerSettings = ( - app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER - ) - - _logger.debug( - "dynamic-sidecars cleanup pending volume removal services every %s seconds", - settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_PENDING_VOLUME_REMOVAL_INTERVAL_S, - ) - while await asyncio.sleep( - settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_PENDING_VOLUME_REMOVAL_INTERVAL_S, - result=True, - ): - _logger.debug("Removing pending volume removal services...") - - try: - await remove_pending_volume_removal_services(settings.SWARM_STACK_NAME) - except asyncio.CancelledError: - _logger.info("Stopped pending volume removal services task") - raise - except Exception: # pylint: disable=broad-except - _logger.exception( - "Unexpected error while cleaning up pending volume removal services" - ) - - async def discover_running_services(scheduler: "Scheduler") -> None: # type: ignore # noqa: F821 """discover all services which were started before and add them to the scheduler""" settings: DynamicServicesSchedulerSettings = ( diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py index 8a6d85c906be..d003eec60e60 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes.py @@ -2,6 +2,9 @@ from pathlib import Path from typing import Any +from models_library.api_schemas_directorv2.services import ( + CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME, +) from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID from models_library.services import RunID @@ -133,7 +136,12 @@ def source(cls, path: Path, node_uuid: NodeID, run_id: RunID) -> str: # and state folders are very long and share the same subdirectory path. # Reversing volume name to prevent these issues from happening. reversed_volume_name = cls.volume_name(path)[::-1] - unique_name = f"{PREFIX_DYNAMIC_SIDECAR_VOLUMES}_{run_id}_{node_uuid}_{reversed_volume_name}" + + # ensure prefix size does not change + prefix = f"{PREFIX_DYNAMIC_SIDECAR_VOLUMES}_{run_id}_{node_uuid}" + assert len(prefix) == CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME - 1 # nosec + + unique_name = f"{prefix}_{reversed_volume_name}" return unique_name[:255] @classmethod diff --git a/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/__init__.py b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/__init__.py new file mode 100644 index 000000000000..8c08a824d3f1 --- /dev/null +++ b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/__init__.py @@ -0,0 +1,10 @@ +from ._setup import get_instrumentation, setup +from ._utils import get_metrics_labels, get_rate, track_duration + +__all__: tuple[str, ...] = ( + "get_instrumentation", + "get_metrics_labels", + "get_rate", + "setup", + "track_duration", +) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py new file mode 100644 index 000000000000..7407885af317 --- /dev/null +++ b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py @@ -0,0 +1,145 @@ +from dataclasses import dataclass, field +from typing import Final + +from prometheus_client import CollectorRegistry, Histogram +from pydantic import ByteSize, parse_obj_as +from servicelib.instrumentation import MetricsBase, get_metrics_namespace + +from ..._meta import PROJECT_NAME + +_METRICS_NAMESPACE: Final[str] = get_metrics_namespace(PROJECT_NAME) +_INSTRUMENTATION_LABELS: Final[tuple[str, ...]] = ( + "user_id", + "wallet_id", + "service_key", + "service_version", +) + +_MINUTE: Final[int] = 60 +_BUCKETS_TIME_S: Final[tuple[float, ...]] = ( + 10, + 30, + 1 * _MINUTE, + 2 * _MINUTE, + 3 * _MINUTE, + 5 * _MINUTE, + 7 * _MINUTE, + 10 * _MINUTE, + 15 * _MINUTE, + 20 * _MINUTE, +) + + +_RATE_BPS_BUCKETS: Final[tuple[float, ...]] = tuple( + parse_obj_as(ByteSize, f"{m}MiB") + for m in ( + 1, + 30, + 60, + 90, + 120, + 150, + 200, + 300, + 400, + 500, + 600, + ) +) + + +@dataclass(slots=True, kw_only=True) +class DynamiSidecarMetrics(MetricsBase): + start_time_duration: Histogram = field(init=False) + stop_time_duration: Histogram = field(init=False) + pull_user_services_images_duration: Histogram = field(init=False) + + # ingress rates + output_ports_pull_rate: Histogram = field(init=False) + input_ports_pull_rate: Histogram = field(init=False) + pull_service_state_rate: Histogram = field(init=False) + + # egress rates + # NOTE: input ports are never pushed + # NOTE: output ports are pushed by the dy-sidecar, upon change making recovering the metric very complicated + push_service_state_rate: Histogram = field(init=False) + + def __post_init__(self) -> None: + self.start_time_duration = Histogram( + "start_time_duration_seconds", + "time to start dynamic service (from start request in dv-2 till service containers are in running state (healthy))", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_BUCKETS_TIME_S, + subsystem=self.subsystem, + registry=self.registry, + ) + self.stop_time_duration = Histogram( + "stop_time_duration_seconds", + "time to stop dynamic service (from stop request in dv-2 till all allocated resources (services + dynamic-sidecar) are removed)", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_BUCKETS_TIME_S, + subsystem=self.subsystem, + registry=self.registry, + ) + self.pull_user_services_images_duration = Histogram( + "pull_user_services_images_duration_seconds", + "time to pull docker images", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, + ) + + self.output_ports_pull_rate = Histogram( + "output_ports_pull_rate_bps", + "rate at which output ports were pulled", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, + ) + self.input_ports_pull_rate = Histogram( + "input_ports_pull_rate_bps", + "rate at which input ports were pulled", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, + ) + self.pull_service_state_rate = Histogram( + "pull_service_state_rate_bps", + "rate at which service states were recovered", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, + ) + + self.push_service_state_rate = Histogram( + "push_service_state_rate_bps", + "rate at which service states were saved", + labelnames=_INSTRUMENTATION_LABELS, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, + ) + + +@dataclass(slots=True, kw_only=True) +class DirectorV2Instrumentation: + registry: CollectorRegistry + dynamic_sidecar_metrics: DynamiSidecarMetrics = field(init=False) + + def __post_init__(self) -> None: + self.dynamic_sidecar_metrics = ( + DynamiSidecarMetrics( # pylint: disable=unexpected-keyword-arg + subsystem="dynamic_services", registry=self.registry + ) + ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_setup.py b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_setup.py new file mode 100644 index 000000000000..889cb39a460c --- /dev/null +++ b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_setup.py @@ -0,0 +1,28 @@ +from typing import cast + +from fastapi import FastAPI +from servicelib.fastapi.prometheus_instrumentation import ( + setup_prometheus_instrumentation, +) + +from ...core.errors import ConfigurationError +from ._models import DirectorV2Instrumentation + + +def setup(app: FastAPI) -> None: + instrumentator = setup_prometheus_instrumentation(app) + + async def on_startup() -> None: + app.state.instrumentation = DirectorV2Instrumentation( + registry=instrumentator.registry + ) + + app.add_event_handler("startup", on_startup) + + +def get_instrumentation(app: FastAPI) -> DirectorV2Instrumentation: + if not app.state.instrumentation: + raise ConfigurationError( + msg="Instrumentation not setup. Please check the configuration." + ) + return cast(DirectorV2Instrumentation, app.state.instrumentation) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_utils.py b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_utils.py new file mode 100644 index 000000000000..96b23ae5f1f9 --- /dev/null +++ b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_utils.py @@ -0,0 +1,61 @@ +import time +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Final + +from pydantic import NonNegativeFloat + +from ...models.dynamic_services_scheduler import SchedulerData + +_EPSILON: Final[NonNegativeFloat] = 1e9 + + +def get_metrics_labels(scheduler_data: "SchedulerData") -> dict[str, str]: + return { + "user_id": f"{scheduler_data.user_id}", + "wallet_id": ( + f"{scheduler_data.wallet_info.wallet_id}" + if scheduler_data.wallet_info + else "" + ), + "service_key": scheduler_data.key, + "service_version": scheduler_data.version, + } + + +def get_rate( + size: NonNegativeFloat | None, duration: NonNegativeFloat +) -> NonNegativeFloat: + if size is None or size <= 0: + size = _EPSILON + return size / duration + + +class DeferredFloat: + def __init__(self): + self._value: float | None = None + + def set_value(self, value): + if not isinstance(value, float | int): + msg = "Value must be a float or an int." + raise TypeError(msg) + + self._value = float(value) + + def to_flaot(self) -> float: + if not isinstance(self._value, float): + msg = "Value must be a float or an int." + raise TypeError(msg) + + return self._value + + +@contextmanager +def track_duration() -> Iterator[DeferredFloat]: + duration = DeferredFloat() + start_time = time.time() + + yield duration + + end_time = time.time() + duration.set_value(end_time - start_time) diff --git a/services/director-v2/tests/conftest.py b/services/director-v2/tests/conftest.py index eafe6bb15fcd..db64158d6d57 100644 --- a/services/director-v2/tests/conftest.py +++ b/services/director-v2/tests/conftest.py @@ -42,12 +42,12 @@ "pytest_simcore.docker_registry", "pytest_simcore.docker_swarm", "pytest_simcore.environment_configs", + "pytest_simcore.faker_projects_data", "pytest_simcore.faker_users_data", "pytest_simcore.minio_service", "pytest_simcore.postgres_service", "pytest_simcore.pydantic_models", "pytest_simcore.pytest_global_environs", - "pytest_simcore.socketio", "pytest_simcore.rabbit_service", "pytest_simcore.redis_service", "pytest_simcore.repository_paths", @@ -55,6 +55,7 @@ "pytest_simcore.simcore_dask_service", "pytest_simcore.simcore_services", "pytest_simcore.simcore_storage_service", + "pytest_simcore.socketio", ] logger = logging.getLogger(__name__) @@ -187,7 +188,7 @@ def mock_env( "REGISTRY_USER": "test", "SC_BOOT_MODE": "production", "SIMCORE_SERVICES_NETWORK_NAME": "test_network_name", - "SWARM_STACK_NAME": "test_swarm_name", + "SWARM_STACK_NAME": "pytest-simcore", "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", }, ) @@ -208,6 +209,7 @@ async def client(mock_env: EnvVarsDict) -> AsyncIterator[TestClient]: async def initialized_app(mock_env: EnvVarsDict) -> AsyncIterable[FastAPI]: settings = AppSettings.create_from_envs() app = init_app(settings) + print("Application settings\n", settings.json(indent=2)) async with LifespanManager(app): yield app diff --git a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py index 56eaecdda473..4a4340338649 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py +++ b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py @@ -52,14 +52,15 @@ logger = logging.getLogger(__name__) pytest_simcore_core_services_selection = [ + "agent", "catalog", "director", "migration", "postgres", "rabbit", "redis", - "storage", "redis", + "storage", ] pytest_simcore_ops_services_selection = [ "adminer", @@ -187,6 +188,7 @@ async def director_v2_client( "REDIS_HOST": redis_settings.REDIS_HOST, "REDIS_PORT": f"{redis_settings.REDIS_PORT}", "REDIS_PASSWORD": f"{redis_settings.REDIS_PASSWORD.get_secret_value()}", + "DIRECTOR_V2_PROMETHEUS_INSTRUMENTATION_ENABLED": "1", }, ) monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False) diff --git a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py index 2fe09c422866..17d3fe4bcca8 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py +++ b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py @@ -1,7 +1,8 @@ # pylint: disable=protected-access # pylint: disable=redefined-outer-name -# pylint: disable=unused-argument # pylint: disable=too-many-arguments +# pylint: disable=unused-argument +# pylint:disable=too-many-positional-arguments import asyncio import hashlib @@ -99,6 +100,7 @@ from yarl import URL pytest_simcore_core_services_selection = [ + "agent", "catalog", "dask-scheduler", "dask-sidecar", @@ -380,7 +382,7 @@ def mock_env( "DYNAMIC_SIDECAR_IMAGE": image_name, "DYNAMIC_SIDECAR_PROMETHEUS_SERVICE_LABELS": "{}", "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", - "SWARM_STACK_NAME": "test_swarm_name", + "SWARM_STACK_NAME": "pytest-simcore", "SC_BOOT_MODE": "production", "DYNAMIC_SIDECAR_EXPOSE_PORT": "true", "DYNAMIC_SIDECAR_LOG_LEVEL": "DEBUG", @@ -401,6 +403,7 @@ def mock_env( "COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED": "true", "COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_URL": dask_scheduler_service, "COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH": dask_scheduler_auth.json(), + "DIRECTOR_V2_PROMETHEUS_INSTRUMENTATION_ENABLED": "1", }, ) monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False) diff --git a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py index 134b9eaea74c..4d7c348a336a 100644 --- a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py +++ b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py @@ -1,6 +1,7 @@ -# pylint:disable=unused-argument # pylint:disable=redefined-outer-name # pylint:disable=too-many-arguments +# pylint:disable=too-many-positional-arguments +# pylint:disable=unused-argument import asyncio import logging @@ -39,6 +40,7 @@ pytest_simcore_core_services_selection = [ + "agent", "catalog", "director", "migration", @@ -65,11 +67,22 @@ def mock_env( minio_s3_settings_envs: EnvVarsDict, storage_service: URL, network_name: str, + services_endpoint: dict[str, URL], ) -> EnvVarsDict: + director_host = services_endpoint["director"].host + assert director_host + director_port = services_endpoint["director"].port + assert director_port + + catalog_host = services_endpoint["catalog"].host + assert catalog_host + catalog_port = services_endpoint["catalog"].port + assert catalog_port + env_vars: EnvVarsDict = { "DYNAMIC_SIDECAR_PROMETHEUS_SERVICE_LABELS": "{}", "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", - "SWARM_STACK_NAME": "test_swarm_name", + "SWARM_STACK_NAME": "pytest-simcore", "DYNAMIC_SIDECAR_LOG_LEVEL": "DEBUG", "SC_BOOT_MODE": "production", "DYNAMIC_SIDECAR_EXPOSE_PORT": "true", @@ -80,6 +93,11 @@ def mock_env( "COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED": "false", "COMPUTATIONAL_BACKEND_ENABLED": "false", "R_CLONE_PROVIDER": "MINIO", + "DIRECTOR_V2_PROMETHEUS_INSTRUMENTATION_ENABLED": "1", + "DIRECTOR_HOST": director_host, + "DIRECTOR_PORT": f"{director_port}", + "CATALOG_HOST": catalog_host, + "CATALOG_PORT": f"{catalog_port}", } setenvs_from_dict(monkeypatch, env_vars) monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False) @@ -158,7 +176,7 @@ def _assemble_node_data(spec: dict, label: str) -> dict[str, str]: @pytest.fixture async def ensure_services_stopped( dy_static_file_server_project: ProjectAtDB, - minimal_app: FastAPI, + initialized_app: FastAPI, ) -> AsyncIterable[None]: yield # ensure service cleanup when done testing @@ -177,7 +195,7 @@ async def ensure_services_stopped( # pylint: disable=protected-access scheduler_interval = ( - minimal_app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL + initialized_app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL ) # sleep enough to ensure the observation cycle properly stopped the service await asyncio.sleep(2 * scheduler_interval.total_seconds()) @@ -190,9 +208,10 @@ def mock_sidecars_client(mocker: MockerFixture) -> mock.Mock: "simcore_service_director_v2.modules.dynamic_sidecar.api_client.SidecarsClient" ) for function_name, return_value in [ - ("pull_service_output_ports", None), - ("restore_service_state", None), + ("pull_service_output_ports", 0), + ("restore_service_state", 0), ("push_service_output_ports", None), + ("save_service_state", 0), ]: mocker.patch( f"{class_path}.{function_name}", @@ -214,7 +233,7 @@ async def _mocked_context_manger(*args, **kwargs) -> AsyncIterator[None]: @pytest.mark.flaky(max_runs=3) async def test_legacy_and_dynamic_sidecar_run( - minimal_app: FastAPI, + initialized_app: FastAPI, wait_for_catalog_service: Callable[[UserID, str], Awaitable[None]], dy_static_file_server_project: ProjectAtDB, user_dict: dict[str, Any], @@ -263,13 +282,7 @@ async def test_legacy_and_dynamic_sidecar_run( if is_legacy(node): continue - # NOTE: it seems the minimal_app fixture does not contain the actual data - # so we use the one in the async_client??? very strange - await patch_dynamic_service_url( - # pylint: disable=protected-access - app=async_client._transport.app, # noqa: SLF001 # type: ignore - node_uuid=node_id, - ) + await patch_dynamic_service_url(app=initialized_app, node_uuid=node_id) assert len(dy_static_file_server_project.workbench) == 3 diff --git a/services/director-v2/tests/unit/conftest.py b/services/director-v2/tests/unit/conftest.py index f08ffd47337a..ecd7da595445 100644 --- a/services/director-v2/tests/unit/conftest.py +++ b/services/director-v2/tests/unit/conftest.py @@ -35,10 +35,6 @@ from simcore_service_director_v2.constants import DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL from simcore_service_director_v2.core.settings import AppSettings from simcore_service_director_v2.models.dynamic_services_scheduler import SchedulerData -from simcore_service_director_v2.modules.dynamic_sidecar.docker_service_specs.volume_remover import ( - DIND_VERSION, - DockerVersion, -) @pytest.fixture @@ -341,8 +337,3 @@ def mock_docker_api(mocker: MockerFixture) -> None: async def async_docker_client() -> AsyncIterable[aiodocker.Docker]: async with aiodocker.Docker() as docker_client: yield docker_client - - -@pytest.fixture -async def docker_version() -> DockerVersion: - return parse_obj_as(DockerVersion, DIND_VERSION) diff --git a/services/director-v2/tests/unit/test_modules_dask_client.py b/services/director-v2/tests/unit/test_modules_dask_client.py index a01980027c02..f63381c538bc 100644 --- a/services/director-v2/tests/unit/test_modules_dask_client.py +++ b/services/director-v2/tests/unit/test_modules_dask_client.py @@ -284,11 +284,6 @@ def project_id() -> ProjectID: return uuid4() -@pytest.fixture -def node_id() -> NodeID: - return uuid4() - - @dataclass class ImageParams: image: Image diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_volume_remover.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_volume_remover.py deleted file mode 100644 index 4f5672b4a533..000000000000 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_volume_remover.py +++ /dev/null @@ -1,230 +0,0 @@ -# pylint: disable=redefined-outer-name - -import contextlib -from pathlib import Path -from typing import AsyncIterator - -import pytest -from aiodocker import Docker, DockerError -from aiodocker.volumes import DockerVolume -from faker import Faker -from models_library.services import RunID -from pydantic import parse_obj_as -from simcore_service_director_v2.modules.dynamic_sidecar.docker_service_specs.volume_remover import ( - SH_SCRIPT_REMOVE_VOLUMES, - DockerVersion, -) - -# UTILS - - -def _get_source(run_id: RunID, node_uuid: str, volume_path: Path) -> str: - reversed_path = f"{volume_path}"[::-1].replace("/", "_") - return f"dyv_{run_id}_{node_uuid}_{reversed_path}" - - -async def run_command( - async_docker_client: Docker, docker_version: DockerVersion, volume_names: list[str] -) -> str: - volume_names_seq = " ".join(volume_names) - formatted_command = SH_SCRIPT_REMOVE_VOLUMES.format( - volume_names_seq=volume_names_seq, retries=3, sleep=0.1 - ) - print("Container will run:\n%s", formatted_command) - command = ["sh", "-c", formatted_command] - - container = await async_docker_client.containers.run( - config={ - "Cmd": command, - "Image": f"docker:{docker_version}-dind", - "HostConfig": {"Binds": ["/var/run/docker.sock:/var/run/docker.sock"]}, - }, - ) - await container.start() - await container.wait() - - logs = await container.log(stderr=True, stdout=True) - - await container.delete(force=True) - - return "".join(logs) - - -# FIXTURES - - -@pytest.fixture -def swarm_stack_name() -> str: - return "test_stack" - - -@pytest.fixture -def study_id(faker: Faker) -> str: - return faker.uuid4() - - -@pytest.fixture -def node_uuid(faker: Faker) -> str: - return faker.uuid4() - - -@pytest.fixture -def run_id() -> RunID: - return RunID.create() - - -@pytest.fixture -def used_volume_path(tmp_path: Path) -> Path: - return tmp_path / "used_volume" - - -@pytest.fixture -def unused_volume_path(tmp_path: Path) -> Path: - return tmp_path / "unused_volume" - - -@pytest.fixture -async def unused_volume( - async_docker_client: Docker, - swarm_stack_name: str, - study_id: str, - node_uuid: str, - run_id: RunID, - unused_volume_path: Path, -) -> AsyncIterator[DockerVolume]: - source = _get_source(run_id, node_uuid, unused_volume_path) - volume = await async_docker_client.volumes.create( - { - "Name": source, - "Labels": { - "node_uuid": node_uuid, - "run_id": run_id, - "source": source, - "study_id": study_id, - "swarm_stack_name": swarm_stack_name, - "user_id": "1", - }, - } - ) - - yield volume - - with contextlib.suppress(DockerError): - await volume.delete() - - -@pytest.fixture -async def used_volume( - async_docker_client: Docker, - swarm_stack_name: str, - study_id: str, - node_uuid: str, - run_id: RunID, - used_volume_path: Path, -) -> AsyncIterator[DockerVolume]: - source = _get_source(run_id, node_uuid, used_volume_path) - volume = await async_docker_client.volumes.create( - { - "Name": source, - "Labels": { - "node_uuid": node_uuid, - "run_id": run_id, - "source": source, - "study_id": study_id, - "swarm_stack_name": swarm_stack_name, - "user_id": "1", - }, - } - ) - - container = await async_docker_client.containers.run( - config={ - "Cmd": ["/bin/ash", "-c", "sleep 10000"], - "Image": "alpine:latest", - "HostConfig": {"Binds": [f"{volume.name}:{used_volume_path}"]}, - }, - name=f"using_volume_{volume.name}", - ) - await container.start() - - yield volume - - await container.delete(force=True) - await volume.delete() - - -@pytest.fixture -async def used_volume_name(used_volume: DockerVolume) -> str: - volume = await used_volume.show() - return volume["Name"] - - -@pytest.fixture -async def unused_volume_name(unused_volume: DockerVolume) -> str: - volume = await unused_volume.show() - return volume["Name"] - - -@pytest.fixture -def missing_volume_name(run_id: RunID, node_uuid: str) -> str: - return _get_source(run_id, node_uuid, Path("/MISSING/PATH")) - - -# TESTS - - -async def test_sh_script_error_if_volume_is_used( - async_docker_client: Docker, used_volume_name: str, docker_version: DockerVersion -): - command_stdout = await run_command( - async_docker_client, docker_version, volume_names=[used_volume_name] - ) - print(command_stdout) - assert "ERROR: Please check above logs, there was/were 1 error/s." in command_stdout - - -async def test_sh_script_removes_unused_volume( - async_docker_client: Docker, unused_volume_name: str, docker_version: DockerVersion -): - command_stdout = await run_command( - async_docker_client, docker_version, volume_names=[unused_volume_name] - ) - print(command_stdout) - assert "ERROR: Please check above logs, there was/were" not in command_stdout - assert command_stdout == f"{unused_volume_name}\n" - - -async def test_sh_script_no_error_if_volume_does_not_exist( - async_docker_client: Docker, missing_volume_name: str, docker_version: DockerVersion -): - command_stdout = await run_command( - async_docker_client, docker_version, volume_names=[missing_volume_name] - ) - print(command_stdout) - assert "ERROR: Please check above logs, there was/were" not in command_stdout - - -@pytest.mark.parametrize( - "docker_version", - [ - "20.10.17", - "20.10.17+azure-1-dind", # github workers - "20.10.17.", - "20.10.17asdjasjsaddas", - ], -) -def test_docker_version_strips_unwanted(docker_version: str): - assert parse_obj_as(DockerVersion, docker_version) == "20.10.17" - - -@pytest.mark.parametrize( - "invalid_docker_version", - [ - "nope", - ".20.10.17.", - ".20.10.17", - ], -) -def test_docker_version_invalid(invalid_docker_version: str): - with pytest.raises(ValueError): - parse_obj_as(DockerVersion, invalid_docker_version) diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler_task.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler_task.py index 124b156ff0e3..5410c37f2039 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler_task.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler_task.py @@ -212,7 +212,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: @pytest.fixture def mock_remove_calls(mocker: MockerFixture) -> None: - mocker.patch.object(_events_utils, "remove_volumes_from_node") + mocker.patch.object(_events_utils, "remove_volumes_without_backup_for_service") @pytest.fixture(params=[True, False]) diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py index cc64e2fd541a..b617c3da6375 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_volumes_resolver.py @@ -9,6 +9,9 @@ import aiodocker import pytest from faker import Faker +from models_library.api_schemas_directorv2.services import ( + CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME, +) from models_library.projects import ProjectID from models_library.services import RunID from models_library.users import UserID @@ -144,6 +147,11 @@ def test_volumes_get_truncated_as_expected(faker: Faker): node_uuid=node_uuid, run_id=run_id, ) + + # if below fails the agent will have issues please check + constant_part = unique_volume_name[: CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME - 1] + assert constant_part == f"dyv_{run_id}_{node_uuid}" + assert len(unique_volume_name) == 255 assert f"{run_id}" in unique_volume_name assert f"{node_uuid}" in unique_volume_name diff --git a/services/director-v2/tests/unit/test_modules_instrumentation__utils.py b/services/director-v2/tests/unit/test_modules_instrumentation__utils.py new file mode 100644 index 000000000000..8ebcada1fde9 --- /dev/null +++ b/services/director-v2/tests/unit/test_modules_instrumentation__utils.py @@ -0,0 +1,10 @@ +import time + +from simcore_service_director_v2.modules.instrumentation._utils import track_duration + + +def test_track_duration(): + with track_duration() as duration: + time.sleep(0.1) + + assert duration.to_flaot() > 0.1 diff --git a/services/director-v2/tests/unit/with_dbs/test_api_route_computations.py b/services/director-v2/tests/unit/with_dbs/test_api_route_computations.py index 7fe676662673..81034fbaee5f 100644 --- a/services/director-v2/tests/unit/with_dbs/test_api_route_computations.py +++ b/services/director-v2/tests/unit/with_dbs/test_api_route_computations.py @@ -4,6 +4,7 @@ # pylint: disable=too-many-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable +# pylint:disable=too-many-positional-arguments import datetime import json diff --git a/services/director-v2/tests/unit/with_dbs/test_api_route_dynamic_services.py b/services/director-v2/tests/unit/with_dbs/test_api_route_dynamic_services.py index 4ddb656a0b2b..cc0246bfec9b 100644 --- a/services/director-v2/tests/unit/with_dbs/test_api_route_dynamic_services.py +++ b/services/director-v2/tests/unit/with_dbs/test_api_route_dynamic_services.py @@ -81,6 +81,8 @@ def minimal_config( monkeypatch.setenv("COMPUTATIONAL_BACKEND_ENABLED", "0") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "0") + monkeypatch.setenv("DIRECTOR_V2_PROMETHEUS_INSTRUMENTATION_ENABLED", "1") + @pytest.fixture(scope="session") def dynamic_sidecar_headers() -> dict[str, str]: diff --git a/services/director-v2/tests/unit/with_dbs/test_cli.py b/services/director-v2/tests/unit/with_dbs/test_cli.py index 0322610985c3..43beec859003 100644 --- a/services/director-v2/tests/unit/with_dbs/test_cli.py +++ b/services/director-v2/tests/unit/with_dbs/test_cli.py @@ -81,6 +81,7 @@ def mock_save_service_state(mocker: MockerFixture) -> None: mocker.patch( "simcore_service_director_v2.modules.dynamic_sidecar.api_client._public.SidecarsClient.save_service_state", spec=True, + return_value=0, ) diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_api.py b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_api.py index b08c5c0c00c9..0536261ed629 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_api.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_api.py @@ -3,7 +3,6 @@ # pylint: disable=protected-access import asyncio -import contextlib import datetime import logging import sys @@ -14,9 +13,7 @@ import aiodocker import pytest from aiodocker.utils import clean_filters -from aiodocker.volumes import DockerVolume from faker import Faker -from fastapi.encoders import jsonable_encoder from models_library.docker import to_simcore_runtime_docker_label_key from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID @@ -27,7 +24,6 @@ DYNAMIC_PROXY_SERVICE_PREFIX, DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL, DYNAMIC_SIDECAR_SERVICE_PREFIX, - DYNAMIC_VOLUME_REMOVER_PREFIX, ) from simcore_service_director_v2.core.dynamic_services_settings.scheduler import ( DynamicServicesSchedulerSettings, @@ -47,10 +43,6 @@ from simcore_service_director_v2.modules.dynamic_sidecar.docker_api._utils import ( docker_client, ) -from simcore_service_director_v2.modules.dynamic_sidecar.docker_service_specs.volume_remover import ( - DockerVersion, - spec_volume_removal_service, -) from simcore_service_director_v2.modules.dynamic_sidecar.errors import ( DynamicSidecarError, GenericDockerError, @@ -798,194 +790,3 @@ async def test_constrain_service_to_node( label, value = node_id_constraint.split("==") assert label.strip() == "node.id" assert value.strip() == target_node_id - - -@pytest.fixture -async def named_volumes( - async_docker_client: aiodocker.Docker, faker: Faker -) -> AsyncIterator[list[str]]: - named_volumes: list[DockerVolume] = [] - volume_names: list[str] = [] - for _ in range(10): - named_volume: DockerVolume = await async_docker_client.volumes.create( - {"Name": f"named-volume-{faker.uuid4()}"} - ) - volume_names.append(named_volume.name) - named_volumes.append(named_volume) - - yield volume_names - - # remove volume if still present - for named_volume in named_volumes: - with contextlib.suppress(aiodocker.DockerError): - await named_volume.delete() - - -async def is_volume_present( - async_docker_client: aiodocker.Docker, volume_name: str -) -> bool: - list_of_volumes = await async_docker_client.volumes.list() - for volume in list_of_volumes.get("Volumes", []): - if volume["Name"] == volume_name: - return True - return False - - -async def test_remove_volume_from_node_ok( - docker_swarm: None, - async_docker_client: aiodocker.Docker, - named_volumes: list[str], - target_node_id: str, - user_id: UserID, - project_id: ProjectID, - node_uuid: NodeID, - dynamic_sidecar_settings: DynamicSidecarSettings, - dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings, -): - for named_volume in named_volumes: - assert await is_volume_present(async_docker_client, named_volume) is True - - volume_removal_result = await docker_api.remove_volumes_from_node( - swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME, - volume_names=named_volumes, - docker_node_id=target_node_id, - user_id=user_id, - project_id=project_id, - node_uuid=node_uuid, - ) - assert volume_removal_result is True - - for named_volume in named_volumes: - assert await is_volume_present(async_docker_client, named_volume) is False - - -async def test_remove_volume_from_node_no_volume_found( - docker_swarm: None, - async_docker_client: aiodocker.Docker, - named_volumes: list[str], - target_node_id: str, - user_id: UserID, - project_id: ProjectID, - node_uuid: NodeID, - dynamic_sidecar_settings: DynamicSidecarSettings, - dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings, -): - missing_volume_name = "nope-i-am-fake-and-do-not-exist" - assert await is_volume_present(async_docker_client, missing_volume_name) is False - - # put the missing one in the middle of the sequence - volumes_to_remove = named_volumes[:1] + [missing_volume_name] + named_volumes[1:] - - volume_removal_result = await docker_api.remove_volumes_from_node( - swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME, - volume_names=volumes_to_remove, - docker_node_id=target_node_id, - user_id=user_id, - project_id=project_id, - node_uuid=node_uuid, - volume_removal_attempts=2, - sleep_between_attempts_s=1, - ) - assert volume_removal_result is True - assert await is_volume_present(async_docker_client, missing_volume_name) is False - for named_volume in named_volumes: - assert await is_volume_present(async_docker_client, named_volume) is False - - -@pytest.fixture -def volume_removal_services_names(faker: Faker) -> set[str]: - return {f"{DYNAMIC_VOLUME_REMOVER_PREFIX}_{faker.uuid4()}" for _ in range(10)} - - -@pytest.fixture(params=[0, 2]) -def service_timeout_s(request: pytest.FixtureRequest) -> int: - return request.param # type: ignore - - -@pytest.fixture -async def ensure_fake_volume_removal_services( - async_docker_client: aiodocker.Docker, - docker_version: DockerVersion, - target_node_id: str, - user_id: UserID, - project_id: ProjectID, - node_uuid: NodeID, - volume_removal_services_names: list[str], - dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings, - service_timeout_s: int, - docker_swarm: None, -) -> AsyncIterator[None]: - started_services_ids: list[str] = [] - - for service_name in volume_removal_services_names: - service_spec = spec_volume_removal_service( - swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME, - docker_node_id=target_node_id, - user_id=user_id, - project_id=project_id, - node_uuid=node_uuid, - volume_names=[], - docker_version=docker_version, - volume_removal_attempts=0, - sleep_between_attempts_s=0, - service_timeout_s=service_timeout_s, - ) - - # replace values - service_spec.Name = service_name - # use very long sleep command - service_spec.TaskTemplate.ContainerSpec.Command = ["sh", "-c", "sleep 3600"] - - started_service = await async_docker_client.services.create( - **jsonable_encoder(service_spec, by_alias=True, exclude_unset=True) - ) - started_services_ids.append(started_service["ID"]) - - yield None - - for service_id in started_services_ids: - try: - await async_docker_client.services.delete(service_id) - except aiodocker.exceptions.DockerError as e: - assert e.message == f"service {service_id} not found" - - -async def _get_pending_services(async_docker_client: aiodocker.Docker) -> list[str]: - service_filters = {"name": [f"{DYNAMIC_VOLUME_REMOVER_PREFIX}"]} - return [ - x["Spec"]["Name"] - for x in await async_docker_client.services.list(filters=service_filters) - ] - - -async def test_get_volume_removal_services( - ensure_fake_volume_removal_services: None, - async_docker_client: aiodocker.Docker, - volume_removal_services_names: set[str], - dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings, - service_timeout_s: int, -): - # services will be detected as timed out after 1 second - sleep_for = 1.01 - await asyncio.sleep(sleep_for) - - pending_service_names = await _get_pending_services(async_docker_client) - assert len(pending_service_names) == len(volume_removal_services_names) - - # check services are present before removing timed out services - for service_name in pending_service_names: - assert service_name in volume_removal_services_names - - await docker_api.remove_pending_volume_removal_services( - dynamic_services_scheduler_settings.SWARM_STACK_NAME - ) - - # check that timed out services have been removed - pending_service_names = await _get_pending_services(async_docker_client) - services_have_timed_out = sleep_for > service_timeout_s - if services_have_timed_out: - assert len(pending_service_names) == 0 - else: - assert len(pending_service_names) == len(volume_removal_services_names) - for service_name in pending_service_names: - assert service_name in volume_removal_services_names diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py index 4ebacc3424e7..a05e4cd84dab 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py @@ -38,6 +38,9 @@ ) from simcore_service_director_v2.models.dynamic_services_scheduler import SchedulerData from simcore_service_director_v2.modules.catalog import CatalogClient +from simcore_service_director_v2.modules.db.repositories.groups_extra_properties import ( + UserExtraProperties, +) from simcore_service_director_v2.modules.dynamic_sidecar.docker_service_specs import ( get_dynamic_sidecar_spec, ) @@ -451,9 +454,12 @@ async def test_get_dynamic_proxy_spec( app_settings=minimal_app.state.settings, hardware_info=hardware_info, has_quota_support=False, - allow_internet_access=False, metrics_collection_allowed=True, - telemetry_enabled=True, + user_extra_properties=UserExtraProperties( + is_internet_enabled=False, + is_telemetry_enabled=True, + is_efs_enabled=False, + ), rpc_client=Mock(), ) @@ -546,9 +552,12 @@ async def test_merge_dynamic_sidecar_specs_with_user_specific_specs( app_settings=minimal_app.state.settings, hardware_info=hardware_info, has_quota_support=False, - allow_internet_access=False, metrics_collection_allowed=True, - telemetry_enabled=True, + user_extra_properties=UserExtraProperties( + is_internet_enabled=False, + is_telemetry_enabled=True, + is_efs_enabled=False, + ), rpc_client=Mock(), ) assert dynamic_sidecar_spec diff --git a/services/director/docker/boot.sh b/services/director/docker/boot.sh index f771974b0957..2a77aa40daad 100755 --- a/services/director/docker/boot.sh +++ b/services/director/docker/boot.sh @@ -18,7 +18,7 @@ if [ "${SC_BUILD_TARGET}" = "development" ]; then python --version | sed 's/^/ /' command -v python | sed 's/^/ /' cd services/director || exit 1 - # speedup for legacy service with all essential depnendcy pinned + # speedup for legacy service with all essential dependencies pinned # in this case `--no-deps` does the trick, for details see link # https://stackoverflow.com/a/65793484/2855718 pip install --no-cache-dir --no-deps -r requirements/dev.txt diff --git a/services/director/src/simcore_service_director/config.py b/services/director/src/simcore_service_director/config.py index 9bccaa0a539e..67a15cb05ac3 100644 --- a/services/director/src/simcore_service_director/config.py +++ b/services/director/src/simcore_service_director/config.py @@ -163,12 +163,6 @@ def _parse_placement_substitutions() -> Dict[str, str]: # NOTE: keep disabled for unit-testing otherwise mocks will not hold MONITORING_ENABLED: bool = strtobool(os.environ.get("MONITORING_ENABLED", "False")) -# tracing -TRACING_ENABLED: bool = strtobool(os.environ.get("TRACING_ENABLED", "True")) -TRACING_ZIPKIN_ENDPOINT: str = os.environ.get( - "TRACING_ZIPKIN_ENDPOINT", "http://jaeger:9411" # NOSONAR -) - # resources: not taken from servicelib.resources since the director uses a fixed hash of that library CPU_RESOURCE_LIMIT_KEY = "SIMCORE_NANO_CPUS_LIMIT" MEM_RESOURCE_LIMIT_KEY = "SIMCORE_MEMORY_BYTES_LIMIT" diff --git a/services/docker-compose-dev-vendors.yml b/services/docker-compose-dev-vendors.yml new file mode 100644 index 000000000000..cb2e45910eb0 --- /dev/null +++ b/services/docker-compose-dev-vendors.yml @@ -0,0 +1,36 @@ + +# NOTE: this stack is only for development and testing of vendor services. +# the actualy code is deployed inside the ops repository. + +services: + + manual: + image: ${VENDOR_DEV_MANUAL_IMAGE} + init: true + hostname: "{{.Node.Hostname}}-{{.Task.Slot}}" + deploy: + replicas: ${VENDOR_DEV_MANUAL_REPLICAS} + labels: + - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} + - traefik.enable=true + - traefik.docker.network=${SWARM_STACK_NAME}_default + # auth + - traefik.http.middlewares.${SWARM_STACK_NAME}_manual-auth.forwardauth.address=http://${WEBSERVER_HOST}:${WEBSERVER_PORT}/v0/auth:check + - traefik.http.middlewares.${SWARM_STACK_NAME}_manual-auth.forwardauth.trustForwardHeader=true + - traefik.http.middlewares.${SWARM_STACK_NAME}_manual-auth.forwardauth.authResponseHeaders=Set-Cookie,osparc-sc + # routing + - traefik.http.services.${SWARM_STACK_NAME}_manual.loadbalancer.server.port=80 + - traefik.http.services.${SWARM_STACK_NAME}_manual.loadbalancer.healthcheck.path=/ + - traefik.http.services.${SWARM_STACK_NAME}_manual.loadbalancer.healthcheck.interval=2000ms + - traefik.http.services.${SWARM_STACK_NAME}_manual.loadbalancer.healthcheck.timeout=1000ms + - traefik.http.routers.${SWARM_STACK_NAME}_manual.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_manual.priority=10 + - traefik.http.routers.${SWARM_STACK_NAME}_manual.rule=HostRegexp(`${VENDOR_DEV_MANUAL_SUBDOMAIN}\.(?P.+)`) + - traefik.http.routers.${SWARM_STACK_NAME}_manual.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME}_manual-auth + networks: + - simcore_default + +networks: + simcore_default: + name: ${SWARM_STACK_NAME}_default + external: true diff --git a/services/docker-compose-ops.yml b/services/docker-compose-ops.yml index c5265e44d2aa..9beacf76c343 100644 --- a/services/docker-compose-ops.yml +++ b/services/docker-compose-ops.yml @@ -30,7 +30,18 @@ services: - "18080:8080" networks: - simcore_default - + jaeger: + image: jaegertracing/all-in-one:1.47 + networks: + - simcore_default + ports: + - "16686:16686" # Jaeger UI + - "14268:14268" # Jaeger HTTP Thrift + - "14250:14250" # Jaeger gRPC + - "43017:4317" # opentelemetry GRPC default port + - "43018:4318" # opentelemetry HTTP default port + environment: + COLLECTOR_OTLP_ENABLED: "true" portainer: image: portainer/portainer-ce init: true @@ -82,13 +93,28 @@ services: user_notifications:${REDIS_HOST}:${REDIS_PORT}:4:${REDIS_PASSWORD}, announcements:${REDIS_HOST}:${REDIS_PORT}:5:${REDIS_PASSWORD}, distributed_identifiers:${REDIS_HOST}:${REDIS_PORT}:6:${REDIS_PASSWORD}, - deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7:${REDIS_PASSWORD} + deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7:${REDIS_PASSWORD}, + dynamic_services:${REDIS_HOST}:${REDIS_PORT}:8:${REDIS_PASSWORD} # If you add/remove a db, do not forget to update the --databases entry in the docker-compose.yml ports: - "18081:8081" networks: - simcore_default - + opentelemetry-collector: + image: otel/opentelemetry-collector-contrib:0.105.0 + volumes: + - ./opentelemetry-collector-config.yaml:/etc/otel/config.yaml + hostname: "{{.Node.Hostname}}-{{.Task.Slot}}" + command: + - "--config=/etc/otel/config.yaml" + ports: + - "4318:4318" # OTLP HTTP receiver + networks: + - simcore_default + environment: + TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE} + TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE} + TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT} volumes: minio_data: name: ops_minio_data @@ -97,8 +123,8 @@ volumes: networks: simcore_default: - name: ${SWARM_STACK_NAME:-simcore}_default + name: ${SWARM_STACK_NAME}_default external: true interactive_services_subnet: - name: ${SWARM_STACK_NAME:-simcore}_interactive_services_subnet + name: ${SWARM_STACK_NAME}_interactive_services_subnet external: true diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 994cb71c890f..ba8137e0e5ae 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -41,6 +41,8 @@ services: WEBSERVER_HOST: ${WB_API_WEBSERVER_HOST} WEBSERVER_PORT: ${WB_API_WEBSERVER_PORT} WEBSERVER_SESSION_SECRET_KEY: ${WEBSERVER_SESSION_SECRET_KEY} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} deploy: labels: @@ -70,6 +72,8 @@ services: AUTOSCALING_LOGLEVEL: ${AUTOSCALING_LOGLEVEL} AUTOSCALING_POLL_INTERVAL: ${AUTOSCALING_POLL_INTERVAL} AUTOSCALING_DRAIN_NODES_WITH_LABELS: ${AUTOSCALING_DRAIN_NODES_WITH_LABELS} + AUTOSCALING_DOCKER_JOIN_DRAINED: ${AUTOSCALING_DOCKER_JOIN_DRAINED} + AUTOSCALING_WAIT_FOR_CLOUD_INIT_BEFORE_WARM_BUFFER_ACTIVATION: ${AUTOSCALING_WAIT_FOR_CLOUD_INIT_BEFORE_WARM_BUFFER_ACTIVATION} AUTOSCALING_DASK: ${AUTOSCALING_DASK} # comp autoscaling DASK_MONITORING_URL: ${DASK_MONITORING_URL} @@ -114,11 +118,15 @@ services: REDIS_HOST: ${REDIS_HOST} REDIS_PASSWORD: ${REDIS_PASSWORD} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REGISTRY_USER: ${REGISTRY_USER} REGISTRY_PW: ${REGISTRY_PW} REGISTRY_URL: ${REGISTRY_URL} REGISTRY_SSL: ${REGISTRY_SSL} REGISTRY_AUTH: ${REGISTRY_AUTH} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} volumes: - "/var/run/docker.sock:/var/run/docker.sock" deploy: @@ -157,7 +165,8 @@ services: RABBIT_PORT: ${RABBIT_PORT} RABBIT_SECURE: ${RABBIT_SECURE} RABBIT_USER: ${RABBIT_USER} - TRACING_THRIFT_COMPACT_ENDPOINT: ${TRACING_THRIFT_COMPACT_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} networks: - default @@ -180,6 +189,11 @@ services: CLUSTERS_KEEPER_EC2_ENDPOINT: ${CLUSTERS_KEEPER_EC2_ENDPOINT} CLUSTERS_KEEPER_EC2_REGION_NAME: ${CLUSTERS_KEEPER_EC2_REGION_NAME} CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY: ${CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY} + CLUSTERS_KEEPER_SSM_ACCESS: ${CLUSTERS_KEEPER_SSM_ACCESS} + CLUSTERS_KEEPER_SSM_ACCESS_KEY_ID: ${CLUSTERS_KEEPER_SSM_ACCESS_KEY_ID} + CLUSTERS_KEEPER_SSM_ENDPOINT: ${CLUSTERS_KEEPER_SSM_ENDPOINT} + CLUSTERS_KEEPER_SSM_REGION_NAME: ${CLUSTERS_KEEPER_SSM_REGION_NAME} + CLUSTERS_KEEPER_SSM_SECRET_ACCESS_KEY: ${CLUSTERS_KEEPER_SSM_SECRET_ACCESS_KEY} CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX: ${CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES: ${CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES} @@ -195,6 +209,8 @@ services: PRIMARY_EC2_INSTANCES_SSM_TLS_DASK_KEY: ${PRIMARY_EC2_INSTANCES_SSM_TLS_DASK_KEY} PRIMARY_EC2_INSTANCES_PROMETHEUS_USERNAME: ${PRIMARY_EC2_INSTANCES_PROMETHEUS_USERNAME} PRIMARY_EC2_INSTANCES_PROMETHEUS_PASSWORD: ${PRIMARY_EC2_INSTANCES_PROMETHEUS_PASSWORD} + PRIMARY_EC2_INSTANCES_MAX_START_TIME: ${PRIMARY_EC2_INSTANCES_MAX_START_TIME} + PRIMARY_EC2_INSTANCES_DOCKER_DEFAULT_ADDRESS_POOL: ${PRIMARY_EC2_INSTANCES_DOCKER_DEFAULT_ADDRESS_POOL} RABBIT_HOST: ${RABBIT_HOST} RABBIT_PASSWORD: ${RABBIT_PASSWORD} RABBIT_PORT: ${RABBIT_PORT} @@ -202,6 +218,8 @@ services: RABBIT_USER: ${RABBIT_USER} REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} SWARM_STACK_NAME: ${SWARM_STACK_NAME} CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES: ${CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES} @@ -214,6 +232,8 @@ services: WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS: ${WORKERS_EC2_INSTANCES_SECURITY_GROUP_IDS} WORKERS_EC2_INSTANCES_SUBNET_ID: ${WORKERS_EC2_INSTANCES_SUBNET_ID} WORKERS_EC2_INSTANCES_CUSTOM_TAGS: ${WORKERS_EC2_INSTANCES_CUSTOM_TAGS} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} secrets: *dask_tls_secrets director: @@ -254,8 +274,8 @@ services: SIMCORE_SERVICES_NETWORK_NAME: interactive_services_subnet STORAGE_ENDPOINT: ${STORAGE_ENDPOINT} SWARM_STACK_NAME: ${SWARM_STACK_NAME:-simcore} - TRACING_ENABLED: ${TRACING_ENABLED:-True} - TRACING_ZIPKIN_ENDPOINT: ${TRACING_ZIPKIN_ENDPOINT:-http://jaeger:9411} + TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE} + TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT} TRAEFIK_SIMCORE_ZONE: ${TRAEFIK_SIMCORE_ZONE:-internal_simcore_stack} volumes: - "/var/run/docker.sock:/var/run/docker.sock" @@ -300,6 +320,7 @@ services: DYNAMIC_SIDECAR_LOG_LEVEL: ${DYNAMIC_SIDECAR_LOG_LEVEL} DYNAMIC_SIDECAR_PROMETHEUS_MONITORING_NETWORKS: ${DYNAMIC_SIDECAR_PROMETHEUS_MONITORING_NETWORKS} DYNAMIC_SIDECAR_PROMETHEUS_SERVICE_LABELS: ${DYNAMIC_SIDECAR_PROMETHEUS_SERVICE_LABELS} + DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT: ${DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} DIRECTOR_V2_LOGLEVEL: ${DIRECTOR_V2_LOGLEVEL} @@ -329,6 +350,8 @@ services: REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} REGISTRY_AUTH: ${REGISTRY_AUTH} @@ -355,6 +378,11 @@ services: SIMCORE_SERVICES_NETWORK_NAME: ${SIMCORE_SERVICES_NETWORK_NAME} SWARM_STACK_NAME: ${SWARM_STACK_NAME} TRAEFIK_SIMCORE_ZONE: ${TRAEFIK_SIMCORE_ZONE} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} + + WEBSERVER_HOST: ${WEBSERVER_HOST} + WEBSERVER_PORT: ${WEBSERVER_PORT} volumes: - "/var/run/docker.sock:/var/run/docker.sock" deploy: @@ -390,7 +418,8 @@ services: EFS_MOUNTED_PATH: ${EFS_MOUNTED_PATH} EFS_ONLY_ENABLED_FOR_USERIDS: ${EFS_ONLY_ENABLED_FOR_USERIDS} EFS_PROJECT_SPECIFIC_DATA_DIRECTORY: ${EFS_PROJECT_SPECIFIC_DATA_DIRECTORY} - + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} invitations: image: ${DOCKER_REGISTRY:-itisfoundation}/invitations:${DOCKER_IMAGE_TAG:-latest} init: true @@ -406,7 +435,8 @@ services: INVITATIONS_SWAGGER_API_DOC_ENABLED: ${INVITATIONS_SWAGGER_API_DOC_ENABLED} INVITATIONS_USERNAME: ${INVITATIONS_USERNAME} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} payments: image: ${DOCKER_REGISTRY:-itisfoundation}/payments:${DOCKER_IMAGE_TAG:-latest} init: true @@ -448,6 +478,8 @@ services: SMTP_PORT: ${SMTP_PORT} SMTP_PROTOCOL: ${SMTP_PROTOCOL} SMTP_USERNAME: ${SMTP_USERNAME} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} resource-usage-tracker: image: ${DOCKER_REGISTRY:-itisfoundation}/resource-usage-tracker:${DOCKER_IMAGE_TAG:-latest} @@ -473,12 +505,16 @@ services: RABBIT_USER: ${RABBIT_USER} REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} RESOURCE_USAGE_TRACKER_LOGLEVEL: ${RESOURCE_USAGE_TRACKER_LOGLEVEL} RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_CHECK_ENABLED: ${RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_CHECK_ENABLED} RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_INTERVAL_SEC: ${RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_INTERVAL_SEC} RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_COUNTER_FAIL: ${RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_COUNTER_FAIL} RESOURCE_USAGE_TRACKER_S3: ${RESOURCE_USAGE_TRACKER_S3} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} RESOURCE_USAGE_TRACKER_PORT: ${RESOURCE_USAGE_TRACKER_PORT} dynamic-schdlr: @@ -496,12 +532,15 @@ services: RABBIT_USER: ${RABBIT_USER} REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} DIRECTOR_V2_HOST: ${DIRECTOR_V2_HOST} DIRECTOR_V2_PORT: ${DIRECTOR_V2_PORT} DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT: ${DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT} DYNAMIC_SCHEDULER_PROFILING: ${DYNAMIC_SCHEDULER_PROFILING} - + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} static-webserver: image: ${DOCKER_REGISTRY:-itisfoundation}/static-webserver:${DOCKER_IMAGE_TAG:-latest} init: true @@ -521,7 +560,7 @@ services: - traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.interval=2000ms - traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.timeout=1000ms - traefik.http.middlewares.${SWARM_STACK_NAME}_static_webserver_retry.retry.attempts=2 - - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`)) + - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`)) - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.service=${SWARM_STACK_NAME}_static_webserver - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.priority=2 @@ -601,6 +640,7 @@ services: # WEBSERVER_DIAGNOSTICS WEBSERVER_DIAGNOSTICS: ${WEBSERVER_DIAGNOSTICS} + DIAGNOSTICS_HEALTHCHECK_ENABLED: ${DIAGNOSTICS_HEALTHCHECK_ENABLED} DIAGNOSTICS_MAX_AVG_LATENCY: ${DIAGNOSTICS_MAX_AVG_LATENCY} DIAGNOSTICS_MAX_TASK_DELAY: ${DIAGNOSTICS_MAX_TASK_DELAY} DIAGNOSTICS_SLOW_DURATION_SECS: ${DIAGNOSTICS_SLOW_DURATION_SECS} @@ -656,6 +696,8 @@ services: # WEBSERVER_REDIS REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} # WEBSERVER_REST @@ -695,9 +737,10 @@ services: STUDIES_DEFAULT_SERVICE_THUMBNAIL: ${STUDIES_DEFAULT_SERVICE_THUMBNAIL} WEBSERVER_TRACING: ${WEBSERVER_TRACING} - TRACING_ENABLED: ${TRACING_ENABLED} - TRACING_ZIPKIN_ENDPOINT: ${TRACING_ZIPKIN_ENDPOINT} - TRACING_THRIFT_COMPACT_ENDPOINT: ${TRACING_THRIFT_COMPACT_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} + TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE} # WEBSERVER_PROJECTS WEBSERVER_PROJECTS: ${WEBSERVER_PROJECTS} @@ -843,6 +886,8 @@ services: # WEBSERVER_REDIS REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} RESOURCE_MANAGER_RESOURCE_TTL_S: ${RESOURCE_MANAGER_RESOURCE_TTL_S} @@ -891,6 +936,8 @@ services: REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} SWARM_STACK_NAME: ${SWARM_STACK_NAME} @@ -962,6 +1009,12 @@ services: AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY: ${AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY} AGENT_VOLUMES_CLEANUP_S3_BUCKET: ${AGENT_VOLUMES_CLEANUP_S3_BUCKET} AGENT_VOLUMES_CLEANUP_S3_PROVIDER: ${AGENT_VOLUMES_CLEANUP_S3_PROVIDER} + AGENT_DOCKER_NODE_ID: "{{.Node.ID}}" + RABBIT_HOST: ${RABBIT_HOST} + RABBIT_PASSWORD: ${RABBIT_PASSWORD} + RABBIT_PORT: ${RABBIT_PORT} + RABBIT_USER: ${RABBIT_USER} + RABBIT_SECURE: ${RABBIT_SECURE} dask-sidecar: image: ${DOCKER_REGISTRY:-itisfoundation}/dask-sidecar:${DOCKER_IMAGE_TAG:-latest} @@ -1009,8 +1062,9 @@ services: networks: - storage_subnet environment: - TRACING_THRIFT_COMPACT_ENDPOINT: ${TRACING_THRIFT_COMPACT_ENDPOINT} DATCORE_ADAPTER_LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} storage: image: ${DOCKER_REGISTRY:-itisfoundation}/storage:${DOCKER_IMAGE_TAG:-latest} @@ -1029,6 +1083,8 @@ services: POSTGRES_USER: ${POSTGRES_USER} REDIS_HOST: ${REDIS_HOST} REDIS_PORT: ${REDIS_PORT} + REDIS_SECURE: ${REDIS_SECURE} + REDIS_USER: ${REDIS_USER} REDIS_PASSWORD: ${REDIS_PASSWORD} S3_ACCESS_KEY: ${S3_ACCESS_KEY} S3_BUCKET_NAME: ${S3_BUCKET_NAME} @@ -1037,8 +1093,9 @@ services: S3_SECRET_KEY: ${S3_SECRET_KEY} STORAGE_LOGLEVEL: ${STORAGE_LOGLEVEL} STORAGE_MONITORING_ENABLED: 1 - TRACING_ZIPKIN_ENDPOINT: ${TRACING_ZIPKIN_ENDPOINT:-http://jaeger:9411} STORAGE_PROFILING: ${STORAGE_PROFILING} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} networks: - default - interactive_services_subnet @@ -1121,7 +1178,19 @@ services: # also aof (append only) is also enabled such that we get full durability at the expense # of backup size. The backup is written into /data. # https://redis.io/topics/persistence - [ "redis-server", "--save", "60 1", "--loglevel", "verbose", "--databases", "8", "--appendonly", "yes", "--requirepass", "${REDIS_PASSWORD}" ] + [ + "redis-server", + "--save", + "60 1", + "--loglevel", + "verbose", + "--databases", + "9", + "--appendonly", + "yes", + "--requirepass", + "${REDIS_PASSWORD}" + ] networks: - default - autoscaling_subnet @@ -1152,8 +1221,14 @@ services: - "--metrics.prometheus.entryPoint=metrics" - "--entryPoints.http.address=:80" - "--entryPoints.http.forwardedHeaders.insecure" + - "--entryPoints.http.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805 + - "--entryPoints.http.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805 + - "--entryPoints.http.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805 - "--entryPoints.simcore_api.address=:10081" - "--entryPoints.simcore_api.address=:10081" - "--entryPoints.simcore_api.forwardedHeaders.insecure" + - "--entryPoints.simcore_api.transport.respondingTimeouts.idleTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805 + - "--entryPoints.simcore_api.transport.respondingTimeouts.writeTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805 + - "--entryPoints.simcore_api.transport.respondingTimeouts.readTimeout=21600s" #6h, for https://github.com/traefik/traefik/issues/10805 - "--entryPoints.traefik_monitor.address=:8080" - "--entryPoints.traefik_monitor.forwardedHeaders.insecure" - "--providers.swarm.endpoint=unix:///var/run/docker.sock" @@ -1166,7 +1241,6 @@ services: - "--tracing.addinternals" - "--tracing.otlp=true" - "--tracing.otlp.http=true" - # - "--tracing.otlp.http.endpoint=0.0.0.0:4318/v1/traces" volumes: # So that Traefik can listen to the Docker events - /var/run/docker.sock:/var/run/docker.sock diff --git a/services/dynamic-scheduler/requirements/_base.in b/services/dynamic-scheduler/requirements/_base.in index 12ae0b98af03..ceb76bbb30f2 100644 --- a/services/dynamic-scheduler/requirements/_base.in +++ b/services/dynamic-scheduler/requirements/_base.in @@ -15,9 +15,10 @@ --requirement ../../../packages/service-library/requirements/_fastapi.in - +arrow fastapi httpx packaging +python-socketio typer[all] uvicorn[standard] diff --git a/services/dynamic-scheduler/requirements/_base.txt b/services/dynamic-scheduler/requirements/_base.txt index 6a29f3ea11a6..714f37a8b3e3 100644 --- a/services/dynamic-scheduler/requirements/_base.txt +++ b/services/dynamic-scheduler/requirements/_base.txt @@ -1,5 +1,7 @@ aio-pika==9.4.1 # via -r requirements/../../../packages/service-library/requirements/_base.in +aiocache==0.12.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.21.0 @@ -34,11 +36,11 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi async-timeout==4.0.3 - # via - # aiohttp - # asyncpg - # redis + # via asyncpg asyncpg==0.29.0 # via sqlalchemy attrs==23.2.0 @@ -46,6 +48,8 @@ attrs==23.2.0 # aiohttp # jsonschema # referencing +bidict==0.23.1 + # via python-socketio certifi==2024.2.2 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -57,16 +61,23 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer # uvicorn +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via pydantic -exceptiongroup==1.2.0 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -87,12 +98,19 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http greenlet==3.0.3 # via sqlalchemy +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore # uvicorn + # wsproto httpcore==1.0.5 # via httpx httptools==0.6.1 @@ -113,7 +131,10 @@ idna==3.6 # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jsonschema==4.21.1 # via # -r requirements/../../../packages/models-library/requirements/_base.in @@ -140,6 +161,59 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -161,6 +235,12 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.9 # via sqlalchemy pydantic==1.10.15 @@ -189,6 +269,10 @@ python-dateutil==2.9.0.post0 # via arrow python-dotenv==1.0.1 # via uvicorn +python-engineio==4.9.1 + # via python-socketio +python-socketio==5.11.2 + # via -r requirements/_base.in pyyaml==6.0.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -215,6 +299,10 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -224,8 +312,12 @@ rpds-py==0.18.0 # via # jsonschema # referencing +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer +simple-websocket==1.0.0 + # via python-engineio six==1.16.0 # via python-dateutil sniffio==1.3.1 @@ -272,12 +364,21 @@ typing-extensions==4.10.0 # aiodebug # aiodocker # alembic - # anyio # fastapi # faststream + # opentelemetry-sdk # pydantic # typer - # uvicorn +urllib3==2.2.2 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests uvicorn==0.29.0 # via # -r requirements/../../../packages/service-library/requirements/_fastapi.in @@ -288,9 +389,17 @@ watchfiles==0.21.0 # via uvicorn websockets==12.0 # via uvicorn +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation +wsproto==1.2.0 + # via simple-websocket yarl==1.9.4 # via # -r requirements/../../../packages/postgres-database/requirements/_base.in # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/dynamic-scheduler/requirements/_test.txt b/services/dynamic-scheduler/requirements/_test.txt index c8ed470df588..b48cff66d524 100644 --- a/services/dynamic-scheduler/requirements/_test.txt +++ b/services/dynamic-scheduler/requirements/_test.txt @@ -12,19 +12,16 @@ certifi==2024.2.2 # httpx # requests charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.0 - # via - # -c requirements/_base.txt - # anyio - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in h11==0.14.0 # via @@ -58,7 +55,7 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -89,7 +86,9 @@ python-dotenv==1.0.1 # -c requirements/_base.txt # -r requirements/_test.in requests==2.32.3 - # via docker + # via + # -c requirements/_base.txt + # docker respx==0.21.1 # via -r requirements/_test.in six==1.16.0 @@ -104,16 +103,9 @@ sniffio==1.3.1 # httpx termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # pytest -typing-extensions==4.10.0 - # via - # -c requirements/_base.txt - # anyio urllib3==2.2.2 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # docker # requests diff --git a/services/dynamic-scheduler/requirements/_tools.txt b/services/dynamic-scheduler/requirements/_tools.txt index c724e2ead52d..3f27c470fe35 100644 --- a/services/dynamic-scheduler/requirements/_tools.txt +++ b/services/dynamic-scheduler/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -47,14 +47,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -65,28 +65,19 @@ pyyaml==6.0.1 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools -tomli==2.0.1 +setuptools==74.0.0 # via - # -c requirements/_test.txt - # black - # build - # mypy + # -c requirements/_base.txt # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.10.0 # via # -c requirements/_base.txt - # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/_meta.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/_meta.py index 92c9713bdd42..4e33eee92267 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/_meta.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/_meta.py @@ -14,6 +14,7 @@ PROJECT_NAME: Final[str] = info.project_name VERSION: Final[Version] = info.version API_VERSION: Final[VersionStr] = info.__version__ +APP_NAME = PROJECT_NAME API_VTAG: Final[str] = info.api_prefix_path_tag SUMMARY: Final[str] = info.get_summary() diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py index 088745a07c30..ce43766f5a33 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py @@ -3,7 +3,8 @@ from servicelib.fastapi.dependencies import get_app, get_reverse_url_mapper from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient from servicelib.redis import RedisClientSDK -from simcore_service_dynamic_scheduler.services.redis import get_redis_client +from settings_library.redis import RedisDatabase +from simcore_service_dynamic_scheduler.services.redis import get_all_redis_clients from ...services.rabbitmq import get_rabbitmq_client, get_rabbitmq_rpc_server @@ -19,8 +20,10 @@ def get_rabbitmq_rpc_server_from_request(request: Request) -> RabbitMQRPCClient: return get_rabbitmq_rpc_server(request.app) -def get_redis_client_from_request(request: Request) -> RedisClientSDK: - return get_redis_client(request.app) +def get_redis_clients_from_request( + request: Request, +) -> dict[RedisDatabase, RedisClientSDK]: + return get_all_redis_clients(request.app) __all__: tuple[str, ...] = ( diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py index 515602aef7c2..7e87c57fd06e 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py @@ -9,11 +9,12 @@ ) from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient from servicelib.redis import RedisClientSDK +from settings_library.redis import RedisDatabase from ._dependencies import ( get_rabbitmq_client_from_request, get_rabbitmq_rpc_server_from_request, - get_redis_client_from_request, + get_redis_clients_from_request, ) router = APIRouter() @@ -29,12 +30,17 @@ async def healthcheck( rabbit_rpc_server: Annotated[ RabbitMQRPCClient, Depends(get_rabbitmq_rpc_server_from_request) ], - redis_client_sdk: Annotated[RedisClientSDK, Depends(get_redis_client_from_request)], + redis_client_sdks: Annotated[ + dict[RedisDatabase, RedisClientSDK], + Depends(get_redis_clients_from_request), + ], ): if not rabbit_client.healthy or not rabbit_rpc_server.healthy: raise HealthCheckError(RABBITMQ_CLIENT_UNHEALTHY_MSG) - if not redis_client_sdk.is_healthy: + if not all( + redis_client_sdk.is_healthy for redis_client_sdk in redis_client_sdks.values() + ): raise HealthCheckError(REDIS_CLIENT_UNHEALTHY_MSG) return f"{__name__}@{arrow.utcnow().isoformat()}" diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index 991aa004703b..65fc96dd6601 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -14,6 +14,7 @@ from ...core.settings import ApplicationSettings from ...services.director_v2 import DirectorV2Client +from ...services.service_tracker import set_request_as_running, set_request_as_stopped router = RPCRouter() @@ -37,6 +38,7 @@ async def run_dynamic_service( response: NodeGet | DynamicServiceGet = ( await director_v2_client.run_dynamic_service(dynamic_service_start) ) + await set_request_as_running(app, dynamic_service_start) return response @@ -48,15 +50,13 @@ async def run_dynamic_service( ) async def stop_dynamic_service( app: FastAPI, *, dynamic_service_stop: DynamicServiceStop -) -> NodeGet | DynamicServiceGet: +) -> None: director_v2_client = DirectorV2Client.get_from_app_state(app) settings: ApplicationSettings = app.state.settings - response: NodeGet | DynamicServiceGet = ( - await director_v2_client.stop_dynamic_service( - node_id=dynamic_service_stop.node_id, - simcore_user_agent=dynamic_service_stop.simcore_user_agent, - save_state=dynamic_service_stop.save_state, - timeout=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, - ) + await director_v2_client.stop_dynamic_service( + node_id=dynamic_service_stop.node_id, + simcore_user_agent=dynamic_service_stop.simcore_user_agent, + save_state=dynamic_service_stop.save_state, + timeout=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) - return response + await set_request_as_stopped(app, dynamic_service_stop) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py index 62f07ea31fce..e6ba2bbb53f7 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py @@ -4,20 +4,26 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from .._meta import ( API_VERSION, API_VTAG, APP_FINISHED_BANNER_MSG, + APP_NAME, APP_STARTED_BANNER_MSG, PROJECT_NAME, SUMMARY, ) from ..api.rest.routes import setup_rest_api from ..api.rpc.routes import setup_rpc_api_routes +from ..services.deferred_manager import setup_deferred_manager from ..services.director_v2 import setup_director_v2 +from ..services.notifier import setup_notifier from ..services.rabbitmq import setup_rabbitmq from ..services.redis import setup_redis +from ..services.service_tracker import setup_service_tracker +from ..services.status_monitor import setup_status_monitor from .settings import ApplicationSettings @@ -45,14 +51,28 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: if app.state.settings.DYNAMIC_SCHEDULER_PROFILING: app.add_middleware(ProfilerMiddleware) + if app.state.settings.DYNAMIC_SCHEDULER_TRACING: + setup_tracing( + app, + app.state.settings.DYNAMIC_SCHEDULER_TRACING, + APP_NAME, + ) # PLUGINS SETUP setup_director_v2(app) + setup_rabbitmq(app) setup_rpc_api_routes(app) + setup_redis(app) + setup_notifier(app) + + setup_service_tracker(app) + setup_deferred_manager(app) + setup_status_monitor(app) + setup_rest_api(app) # ERROR HANDLERS diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/settings.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/settings.py index 1a38cf336cb7..9605619d57d3 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/settings.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/settings.py @@ -7,6 +7,7 @@ from settings_library.director_v2 import DirectorV2Settings from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from .._meta import API_VERSION, API_VTAG, PROJECT_NAME @@ -78,3 +79,6 @@ class ApplicationSettings(_BaseApplicationSettings): DYNAMIC_SCHEDULER_PROMETHEUS_INSTRUMENTATION_ENABLED: bool = True DYNAMIC_SCHEDULER_PROFILING: bool = False + DYNAMIC_SCHEDULER_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py new file mode 100644 index 000000000000..8544c0f38e6f --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py @@ -0,0 +1,24 @@ +from fastapi import FastAPI +from servicelib.deferred_tasks import DeferredManager +from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisDatabase + +from .redis import get_redis_client + + +def setup_deferred_manager(app: FastAPI) -> None: + async def on_startup() -> None: + rabbit_settings: RabbitSettings = app.state.settings.DYNAMIC_SCHEDULER_RABBITMQ + + redis_client_sdk = get_redis_client(app, RedisDatabase.DEFERRED_TASKS) + app.state.deferred_manager = manager = DeferredManager( + rabbit_settings, redis_client_sdk, globals_context={"app": app} + ) + await manager.setup() + + async def on_shutdown() -> None: + manager: DeferredManager = app.state.deferred_manager + await manager.shutdown() + + app.add_event_handler("startup", on_startup) + app.add_event_handler("shutdown", on_shutdown) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py new file mode 100644 index 000000000000..8cd33e12808f --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py @@ -0,0 +1,7 @@ +from ._notifier import notify_service_status_change +from ._setup import setup_notifier + +__all__: tuple[str, ...] = ( + "setup_notifier", + "notify_service_status_change", +) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py similarity index 60% rename from services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_notifier.py rename to services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py index 9f97a889baca..0b8690a96766 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_notifier.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py @@ -1,20 +1,17 @@ import contextlib -from pathlib import Path import socketio # type: ignore[import-untyped] from fastapi import FastAPI from fastapi.encoders import jsonable_encoder -from models_library.api_schemas_dynamic_sidecar.socketio import ( - SOCKET_IO_SERVICE_DISK_USAGE_EVENT, -) -from models_library.api_schemas_dynamic_sidecar.telemetry import ( - DiskUsage, - ServiceDiskUsage, +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.socketio import ( + SOCKET_IO_SERVICE_STATUS_EVENT, ) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.api_schemas_webserver.socketio import SocketIORoomStr -from models_library.projects_nodes_io import NodeID from models_library.users import UserID from servicelib.fastapi.app_state import SingletonInAppStateMixin +from servicelib.services_utils import get_status_as_dict class Notifier(SingletonInAppStateMixin): @@ -23,26 +20,24 @@ class Notifier(SingletonInAppStateMixin): def __init__(self, sio_manager: socketio.AsyncAioPikaManager): self._sio_manager = sio_manager - async def notify_service_disk_usage( - self, user_id: UserID, node_id: NodeID, usage: dict[Path, DiskUsage] + async def notify_service_status( + self, user_id: UserID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> None: await self._sio_manager.emit( - SOCKET_IO_SERVICE_DISK_USAGE_EVENT, - data=jsonable_encoder(ServiceDiskUsage(node_id=node_id, usage=usage)), + SOCKET_IO_SERVICE_STATUS_EVENT, + data=jsonable_encoder(get_status_as_dict(status)), room=SocketIORoomStr.from_user_id(user_id), ) -async def publish_disk_usage( - app: FastAPI, *, user_id: UserID, node_id: NodeID, usage: dict[Path, DiskUsage] +async def notify_service_status_change( + app: FastAPI, user_id: UserID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> None: notifier: Notifier = Notifier.get_from_app_state(app) - await notifier.notify_service_disk_usage( - user_id=user_id, node_id=node_id, usage=usage - ) + await notifier.notify_service_status(user_id=user_id, status=status) -def setup_notifier(app: FastAPI): +def setup(app: FastAPI): async def _on_startup() -> None: assert app.state.external_socketio # nosec diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py new file mode 100644 index 000000000000..1542afa8a87d --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py @@ -0,0 +1,8 @@ +from fastapi import FastAPI + +from . import _notifier, _socketio + + +def setup_notifier(app: FastAPI): + _socketio.setup(app) + _notifier.setup(app) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py new file mode 100644 index 000000000000..2f0abfbd3af1 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py @@ -0,0 +1,32 @@ +import logging + +import socketio # type: ignore[import-untyped] +from fastapi import FastAPI +from servicelib.socketio_utils import cleanup_socketio_async_pubsub_manager + +from ...core.settings import ApplicationSettings + +_logger = logging.getLogger(__name__) + + +def setup(app: FastAPI): + settings: ApplicationSettings = app.state.settings + + async def _on_startup() -> None: + assert app.state.rabbitmq_client # nosec + + # Connect to the as an external process in write-only mode + # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes + assert settings.DYNAMIC_SCHEDULER_RABBITMQ # nosec + app.state.external_socketio = socketio.AsyncAioPikaManager( + url=settings.DYNAMIC_SCHEDULER_RABBITMQ.dsn, logger=_logger, write_only=True + ) + + async def _on_shutdown() -> None: + if external_socketio := getattr(app.state, "external_socketio"): # noqa: B009 + await cleanup_socketio_async_pubsub_manager( + server_manager=external_socketio + ) + + app.add_event_handler("startup", _on_startup) + app.add_event_handler("shutdown", _on_shutdown) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py index 7904d5e1a5df..84131eaf54bf 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py @@ -1,25 +1,46 @@ +from typing import Final + from fastapi import FastAPI -from servicelib.redis import RedisClientSDK +from servicelib.redis import RedisClientSDK, RedisClientsManager, RedisManagerDBConfig from settings_library.redis import RedisDatabase, RedisSettings +_DECODE_DBS: Final[set[RedisDatabase]] = { + RedisDatabase.LOCKS, +} + +_BINARY_DBS: Final[set[RedisDatabase]] = { + RedisDatabase.DEFERRED_TASKS, + RedisDatabase.DYNAMIC_SERVICES, +} + +_ALL_REDIS_DATABASES: Final[set[RedisDatabase]] = _DECODE_DBS | _BINARY_DBS + def setup_redis(app: FastAPI) -> None: settings: RedisSettings = app.state.settings.DYNAMIC_SCHEDULER_REDIS async def on_startup() -> None: - redis_locks_dsn = settings.build_redis_dsn(RedisDatabase.LOCKS) - app.state.redis_client_sdk = client = RedisClientSDK(redis_locks_dsn) - await client.setup() + app.state.redis_clients_manager = manager = RedisClientsManager( + {RedisManagerDBConfig(x, decode_responses=False) for x in _BINARY_DBS} + | {RedisManagerDBConfig(x, decode_responses=True) for x in _DECODE_DBS}, + settings, + ) + await manager.setup() async def on_shutdown() -> None: - redis_client_sdk: None | RedisClientSDK = app.state.redis_client_sdk - if redis_client_sdk: - await redis_client_sdk.shutdown() + manager: RedisClientsManager = app.state.redis_clients_manager + await manager.shutdown() app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) -def get_redis_client(app: FastAPI) -> RedisClientSDK: - redis_client_sdk: RedisClientSDK = app.state.redis_client_sdk - return redis_client_sdk +def get_redis_client(app: FastAPI, database: RedisDatabase) -> RedisClientSDK: + manager: RedisClientsManager = app.state.redis_clients_manager + return manager.client(database) + + +def get_all_redis_clients( + app: FastAPI, +) -> dict[RedisDatabase, RedisClientSDK]: + return {d: get_redis_client(app, d) for d in _ALL_REDIS_DATABASES} diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py new file mode 100644 index 000000000000..abf543d1befa --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -0,0 +1,33 @@ +from ._api import ( + NORMAL_RATE_POLL_INTERVAL, + get_all_tracked_services, + get_tracked_service, + get_user_id_for_service, + remove_tracked_service, + set_frontned_notified_for_service, + set_if_status_changed_for_service, + set_request_as_running, + set_request_as_stopped, + set_service_scheduled_to_run, + set_service_status_task_uid, + should_notify_frontend_for_service, +) +from ._models import TrackedServiceModel +from ._setup import setup_service_tracker + +__all__: tuple[str, ...] = ( + "get_all_tracked_services", + "get_tracked_service", + "get_user_id_for_service", + "NORMAL_RATE_POLL_INTERVAL", + "remove_tracked_service", + "set_frontned_notified_for_service", + "set_if_status_changed_for_service", + "set_request_as_running", + "set_request_as_stopped", + "set_service_scheduled_to_run", + "set_service_status_task_uid", + "setup_service_tracker", + "should_notify_frontend_for_service", + "TrackedServiceModel", +) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py new file mode 100644 index 000000000000..1b1b4a0d9f8f --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -0,0 +1,248 @@ +import inspect +import logging +from datetime import timedelta +from typing import Final + +import arrow +from fastapi import FastAPI +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, + DynamicServiceStop, +) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects_nodes_io import NodeID +from models_library.services_enums import ServiceState +from models_library.users import UserID +from servicelib.deferred_tasks import TaskUID + +from ._models import SchedulerServiceState, TrackedServiceModel, UserRequestedState +from ._setup import get_tracker + +_logger = logging.getLogger(__name__) + + +_LOW_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=1) +NORMAL_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=5) +_MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES: Final[timedelta] = timedelta(seconds=60) + + +async def set_request_as_running( + app: FastAPI, + dynamic_service_start: DynamicServiceStart, +) -> None: + """Stores intention to `start` request""" + await get_tracker(app).save( + dynamic_service_start.node_uuid, + TrackedServiceModel( + dynamic_service_start=dynamic_service_start, + requested_state=UserRequestedState.RUNNING, + project_id=dynamic_service_start.project_id, + user_id=dynamic_service_start.user_id, + ), + ) + + +async def set_request_as_stopped( + app: FastAPI, dynamic_service_stop: DynamicServiceStop +) -> None: + """Stores intention to `stop` request""" + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(dynamic_service_stop.node_id) + + if model is None: + model = TrackedServiceModel( + dynamic_service_start=None, + user_id=dynamic_service_stop.user_id, + project_id=dynamic_service_stop.project_id, + requested_state=UserRequestedState.STOPPED, + ) + + model.requested_state = UserRequestedState.STOPPED + await tracker.save(dynamic_service_stop.node_id, model) + + +def _get_service_state( + status: NodeGet | DynamicServiceGet | NodeGetIdle, +) -> ServiceState: + # Attributes where to find the state + # NodeGet -> service_state + # DynamicServiceGet -> state + # NodeGetIdle -> service_state + state_key = "state" if isinstance(status, DynamicServiceGet) else "service_state" + + state: ServiceState | str = getattr(status, state_key) + result: str = state.value if isinstance(state, ServiceState) else state + return ServiceState(result) + + +def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: + if _get_service_state(status) != ServiceState.RUNNING: + return _LOW_RATE_POLL_INTERVAL + + return NORMAL_RATE_POLL_INTERVAL + + +def _get_current_scheduler_service_state( + requested_state: UserRequestedState, + status: NodeGet | DynamicServiceGet | NodeGetIdle, +) -> SchedulerServiceState: + """ + Computes the `SchedulerServiceState` used internally by the scheduler + to decide about a service's future. + """ + + if isinstance(status, NodeGetIdle): + return SchedulerServiceState.IDLE + + service_state: ServiceState = _get_service_state(status) + + if requested_state == UserRequestedState.RUNNING: + if service_state == ServiceState.RUNNING: + return SchedulerServiceState.RUNNING + + if ( + ServiceState.PENDING # type:ignore[operator] + <= service_state + <= ServiceState.STARTING + ): + return SchedulerServiceState.STARTING + + if service_state < ServiceState.PENDING or service_state > ServiceState.RUNNING: + return SchedulerServiceState.UNEXPECTED_OUTCOME + + if requested_state == UserRequestedState.STOPPED: + if service_state >= ServiceState.RUNNING: # type:ignore[operator] + return SchedulerServiceState.STOPPING + + if service_state < ServiceState.RUNNING: + return SchedulerServiceState.UNEXPECTED_OUTCOME + + msg = f"Could not determine current_state from: '{requested_state=}', '{status=}'" + raise TypeError(msg) + + +def _log_skipping_operation(node_id: NodeID) -> None: + # the caller is at index 1 (index 0 is the current function) + caller_name = inspect.stack()[1].function + + _logger.info( + "Could not find a %s entry for node_id %s: skipping %s", + TrackedServiceModel.__name__, + node_id, + caller_name, + ) + + +async def set_if_status_changed_for_service( + app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle +) -> bool: + """returns ``True`` if the tracker detected a status change""" + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _log_skipping_operation(node_id) + return False + + # set new polling interval in the future + model.set_check_status_after_to(_get_poll_interval(status)) + model.service_status_task_uid = None + model.scheduled_to_run = False + + # check if model changed + json_status = status.json() + if model.service_status != json_status: + model.service_status = json_status + model.current_state = _get_current_scheduler_service_state( + model.requested_state, status + ) + await tracker.save(node_id, model) + return True + + return False + + +async def should_notify_frontend_for_service( + app: FastAPI, node_id: NodeID, *, status_changed: bool +) -> bool: + """ + Checks if it's time to notify the frontend. + The frontend will be notified at regular intervals and on changes + Avoids sending too many updates. + """ + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + + if model is None: + return False + + # check if too much time has passed since the last time an update was sent + return ( + status_changed + or arrow.utcnow().timestamp() - model.last_status_notification + > _MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES.total_seconds() + ) + + +async def set_frontned_notified_for_service(app: FastAPI, node_id: NodeID) -> None: + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _log_skipping_operation(node_id) + return + + model.set_last_status_notification_to_now() + await tracker.save(node_id, model) + + +async def set_service_scheduled_to_run( + app: FastAPI, node_id: NodeID, delay_from_now: timedelta +) -> None: + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _log_skipping_operation(node_id) + return + + model.scheduled_to_run = True + model.set_check_status_after_to(delay_from_now) + await tracker.save(node_id, model) + + +async def set_service_status_task_uid( + app: FastAPI, node_id: NodeID, task_uid: TaskUID +) -> None: + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _log_skipping_operation(node_id) + return + + model.service_status_task_uid = task_uid + await tracker.save(node_id, model) + + +async def remove_tracked_service(app: FastAPI, node_id: NodeID) -> None: + """ + Removes the service from tracking (usually after stop completes) + # NOTE: does not raise if node_id is not found + """ + await get_tracker(app).delete(node_id) + + +async def get_tracked_service( + app: FastAPI, node_id: NodeID +) -> TrackedServiceModel | None: + """Returns information about the tracked service""" + return await get_tracker(app).load(node_id) + + +async def get_all_tracked_services(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: + """Returns all tracked services""" + return await get_tracker(app).all() + + +async def get_user_id_for_service(app: FastAPI, node_id: NodeID) -> UserID | None: + """returns user_id for the service""" + model: TrackedServiceModel | None = await get_tracker(app).load(node_id) + return model.user_id if model else None diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py new file mode 100644 index 000000000000..985ca8feef5a --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -0,0 +1,123 @@ +import pickle +from dataclasses import dataclass, field +from datetime import timedelta +from enum import auto + +import arrow +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, +) +from models_library.projects import ProjectID +from models_library.users import UserID +from models_library.utils.enums import StrAutoEnum +from servicelib.deferred_tasks import TaskUID + + +class UserRequestedState(StrAutoEnum): + RUNNING = auto() + STOPPED = auto() + + +class SchedulerServiceState(StrAutoEnum): + # service was started and is running as expected + RUNNING = auto() + # service is not present + IDLE = auto() + # something went wrong while starting/stopping service + UNEXPECTED_OUTCOME = auto() + + # service is being started + STARTING = auto() + # service is being stopped + STOPPING = auto() + + # service status has not been determined + UNKNOWN = auto() + + +@dataclass +class TrackedServiceModel: # pylint:disable=too-many-instance-attributes + + dynamic_service_start: DynamicServiceStart | None = field( + metadata={ + "description": ( + "used to create the service in any given moment if the requested_state is RUNNING" + "can be set to None only when stopping the service" + ) + } + ) + + user_id: UserID | None = field( + metadata={ + "description": "required for propagating status changes to the frontend" + } + ) + project_id: ProjectID | None = field( + metadata={ + "description": "required for propagating status changes to the frontend" + } + ) + + requested_state: UserRequestedState = field( + metadata={ + "description": ( + "status of the service desidered by the user RUNNING or STOPPED" + ) + } + ) + + current_state: SchedulerServiceState = field( + default=SchedulerServiceState.UNKNOWN, + metadata={ + "description": "to set after parsing the incoming state via the API calls" + }, + ) + + ############################# + ### SERVICE STATUS UPDATE ### + ############################# + + scheduled_to_run: bool = field( + default=False, + metadata={"description": "set when a job will be immediately scheduled"}, + ) + + service_status: str = field( + default="", + metadata={ + "description": "stored for debug mainly this is used to compute ``current_state``" + }, + ) + service_status_task_uid: TaskUID | None = field( + default=None, + metadata={"description": "uid of the job currently fetching the status"}, + ) + + check_status_after: float = field( + default_factory=lambda: arrow.utcnow().timestamp(), + metadata={"description": "used to determine when to poll the status again"}, + ) + + last_status_notification: float = field( + default=0, + metadata={ + "description": "used to determine when was the last time the status was notified" + }, + ) + + def set_check_status_after_to(self, delay_from_now: timedelta) -> None: + self.check_status_after = (arrow.utcnow() + delay_from_now).timestamp() + + def set_last_status_notification_to_now(self) -> None: + self.last_status_notification = arrow.utcnow().timestamp() + + ##################### + ### SERIALIZATION ### + ##################### + + def to_bytes(self) -> bytes: + return pickle.dumps(self) + + @classmethod + def from_bytes(cls, data: bytes) -> "TrackedServiceModel": + return pickle.loads(data) # type: ignore # noqa: S301 diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py new file mode 100644 index 000000000000..40a47bb8becc --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py @@ -0,0 +1,19 @@ +from fastapi import FastAPI +from settings_library.redis import RedisDatabase + +from ..redis import get_redis_client +from ._tracker import Tracker + + +def setup_service_tracker(app: FastAPI) -> None: + async def on_startup() -> None: + app.state.service_tracker = Tracker( + get_redis_client(app, RedisDatabase.DYNAMIC_SERVICES) + ) + + app.add_event_handler("startup", on_startup) + + +def get_tracker(app: FastAPI) -> Tracker: + tracker: Tracker = app.state.service_tracker + return tracker diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py new file mode 100644 index 000000000000..489cee153105 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import Final + +from models_library.projects_nodes_io import NodeID +from servicelib.redis import RedisClientSDK + +from ._models import TrackedServiceModel + +_KEY_PREFIX: Final[str] = "t::" + + +def _get_key(node_id: NodeID) -> str: + return f"{_KEY_PREFIX}{node_id}" + + +@dataclass +class Tracker: + redis_client_sdk: RedisClientSDK + + async def save(self, node_id: NodeID, model: TrackedServiceModel) -> None: + await self.redis_client_sdk.redis.set(_get_key(node_id), model.to_bytes()) + + async def load(self, node_id: NodeID) -> TrackedServiceModel | None: + model_as_bytes: bytes | None = await self.redis_client_sdk.redis.get( + _get_key(node_id) + ) + return ( + None + if model_as_bytes is None + else TrackedServiceModel.from_bytes(model_as_bytes) + ) + + async def delete(self, node_id: NodeID) -> None: + await self.redis_client_sdk.redis.delete(_get_key(node_id)) + + async def all(self) -> dict[NodeID, TrackedServiceModel]: + found_keys = await self.redis_client_sdk.redis.keys(f"{_KEY_PREFIX}*") + found_values = await self.redis_client_sdk.redis.mget(found_keys) + + return { + NodeID(k.decode().lstrip(_KEY_PREFIX)): TrackedServiceModel.from_bytes(v) + for k, v in zip(found_keys, found_values, strict=True) + if v is not None + } diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py new file mode 100644 index 000000000000..263451243252 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py @@ -0,0 +1,3 @@ +from ._setup import setup_status_monitor + +__all__: tuple[str, ...] = ("setup_status_monitor",) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py new file mode 100644 index 000000000000..f710204504c2 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -0,0 +1,85 @@ +import logging +from datetime import timedelta + +from fastapi import FastAPI +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_directorv2.dynamic_services_service import ( + RunningDynamicServiceDetails, +) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects_nodes_io import NodeID +from models_library.users import UserID +from servicelib.deferred_tasks import BaseDeferredHandler, TaskUID +from servicelib.deferred_tasks._base_deferred_handler import DeferredContext + +from .. import service_tracker +from ..director_v2 import DirectorV2Client +from ..notifier import notify_service_status_change + +_logger = logging.getLogger(__name__) + + +class DeferredGetStatus(BaseDeferredHandler[NodeGet | DynamicServiceGet | NodeGetIdle]): + @classmethod + async def get_timeout(cls, context: DeferredContext) -> timedelta: + assert context # nosec + return timedelta(seconds=5) + + @classmethod + async def start( # type:ignore[override] # pylint:disable=arguments-differ + cls, node_id: NodeID + ) -> DeferredContext: + _logger.debug("Getting service status for %s", node_id) + return {"node_id": node_id} + + @classmethod + async def on_created(cls, task_uid: TaskUID, context: DeferredContext) -> None: + """called after deferred was scheduled to run""" + app: FastAPI = context["app"] + node_id: NodeID = context["node_id"] + + await service_tracker.set_service_status_task_uid(app, node_id, task_uid) + + @classmethod + async def run( + cls, context: DeferredContext + ) -> NodeGet | DynamicServiceGet | NodeGetIdle: + app: FastAPI = context["app"] + node_id: NodeID = context["node_id"] + + director_v2_client: DirectorV2Client = DirectorV2Client.get_from_app_state(app) + service_status: NodeGet | RunningDynamicServiceDetails | NodeGetIdle = ( + await director_v2_client.get_status(node_id) + ) + _logger.debug( + "Service status type=%s, %s", type(service_status), service_status + ) + return service_status + + @classmethod + async def on_result( + cls, result: NodeGet | DynamicServiceGet | NodeGetIdle, context: DeferredContext + ) -> None: + app: FastAPI = context["app"] + node_id: NodeID = context["node_id"] + + _logger.debug("Received status for service '%s': '%s'", node_id, result) + + status_changed: bool = await service_tracker.set_if_status_changed_for_service( + app, node_id, result + ) + if await service_tracker.should_notify_frontend_for_service( + app, node_id, status_changed=status_changed + ): + user_id: UserID | None = await service_tracker.get_user_id_for_service( + app, node_id + ) + if user_id: + await notify_service_status_change(app, user_id, result) + await service_tracker.set_frontned_notified_for_service(app, node_id) + else: + _logger.info( + "Did not find a user for '%s', skipping status delivery of: %s", + node_id, + result, + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py new file mode 100644 index 000000000000..0d8b5a2723f3 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -0,0 +1,121 @@ +import logging +from datetime import timedelta +from functools import cached_property +from typing import Final + +import arrow +from fastapi import FastAPI +from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeFloat, NonNegativeInt +from servicelib.background_task import stop_periodic_task +from servicelib.redis_utils import start_exclusive_periodic_task +from servicelib.utils import limited_gather +from settings_library.redis import RedisDatabase + +from .. import service_tracker +from ..redis import get_redis_client +from ..service_tracker import NORMAL_RATE_POLL_INTERVAL, TrackedServiceModel +from ..service_tracker._models import SchedulerServiceState, UserRequestedState +from ._deferred_get_status import DeferredGetStatus + +_logger = logging.getLogger(__name__) + +_INTERVAL_BETWEEN_CHECKS: Final[timedelta] = timedelta(seconds=1) +_MAX_CONCURRENCY: Final[NonNegativeInt] = 10 + + +async def _start_get_status_deferred( + app: FastAPI, node_id: NodeID, *, next_check_delay: timedelta +) -> None: + await service_tracker.set_service_scheduled_to_run(app, node_id, next_check_delay) + await DeferredGetStatus.start(node_id=node_id) + + +class Monitor: + def __init__(self, app: FastAPI, status_worker_interval: timedelta) -> None: + self.app = app + self.status_worker_interval = status_worker_interval + + @cached_property + def status_worker_interval_seconds(self) -> NonNegativeFloat: + return self.status_worker_interval.total_seconds() + + async def _worker_start_get_status_requests(self) -> None: + """ + Check if a service requires it's status to be polled. + Note that the interval at which the status is polled can vary. + This is a relatively low resoruce check. + """ + + # NOTE: this worker runs on only once across all instances of the scheduler + + models: dict[ + NodeID, TrackedServiceModel + ] = await service_tracker.get_all_tracked_services(self.app) + + to_remove: list[NodeID] = [] + to_start: list[NodeID] = [] + + current_timestamp = arrow.utcnow().timestamp() + + for node_id, model in models.items(): + # check if service is idle and status polling should stop + if ( + model.current_state == SchedulerServiceState.IDLE + and model.requested_state == UserRequestedState.STOPPED + ): + to_remove.append(node_id) + continue + + job_not_running = not ( + model.scheduled_to_run + and model.service_status_task_uid is not None + and await DeferredGetStatus.is_present(model.service_status_task_uid) + ) + wait_period_finished = current_timestamp > model.check_status_after + if job_not_running and wait_period_finished: + to_start.append(node_id) + else: + _logger.info( + "Skipping status check for %s, because: %s or %s", + node_id, + f"{job_not_running=}", + ( + f"{wait_period_finished=}" + if wait_period_finished + else f"can_start_in={model.check_status_after - current_timestamp}" + ), + ) + + _logger.debug("Removing tracked services: '%s'", to_remove) + await limited_gather( + *( + service_tracker.remove_tracked_service(self.app, node_id) + for node_id in to_remove + ), + limit=_MAX_CONCURRENCY, + ) + + _logger.debug("Poll status for tracked services: '%s'", to_start) + await limited_gather( + *( + _start_get_status_deferred( + self.app, node_id, next_check_delay=NORMAL_RATE_POLL_INTERVAL + ) + for node_id in to_start + ), + limit=_MAX_CONCURRENCY, + ) + + async def setup(self) -> None: + self.app.state.status_monitor_background_task = start_exclusive_periodic_task( + get_redis_client(self.app, RedisDatabase.LOCKS), + self._worker_start_get_status_requests, + task_period=_INTERVAL_BETWEEN_CHECKS, + retry_after=_INTERVAL_BETWEEN_CHECKS, + task_name="periodic_service_status_update", + ) + + async def shutdown(self) -> None: + if getattr(self.app.state, "status_monitor_background_task", None): + await stop_periodic_task(self.app.state.status_monitor_background_task) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py new file mode 100644 index 000000000000..8f9601464bcb --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py @@ -0,0 +1,28 @@ +from datetime import timedelta +from typing import Final + +from fastapi import FastAPI + +from ._monitor import Monitor + +_STATUS_WORKER_DEFAULT_INTERVAL: Final[timedelta] = timedelta(seconds=1) + + +def setup_status_monitor(app: FastAPI) -> None: + async def on_startup() -> None: + app.state.status_monitor = monitor = Monitor( + app, status_worker_interval=_STATUS_WORKER_DEFAULT_INTERVAL + ) + await monitor.setup() + + async def on_shutdown() -> None: + monitor: Monitor = app.state.status_monitor + await monitor.shutdown() + + app.add_event_handler("startup", on_startup) + app.add_event_handler("shutdown", on_shutdown) + + +def get_monitor(app: FastAPI) -> Monitor: + monitor: Monitor = app.state.status_monitor + return monitor diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index ff72140f5ee7..2cb14086b2a2 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -4,6 +4,7 @@ import string from collections.abc import AsyncIterator from pathlib import Path +from typing import Final import pytest import simcore_service_dynamic_scheduler @@ -13,6 +14,9 @@ from pytest_mock import MockerFixture from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.redis import RedisClientsManager, RedisManagerDBConfig +from servicelib.utils import logged_gather +from settings_library.redis import RedisDatabase, RedisSettings from simcore_service_dynamic_scheduler.core.application import create_app pytest_plugins = [ @@ -20,6 +24,7 @@ "pytest_simcore.docker_compose", "pytest_simcore.docker_swarm", "pytest_simcore.environment_configs", + "pytest_simcore.faker_projects_data", "pytest_simcore.rabbit_service", "pytest_simcore.redis_service", "pytest_simcore.repository_paths", @@ -73,17 +78,38 @@ def app_environment( ) +_PATH_APPLICATION: Final[str] = "simcore_service_dynamic_scheduler.core.application" + + @pytest.fixture def disable_rabbitmq_setup(mocker: MockerFixture) -> None: - base_path = "simcore_service_dynamic_scheduler.core.application" - mocker.patch(f"{base_path}.setup_rabbitmq") - mocker.patch(f"{base_path}.setup_rpc_api_routes") + mocker.patch(f"{_PATH_APPLICATION}.setup_rabbitmq") + mocker.patch(f"{_PATH_APPLICATION}.setup_rpc_api_routes") @pytest.fixture def disable_redis_setup(mocker: MockerFixture) -> None: - base_path = "simcore_service_dynamic_scheduler.core.application" - mocker.patch(f"{base_path}.setup_redis") + mocker.patch(f"{_PATH_APPLICATION}.setup_redis") + + +@pytest.fixture +def disable_service_tracker_setup(mocker: MockerFixture) -> None: + mocker.patch(f"{_PATH_APPLICATION}.setup_service_tracker") + + +@pytest.fixture +def disable_deferred_manager_setup(mocker: MockerFixture) -> None: + mocker.patch(f"{_PATH_APPLICATION}.setup_deferred_manager") + + +@pytest.fixture +def disable_notifier_setup(mocker: MockerFixture) -> None: + mocker.patch(f"{_PATH_APPLICATION}.setup_notifier") + + +@pytest.fixture +def disable_status_monitor_setup(mocker: MockerFixture) -> None: + mocker.patch(f"{_PATH_APPLICATION}.setup_status_monitor") MAX_TIME_FOR_APP_TO_STARTUP = 10 @@ -101,3 +127,13 @@ async def app( shutdown_timeout=None if is_pdb_enabled else MAX_TIME_FOR_APP_TO_SHUTDOWN, ): yield test_app + + +@pytest.fixture +async def remove_redis_data(redis_service: RedisSettings) -> None: + async with RedisClientsManager( + {RedisManagerDBConfig(x) for x in RedisDatabase}, redis_service + ) as manager: + await logged_gather( + *[manager.client(d).redis.flushall() for d in RedisDatabase] + ) diff --git a/services/dynamic-scheduler/tests/unit/api_rest/conftest.py b/services/dynamic-scheduler/tests/unit/api_rest/conftest.py index 987ed8c4d851..efef4241d981 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/conftest.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/conftest.py @@ -1,13 +1,31 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument from collections.abc import AsyncIterator import pytest from fastapi import FastAPI from httpx import AsyncClient from httpx._transports.asgi import ASGITransport +from pytest_simcore.helpers.typing_env import EnvVarsDict @pytest.fixture -async def client(app: FastAPI) -> AsyncIterator[AsyncClient]: +def app_environment( + disable_rabbitmq_setup: None, + disable_redis_setup: None, + disable_service_tracker_setup: None, + disable_deferred_manager_setup: None, + disable_notifier_setup: None, + disable_status_monitor_setup: None, + app_environment: EnvVarsDict, +) -> EnvVarsDict: + return app_environment + + +@pytest.fixture +async def client( + app_environment: EnvVarsDict, app: FastAPI +) -> AsyncIterator[AsyncClient]: # - Needed for app to trigger start/stop event handlers # - Prefer this client instead of fastapi.testclient.TestClient async with AsyncClient( diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py index 8cc1c3279efd..9b5648e12b4e 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py @@ -21,7 +21,6 @@ def __init__(self, is_ok: bool) -> None: @pytest.fixture def mock_rabbitmq_clients( - disable_rabbitmq_setup: None, mocker: MockerFixture, rabbit_client_ok: bool, rabbit_rpc_server_ok: bool, @@ -39,11 +38,13 @@ def mock_rabbitmq_clients( @pytest.fixture def mock_redis_client( - disable_redis_setup: None, mocker: MockerFixture, redis_client_ok: bool + mocker: MockerFixture, + redis_client_ok: bool, ) -> None: base_path = "simcore_service_dynamic_scheduler.api.rest._dependencies" mocker.patch( - f"{base_path}.get_redis_client", return_value=MockHealth(redis_client_ok) + f"{base_path}.get_all_redis_clients", + return_value={0: MockHealth(redis_client_ok)}, ) diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py index 6e68190bcee9..8d986dfe60ed 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py @@ -1,24 +1,11 @@ # pylint:disable=redefined-outer-name # pylint:disable=unused-argument - - -import pytest from fastapi import status from httpx import AsyncClient -from pytest_simcore.helpers.typing_env import EnvVarsDict from simcore_service_dynamic_scheduler._meta import API_VTAG from simcore_service_dynamic_scheduler.models.schemas.meta import Meta -@pytest.fixture -def app_environment( - disable_rabbitmq_setup: None, - disable_redis_setup: None, - app_environment: EnvVarsDict, -) -> EnvVarsDict: - return app_environment - - async def test_health(client: AsyncClient): response = await client.get(f"/{API_VTAG}/meta") assert response.status_code == status.HTTP_200_OK diff --git a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py index 7c8dada1e183..c484f722ff95 100644 --- a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py +++ b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py @@ -59,7 +59,7 @@ def service_status_new_style() -> DynamicServiceGet: @pytest.fixture def service_status_legacy() -> NodeGet: - return NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]) + return NodeGet.parse_obj(NodeGet.Config.schema_extra["examples"][1]) @pytest.fixture diff --git a/services/dynamic-scheduler/tests/unit/conftest.py b/services/dynamic-scheduler/tests/unit/conftest.py new file mode 100644 index 000000000000..642ed2170ce1 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/conftest.py @@ -0,0 +1,29 @@ +from collections.abc import Callable +from copy import deepcopy + +import pytest +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, + DynamicServiceStop, +) +from models_library.projects_nodes_io import NodeID + + +@pytest.fixture +def get_dynamic_service_start() -> Callable[[NodeID], DynamicServiceStart]: + def _(node_id: NodeID) -> DynamicServiceStart: + dict_data = deepcopy(DynamicServiceStart.Config.schema_extra["example"]) + dict_data["service_uuid"] = f"{node_id}" + return DynamicServiceStart.parse_obj(dict_data) + + return _ + + +@pytest.fixture +def get_dynamic_service_stop() -> Callable[[NodeID], DynamicServiceStop]: + def _(node_id: NodeID) -> DynamicServiceStop: + dict_data = deepcopy(DynamicServiceStop.Config.schema_extra["example"]) + dict_data["node_id"] = f"{node_id}" + return DynamicServiceStop.parse_obj(dict_data) + + return _ diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py new file mode 100644 index 000000000000..0755f7e5d786 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -0,0 +1,325 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument + +from collections.abc import Callable +from datetime import timedelta +from typing import Any, Final, NamedTuple +from uuid import uuid4 + +import pytest +from faker import Faker +from fastapi import FastAPI +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, + DynamicServiceStop, +) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects_nodes_io import NodeID +from models_library.services_enums import ServiceState +from pydantic import NonNegativeInt +from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.deferred_tasks import TaskUID +from servicelib.utils import limited_gather +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.services.service_tracker import ( + get_all_tracked_services, + get_tracked_service, + remove_tracked_service, + set_if_status_changed_for_service, + set_request_as_running, + set_request_as_stopped, + set_service_status_task_uid, +) +from simcore_service_dynamic_scheduler.services.service_tracker._api import ( + _LOW_RATE_POLL_INTERVAL, + NORMAL_RATE_POLL_INTERVAL, + _get_current_scheduler_service_state, + _get_poll_interval, +) +from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + SchedulerServiceState, + UserRequestedState, +) + +pytest_simcore_core_services_selection = [ + "redis", +] + + +@pytest.fixture +def app_environment( + disable_rabbitmq_setup: None, + disable_deferred_manager_setup: None, + disable_notifier_setup: None, + app_environment: EnvVarsDict, + redis_service: RedisSettings, + remove_redis_data: None, +) -> EnvVarsDict: + return app_environment + + +async def test_services_tracer_set_as_running_set_as_stopped( + app: FastAPI, + node_id: NodeID, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], + get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], +): + async def _remove_service() -> None: + await remove_tracked_service(app, node_id) + assert await get_tracked_service(app, node_id) is None + assert await get_all_tracked_services(app) == {} + + async def _set_as_running() -> None: + await set_request_as_running(app, get_dynamic_service_start(node_id)) + tracked_model = await get_tracked_service(app, node_id) + assert tracked_model + assert tracked_model.requested_state == UserRequestedState.RUNNING + + async def _set_as_stopped() -> None: + await set_request_as_stopped(app, get_dynamic_service_stop(node_id)) + tracked_model = await get_tracked_service(app, node_id) + assert tracked_model + assert tracked_model.requested_state == UserRequestedState.STOPPED + + # request as running then as stopped + await _remove_service() + await _set_as_running() + await _set_as_stopped() + + # request as stopped then as running + await _remove_service() + await _set_as_stopped() + await _set_as_running() + + +@pytest.mark.parametrize("item_count", [100]) +async def test_services_tracer_workflow( + app: FastAPI, + node_id: NodeID, + item_count: NonNegativeInt, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], + get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], +): + # ensure more than one service can be tracked + await limited_gather( + *[ + set_request_as_stopped(app, get_dynamic_service_stop(uuid4())) + for _ in range(item_count) + ], + limit=100, + ) + assert len(await get_all_tracked_services(app)) == item_count + + +@pytest.mark.parametrize( + "status", + [ + *[NodeGet.parse_obj(o) for o in NodeGet.Config.schema_extra["examples"]], + *[ + DynamicServiceGet.parse_obj(o) + for o in DynamicServiceGet.Config.schema_extra["examples"] + ], + NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + ], +) +async def test_set_if_status_changed( + app: FastAPI, + node_id: NodeID, + status: NodeGet | DynamicServiceGet | NodeGetIdle, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], +): + await set_request_as_running(app, get_dynamic_service_start(node_id)) + + assert await set_if_status_changed_for_service(app, node_id, status) is True + + assert await set_if_status_changed_for_service(app, node_id, status) is False + + model = await get_tracked_service(app, node_id) + assert model + + assert model.service_status == status.json() + + +async def test_set_service_status_task_uid( + app: FastAPI, + node_id: NodeID, + faker: Faker, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], +): + await set_request_as_running(app, get_dynamic_service_start(node_id)) + + task_uid = TaskUID(faker.uuid4()) + await set_service_status_task_uid(app, node_id, task_uid) + + model = await get_tracked_service(app, node_id) + assert model + + assert model.service_status_task_uid == task_uid + + +@pytest.mark.parametrize( + "status, expected_poll_interval", + [ + ( + NodeGet.parse_obj(NodeGet.Config.schema_extra["examples"][1]), + _LOW_RATE_POLL_INTERVAL, + ), + *[ + (DynamicServiceGet.parse_obj(o), NORMAL_RATE_POLL_INTERVAL) + for o in DynamicServiceGet.Config.schema_extra["examples"] + ], + ( + NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + _LOW_RATE_POLL_INTERVAL, + ), + ], +) +def test__get_poll_interval( + status: NodeGet | DynamicServiceGet | NodeGetIdle, expected_poll_interval: timedelta +): + assert _get_poll_interval(status) == expected_poll_interval + + +def _get_node_get_from(service_state: ServiceState) -> NodeGet: + dict_data = NodeGet.Config.schema_extra["examples"][1] + assert "service_state" in dict_data + dict_data["service_state"] = service_state + return NodeGet.parse_obj(dict_data) + + +def _get_dynamic_service_get_from( + service_state: ServiceState, +) -> DynamicServiceGet: + dict_data = DynamicServiceGet.Config.schema_extra["examples"][1] + assert "state" in dict_data + dict_data["state"] = service_state + return DynamicServiceGet.parse_obj(dict_data) + + +def _get_node_get_idle() -> NodeGetIdle: + return NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]) + + +def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: + return [item for sublist in nested_list for item in sublist] + + +class ServiceStatusToSchedulerState(NamedTuple): + requested: UserRequestedState + service_status: NodeGet | DynamicServiceGet | NodeGetIdle + expected: SchedulerServiceState + + +_EXPECTED_TEST_CASES: list[list[ServiceStatusToSchedulerState]] = [ + [ + # UserRequestedState.RUNNING + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.PENDING), + SchedulerServiceState.STARTING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.PULLING), + SchedulerServiceState.STARTING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.STARTING), + SchedulerServiceState.STARTING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.RUNNING), + SchedulerServiceState.RUNNING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.COMPLETE), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.FAILED), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + status_generator(ServiceState.STOPPING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.RUNNING, + _get_node_get_idle(), + SchedulerServiceState.IDLE, + ), + # UserRequestedState.STOPPED + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.PENDING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.PULLING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.STARTING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.RUNNING), + SchedulerServiceState.STOPPING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.COMPLETE), + SchedulerServiceState.STOPPING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.FAILED), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + status_generator(ServiceState.STOPPING), + SchedulerServiceState.STOPPING, + ), + ServiceStatusToSchedulerState( + UserRequestedState.STOPPED, + _get_node_get_idle(), + SchedulerServiceState.IDLE, + ), + ] + for status_generator in ( + _get_node_get_from, + _get_dynamic_service_get_from, + ) +] +_FLAT_EXPECTED_TEST_CASES: list[ServiceStatusToSchedulerState] = __get_flat_list( + _EXPECTED_TEST_CASES +) +# ensure enum changes do not break above rules +_NODE_STATUS_FORMATS_COUNT: Final[int] = 2 +assert ( + len(_FLAT_EXPECTED_TEST_CASES) + == len(ServiceState) * len(UserRequestedState) * _NODE_STATUS_FORMATS_COUNT +) + + +@pytest.mark.parametrize("service_status_to_scheduler_state", _FLAT_EXPECTED_TEST_CASES) +def test__get_current_scheduler_service_state( + service_status_to_scheduler_state: ServiceStatusToSchedulerState, +): + assert ( + _get_current_scheduler_service_state( + service_status_to_scheduler_state.requested, + service_status_to_scheduler_state.service_status, + ) + == service_status_to_scheduler_state.expected + ) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py new file mode 100644 index 000000000000..6b8e31321b38 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py @@ -0,0 +1,57 @@ +from datetime import timedelta + +import arrow +import pytest +from faker import Faker +from servicelib.deferred_tasks import TaskUID +from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + SchedulerServiceState, + TrackedServiceModel, + UserRequestedState, +) + + +@pytest.mark.parametrize("requested_state", UserRequestedState) +@pytest.mark.parametrize("current_state", SchedulerServiceState) +@pytest.mark.parametrize("check_status_after", [1, arrow.utcnow().timestamp()]) +@pytest.mark.parametrize("service_status_task_uid", [None, TaskUID("ok")]) +def test_serialization( + faker: Faker, + requested_state: UserRequestedState, + current_state: SchedulerServiceState, + check_status_after: float, + service_status_task_uid: TaskUID | None, +): + tracked_model = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=requested_state, + current_state=current_state, + service_status=faker.pystr(), + check_status_after=check_status_after, + service_status_task_uid=service_status_task_uid, + ) + + as_bytes = tracked_model.to_bytes() + assert as_bytes + assert TrackedServiceModel.from_bytes(as_bytes) == tracked_model + + +async def test_set_check_status_after_to(): + model = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=UserRequestedState.RUNNING, + ) + assert model.check_status_after < arrow.utcnow().timestamp() + + delay = timedelta(seconds=4) + + before = (arrow.utcnow() + delay).timestamp() + model.set_check_status_after_to(delay) + after = (arrow.utcnow() + delay).timestamp() + + assert model.check_status_after + assert before < model.check_status_after < after diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py new file mode 100644 index 000000000000..59739ddf8f60 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -0,0 +1,94 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument + +from uuid import uuid4 + +import pytest +from fastapi import FastAPI +from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeInt +from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.utils import logged_gather +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + TrackedServiceModel, + UserRequestedState, +) +from simcore_service_dynamic_scheduler.services.service_tracker._setup import ( + get_tracker, +) +from simcore_service_dynamic_scheduler.services.service_tracker._tracker import Tracker + +pytest_simcore_core_services_selection = [ + "redis", +] + + +@pytest.fixture +def app_environment( + disable_rabbitmq_setup: None, + disable_deferred_manager_setup: None, + disable_notifier_setup: None, + app_environment: EnvVarsDict, + redis_service: RedisSettings, + remove_redis_data: None, +) -> EnvVarsDict: + return app_environment + + +@pytest.fixture +def tracker(app: FastAPI) -> Tracker: + return get_tracker(app) + + +async def test_tracker_workflow(tracker: Tracker): + node_id: NodeID = uuid4() + + # ensure does not already exist + result = await tracker.load(node_id) + assert result is None + + # node creation + model = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=UserRequestedState.RUNNING, + ) + await tracker.save(node_id, model) + + # check if exists + result = await tracker.load(node_id) + assert result == model + + # remove and check is missing + await tracker.delete(node_id) + result = await tracker.load(node_id) + assert result is None + + +@pytest.mark.parametrize("item_count", [100]) +async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> None: + assert await tracker.all() == {} + + model_to_insert = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=UserRequestedState.RUNNING, + ) + + data_to_insert = {uuid4(): model_to_insert for _ in range(item_count)} + + await logged_gather( + *[tracker.save(k, v) for k, v in data_to_insert.items()], max_concurrency=100 + ) + + response = await tracker.all() + for key in response: + assert isinstance(key, NodeID) + assert response == data_to_insert + + +async def test_remove_missing_key_does_not_raise_error(tracker: Tracker): + await tracker.delete(uuid4()) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py new file mode 100644 index 000000000000..2dd5270b627a --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -0,0 +1,416 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=too-many-positional-arguments +# pylint:disable=unused-argument + +import json +import re +from collections.abc import AsyncIterable, Callable +from copy import deepcopy +from typing import Any +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest +import respx +from fastapi import FastAPI, status +from fastapi.encoders import jsonable_encoder +from httpx import Request, Response +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, + DynamicServiceStop, +) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeInt +from pytest_mock import MockerFixture +from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.services.service_tracker import ( + get_all_tracked_services, + set_request_as_running, + set_request_as_stopped, +) +from simcore_service_dynamic_scheduler.services.status_monitor import _monitor +from simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status import ( + DeferredGetStatus, +) +from simcore_service_dynamic_scheduler.services.status_monitor._monitor import Monitor +from simcore_service_dynamic_scheduler.services.status_monitor._setup import get_monitor +from tenacity import AsyncRetrying +from tenacity.retry import retry_if_exception_type +from tenacity.stop import stop_after_delay +from tenacity.wait import wait_fixed + +pytest_simcore_core_services_selection = [ + "rabbit", + "redis", +] + + +@pytest.fixture +def app_environment( + app_environment: EnvVarsDict, + rabbit_service: RabbitSettings, + redis_service: RedisSettings, + remove_redis_data: None, +) -> EnvVarsDict: + return app_environment + + +_DEFAULT_NODE_ID: NodeID = uuid4() + + +def _add_to_dict(dict_data: dict, entries: list[tuple[str, Any]]) -> None: + for key, data in entries: + assert key in dict_data + dict_data[key] = data + + +def _get_node_get_with(state: str, node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGet: + dict_data = deepcopy(NodeGet.Config.schema_extra["examples"][1]) + _add_to_dict( + dict_data, + [ + ("service_state", state), + ("service_uuid", f"{node_id}"), + ], + ) + return NodeGet.parse_obj(dict_data) + + +def _get_dynamic_service_get_legacy_with( + state: str, node_id: NodeID = _DEFAULT_NODE_ID +) -> DynamicServiceGet: + dict_data = deepcopy(DynamicServiceGet.Config.schema_extra["examples"][0]) + _add_to_dict( + dict_data, + [ + ("state", state), + ("uuid", f"{node_id}"), + ("node_uuid", f"{node_id}"), + ], + ) + return DynamicServiceGet.parse_obj(dict_data) + + +def _get_dynamic_service_get_new_style_with( + state: str, node_id: NodeID = _DEFAULT_NODE_ID +) -> DynamicServiceGet: + dict_data = deepcopy(DynamicServiceGet.Config.schema_extra["examples"][1]) + _add_to_dict( + dict_data, + [ + ("state", state), + ("uuid", f"{node_id}"), + ("node_uuid", f"{node_id}"), + ], + ) + return DynamicServiceGet.parse_obj(dict_data) + + +def _get_node_get_idle(node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGetIdle: + dict_data = NodeGetIdle.Config.schema_extra["example"] + _add_to_dict( + dict_data, + [ + ("service_uuid", f"{node_id}"), + ], + ) + return NodeGetIdle.parse_obj(dict_data) + + +class _ResponseTimeline: + def __init__( + self, timeline: list[NodeGet | DynamicServiceGet | NodeGetIdle] + ) -> None: + self._timeline = timeline + + self._client_access_history: dict[NodeID, NonNegativeInt] = {} + + @property + def entries(self) -> list[NodeGet | DynamicServiceGet | NodeGetIdle]: + return self._timeline + + def __len__(self) -> int: + return len(self._timeline) + + def get_status(self, node_id: NodeID) -> NodeGet | DynamicServiceGet | NodeGetIdle: + if node_id not in self._client_access_history: + self._client_access_history[node_id] = 0 + + # always return node idle when timeline finished playing + if self._client_access_history[node_id] >= len(self._timeline): + return _get_node_get_idle() + + status = self._timeline[self._client_access_history[node_id]] + self._client_access_history[node_id] += 1 + return status + + +async def _assert_call_to( + deferred_status_spies: dict[str, AsyncMock], *, method: str, count: NonNegativeInt +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(1), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + call_count = deferred_status_spies[method].call_count + assert ( + call_count == count + ), f"Received calls {call_count} != {count} (expected) to '{method}'" + + +async def _assert_result( + deferred_status_spies: dict[str, AsyncMock], + *, + timeline: list[NodeGet | DynamicServiceGet | NodeGetIdle], +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(1), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + + assert deferred_status_spies["on_result"].call_count == len(timeline) + assert [ + x.args[0] for x in deferred_status_spies["on_result"].call_args_list + ] == timeline + + +async def _assert_notification_count( + mock: AsyncMock, expected_count: NonNegativeInt +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(1), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert mock.call_count == expected_count + + +@pytest.fixture +async def mock_director_v2_status( + app: FastAPI, response_timeline: _ResponseTimeline +) -> AsyncIterable[None]: + def _side_effect_node_status_response(request: Request) -> Response: + node_id = NodeID(f"{request.url}".split("/")[-1]) + + service_status = response_timeline.get_status(node_id) + + if isinstance(service_status, NodeGet): + return Response( + status.HTTP_200_OK, + text=json.dumps(jsonable_encoder({"data": service_status.dict()})), + ) + if isinstance(service_status, DynamicServiceGet): + return Response(status.HTTP_200_OK, text=service_status.json()) + if isinstance(service_status, NodeGetIdle): + return Response(status.HTTP_404_NOT_FOUND) + + raise TypeError + + with respx.mock( + base_url=app.state.settings.DYNAMIC_SCHEDULER_DIRECTOR_V2_SETTINGS.api_base_url, + assert_all_called=False, + assert_all_mocked=True, + ) as mock: + mock.get(re.compile(r"/dynamic_services/([\w-]+)")).mock( + side_effect=_side_effect_node_status_response + ) + yield + + +@pytest.fixture +def monitor(mock_director_v2_status: None, app: FastAPI) -> Monitor: + return get_monitor(app) + + +@pytest.fixture +def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: + results: dict[str, AsyncMock] = {} + for method_name in ( + "start", + "on_result", + "on_created", + "run", + "on_finished_with_error", + ): + mock_method = mocker.AsyncMock(wraps=getattr(DeferredGetStatus, method_name)) + mocker.patch.object(DeferredGetStatus, method_name, mock_method) + results[method_name] = mock_method + + return results + + +@pytest.fixture +def remove_tracked_spy(mocker: MockerFixture) -> AsyncMock: + mock_method = mocker.AsyncMock( + wraps=_monitor.service_tracker.remove_tracked_service + ) + return mocker.patch.object( + _monitor.service_tracker, + _monitor.service_tracker.remove_tracked_service.__name__, + mock_method, + ) + + +@pytest.fixture +def node_id() -> NodeID: + return _DEFAULT_NODE_ID + + +@pytest.fixture +def mocked_notify_frontend(mocker: MockerFixture) -> AsyncMock: + return mocker.patch( + "simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status.notify_service_status_change" + ) + + +@pytest.fixture +def disable_status_monitor_background_task(mocker: MockerFixture) -> None: + mocker.patch( + "simcore_service_dynamic_scheduler.services.status_monitor._monitor.Monitor.setup" + ) + + +@pytest.mark.parametrize( + "user_requests_running, response_timeline, expected_notification_count, remove_tracked_count", + [ + pytest.param( + True, + _ResponseTimeline([_get_node_get_with("running")]), + 1, + 0, + id="requested_running_state_changes_1_no_task_removal", + ), + pytest.param( + True, + _ResponseTimeline( + [_get_dynamic_service_get_legacy_with("running") for _ in range(10)] + ), + 1, + 0, + id="requested_running_state_changes_1_for_multiple_same_state_no_task_removal", + ), + pytest.param( + True, + _ResponseTimeline([_get_node_get_idle()]), + 1, + 0, + id="requested_running_state_idle_no_removal", + ), + pytest.param( + False, + _ResponseTimeline([_get_node_get_idle()]), + 1, + 1, + id="requested_stopped_state_idle_is_removed", + ), + pytest.param( + True, + _ResponseTimeline( + [ + *[_get_node_get_idle() for _ in range(10)], + _get_dynamic_service_get_new_style_with("pending"), + _get_dynamic_service_get_new_style_with("pulling"), + *[ + _get_dynamic_service_get_new_style_with("starting") + for _ in range(10) + ], + _get_dynamic_service_get_new_style_with("running"), + _get_dynamic_service_get_new_style_with("stopping"), + _get_dynamic_service_get_new_style_with("complete"), + _get_node_get_idle(), + ] + ), + 8, + 0, + id="requested_running_state_changes_8_no_removal", + ), + pytest.param( + False, + _ResponseTimeline( + [ + _get_dynamic_service_get_new_style_with("pending"), + _get_dynamic_service_get_new_style_with("pulling"), + *[ + _get_dynamic_service_get_new_style_with("starting") + for _ in range(10) + ], + _get_dynamic_service_get_new_style_with("running"), + _get_dynamic_service_get_new_style_with("stopping"), + _get_dynamic_service_get_new_style_with("complete"), + _get_node_get_idle(), + ] + ), + 7, + 1, + id="requested_stopped_state_changes_7_is_removed", + ), + ], +) +async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arguments + disable_status_monitor_background_task: None, + mocked_notify_frontend: AsyncMock, + deferred_status_spies: dict[str, AsyncMock], + remove_tracked_spy: AsyncMock, + app: FastAPI, + monitor: Monitor, + node_id: NodeID, + user_requests_running: bool, + response_timeline: _ResponseTimeline, + expected_notification_count: NonNegativeInt, + remove_tracked_count: NonNegativeInt, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], + get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], +): + assert await get_all_tracked_services(app) == {} + + if user_requests_running: + await set_request_as_running(app, get_dynamic_service_start(node_id)) + else: + await set_request_as_stopped(app, get_dynamic_service_stop(node_id)) + + entries_in_timeline = len(response_timeline) + + for i in range(entries_in_timeline): + async for attempt in AsyncRetrying( + reraise=True, stop=stop_after_delay(10), wait=wait_fixed(0.1) + ): + with attempt: + # pylint:disable=protected-access + await monitor._worker_start_get_status_requests() # noqa: SLF001 + for method in ("start", "on_created", "on_result"): + await _assert_call_to( + deferred_status_spies, method=method, count=i + 1 + ) + + await _assert_call_to( + deferred_status_spies, method="run", count=entries_in_timeline + ) + await _assert_call_to( + deferred_status_spies, method="on_finished_with_error", count=0 + ) + + await _assert_result(deferred_status_spies, timeline=response_timeline.entries) + + await _assert_notification_count( + mocked_notify_frontend, expected_notification_count + ) + + async for attempt in AsyncRetrying( + reraise=True, stop=stop_after_delay(1), wait=wait_fixed(0.1) + ): + with attempt: + # pylint:disable=protected-access + await monitor._worker_start_get_status_requests() # noqa: SLF001 + assert remove_tracked_spy.call_count == remove_tracked_count diff --git a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py index feefc0c1aa4a..eadb7c9ee038 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py +++ b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py @@ -21,6 +21,10 @@ @pytest.fixture def app_environment( disable_redis_setup: None, + disable_service_tracker_setup: None, + disable_deferred_manager_setup: None, + disable_notifier_setup: None, + disable_status_monitor_setup: None, app_environment: EnvVarsDict, rabbit_service: RabbitSettings, ) -> EnvVarsDict: diff --git a/services/dynamic-scheduler/tests/unit/test_services_redis.py b/services/dynamic-scheduler/tests/unit/test_services_redis.py index 7a7d90063851..059a17aeb0fc 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_redis.py +++ b/services/dynamic-scheduler/tests/unit/test_services_redis.py @@ -6,7 +6,7 @@ from fastapi import FastAPI from pytest_simcore.helpers.typing_env import EnvVarsDict from settings_library.redis import RedisSettings -from simcore_service_dynamic_scheduler.services.redis import get_redis_client +from simcore_service_dynamic_scheduler.services.redis import get_all_redis_clients pytest_simcore_core_services_selection = [ "redis", @@ -16,6 +16,9 @@ @pytest.fixture def app_environment( disable_rabbitmq_setup: None, + disable_deferred_manager_setup: None, + disable_notifier_setup: None, + disable_status_monitor_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, ) -> EnvVarsDict: @@ -23,5 +26,6 @@ def app_environment( async def test_health(app: FastAPI): - redis_client = get_redis_client(app) - assert await redis_client.ping() is True + redis_clients = get_all_redis_clients(app) + for redis_client in redis_clients.values(): + assert await redis_client.ping() is True diff --git a/services/dynamic-sidecar/requirements/_base.txt b/services/dynamic-sidecar/requirements/_base.txt index 0416a0dc9f01..35cd6843f2b3 100644 --- a/services/dynamic-sidecar/requirements/_base.txt +++ b/services/dynamic-sidecar/requirements/_base.txt @@ -4,7 +4,10 @@ aio-pika==9.4.1 # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in aiocache==0.12.2 - # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in aiodebug==2.3.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in @@ -65,12 +68,12 @@ arrow==1.3.0 # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi async-timeout==4.0.3 # via - # aiohttp # aiopg # asyncpg - # redis asyncpg==0.29.0 # via sqlalchemy attrs==23.2.0 @@ -98,16 +101,23 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer # uvicorn +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via pydantic -exceptiongroup==1.2.0 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -137,8 +147,14 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http greenlet==3.0.3 # via sqlalchemy +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -169,7 +185,10 @@ idna==3.6 # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jsonschema==4.21.1 # via # -r requirements/../../../packages/models-library/requirements/_base.in @@ -205,6 +224,65 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -237,8 +315,15 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in -psutil==5.9.8 - # via -r requirements/_base.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/_base.in psycopg2-binary==2.9.9 # via # aiopg @@ -333,6 +418,12 @@ referencing==0.29.3 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -344,6 +435,8 @@ rpds-py==0.18.0 # via # jsonschema # referencing +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer simple-websocket==1.0.0 @@ -419,21 +512,41 @@ typing-extensions==4.11.0 # aiodebug # aiodocker # alembic - # anyio # fastapi # faststream + # opentelemetry-sdk # pint # pydantic # typer - # uvicorn u-msgpack-python==2.8.0 # via -r requirements/_base.in +urllib3==2.2.2 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests uvicorn==0.29.0 # via # -r requirements/../../../packages/service-library/requirements/_fastapi.in # -r requirements/_base.in watchdog==4.0.0 # via -r requirements/_base.in +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation wsproto==1.2.0 # via simple-websocket yarl==1.9.4 @@ -443,3 +556,5 @@ yarl==1.9.4 # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/dynamic-sidecar/requirements/_test.txt b/services/dynamic-sidecar/requirements/_test.txt index ba822234e960..3b248a300cdc 100644 --- a/services/dynamic-sidecar/requirements/_test.txt +++ b/services/dynamic-sidecar/requirements/_test.txt @@ -11,7 +11,7 @@ aiohttp==3.9.3 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # aiobotocore -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore aiosignal==1.3.1 # via @@ -21,10 +21,6 @@ asgi-lifespan==2.1.0 # via -r requirements/_test.in async-asgi-testclient==1.4.11 # via -r requirements/_test.in -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # aiohttp attrs==23.2.0 # via # -c requirements/_base.txt @@ -42,16 +38,14 @@ certifi==2024.2.2 # -c requirements/_base.txt # requests charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests coverage==7.6.1 # via pytest-cov docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.0 - # via - # -c requirements/_base.txt - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in flaky==3.8.1 # via -r requirements/_test.in @@ -81,7 +75,7 @@ multidict==6.0.5 # aiohttp # async-asgi-testclient # yarl -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -91,7 +85,7 @@ packaging==24.0 # pytest pluggy==1.5.0 # via pytest -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -114,6 +108,7 @@ python-dotenv==1.0.1 # via -r requirements/_test.in requests==2.32.3 # via + # -c requirements/_base.txt # async-asgi-testclient # docker s3transfer==0.10.2 @@ -133,18 +128,13 @@ sqlalchemy==1.4.52 # -r requirements/_test.in sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy -tomli==2.0.1 - # via - # coverage - # mypy - # pytest -types-aiobotocore-s3==2.13.2 +types-aiobotocore-s3==2.15.1 # via -r requirements/_test.in types-aiofiles==24.1.0.20240626 # via -r requirements/_test.in -types-psutil==6.0.0.20240621 +types-psutil==6.0.0.20240901 # via -r requirements/_test.in -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.11.0 # via @@ -155,11 +145,14 @@ typing-extensions==4.11.0 urllib3==2.2.2 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # botocore # docker # requests wrapt==1.16.0 - # via aiobotocore + # via + # -c requirements/_base.txt + # aiobotocore yarl==1.9.4 # via # -c requirements/_base.txt diff --git a/services/dynamic-sidecar/requirements/_tools.txt b/services/dynamic-sidecar/requirements/_tools.txt index 32ff78658779..4eed4827cf84 100644 --- a/services/dynamic-sidecar/requirements/_tools.txt +++ b/services/dynamic-sidecar/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -50,14 +50,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,28 +68,20 @@ pyyaml==6.0.1 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools -tomli==2.0.1 +setuptools==74.0.0 # via - # -c requirements/_test.txt - # black - # build - # mypy + # -c requirements/_base.txt # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.11.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_long_running_tasks.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_long_running_tasks.py index ae04a620c8a2..52b0e2e7ad64 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_long_running_tasks.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_long_running_tasks.py @@ -209,6 +209,7 @@ async def ports_inputs_pull_task( request: Request, tasks_manager: Annotated[TasksManager, Depends(get_tasks_manager)], app: Annotated[FastAPI, Depends(get_application)], + settings: Annotated[ApplicationSettings, Depends(get_settings)], mounted_volumes: Annotated[MountedVolumes, Depends(get_mounted_volumes)], inputs_state: Annotated[InputsState, Depends(get_inputs_state)], port_keys: list[str] | None = None, @@ -223,6 +224,7 @@ async def ports_inputs_pull_task( port_keys=port_keys, mounted_volumes=mounted_volumes, app=app, + settings=settings, inputs_pulling_enabled=inputs_state.inputs_pulling_enabled, ) except TaskAlreadyRunningError as e: diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py index f5910ffbffee..20029cac7fcb 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py @@ -19,6 +19,7 @@ from ..modules.attribute_monitor import setup_attribute_monitor from ..modules.inputs import setup_inputs from ..modules.mounted_fs import MountedVolumes, setup_mounted_fs +from ..modules.notifications import setup_notifications from ..modules.outputs import setup_outputs from ..modules.prometheus_metrics import setup_prometheus_metrics from ..modules.resource_tracking import setup_resource_tracking @@ -172,6 +173,7 @@ def create_app(): setup_rabbitmq(app) setup_background_log_fetcher(app) setup_resource_tracking(app) + setup_notifications(app) setup_system_monitor(app) setup_mounted_fs(app) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/attribute_monitor/_watchdog_extensions.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/attribute_monitor/_watchdog_extensions.py index 5925e7d7fe20..83389547c776 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/attribute_monitor/_watchdog_extensions.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/attribute_monitor/_watchdog_extensions.py @@ -20,7 +20,9 @@ def __init__(self, path, recursive=False): # pylint:disable=super-init-not-call # overwrite the `InotifyBuffer.__init__` method BaseThread.__init__(self) # pylint:disable=non-parent-init-called self._queue = DelayedQueue(self.delay) - self._inotify = Inotify(path, recursive, InotifyConstants.IN_ATTRIB) + self._inotify = Inotify( # pylint:disable=too-many-function-args + path, recursive, InotifyConstants.IN_ATTRIB + ) self.start() diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py index 9c13cfc54170..0558011770af 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py @@ -5,12 +5,12 @@ from pathlib import Path from typing import Final +from common_library.pydantic_basic_types import IDStr from fastapi import FastAPI from models_library.api_schemas_long_running_tasks.base import ( ProgressPercent, TaskProgress, ) -from models_library.basic_types import IDStr from models_library.generated_models.docker_rest_api import ContainerState from models_library.rabbitmq_messages import ProgressType, SimcorePlatformStatus from pydantic import PositiveInt @@ -52,6 +52,7 @@ from ..models.shared_store import SharedStore from ..modules import nodeports, user_services_preferences from ..modules.mounted_fs import MountedVolumes +from ..modules.notifications._notifications_ports import PortNotifier from ..modules.outputs import OutputsManager, event_propagation_disabled from .long_running_tasksutils import run_before_shutdown_actions from .resource_tracking import send_service_started, send_service_stopped @@ -168,24 +169,27 @@ async def task_create_service_containers( assert shared_store.compose_spec # nosec - async with event_propagation_disabled(app), _reset_on_error(shared_store): + async with event_propagation_disabled(app), _reset_on_error( + shared_store + ), ProgressBarData( + num_steps=4, + progress_report_cb=functools.partial( + post_progress_message, + app, + ProgressType.SERVICE_CONTAINERS_STARTING, + ), + description=IDStr("starting software"), + ) as progress_bar: with log_context(_logger, logging.INFO, "load user services preferences"): if user_services_preferences.is_feature_enabled(app): await user_services_preferences.load_user_services_preferences(app) + await progress_bar.update() # removes previous pending containers progress.update(message="cleanup previous used resources") result = await docker_compose_rm(shared_store.compose_spec, settings) _raise_for_errors(result, "rm") - - progress.update(message="pulling images", percent=ProgressPercent(0.01)) - await post_sidecar_log_message( - app, "pulling service images", log_level=logging.INFO - ) - await docker_compose_pull(app, shared_store.compose_spec) - await post_sidecar_log_message( - app, "service images ready", log_level=logging.INFO - ) + await progress_bar.update() progress.update( message="creating and starting containers", percent=ProgressPercent(0.90) @@ -194,6 +198,7 @@ async def task_create_service_containers( app, "starting service containers", log_level=logging.INFO ) await _retry_docker_compose_create(shared_store.compose_spec, settings) + await progress_bar.update() progress.update( message="ensure containers are started", percent=ProgressPercent(0.95) @@ -317,6 +322,15 @@ async def _send_resource_tracking_stop(platform_status: SimcorePlatformStatus): progress.update(message="done", percent=ProgressPercent(0.99)) +def _get_satate_folders_size(paths: list[Path]) -> int: + total_size: int = 0 + for path in paths: + for file in path.rglob("*"): + if file.is_file(): + total_size += file.stat().st_size + return total_size + + async def _restore_state_folder( app: FastAPI, *, @@ -343,7 +357,7 @@ async def task_restore_state( settings: ApplicationSettings, mounted_volumes: MountedVolumes, app: FastAPI, -) -> None: +) -> int: # NOTE: the legacy data format was a zip file # this method will maintain retro compatibility. # The legacy archive is always downloaded and decompressed @@ -386,6 +400,8 @@ async def task_restore_state( ) progress.update(message="state restored", percent=ProgressPercent(0.99)) + return _get_satate_folders_size(state_paths) + async def _save_state_folder( app: FastAPI, @@ -415,7 +431,7 @@ async def task_save_state( settings: ApplicationSettings, mounted_volumes: MountedVolumes, app: FastAPI, -) -> None: +) -> int: """ Saves the states of the service. If a legacy archive is detected, it will be removed after @@ -449,12 +465,15 @@ async def task_save_state( await post_sidecar_log_message(app, "Finished state saving", log_level=logging.INFO) progress.update(message="finished state saving", percent=ProgressPercent(0.99)) + return _get_satate_folders_size(state_paths) + async def task_ports_inputs_pull( progress: TaskProgress, port_keys: list[str] | None, mounted_volumes: MountedVolumes, app: FastAPI, + settings: ApplicationSettings, *, inputs_pulling_enabled: bool, ) -> int: @@ -488,6 +507,12 @@ async def task_ports_inputs_pull( post_sidecar_log_message, app, log_level=logging.INFO ), progress_bar=root_progress, + port_notifier=PortNotifier( + app, + settings.DY_SIDECAR_USER_ID, + settings.DY_SIDECAR_PROJECT_ID, + settings.DY_SIDECAR_NODE_ID, + ), ) await post_sidecar_log_message( app, "Finished pulling inputs", log_level=logging.INFO @@ -524,6 +549,7 @@ async def task_ports_outputs_pull( post_sidecar_log_message, app, log_level=logging.INFO ), progress_bar=root_progress, + port_notifier=None, ) await post_sidecar_log_message( app, "Finished pulling outputs", log_level=logging.INFO diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 2213dd1d4ac9..274367d57c3e 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -4,6 +4,7 @@ import shutil import sys import time +from asyncio import CancelledError from collections import deque from collections.abc import Coroutine from contextlib import AsyncExitStack @@ -14,7 +15,7 @@ import aiofiles.os import magic from aiofiles.tempfile import TemporaryDirectory as AioTemporaryDirectory -from models_library.basic_types import IDStr +from common_library.pydantic_basic_types import IDStr from models_library.projects import ProjectIDStr from models_library.projects_nodes_io import NodeIDStr from models_library.services_types import ServicePortKey @@ -24,16 +25,17 @@ from servicelib.file_utils import remove_directory from servicelib.logging_utils import log_context from servicelib.progress_bar import ProgressBarData -from servicelib.utils import logged_gather +from servicelib.utils import limited_gather from simcore_sdk import node_ports_v2 from simcore_sdk.node_ports_common.file_io_utils import LogRedirectCB from simcore_sdk.node_ports_v2 import Port from simcore_sdk.node_ports_v2.links import ItemConcreteValue -from simcore_sdk.node_ports_v2.nodeports_v2 import Nodeports +from simcore_sdk.node_ports_v2.nodeports_v2 import Nodeports, OutputsCallbacks from simcore_sdk.node_ports_v2.port import SetKWargs from simcore_sdk.node_ports_v2.port_utils import is_file_type from ..core.settings import ApplicationSettings, get_settings +from ..modules.notifications import PortNotifier class PortTypeName(str, Enum): @@ -70,13 +72,27 @@ def _get_size_of_value(value: tuple[ItemConcreteValue | None, SetKWargs | None]) ) -# NOTE: outputs_manager guarantees that no parallel calls -# to this function occur -async def upload_outputs( +class OutputCallbacksWrapper(OutputsCallbacks): + def __init__(self, port_notifier: PortNotifier) -> None: + self.port_notifier = port_notifier + + async def aborted(self, key: ServicePortKey) -> None: + await self.port_notifier.send_output_port_upload_was_aborted(key) + + async def finished_succesfully(self, key: ServicePortKey) -> None: + await self.port_notifier.send_output_port_upload_finished_successfully(key) + + async def finished_with_error(self, key: ServicePortKey) -> None: + await self.port_notifier.send_output_port_upload_finished_with_error(key) + + +# NOTE: outputs_manager guarantees that no parallel calls to this function occur +async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915, C901 outputs_path: Path, port_keys: list[str], io_log_redirect_cb: LogRedirectCB | None, progress_bar: ProgressBarData, + port_notifier: PortNotifier, ) -> None: # pylint: disable=too-many-branches logger.debug("uploading data to simcore...") @@ -97,12 +113,17 @@ async def upload_outputs( ServicePortKey, tuple[ItemConcreteValue | None, SetKWargs | None] ] = {} archiving_tasks: deque[Coroutine[None, None, None]] = deque() - ports_to_set = [ + ports_to_set: list[Port] = [ port_value for port_value in (await PORTS.outputs).values() if (not port_keys) or (port_value.key in port_keys) ] + await limited_gather( + *(port_notifier.send_output_port_upload_sarted(p.key) for p in ports_to_set), + limit=4, + ) + async with AsyncExitStack() as stack: sub_progress = await stack.enter_async_context( progress_bar.sub_progress( @@ -147,13 +168,34 @@ async def upload_outputs( # when having multiple directories it is important to # run the compression in parallel to guarantee better performance + async def _archive_dir_notified( + dir_to_compress: Path, destination: Path, port_key: ServicePortKey + ) -> None: + # Errors and cancellation can also be triggered from archving as well + try: + await archive_dir( + dir_to_compress=dir_to_compress, + destination=destination, + compress=False, + store_relative_path=True, + progress_bar=sub_progress, + ) + except CancelledError: + await port_notifier.send_output_port_upload_was_aborted( + port_key + ) + raise + except Exception: + await port_notifier.send_output_port_upload_finished_with_error( + port_key + ) + raise + archiving_tasks.append( - archive_dir( + _archive_dir_notified( dir_to_compress=src_folder, destination=tmp_file, - compress=False, - store_relative_path=True, - progress_bar=sub_progress, + port_key=port.key, ) ) ports_values[port.key] = ( @@ -176,9 +218,13 @@ async def upload_outputs( logger.debug("No file %s to fetch port values from", data_file) if archiving_tasks: - await logged_gather(*archiving_tasks) + await limited_gather(*archiving_tasks, limit=4) - await PORTS.set_multiple(ports_values, progress_bar=sub_progress) + await PORTS.set_multiple( + ports_values, + progress_bar=sub_progress, + outputs_callbacks=OutputCallbacksWrapper(port_notifier), + ) elapsed_time = time.perf_counter() - start_time total_bytes = sum(_get_size_of_value(x) for x in ports_values.values()) @@ -264,6 +310,7 @@ async def download_target_ports( port_keys: list[str], io_log_redirect_cb: LogRedirectCB, progress_bar: ProgressBarData, + port_notifier: PortNotifier | None, ) -> ByteSize: logger.debug("retrieving data from simcore...") start_time = time.perf_counter() @@ -279,22 +326,46 @@ async def download_target_ports( ) # let's gather all the data - ports_to_get = [ + ports_to_get: list[Port] = [ port_value for port_value in (await getattr(PORTS, port_type_name.value)).values() if (not port_keys) or (port_value.key in port_keys) ] + + async def _get_date_from_port_notified( + port: Port, progress_bar: ProgressBarData + ) -> tuple[Port, ItemConcreteValue | None, ByteSize]: + assert port_notifier is not None + await port_notifier.send_input_port_download_started(port.key) + try: + result = await _get_data_from_port( + port, target_dir=target_dir, progress_bar=progress_bar + ) + await port_notifier.send_input_port_download_finished_succesfully(port.key) + return result + + except CancelledError: + await port_notifier.send_input_port_download_was_aborted(port.key) + raise + except Exception: + await port_notifier.send_input_port_download_finished_with_error(port.key) + raise + async with progress_bar.sub_progress( steps=len(ports_to_get), description=IDStr("downloading") ) as sub_progress: - results = await logged_gather( + results = await limited_gather( *[ - _get_data_from_port( - port, target_dir=target_dir, progress_bar=sub_progress + ( + _get_data_from_port( + port, target_dir=target_dir, progress_bar=sub_progress + ) + if port_type_name == PortTypeName.OUTPUTS + else _get_date_from_port_notified(port, progress_bar=sub_progress) ) for port in ports_to_get ], - max_concurrency=2, + limit=2, ) # parse results data = { diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/__init__.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/__init__.py new file mode 100644 index 000000000000..18254b1d23c1 --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/__init__.py @@ -0,0 +1,9 @@ +from ._notifications_ports import PortNotifier +from ._notifications_system_monitor import publish_disk_usage +from ._setup import setup_notifications + +__all__: tuple[str, ...] = ( + "PortNotifier", + "publish_disk_usage", + "setup_notifications", +) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifications_ports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifications_ports.py new file mode 100644 index 000000000000..ae48f19a973f --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifications_ports.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass + +from fastapi import FastAPI +from models_library.api_schemas_dynamic_sidecar.ports import InputStatus, OutputStatus +from models_library.projects import ProjectID +from models_library.projects_nodes_io import NodeID +from models_library.services_types import ServicePortKey +from models_library.users import UserID + +from ._notifier import Notifier + + +@dataclass +class PortNotifier: + app: FastAPI + user_id: UserID + project_id: ProjectID + node_id: NodeID + + async def _send_output_port_status( + self, port_key: ServicePortKey, status: OutputStatus + ) -> None: + notifier: Notifier = Notifier.get_from_app_state(self.app) + await notifier.notify_output_port_status( + self.user_id, self.project_id, self.node_id, port_key, status + ) + + async def _send_input_port_status( + self, port_key: ServicePortKey, status: InputStatus + ) -> None: + notifier: Notifier = Notifier.get_from_app_state(self.app) + await notifier.notify_input_port_status( + self.user_id, self.project_id, self.node_id, port_key, status + ) + + async def send_output_port_upload_sarted(self, port_key: ServicePortKey) -> None: + await self._send_output_port_status(port_key, OutputStatus.UPLOAD_STARTED) + + async def send_output_port_upload_was_aborted( + self, port_key: ServicePortKey + ) -> None: + await self._send_output_port_status(port_key, OutputStatus.UPLOAD_WAS_ABORTED) + + async def send_output_port_upload_finished_successfully( + self, port_key: ServicePortKey + ) -> None: + await self._send_output_port_status( + port_key, OutputStatus.UPLOAD_FINISHED_SUCCESSFULLY + ) + + async def send_output_port_upload_finished_with_error( + self, port_key: ServicePortKey + ) -> None: + await self._send_output_port_status( + port_key, OutputStatus.UPLOAD_FINISHED_WITH_ERRROR + ) + + async def send_input_port_download_started(self, port_key: ServicePortKey) -> None: + await self._send_input_port_status(port_key, InputStatus.DOWNLOAD_STARTED) + + async def send_input_port_download_was_aborted( + self, port_key: ServicePortKey + ) -> None: + await self._send_input_port_status(port_key, InputStatus.DOWNLOAD_WAS_ABORTED) + + async def send_input_port_download_finished_succesfully( + self, port_key: ServicePortKey + ) -> None: + await self._send_input_port_status( + port_key, InputStatus.DOWNLOAD_FINISHED_SUCCESSFULLY + ) + + async def send_input_port_download_finished_with_error( + self, port_key: ServicePortKey + ) -> None: + await self._send_input_port_status( + port_key, InputStatus.DOWNLOAD_FINISHED_WITH_ERRROR + ) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifications_system_monitor.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifications_system_monitor.py new file mode 100644 index 000000000000..840c47d729ef --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifications_system_monitor.py @@ -0,0 +1,17 @@ +from pathlib import Path + +from fastapi import FastAPI +from models_library.api_schemas_dynamic_sidecar.telemetry import DiskUsage +from models_library.projects_nodes_io import NodeID +from models_library.users import UserID + +from ._notifier import Notifier + + +async def publish_disk_usage( + app: FastAPI, *, user_id: UserID, node_id: NodeID, usage: dict[Path, DiskUsage] +) -> None: + notifier: Notifier = Notifier.get_from_app_state(app) + await notifier.notify_service_disk_usage( + user_id=user_id, node_id=node_id, usage=usage + ) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifier.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifier.py new file mode 100644 index 000000000000..0d61e1b388ba --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_notifier.py @@ -0,0 +1,103 @@ +import contextlib +from pathlib import Path + +import socketio # type: ignore[import-untyped] +from fastapi import FastAPI +from fastapi.encoders import jsonable_encoder +from models_library.api_schemas_dynamic_sidecar.ports import ( + InputPortSatus, + InputStatus, + OutputPortStatus, + OutputStatus, +) +from models_library.api_schemas_dynamic_sidecar.socketio import ( + SOCKET_IO_SERVICE_DISK_USAGE_EVENT, + SOCKET_IO_STATE_INPUT_PORTS_EVENT, + SOCKET_IO_STATE_OUTPUT_PORTS_EVENT, +) +from models_library.api_schemas_dynamic_sidecar.telemetry import ( + DiskUsage, + ServiceDiskUsage, +) +from models_library.api_schemas_webserver.socketio import SocketIORoomStr +from models_library.projects import ProjectID +from models_library.projects_nodes_io import NodeID +from models_library.services_types import ServicePortKey +from models_library.users import UserID +from servicelib.fastapi.app_state import SingletonInAppStateMixin + + +class Notifier(SingletonInAppStateMixin): + app_state_name: str = "notifier" + + def __init__(self, sio_manager: socketio.AsyncAioPikaManager): + self._sio_manager = sio_manager + + async def notify_service_disk_usage( + self, user_id: UserID, node_id: NodeID, usage: dict[Path, DiskUsage] + ) -> None: + await self._sio_manager.emit( + SOCKET_IO_SERVICE_DISK_USAGE_EVENT, + data=jsonable_encoder(ServiceDiskUsage(node_id=node_id, usage=usage)), + room=SocketIORoomStr.from_user_id(user_id), + ) + + async def notify_output_port_status( + self, + user_id: UserID, + project_id: ProjectID, + node_id: NodeID, + port_key: ServicePortKey, + output_status: OutputStatus, + ) -> None: + await self._sio_manager.emit( + SOCKET_IO_STATE_OUTPUT_PORTS_EVENT, + data=jsonable_encoder( + OutputPortStatus( + project_id=project_id, + node_id=node_id, + port_key=port_key, + status=output_status, + ) + ), + room=SocketIORoomStr.from_user_id(user_id), + ) + + async def notify_input_port_status( + self, + user_id: UserID, + project_id: ProjectID, + node_id: NodeID, + port_key: ServicePortKey, + input_status: InputStatus, + ) -> None: + await self._sio_manager.emit( + SOCKET_IO_STATE_INPUT_PORTS_EVENT, + data=jsonable_encoder( + InputPortSatus( + project_id=project_id, + node_id=node_id, + port_key=port_key, + status=input_status, + ) + ), + room=SocketIORoomStr.from_user_id(user_id), + ) + + +def setup_notifier(app: FastAPI): + async def _on_startup() -> None: + assert app.state.external_socketio # nosec + + notifier = Notifier( + sio_manager=app.state.external_socketio, + ) + notifier.set_to_app_state(app) + assert Notifier.get_from_app_state(app) == notifier # nosec + + async def _on_shutdown() -> None: + with contextlib.suppress(AttributeError): + Notifier.pop_from_app_state(app) + + app.add_event_handler("startup", _on_startup) + app.add_event_handler("shutdown", _on_shutdown) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_setup.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_setup.py new file mode 100644 index 000000000000..6de0fae307f1 --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_setup.py @@ -0,0 +1,15 @@ +import logging + +from fastapi import FastAPI +from servicelib.logging_utils import log_context + +from ..notifications._notifier import setup_notifier +from ..notifications._socketio import setup_socketio + +_logger = logging.getLogger(__name__) + + +def setup_notifications(app: FastAPI) -> None: + with log_context(_logger, logging.INFO, "setup notifications"): + setup_socketio(app) + setup_notifier(app) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_socketio.py similarity index 100% rename from services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py rename to services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/notifications/_socketio.py diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_manager.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_manager.py index 307f8b3d9337..c9e6d528a60d 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_manager.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_manager.py @@ -6,8 +6,8 @@ from datetime import timedelta from functools import partial +from common_library.pydantic_basic_types import IDStr from fastapi import FastAPI -from models_library.basic_types import IDStr from models_library.rabbitmq_messages import ProgressType from pydantic import PositiveFloat from pydantic.errors import PydanticErrorMixin @@ -18,6 +18,7 @@ from ...core.rabbitmq import post_log_message, post_progress_message from ...core.settings import ApplicationSettings +from ...modules.notifications._notifications_ports import PortNotifier from ..nodeports import upload_outputs from ._context import OutputsContext @@ -100,6 +101,7 @@ class OutputsManager: # pylint: disable=too-many-instance-attributes def __init__( self, outputs_context: OutputsContext, + port_notifier: PortNotifier, io_log_redirect_cb: LogRedirectCB | None, progress_cb: progress_bar.AsyncReportCB | None, *, @@ -108,6 +110,7 @@ def __init__( task_monitor_interval_s: PositiveFloat = 1.0, ): self.outputs_context = outputs_context + self.port_notifier = port_notifier self.io_log_redirect_cb = io_log_redirect_cb self.upload_upon_api_request = upload_upon_api_request self.task_cancellation_timeout_s = task_cancellation_timeout_s @@ -138,6 +141,7 @@ async def _upload_ports() -> None: port_keys=port_keys, io_log_redirect_cb=self.io_log_redirect_cb, progress_bar=root_progress, + port_notifier=self.port_notifier, ) task_name = f"outputs_manager_port_keys-{'_'.join(port_keys)}" @@ -271,6 +275,12 @@ async def on_startup() -> None: progress_cb=partial( post_progress_message, app, ProgressType.SERVICE_OUTPUTS_PUSHING ), + port_notifier=PortNotifier( + app, + settings.DY_SIDECAR_USER_ID, + settings.DY_SIDECAR_PROJECT_ID, + settings.DY_SIDECAR_NODE_ID, + ), ) await outputs_manager.start() diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_watchdog_extensions.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_watchdog_extensions.py index 6d6917d4e15f..c95813e939f2 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_watchdog_extensions.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/outputs/_watchdog_extensions.py @@ -36,7 +36,9 @@ def __init__(self, path, recursive=False): # pylint:disable=super-init-not-call # overwrite the `InotifyBuffer.__init__` method BaseThread.__init__(self) # pylint:disable=non-parent-init-called self._queue = DelayedQueue(self.delay) - self._inotify = Inotify(path, recursive, _EVENTS_TO_WATCH) + self._inotify = Inotify( # pylint:disable=too-many-function-args + path, recursive, _EVENTS_TO_WATCH + ) self.start() diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_disk_usage.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_disk_usage.py index 1ecc04fdaea0..90b06450e6f0 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_disk_usage.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_disk_usage.py @@ -15,7 +15,7 @@ from ...core.settings import ApplicationSettings from ..mounted_fs import MountedVolumes -from ._notifier import publish_disk_usage +from ..notifications import publish_disk_usage _logger = logging.getLogger(__name__) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_setup.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_setup.py index e460f7a9ee30..aa0d36a72b9b 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_setup.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_setup.py @@ -5,8 +5,6 @@ from ...core.settings import SystemMonitorSettings from ._disk_usage import setup_disk_usage -from ._notifier import setup_notifier -from ._socketio import setup_socketio _logger = logging.getLogger(__name__) @@ -19,6 +17,4 @@ def setup_system_monitor(app: FastAPI) -> None: _logger.warning("system monitor disabled") return - setup_socketio(app) # required by notifier - setup_notifier(app) setup_disk_usage(app) diff --git a/services/dynamic-sidecar/tests/conftest.py b/services/dynamic-sidecar/tests/conftest.py index 397666815fb4..53b88ac13592 100644 --- a/services/dynamic-sidecar/tests/conftest.py +++ b/services/dynamic-sidecar/tests/conftest.py @@ -1,5 +1,6 @@ # pylint: disable=redefined-outer-name # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable diff --git a/services/dynamic-sidecar/tests/unit/conftest.py b/services/dynamic-sidecar/tests/unit/conftest.py index b6e590f71ebb..ee2c106bb695 100644 --- a/services/dynamic-sidecar/tests/unit/conftest.py +++ b/services/dynamic-sidecar/tests/unit/conftest.py @@ -17,6 +17,10 @@ docker_compose_down, ) from simcore_service_dynamic_sidecar.core.docker_utils import docker_client +from simcore_service_dynamic_sidecar.core.settings import ApplicationSettings +from simcore_service_dynamic_sidecar.modules.notifications._notifications_ports import ( + PortNotifier, +) from tenacity import retry from tenacity.after import after_log from tenacity.stop import stop_after_delay @@ -142,3 +146,14 @@ def mock_rabbitmq_envs( }, ) return mock_environment + + +@pytest.fixture +def port_notifier(app: FastAPI) -> PortNotifier: + settings: ApplicationSettings = app.state.settings + return PortNotifier( + app, + settings.DY_SIDECAR_USER_ID, + settings.DY_SIDECAR_PROJECT_ID, + settings.DY_SIDECAR_NODE_ID, + ) diff --git a/services/dynamic-sidecar/tests/unit/test_api_containers_long_running_tasks.py b/services/dynamic-sidecar/tests/unit/test_api_containers_long_running_tasks.py index 1adde00e2065..7493c6a1b7b3 100644 --- a/services/dynamic-sidecar/tests/unit/test_api_containers_long_running_tasks.py +++ b/services/dynamic-sidecar/tests/unit/test_api_containers_long_running_tasks.py @@ -577,7 +577,7 @@ async def test_container_restore_state( status_poll_interval=FAST_STATUS_POLL, progress_callback=_debug_progress, ) as result: - assert result is None + assert isinstance(result, int) async def test_container_save_state( @@ -590,7 +590,7 @@ async def test_container_save_state( status_poll_interval=FAST_STATUS_POLL, progress_callback=_debug_progress, ) as result: - assert result is None + assert isinstance(result, int) @pytest.mark.parametrize("inputs_pulling_enabled", [True, False]) diff --git a/services/dynamic-sidecar/tests/unit/test_modules_notifier.py b/services/dynamic-sidecar/tests/unit/test_modules_notifier.py new file mode 100644 index 000000000000..654d2bb16191 --- /dev/null +++ b/services/dynamic-sidecar/tests/unit/test_modules_notifier.py @@ -0,0 +1,400 @@ +# pylint:disable=unused-argument +# pylint:disable=redefined-outer-name + +from collections.abc import AsyncIterable, Callable +from contextlib import AsyncExitStack, _AsyncGeneratorContextManager +from pathlib import Path +from typing import Final +from unittest.mock import AsyncMock + +import pytest +import socketio +from asgi_lifespan import LifespanManager +from fastapi import FastAPI +from fastapi.encoders import jsonable_encoder +from models_library.api_schemas_dynamic_sidecar.ports import ( + InputPortSatus, + InputStatus, + OutputPortStatus, + OutputStatus, +) +from models_library.api_schemas_dynamic_sidecar.socketio import ( + SOCKET_IO_SERVICE_DISK_USAGE_EVENT, + SOCKET_IO_STATE_INPUT_PORTS_EVENT, + SOCKET_IO_STATE_OUTPUT_PORTS_EVENT, +) +from models_library.api_schemas_dynamic_sidecar.telemetry import ( + DiskUsage, + ServiceDiskUsage, +) +from models_library.api_schemas_webserver.socketio import SocketIORoomStr +from models_library.projects import ProjectID +from models_library.projects_nodes_io import NodeID +from models_library.services_types import ServicePortKey +from models_library.users import UserID +from pydantic import ByteSize, NonNegativeInt, parse_obj_as +from pytest_mock import MockerFixture +from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict +from servicelib.utils import logged_gather +from settings_library.rabbit import RabbitSettings +from simcore_service_dynamic_sidecar.core.application import create_app +from simcore_service_dynamic_sidecar.core.settings import ApplicationSettings +from simcore_service_dynamic_sidecar.modules.notifications import ( + PortNotifier, + publish_disk_usage, +) +from simcore_service_dynamic_sidecar.modules.system_monitor._disk_usage import ( + DiskUsageMonitor, +) +from socketio import AsyncServer +from tenacity import AsyncRetrying +from tenacity.stop import stop_after_delay +from tenacity.wait import wait_fixed + +pytest_simcore_core_services_selection = [ + "rabbit", +] + +_NUMBER_OF_CLIENTS: Final[NonNegativeInt] = 10 + + +@pytest.fixture +def mock_environment( + monkeypatch: pytest.MonkeyPatch, + rabbit_service: RabbitSettings, + mock_environment: EnvVarsDict, +) -> EnvVarsDict: + return setenvs_from_dict( + monkeypatch, + { + "DY_SIDECAR_SYSTEM_MONITOR_TELEMETRY_ENABLE": "true", + "RABBIT_HOST": rabbit_service.RABBIT_HOST, + "RABBIT_PASSWORD": rabbit_service.RABBIT_PASSWORD.get_secret_value(), + "RABBIT_PORT": f"{rabbit_service.RABBIT_PORT}", + "RABBIT_SECURE": f"{rabbit_service.RABBIT_SECURE}", + "RABBIT_USER": rabbit_service.RABBIT_USER, + }, + ) + + +@pytest.fixture +async def app( + mock_environment: EnvVarsDict, + mock_registry_service: AsyncMock, + mock_storage_check: None, + mock_postgres_check: None, + mocker: MockerFixture, +) -> AsyncIterable[FastAPI]: + mocker.patch( + "simcore_service_dynamic_sidecar.modules.system_monitor._disk_usage._get_monitored_paths", + return_value=[], + ) + + app: FastAPI = create_app() + async with LifespanManager(app): + yield app + + +@pytest.fixture +async def disk_usage_monitor(app: FastAPI) -> DiskUsageMonitor: + return app.state.disk_usage_monitor + + +@pytest.fixture +async def socketio_server( + app: FastAPI, + socketio_server_factory: Callable[ + [RabbitSettings], _AsyncGeneratorContextManager[AsyncServer] + ], +) -> AsyncIterable[AsyncServer]: + # Same configuration as simcore_service_webserver/socketio/server.py + settings: ApplicationSettings = app.state.settings + assert settings.RABBIT_SETTINGS + + async with socketio_server_factory(settings.RABBIT_SETTINGS) as server: + yield server + + +@pytest.fixture +def room_name(user_id: UserID) -> SocketIORoomStr: + return SocketIORoomStr.from_user_id(user_id) + + +async def _assert_call_count(mock: AsyncMock, *, call_count: int) -> None: + async for attempt in AsyncRetrying( + wait=wait_fixed(0.1), stop=stop_after_delay(5), reraise=True + ): + with attempt: + assert mock.call_count == call_count + + +def _get_mocked_disk_usage(byte_size_str: str) -> DiskUsage: + return DiskUsage( + total=ByteSize(0), + used=ByteSize(0), + free=ByteSize.validate(byte_size_str), + used_percent=0, + ) + + +def _get_on_service_disk_usage_spy( + socketio_client: socketio.AsyncClient, +) -> AsyncMock: + # emulates front-end receiving message + + async def on_service_status(data): + assert parse_obj_as(ServiceDiskUsage, data) is not None + + on_event_spy = AsyncMock(wraps=on_service_status) + socketio_client.on(SOCKET_IO_SERVICE_DISK_USAGE_EVENT, on_event_spy) + + return on_event_spy + + +@pytest.mark.parametrize( + "usage", + [ + pytest.param({}, id="empty"), + pytest.param({Path("/"): _get_mocked_disk_usage("1kb")}, id="one_entry"), + pytest.param( + { + Path("/"): _get_mocked_disk_usage("1kb"), + Path("/tmp"): _get_mocked_disk_usage("2kb"), # noqa: S108 + }, + id="two_entries", + ), + ], +) +async def test_notifier_publish_disk_usage( + disk_usage_monitor: DiskUsageMonitor, + socketio_server_events: dict[str, AsyncMock], + app: FastAPI, + user_id: UserID, + usage: dict[Path, DiskUsage], + node_id: NodeID, + socketio_client_factory: Callable[ + [], _AsyncGeneratorContextManager[socketio.AsyncClient] + ], +): + # web server spy events + server_connect = socketio_server_events["connect"] + server_disconnect = socketio_server_events["disconnect"] + server_on_check = socketio_server_events["on_check"] + + async with AsyncExitStack() as socketio_frontend_clients: + frontend_clients: list[socketio.AsyncClient] = await logged_gather( + *[ + socketio_frontend_clients.enter_async_context(socketio_client_factory()) + for _ in range(_NUMBER_OF_CLIENTS) + ] + ) + await _assert_call_count(server_connect, call_count=_NUMBER_OF_CLIENTS) + + # client emits and check it was received + await logged_gather( + *[ + frontend_client.emit("check", data="an_event") + for frontend_client in frontend_clients + ] + ) + await _assert_call_count(server_on_check, call_count=_NUMBER_OF_CLIENTS) + + # attach spy to client + on_service_disk_usage_events: list[AsyncMock] = [ + _get_on_service_disk_usage_spy(c) for c in frontend_clients + ] + + # server publishes a message + await publish_disk_usage(app, user_id=user_id, node_id=node_id, usage=usage) + + # check that all clients received it + for on_service_disk_usage_event in on_service_disk_usage_events: + await _assert_call_count(on_service_disk_usage_event, call_count=1) + on_service_disk_usage_event.assert_awaited_once_with( + jsonable_encoder(ServiceDiskUsage(node_id=node_id, usage=usage)) + ) + + await _assert_call_count(server_disconnect, call_count=_NUMBER_OF_CLIENTS) + + +@pytest.fixture +def port_key() -> ServicePortKey: + return ServicePortKey("test_port") + + +def _get_on_input_port_spy( + socketio_client: socketio.AsyncClient, +) -> AsyncMock: + # emulates front-end receiving message + + async def on_service_status(data): + assert parse_obj_as(ServiceDiskUsage, data) is not None + + on_event_spy = AsyncMock(wraps=on_service_status) + socketio_client.on(SOCKET_IO_STATE_INPUT_PORTS_EVENT, on_event_spy) + + return on_event_spy + + +@pytest.mark.parametrize("input_status", InputStatus) +async def test_notifier_send_input_port_status( + socketio_server_events: dict[str, AsyncMock], + app: FastAPI, + user_id: UserID, + project_id: ProjectID, + node_id: NodeID, + port_key: ServicePortKey, + socketio_client_factory: Callable[ + [], _AsyncGeneratorContextManager[socketio.AsyncClient] + ], + input_status: InputStatus, +): + # web server spy events + server_connect = socketio_server_events["connect"] + server_disconnect = socketio_server_events["disconnect"] + server_on_check = socketio_server_events["on_check"] + + async with AsyncExitStack() as socketio_frontend_clients: + frontend_clients: list[socketio.AsyncClient] = await logged_gather( + *[ + socketio_frontend_clients.enter_async_context(socketio_client_factory()) + for _ in range(_NUMBER_OF_CLIENTS) + ] + ) + await _assert_call_count(server_connect, call_count=_NUMBER_OF_CLIENTS) + + # client emits and check it was received + await logged_gather( + *[ + frontend_client.emit("check", data="an_event") + for frontend_client in frontend_clients + ] + ) + await _assert_call_count(server_on_check, call_count=_NUMBER_OF_CLIENTS) + + # attach spy to client + on_input_port_events: list[AsyncMock] = [ + _get_on_input_port_spy(c) for c in frontend_clients + ] + + port_notifier = PortNotifier(app, user_id, project_id, node_id) + + # server publishes a message + match input_status: + case InputStatus.DOWNLOAD_STARTED: + await port_notifier.send_input_port_download_started(port_key) + case InputStatus.DOWNLOAD_WAS_ABORTED: + await port_notifier.send_input_port_download_was_aborted(port_key) + case InputStatus.DOWNLOAD_FINISHED_SUCCESSFULLY: + await port_notifier.send_input_port_download_finished_succesfully( + port_key + ) + case InputStatus.DOWNLOAD_FINISHED_WITH_ERRROR: + await port_notifier.send_input_port_download_finished_with_error( + port_key + ) + + # check that all clients received it + for on_input_port_event in on_input_port_events: + await _assert_call_count(on_input_port_event, call_count=1) + on_input_port_event.assert_awaited_once_with( + jsonable_encoder( + InputPortSatus( + project_id=project_id, + node_id=node_id, + port_key=port_key, + status=input_status, + ) + ) + ) + + await _assert_call_count(server_disconnect, call_count=_NUMBER_OF_CLIENTS) + + +def _get_on_output_port_spy( + socketio_client: socketio.AsyncClient, +) -> AsyncMock: + # emulates front-end receiving message + + async def on_service_status(data): + assert parse_obj_as(ServiceDiskUsage, data) is not None + + on_event_spy = AsyncMock(wraps=on_service_status) + socketio_client.on(SOCKET_IO_STATE_OUTPUT_PORTS_EVENT, on_event_spy) + + return on_event_spy + + +@pytest.mark.parametrize("output_status", OutputStatus) +async def test_notifier_send_output_port_status( + socketio_server_events: dict[str, AsyncMock], + app: FastAPI, + user_id: UserID, + project_id: ProjectID, + node_id: NodeID, + port_key: ServicePortKey, + socketio_client_factory: Callable[ + [], _AsyncGeneratorContextManager[socketio.AsyncClient] + ], + output_status: OutputStatus, +): + # web server spy events + server_connect = socketio_server_events["connect"] + server_disconnect = socketio_server_events["disconnect"] + server_on_check = socketio_server_events["on_check"] + + async with AsyncExitStack() as socketio_frontend_clients: + frontend_clients: list[socketio.AsyncClient] = await logged_gather( + *[ + socketio_frontend_clients.enter_async_context(socketio_client_factory()) + for _ in range(_NUMBER_OF_CLIENTS) + ] + ) + await _assert_call_count(server_connect, call_count=_NUMBER_OF_CLIENTS) + + # client emits and check it was received + await logged_gather( + *[ + frontend_client.emit("check", data="an_event") + for frontend_client in frontend_clients + ] + ) + await _assert_call_count(server_on_check, call_count=_NUMBER_OF_CLIENTS) + + # attach spy to client + on_output_port_events: list[AsyncMock] = [ + _get_on_output_port_spy(c) for c in frontend_clients + ] + + port_notifier = PortNotifier(app, user_id, project_id, node_id) + + # server publishes a message + match output_status: + case OutputStatus.UPLOAD_STARTED: + await port_notifier.send_output_port_upload_sarted(port_key) + case OutputStatus.UPLOAD_WAS_ABORTED: + await port_notifier.send_output_port_upload_was_aborted(port_key) + case OutputStatus.UPLOAD_FINISHED_SUCCESSFULLY: + await port_notifier.send_output_port_upload_finished_successfully( + port_key + ) + case OutputStatus.UPLOAD_FINISHED_WITH_ERRROR: + await port_notifier.send_output_port_upload_finished_with_error( + port_key + ) + + # check that all clients received it + for on_output_port_event in on_output_port_events: + await _assert_call_count(on_output_port_event, call_count=1) + on_output_port_event.assert_awaited_once_with( + jsonable_encoder( + OutputPortStatus( + project_id=project_id, + node_id=node_id, + port_key=port_key, + status=output_status, + ) + ) + ) + + await _assert_call_count(server_disconnect, call_count=_NUMBER_OF_CLIENTS) diff --git a/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_filter.py b/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_filter.py index 024d966e424b..38b217bab8f5 100644 --- a/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_filter.py +++ b/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_filter.py @@ -9,6 +9,9 @@ import pytest from pydantic import ByteSize, NonNegativeFloat, NonNegativeInt, parse_obj_as from pytest_mock.plugin import MockerFixture +from simcore_service_dynamic_sidecar.modules.notifications._notifications_ports import ( + PortNotifier, +) from simcore_service_dynamic_sidecar.modules.outputs._context import OutputsContext from simcore_service_dynamic_sidecar.modules.outputs._event_filter import ( BaseDelayPolicy, @@ -56,10 +59,13 @@ async def outputs_context(outputs_path: Path, port_keys: list[str]) -> OutputsCo @pytest.fixture async def outputs_manager( - outputs_context: OutputsContext, + outputs_context: OutputsContext, port_notifier: PortNotifier ) -> AsyncIterator[OutputsManager]: outputs_manager = OutputsManager( - outputs_context=outputs_context, io_log_redirect_cb=None, progress_cb=None + outputs_context=outputs_context, + port_notifier=port_notifier, + io_log_redirect_cb=None, + progress_cb=None, ) await outputs_manager.start() yield outputs_manager diff --git a/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_handler.py b/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_handler.py index 5f02a500a4da..35ccc7d72df7 100644 --- a/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_handler.py +++ b/services/dynamic-sidecar/tests/unit/test_modules_outputs_event_handler.py @@ -10,6 +10,9 @@ import pytest from aioprocessing.queues import AioQueue from pydantic import PositiveFloat +from simcore_service_dynamic_sidecar.modules.notifications._notifications_ports import ( + PortNotifier, +) from simcore_service_dynamic_sidecar.modules.outputs._context import OutputsContext from simcore_service_dynamic_sidecar.modules.outputs._event_handler import ( EventHandlerObserver, @@ -39,10 +42,13 @@ async def outputs_context( @pytest.fixture async def outputs_manager( - outputs_context: OutputsContext, + outputs_context: OutputsContext, port_notifier: PortNotifier ) -> AsyncIterable[OutputsManager]: outputs_manager = OutputsManager( - outputs_context, io_log_redirect_cb=None, progress_cb=None + outputs_context, + port_notifier=port_notifier, + io_log_redirect_cb=None, + progress_cb=None, ) await outputs_manager.start() diff --git a/services/dynamic-sidecar/tests/unit/test_modules_outputs_manager.py b/services/dynamic-sidecar/tests/unit/test_modules_outputs_manager.py index 40a3db6d3f94..3bf17d09f925 100644 --- a/services/dynamic-sidecar/tests/unit/test_modules_outputs_manager.py +++ b/services/dynamic-sidecar/tests/unit/test_modules_outputs_manager.py @@ -22,6 +22,9 @@ from simcore_sdk.node_ports_common.file_io_utils import LogRedirectCB from simcore_service_dynamic_sidecar.core.settings import ApplicationSettings from simcore_service_dynamic_sidecar.modules.mounted_fs import MountedVolumes +from simcore_service_dynamic_sidecar.modules.notifications._notifications_ports import ( + PortNotifier, +) from simcore_service_dynamic_sidecar.modules.outputs._context import ( OutputsContext, setup_outputs_context, @@ -165,10 +168,11 @@ async def outputs_context( @pytest.fixture async def outputs_manager( - outputs_context: OutputsContext, + outputs_context: OutputsContext, port_notifier: PortNotifier ) -> AsyncIterator[OutputsManager]: outputs_manager = OutputsManager( outputs_context=outputs_context, + port_notifier=port_notifier, io_log_redirect_cb=None, task_monitor_interval_s=0.01, progress_cb=None, diff --git a/services/dynamic-sidecar/tests/unit/test_modules_outputs_watcher.py b/services/dynamic-sidecar/tests/unit/test_modules_outputs_watcher.py index f209e4877a75..7f9b81587c25 100644 --- a/services/dynamic-sidecar/tests/unit/test_modules_outputs_watcher.py +++ b/services/dynamic-sidecar/tests/unit/test_modules_outputs_watcher.py @@ -26,6 +26,9 @@ ) from pytest_mock import MockerFixture from simcore_service_dynamic_sidecar.modules.mounted_fs import MountedVolumes +from simcore_service_dynamic_sidecar.modules.notifications._notifications_ports import ( + PortNotifier, +) from simcore_service_dynamic_sidecar.modules.outputs import ( _watcher as outputs_watcher_core, ) @@ -90,10 +93,11 @@ async def outputs_context( @pytest.fixture async def outputs_manager( - outputs_context: OutputsContext, + outputs_context: OutputsContext, port_notifier: PortNotifier ) -> AsyncIterable[OutputsManager]: outputs_manager = OutputsManager( outputs_context=outputs_context, + port_notifier=port_notifier, io_log_redirect_cb=None, task_monitor_interval_s=TICK_INTERVAL, progress_cb=None, diff --git a/services/dynamic-sidecar/tests/unit/test_modules_system_monitor__notifier.py b/services/dynamic-sidecar/tests/unit/test_modules_system_monitor__notifier.py deleted file mode 100644 index 73184a1b3cba..000000000000 --- a/services/dynamic-sidecar/tests/unit/test_modules_system_monitor__notifier.py +++ /dev/null @@ -1,204 +0,0 @@ -# pylint:disable=unused-argument -# pylint:disable=redefined-outer-name - -from collections.abc import AsyncIterable, Callable -from contextlib import AsyncExitStack, _AsyncGeneratorContextManager -from pathlib import Path -from unittest.mock import AsyncMock - -import pytest -import socketio -from asgi_lifespan import LifespanManager -from fastapi import FastAPI -from fastapi.encoders import jsonable_encoder -from models_library.api_schemas_dynamic_sidecar.socketio import ( - SOCKET_IO_SERVICE_DISK_USAGE_EVENT, -) -from models_library.api_schemas_dynamic_sidecar.telemetry import ( - DiskUsage, - ServiceDiskUsage, -) -from models_library.api_schemas_webserver.socketio import SocketIORoomStr -from models_library.projects_nodes_io import NodeID -from models_library.users import UserID -from pydantic import ByteSize, NonNegativeInt, parse_obj_as -from pytest_mock import MockerFixture -from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict -from servicelib.utils import logged_gather -from settings_library.rabbit import RabbitSettings -from simcore_service_dynamic_sidecar.core.application import create_app -from simcore_service_dynamic_sidecar.core.settings import ApplicationSettings -from simcore_service_dynamic_sidecar.modules.system_monitor._disk_usage import ( - DiskUsageMonitor, -) -from simcore_service_dynamic_sidecar.modules.system_monitor._notifier import ( - publish_disk_usage, -) -from socketio import AsyncServer -from tenacity import AsyncRetrying -from tenacity.stop import stop_after_delay -from tenacity.wait import wait_fixed - -pytest_simcore_core_services_selection = [ - "rabbit", -] - - -@pytest.fixture -def mock_environment( - monkeypatch: pytest.MonkeyPatch, - rabbit_service: RabbitSettings, - mock_environment: EnvVarsDict, -) -> EnvVarsDict: - return setenvs_from_dict( - monkeypatch, - { - "DY_SIDECAR_SYSTEM_MONITOR_TELEMETRY_ENABLE": "true", - "RABBIT_HOST": rabbit_service.RABBIT_HOST, - "RABBIT_PASSWORD": rabbit_service.RABBIT_PASSWORD.get_secret_value(), - "RABBIT_PORT": f"{rabbit_service.RABBIT_PORT}", - "RABBIT_SECURE": f"{rabbit_service.RABBIT_SECURE}", - "RABBIT_USER": rabbit_service.RABBIT_USER, - }, - ) - - -@pytest.fixture -async def app( - mock_environment: EnvVarsDict, - mock_registry_service: AsyncMock, - mock_storage_check: None, - mock_postgres_check: None, - mocker: MockerFixture, -) -> AsyncIterable[FastAPI]: - mocker.patch( - "simcore_service_dynamic_sidecar.modules.system_monitor._disk_usage._get_monitored_paths", - return_value=[], - ) - - app: FastAPI = create_app() - async with LifespanManager(app): - yield app - - -@pytest.fixture -async def disk_usage_monitor(app: FastAPI) -> DiskUsageMonitor: - return app.state.disk_usage_monitor - - -@pytest.fixture -async def socketio_server( - app: FastAPI, - socketio_server_factory: Callable[ - [RabbitSettings], _AsyncGeneratorContextManager[AsyncServer] - ], -) -> AsyncIterable[AsyncServer]: - # Same configuration as simcore_service_webserver/socketio/server.py - settings: ApplicationSettings = app.state.settings - assert settings.RABBIT_SETTINGS - - async with socketio_server_factory(settings.RABBIT_SETTINGS) as server: - yield server - - -@pytest.fixture -def room_name(user_id: UserID) -> SocketIORoomStr: - return SocketIORoomStr.from_user_id(user_id) - - -def _get_on_service_disk_usage_event( - socketio_client: socketio.AsyncClient, -) -> AsyncMock: - # emulates front-end receiving message - - async def on_service_status(data): - assert parse_obj_as(ServiceDiskUsage, data) is not None - - on_event_spy = AsyncMock(wraps=on_service_status) - socketio_client.on(SOCKET_IO_SERVICE_DISK_USAGE_EVENT, on_event_spy) - - return on_event_spy - - -async def _assert_call_count(mock: AsyncMock, *, call_count: int) -> None: - async for attempt in AsyncRetrying( - wait=wait_fixed(0.1), stop=stop_after_delay(5), reraise=True - ): - with attempt: - assert mock.call_count == call_count - - -def _get_mocked_disk_usage(byte_size_str: str) -> DiskUsage: - return DiskUsage( - total=ByteSize(0), - used=ByteSize(0), - free=ByteSize.validate(byte_size_str), - used_percent=0, - ) - - -@pytest.mark.parametrize( - "usage", - [ - pytest.param({}, id="empty"), - pytest.param({Path("/"): _get_mocked_disk_usage("1kb")}, id="one_entry"), - pytest.param( - { - Path("/"): _get_mocked_disk_usage("1kb"), - Path("/tmp"): _get_mocked_disk_usage("2kb"), # noqa: S108 - }, - id="two_entries", - ), - ], -) -async def test_notifier_publish_message( - disk_usage_monitor: DiskUsageMonitor, - socketio_server_events: dict[str, AsyncMock], - app: FastAPI, - user_id: UserID, - usage: dict[Path, DiskUsage], - node_id: NodeID, - socketio_client_factory: Callable[ - [], _AsyncGeneratorContextManager[socketio.AsyncClient] - ], -): - # web server spy events - server_connect = socketio_server_events["connect"] - server_disconnect = socketio_server_events["disconnect"] - server_on_check = socketio_server_events["on_check"] - - number_of_clients: NonNegativeInt = 10 - async with AsyncExitStack() as socketio_frontend_clients: - frontend_clients: list[socketio.AsyncClient] = await logged_gather( - *[ - socketio_frontend_clients.enter_async_context(socketio_client_factory()) - for _ in range(number_of_clients) - ] - ) - await _assert_call_count(server_connect, call_count=number_of_clients) - - # client emits and check it was received - await logged_gather( - *[ - frontend_client.emit("check", data="an_event") - for frontend_client in frontend_clients - ] - ) - await _assert_call_count(server_on_check, call_count=number_of_clients) - - # attach spy to client - on_service_disk_usage_events: list[AsyncMock] = [ - _get_on_service_disk_usage_event(c) for c in frontend_clients - ] - - # server publishes a message - await publish_disk_usage(app, user_id=user_id, node_id=node_id, usage=usage) - - # check that all clients received it - for on_service_disk_usage_event in on_service_disk_usage_events: - await _assert_call_count(on_service_disk_usage_event, call_count=1) - on_service_disk_usage_event.assert_awaited_once_with( - jsonable_encoder(ServiceDiskUsage(node_id=node_id, usage=usage)) - ) - - await _assert_call_count(server_disconnect, call_count=number_of_clients) diff --git a/services/efs-guardian/Dockerfile b/services/efs-guardian/Dockerfile index 8d75f1ac0566..5d470ee4af40 100644 --- a/services/efs-guardian/Dockerfile +++ b/services/efs-guardian/Dockerfile @@ -168,14 +168,12 @@ ENV SC_BUILD_TARGET=production \ ENV PYTHONOPTIMIZE=TRUE WORKDIR /home/efs -# ensure home folder is read/writable for user efs -RUN chown -R efs /home/efs # Starting from clean base image, copies pre-installed virtualenv from prod-only-deps -COPY --chown=efs:efs --from=prod-only-deps ${VIRTUAL_ENV} ${VIRTUAL_ENV} +COPY --from=prod-only-deps ${VIRTUAL_ENV} ${VIRTUAL_ENV} # Copies booting scripts -COPY --chown=efs:efs services/efs-guardian/docker services/efs-guardian/docker +COPY services/efs-guardian/docker services/efs-guardian/docker RUN chmod +x services/efs-guardian/docker/*.sh @@ -205,7 +203,7 @@ ENV SC_BUILD_TARGET=development \ WORKDIR /devel -RUN chown -R efs:efs "${VIRTUAL_ENV}" +RUN chown -R root:root "${VIRTUAL_ENV}" ENTRYPOINT ["/bin/sh", "services/efs-guardian/docker/entrypoint.sh"] CMD ["/bin/sh", "services/efs-guardian/docker/boot.sh"] diff --git a/services/efs-guardian/docker/entrypoint.sh b/services/efs-guardian/docker/entrypoint.sh index 5e58e8e87c86..d8ddf1c826ad 100755 --- a/services/efs-guardian/docker/entrypoint.sh +++ b/services/efs-guardian/docker/entrypoint.sh @@ -88,7 +88,7 @@ if stat $DOCKER_MOUNT >/dev/null 2>&1; then fi echo "$INFO Starting $* ..." -echo " $EFS_USER_NAME rights : $(id "$EFS_USER_NAME")" +echo " $(whoami) rights : $(id $whoami))" echo " local dir : $(ls -al)" -exec gosu "$EFS_USER_NAME" "$@" +exec "$@" diff --git a/services/efs-guardian/requirements/_base.txt b/services/efs-guardian/requirements/_base.txt index 551a43f5c07a..63a02e9f4f81 100644 --- a/services/efs-guardian/requirements/_base.txt +++ b/services/efs-guardian/requirements/_base.txt @@ -7,7 +7,10 @@ aioboto3==13.1.0 aiobotocore==2.13.1 # via aioboto3 aiocache==0.12.2 - # via -r requirements/../../../packages/aws-library/requirements/_base.in + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in aiodebug==2.3.0 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in @@ -60,6 +63,8 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi attrs==23.2.0 # via # aiohttp @@ -90,10 +95,19 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer # uvicorn +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 @@ -113,6 +127,12 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -139,7 +159,10 @@ idna==3.7 # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jmespath==1.0.1 # via # boto3 @@ -160,6 +183,65 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.3 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -188,6 +270,14 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in pydantic==2.9.2 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -285,6 +375,12 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -298,6 +394,8 @@ rpds-py==0.18.1 # referencing s3transfer==0.10.1 # via boto3 +setuptools==74.0.0 + # via opentelemetry-instrumentation sh==2.0.6 # via -r requirements/../../../packages/aws-library/requirements/_base.in shellingham==1.5.4 @@ -360,6 +458,7 @@ typing-extensions==4.11.0 # aiodocker # fastapi # faststream + # opentelemetry-sdk # pydantic # pydantic-core # typer @@ -382,12 +481,18 @@ urllib3==2.2.1 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # botocore + # requests uvicorn==0.30.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in wrapt==1.16.0 - # via aiobotocore + # via + # aiobotocore + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/efs-guardian/requirements/_test.txt b/services/efs-guardian/requirements/_test.txt index e2086e769ae3..17c2eb221936 100644 --- a/services/efs-guardian/requirements/_test.txt +++ b/services/efs-guardian/requirements/_test.txt @@ -54,12 +54,14 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography cfn-lint==1.10.3 # via moto charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests click==8.1.7 # via # -c requirements/_base.txt @@ -68,35 +70,35 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # joserfc # moto debugpy==1.8.5 # via -r requirements/_test.in -deepdiff==7.0.1 +deepdiff==8.0.1 # via -r requirements/_test.in docker==7.1.0 # via # -r requirements/_test.in # moto -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in -fakeredis==2.23.5 +fakeredis==2.24.1 # via -r requirements/_test.in flask==3.0.3 # via # flask-cors # moto -flask-cors==4.0.1 +flask-cors==5.0.0 # via moto frozenlist==1.4.1 # via # -c requirements/_base.txt # aiohttp # aiosignal -graphql-core==3.2.3 +graphql-core==3.2.4 # via moto h11==0.14.0 # via @@ -135,7 +137,7 @@ jmespath==1.0.1 # botocore joserfc==1.0.0 # via moto -jsondiff==2.2.0 +jsondiff==2.2.1 # via moto jsonpatch==1.33 # via cfn-lint @@ -164,7 +166,7 @@ markupsafe==2.1.5 # via # jinja2 # werkzeug -moto==5.0.13 +moto==5.0.15 # via -r requirements/_test.in mpmath==1.3.0 # via sympy @@ -179,7 +181,7 @@ openapi-schema-validator==0.6.2 # via openapi-spec-validator openapi-spec-validator==0.7.1 # via moto -ordered-set==4.1.0 +orderly-set==5.2.2 # via deepdiff packaging==24.0 # via @@ -194,8 +196,10 @@ pluggy==1.5.0 ply==3.11 # via jsonpath-ng psutil==6.0.0 - # via -r requirements/_test.in -py-partiql-parser==0.5.5 + # via + # -c requirements/_base.txt + # -r requirements/_test.in +py-partiql-parser==0.5.6 # via moto pycparser==2.22 # via cffi @@ -208,9 +212,9 @@ pydantic-core==2.23.4 # via # -c requirements/_base.txt # pydantic -pyparsing==3.1.2 +pyparsing==3.1.4 # via moto -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -256,10 +260,11 @@ referencing==0.29.3 # jsonschema # jsonschema-path # jsonschema-specifications -regex==2024.7.24 +regex==2024.9.11 # via cfn-lint requests==2.32.3 # via + # -c requirements/_base.txt # docker # jsonschema-path # moto @@ -279,8 +284,10 @@ s3transfer==0.10.1 # via # -c requirements/_base.txt # boto3 -setuptools==73.0.1 - # via moto +setuptools==74.0.0 + # via + # -c requirements/_base.txt + # moto six==1.16.0 # via # -c requirements/_base.txt @@ -294,7 +301,7 @@ sniffio==1.3.1 # httpx sortedcontainers==2.4.0 # via fakeredis -sympy==1.13.2 +sympy==1.13.3 # via cfn-lint typing-extensions==4.11.0 # via @@ -312,7 +319,7 @@ urllib3==2.2.1 # docker # requests # responses -werkzeug==3.0.3 +werkzeug==3.0.4 # via # flask # moto diff --git a/services/efs-guardian/requirements/_tools.txt b/services/efs-guardian/requirements/_tools.txt index 7a2c3f9d91b9..97a49efc2ebd 100644 --- a/services/efs-guardian/requirements/_tools.txt +++ b/services/efs-guardian/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -48,14 +48,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,10 +68,11 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==74.0.0 # via + # -c requirements/_base.txt # -c requirements/_test.txt # pip-tools tomlkit==0.13.2 @@ -81,9 +82,9 @@ typing-extensions==4.11.0 # -c requirements/_base.txt # -c requirements/_test.txt # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/efs-guardian/src/simcore_service_efs_guardian/core/application.py b/services/efs-guardian/src/simcore_service_efs_guardian/core/application.py index e93b071761c0..ae439b01898d 100644 --- a/services/efs-guardian/src/simcore_service_efs_guardian/core/application.py +++ b/services/efs-guardian/src/simcore_service_efs_guardian/core/application.py @@ -1,6 +1,7 @@ import logging from fastapi import FastAPI +from servicelib.fastapi.tracing import setup_tracing from .._meta import ( API_VERSION, @@ -34,6 +35,8 @@ def create_app(settings: ApplicationSettings) -> FastAPI: # STATE app.state.settings = settings assert app.state.settings.API_VERSION == API_VERSION # nosec + if app.state.settings.EFS_GUARDIAN_TRACING: + setup_tracing(app, app.state.settings.EFS_GUARDIAN_TRACING, APP_NAME) # PLUGINS SETUP setup_rabbitmq(app) diff --git a/services/efs-guardian/src/simcore_service_efs_guardian/core/settings.py b/services/efs-guardian/src/simcore_service_efs_guardian/core/settings.py index 2b62ca16931e..0e9b766bbe8c 100644 --- a/services/efs-guardian/src/simcore_service_efs_guardian/core/settings.py +++ b/services/efs-guardian/src/simcore_service_efs_guardian/core/settings.py @@ -12,6 +12,7 @@ from settings_library.base import BaseCustomSettings from settings_library.efs import AwsEfsSettings from settings_library.rabbit import RabbitSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from .._meta import API_VERSION, API_VTAG, APP_NAME @@ -76,12 +77,15 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): description="Enables local development log format. WARNING: make sure it is disabled if you want to have structured logs!", ) + + EFS_GUARDIAN_AWS_EFS_SETTINGS: AwsEfsSettings = Field(json_schema_extra={"auto_default_from_env": True}) + EFS_GUARDIAN_RABBITMQ: RabbitSettings = Field(json_schema_extra={"auto_default_from_env": True}) + EFS_GUARDIAN_TRACING: TracingSettings | None = Field( + json_schema_extra={"auto_default_from_env": True}, description="settings for opentelemetry tracing" + EFS_GUARDIAN_AWS_EFS_SETTINGS: AwsEfsSettings = Field( json_schema_extra={"auto_default_from_env": True} ) - EFS_GUARDIAN_RABBITMQ: RabbitSettings = Field( - json_schema_extra={"auto_default_from_env": True} - ) @cached_property def LOG_LEVEL(self) -> LogLevel: # noqa: N802 diff --git a/services/invitations/requirements/_base.txt b/services/invitations/requirements/_base.txt index 413ac0b5d357..dc7fd7288e10 100644 --- a/services/invitations/requirements/_base.txt +++ b/services/invitations/requirements/_base.txt @@ -1,5 +1,7 @@ aio-pika==9.4.1 # via -r requirements/../../../packages/service-library/requirements/_base.in +aiocache==0.12.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.21.0 @@ -33,6 +35,8 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi attrs==23.2.0 # via # aiohttp @@ -48,8 +52,11 @@ certifi==2024.2.2 # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests cffi==1.16.0 # via cryptography +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer @@ -63,6 +70,12 @@ cryptography==42.0.5 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/_base.in +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 @@ -80,6 +93,12 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -102,7 +121,10 @@ idna==3.6 # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jsonschema==4.21.1 # via # -r requirements/../../../packages/models-library/requirements/_base.in @@ -117,6 +139,59 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.0 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -137,6 +212,12 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in pycparser==2.21 # via cffi pydantic==2.9.2 @@ -202,6 +283,10 @@ referencing==0.29.3 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 # via # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in @@ -211,6 +296,8 @@ rpds-py==0.18.0 # via # jsonschema # referencing +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer six==1.16.0 @@ -248,9 +335,19 @@ typing-extensions==4.10.0 # aiodocker # fastapi # faststream + # opentelemetry-sdk # pydantic # pydantic-core # typer +urllib3==2.2.2 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests uvicorn==0.29.0 # via # -r requirements/../../../packages/service-library/requirements/_fastapi.in @@ -261,8 +358,14 @@ watchfiles==0.21.0 # via uvicorn websockets==12.0 # via uvicorn +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/invitations/requirements/_test.txt b/services/invitations/requirements/_test.txt index 7b029ccb0de5..6a73c31809dd 100644 --- a/services/invitations/requirements/_test.txt +++ b/services/invitations/requirements/_test.txt @@ -16,7 +16,7 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in h11==0.14.0 # via @@ -31,7 +31,7 @@ httpx==0.27.0 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # -r requirements/_test.in -hypothesis==6.111.1 +hypothesis==6.112.1 # via -r requirements/_test.in idna==3.6 # via @@ -47,7 +47,7 @@ packaging==24.0 # pytest-sugar pluggy==1.5.0 # via pytest -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio diff --git a/services/invitations/requirements/_tools.txt b/services/invitations/requirements/_tools.txt index f3932792cef2..d6bba29eee2d 100644 --- a/services/invitations/requirements/_tools.txt +++ b/services/invitations/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -47,14 +47,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -66,19 +66,21 @@ pyyaml==6.0.1 # -c requirements/_base.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools +setuptools==74.0.0 + # via + # -c requirements/_base.txt + # pip-tools tomlkit==0.13.2 # via pylint typing-extensions==4.10.0 # via # -c requirements/_base.txt # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/invitations/src/simcore_service_invitations/_meta.py b/services/invitations/src/simcore_service_invitations/_meta.py index d0501d9c92c0..c7b955a4db6d 100644 --- a/services/invitations/src/simcore_service_invitations/_meta.py +++ b/services/invitations/src/simcore_service_invitations/_meta.py @@ -16,6 +16,7 @@ PROJECT_NAME: Final[str] = info.project_name VERSION: Final[Version] = info.version API_VERSION: Final[VersionStr] = info.__version__ +APP_NAME = PROJECT_NAME API_VTAG: Final[VersionTag] = VersionTag(info.api_prefix_path_tag) SUMMARY: Final[str] = info.get_summary() diff --git a/services/invitations/src/simcore_service_invitations/core/application.py b/services/invitations/src/simcore_service_invitations/core/application.py index 81d7638b1e30..a08dad3580d2 100644 --- a/services/invitations/src/simcore_service_invitations/core/application.py +++ b/services/invitations/src/simcore_service_invitations/core/application.py @@ -3,11 +3,13 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from .._meta import ( API_VERSION, API_VTAG, APP_FINISHED_BANNER_MSG, + APP_NAME, APP_STARTED_BANNER_MSG, PROJECT_NAME, SUMMARY, @@ -37,6 +39,8 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: if app.state.settings.INVITATIONS_PROMETHEUS_INSTRUMENTATION_ENABLED: setup_prometheus_instrumentation(app) + if app.state.settings.INVITATIONS_TRACING: + setup_tracing(app, app.state.settings.INVITATIONS_TRACING, APP_NAME) # ERROR HANDLERS # ... add here ... diff --git a/services/invitations/src/simcore_service_invitations/core/settings.py b/services/invitations/src/simcore_service_invitations/core/settings.py index 29d286f10204..7658ad86adc5 100644 --- a/services/invitations/src/simcore_service_invitations/core/settings.py +++ b/services/invitations/src/simcore_service_invitations/core/settings.py @@ -11,6 +11,7 @@ ) from settings_library.base import BaseCustomSettings from settings_library.basic_types import BuildTargetEnum, LogLevel, VersionTag +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from .._meta import API_VERSION, API_VTAG, PROJECT_NAME @@ -110,3 +111,6 @@ class ApplicationSettings(MinimalApplicationSettings): min_length=10, ) INVITATIONS_PROMETHEUS_INSTRUMENTATION_ENABLED: bool = True + INVITATIONS_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) diff --git a/services/migration/requirements/_test.txt b/services/migration/requirements/_test.txt index 1e306e696495..0c989c238a4e 100644 --- a/services/migration/requirements/_test.txt +++ b/services/migration/requirements/_test.txt @@ -3,7 +3,7 @@ attrs==24.2.0 # jsonschema # pytest-docker # referencing -certifi==2024.7.4 +certifi==2024.8.30 # via # -c requirements/../../../requirements/constraints.txt # requests @@ -13,11 +13,9 @@ coverage==7.6.1 # via pytest-cov docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.2 - # via pytest -greenlet==3.0.3 +greenlet==3.1.1 # via sqlalchemy -idna==3.7 +idna==3.10 # via requests iniconfig==2.0.0 # via pytest @@ -25,7 +23,7 @@ jsonschema==4.23.0 # via -r requirements/_test.in jsonschema-specifications==2023.12.1 # via jsonschema -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -33,7 +31,7 @@ packaging==24.1 # via pytest pluggy==1.5.0 # via pytest -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -68,7 +66,7 @@ rpds-py==0.20.0 # via # jsonschema # referencing -sqlalchemy==1.4.53 +sqlalchemy==1.4.54 # via # -c requirements/../../../requirements/constraints.txt # -r requirements/_test.in @@ -76,16 +74,11 @@ sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy tenacity==9.0.0 # via -r requirements/_test.in -tomli==2.0.1 - # via - # coverage - # mypy - # pytest typing-extensions==4.12.2 # via # mypy # sqlalchemy2-stubs -urllib3==2.2.2 +urllib3==2.2.3 # via # -c requirements/../../../requirements/constraints.txt # docker diff --git a/services/migration/requirements/_tools.txt b/services/migration/requirements/_tools.txt index 717169d7f46c..e775221e68b0 100644 --- a/services/migration/requirements/_tools.txt +++ b/services/migration/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -16,9 +16,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -26,7 +26,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -48,14 +48,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -67,29 +67,19 @@ pyyaml==6.0.2 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via pip-tools -tomli==2.0.1 - # via - # -c requirements/_test.txt - # black - # build - # mypy - # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.12.2 # via # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/opentelemetry-collector-config.yaml b/services/opentelemetry-collector-config.yaml new file mode 100644 index 000000000000..d3a1d09f605e --- /dev/null +++ b/services/opentelemetry-collector-config.yaml @@ -0,0 +1,26 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 # Default endpoint for OTLP over gRPC + http: + endpoint: 0.0.0.0:4318 # Default endpoint for OTLP over HTTP +exporters: + otlphttp: + endpoint: ${TRACING_OPENTELEMETRY_COLLECTOR_EXPORTER_ENDPOINT} # Adjust to your Jaeger endpoint + debug: + verbosity: detailed +service: + pipelines: + traces: + receivers: [otlp] + exporters: [otlphttp,debug] + telemetry: + logs: + level: "debug" +processors: + batch: + timeout: 5s + send_batch_size: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE} + probabilistic_sampler: + sampling_percentage: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE} diff --git a/services/osparc-gateway-server/requirements/_base.in b/services/osparc-gateway-server/requirements/_base.in index 156e38d9de7b..e41303cf13a2 100644 --- a/services/osparc-gateway-server/requirements/_base.in +++ b/services/osparc-gateway-server/requirements/_base.in @@ -5,5 +5,6 @@ --constraint constraints.txt aiodocker +async-timeout dask-gateway-server[local] pydantic[email,dotenv] diff --git a/services/osparc-gateway-server/requirements/_base.txt b/services/osparc-gateway-server/requirements/_base.txt index 120db872e493..8a734704a813 100644 --- a/services/osparc-gateway-server/requirements/_base.txt +++ b/services/osparc-gateway-server/requirements/_base.txt @@ -8,7 +8,7 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp async-timeout==4.0.3 - # via aiohttp + # via -r requirements/_base.in attrs==23.2.0 # via aiohttp cffi==1.16.0 diff --git a/services/osparc-gateway-server/requirements/_test.txt b/services/osparc-gateway-server/requirements/_test.txt index e02757bd6aa4..908dca5582ba 100644 --- a/services/osparc-gateway-server/requirements/_test.txt +++ b/services/osparc-gateway-server/requirements/_test.txt @@ -7,15 +7,11 @@ aiosignal==1.3.1 # via # -c requirements/_base.txt # aiohttp -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # aiohttp attrs==23.2.0 # via # -c requirements/_base.txt # aiohttp -certifi==2024.7.4 +certifi==2024.8.30 # via # -c requirements/../../../requirements/constraints.txt # requests @@ -51,9 +47,7 @@ distributed==2024.5.1 # dask-gateway docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.2 - # via pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in frozenlist==1.4.1 # via @@ -104,7 +98,7 @@ multidict==6.0.5 # -c requirements/_base.txt # aiohttp # yarl -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -123,11 +117,11 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -psutil==5.9.8 +psutil==6.0.0 # via # -c requirements/../../dask-sidecar/requirements/_dask-distributed.txt # distributed -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -182,11 +176,6 @@ tenacity==9.0.0 # via -r requirements/_test.in termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # mypy - # pytest toolz==0.12.1 # via # -c requirements/../../dask-sidecar/requirements/_dask-distributed.txt diff --git a/services/osparc-gateway-server/requirements/_tools.txt b/services/osparc-gateway-server/requirements/_tools.txt index 7a36f37bae9b..985945c7b0ef 100644 --- a/services/osparc-gateway-server/requirements/_tools.txt +++ b/services/osparc-gateway-server/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -49,14 +49,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,30 +68,20 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via pip-tools -tomli==2.0.1 - # via - # -c requirements/_test.txt - # black - # build - # mypy - # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.12.2 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/osparc-gateway-server/tests/system/requirements/_test.txt b/services/osparc-gateway-server/tests/system/requirements/_test.txt index 3a4a800a5ec0..410339df3c69 100644 --- a/services/osparc-gateway-server/tests/system/requirements/_test.txt +++ b/services/osparc-gateway-server/tests/system/requirements/_test.txt @@ -1,4 +1,4 @@ -aiodocker==0.22.2 +aiodocker==0.23.0 # via -r requirements/_test.in aiohappyeyeballs==2.4.0 # via aiohttp @@ -9,11 +9,9 @@ aiohttp==3.10.5 # dask-gateway aiosignal==1.3.1 # via aiohttp -async-timeout==4.0.3 - # via aiohttp attrs==24.2.0 # via aiohttp -certifi==2024.7.4 +certifi==2024.8.30 # via # -c requirements/../../../../../requirements/constraints.txt # requests @@ -45,9 +43,7 @@ distributed==2024.5.1 # dask-gateway docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.2 - # via pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in frozenlist==1.4.1 # via @@ -59,7 +55,7 @@ fsspec==2024.5.0 # dask icdiff==2.0.7 # via pytest-icdiff -idna==3.7 +idna==3.10 # via # requests # yarl @@ -91,7 +87,7 @@ msgpack==1.0.8 # via # -c requirements/../../../../dask-sidecar/requirements/_dask-distributed.txt # distributed -multidict==6.0.5 +multidict==6.1.0 # via # aiohttp # yarl @@ -114,11 +110,11 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -psutil==5.9.8 +psutil==6.0.0 # via # -c requirements/../../../../dask-sidecar/requirements/_dask-distributed.txt # distributed -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -168,10 +164,6 @@ tenacity==9.0.0 # via -r requirements/_test.in termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # pytest toolz==0.12.1 # via # -c requirements/../../../../dask-sidecar/requirements/_dask-distributed.txt @@ -190,7 +182,7 @@ urllib3==2.2.1 # distributed # docker # requests -yarl==1.9.4 +yarl==1.12.1 # via aiohttp zict==3.0.0 # via diff --git a/services/osparc-gateway-server/tests/system/requirements/_tools.txt b/services/osparc-gateway-server/tests/system/requirements/_tools.txt index 78930b23b724..ce5d53160d5b 100644 --- a/services/osparc-gateway-server/tests/system/requirements/_tools.txt +++ b/services/osparc-gateway-server/tests/system/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via -r requirements/../../../../../requirements/devenv.txt mypy-extensions==1.0.0 # via @@ -46,14 +46,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -64,26 +64,15 @@ pyyaml==6.0.1 # -c requirements/../../../../../requirements/constraints.txt # -c requirements/_test.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==75.1.0 # via pip-tools -tomli==2.0.1 - # via - # -c requirements/_test.txt - # black - # build - # mypy - # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.12.2 - # via - # astroid - # black - # mypy -virtualenv==20.26.3 + # via mypy +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/services/payments/requirements/_base.txt b/services/payments/requirements/_base.txt index e14e1e9e1711..d4c08c97591c 100644 --- a/services/payments/requirements/_base.txt +++ b/services/payments/requirements/_base.txt @@ -1,17 +1,33 @@ aio-pika==9.4.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in +aiocache==0.12.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiodebug==2.3.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.22.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiofiles==24.1.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in aiohappyeyeballs==2.3.4 # via aiohttp aiohttp==3.10.0 - # via aiodocker + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # aiodocker aiormq==6.8.0 # via aio-pika aiosignal==1.3.1 # via aiohttp aiosmtplib==3.0.2 + # via -r requirements/_base.in alembic==1.13.2 + # via -r requirements/../../../packages/postgres-database/requirements/_base.in anyio==4.4.0 # via # fast-depends @@ -20,11 +36,14 @@ anyio==4.4.0 # starlette # watchfiles arrow==1.3.0 -async-timeout==4.0.3 # via - # aiohttp - # asyncpg - # redis + # -r requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi +async-timeout==4.0.3 + # via asyncpg asyncpg==0.29.0 # via sqlalchemy attrs==23.2.0 @@ -36,34 +55,74 @@ bidict==0.23.1 # via python-socketio certifi==2024.7.4 # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt # httpcore # httpx + # requests cffi==1.16.0 # via cryptography +charset-normalizer==3.3.2 + # via requests click==8.1.7 # via # typer # uvicorn cryptography==43.0.0 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/_base.in +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator ecdsa==0.19.0 # via python-jose email-validator==2.2.0 # via pydantic -exceptiongroup==1.2.2 - # via anyio fast-depends==2.4.7 # via faststream fastapi==0.99.1 - # via prometheus-fastapi-instrumentator + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/service-library/requirements/_fastapi.in + # -r requirements/_base.in + # prometheus-fastapi-instrumentator faststream==0.5.15 + # via -r requirements/../../../packages/service-library/requirements/_base.in frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http greenlet==3.0.3 # via sqlalchemy +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -74,18 +133,51 @@ httpcore==1.0.5 httptools==0.6.1 # via uvicorn httpx==0.27.0 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/service-library/requirements/_fastapi.in + # -r requirements/_base.in idna==3.7 # via # anyio # email-validator # httpx + # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jinja2==3.1.4 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/_base.in jsonschema==4.23.0 + # via + # -r requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in jsonschema-specifications==2023.7.1 # via jsonschema mako==1.3.5 - # via alembic + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # alembic markdown-it-py==3.0.0 # via rich markupsafe==2.1.5 @@ -98,13 +190,86 @@ multidict==6.0.5 # via # aiohttp # yarl +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.6 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in packaging==24.1 + # via -r requirements/_base.in pamqp==3.3.0 # via aiormq prometheus-client==0.20.0 - # via prometheus-fastapi-instrumentator + # via + # -r requirements/../../../packages/service-library/requirements/_fastapi.in + # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.9 # via sqlalchemy pyasn1==0.6.0 @@ -115,11 +280,26 @@ pycparser==2.22 # via cffi pydantic==1.10.17 # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/postgres-database/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/settings-library/requirements/_base.in # fast-depends # fastapi pygments==2.18.0 # via rich pyinstrument==4.6.2 + # via -r requirements/../../../packages/service-library/requirements/_base.in python-dateutil==2.9.0.post0 # via arrow python-dotenv==1.0.1 @@ -127,23 +307,62 @@ python-dotenv==1.0.1 python-engineio==4.9.1 # via python-socketio python-jose==3.3.0 + # via -r requirements/_base.in python-multipart==0.0.9 + # via -r requirements/_base.in python-socketio==5.11.3 + # via -r requirements/_base.in pyyaml==6.0.1 - # via uvicorn + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/service-library/requirements/_base.in + # uvicorn redis==5.0.8 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/service-library/requirements/_base.in referencing==0.29.3 # via + # -c requirements/../../../packages/service-library/requirements/./constraints.txt # jsonschema # jsonschema-specifications +repro-zipfile==0.3.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in +requests==2.32.3 + # via opentelemetry-exporter-otlp-proto-http rich==13.7.1 - # via typer + # via + # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/settings-library/requirements/_base.in + # typer rpds-py==0.19.1 # via # jsonschema # referencing rsa==4.9 - # via python-jose + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # python-jose +setuptools==74.0.0 + # via opentelemetry-instrumentation shellingham==1.5.4 # via typer simple-websocket==1.0.0 @@ -157,37 +376,80 @@ sniffio==1.3.1 # anyio # httpx sqlalchemy==1.4.53 - # via alembic + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # -r requirements/../../../packages/postgres-database/requirements/_base.in + # alembic starlette==0.27.0 - # via fastapi + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # fastapi tenacity==9.0.0 + # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==0.12.1 + # via -r requirements/../../../packages/service-library/requirements/_base.in tqdm==4.66.4 + # via -r requirements/../../../packages/service-library/requirements/_base.in typer==0.12.3 - # via faststream + # via + # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/_base.in + # faststream types-python-dateutil==2.9.0.20240316 # via arrow typing-extensions==4.12.2 # via # aiodebug # alembic - # anyio # fastapi # faststream + # opentelemetry-sdk # pydantic # typer - # uvicorn +urllib3==2.2.2 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # requests uvicorn==0.30.4 + # via + # -r requirements/../../../packages/service-library/requirements/_fastapi.in + # -r requirements/_base.in uvloop==0.19.0 # via uvicorn watchfiles==0.22.0 # via uvicorn websockets==12.0 # via uvicorn +wrapt==1.16.0 + # via + # deprecated + # opentelemetry-instrumentation wsproto==1.2.0 # via simple-websocket yarl==1.9.4 # via + # -r requirements/../../../packages/postgres-database/requirements/_base.in # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/payments/requirements/_test.txt b/services/payments/requirements/_test.txt index ad73ea53868c..4a42c0d9ccf8 100644 --- a/services/payments/requirements/_test.txt +++ b/services/payments/requirements/_test.txt @@ -17,10 +17,6 @@ anyio==4.4.0 # httpx asgi-lifespan==2.1.0 # via -r requirements/_test.in -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # aiohttp attrs==23.2.0 # via # -c requirements/_base.txt @@ -37,19 +33,16 @@ certifi==2024.7.4 # httpx # requests charset-normalizer==3.3.2 - # via requests + # via + # -c requirements/_base.txt + # requests coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov docker==7.1.0 # via -r requirements/_test.in -exceptiongroup==1.2.2 - # via - # -c requirements/_base.txt - # anyio - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in frozenlist==1.4.1 # via @@ -92,7 +85,7 @@ multidict==6.0.5 # -c requirements/_base.txt # aiohttp # yarl -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -105,7 +98,7 @@ pluggy==1.5.0 # via pytest pprintpp==0.4.0 # via pytest-icdiff -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -144,7 +137,9 @@ python-socketio==5.11.3 # -c requirements/_base.txt # -r requirements/_test.in requests==2.32.3 - # via docker + # via + # -c requirements/_base.txt + # docker respx==0.21.1 # via -r requirements/_test.in simple-websocket==1.0.0 @@ -170,28 +165,23 @@ sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # mypy - # pytest types-aiofiles==24.1.0.20240626 # via -r requirements/_test.in -types-pyasn1==0.6.0.20240402 +types-pyasn1==0.6.0.20240913 # via types-python-jose types-python-jose==3.3.4.20240106 # via -r requirements/_test.in -types-pyyaml==6.0.12.20240808 +types-pyyaml==6.0.12.20240917 # via -r requirements/_test.in typing-extensions==4.12.2 # via # -c requirements/_base.txt - # anyio # mypy # sqlalchemy2-stubs urllib3==2.2.2 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # docker # requests wsproto==1.2.0 diff --git a/services/payments/requirements/_tools.txt b/services/payments/requirements/_tools.txt index cee6887ecd00..5ac982ce79cc 100644 --- a/services/payments/requirements/_tools.txt +++ b/services/payments/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -17,9 +17,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -27,7 +27,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -50,14 +50,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -68,28 +68,20 @@ pyyaml==6.0.1 # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt # pre-commit -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 - # via pip-tools -tomli==2.0.1 +setuptools==74.0.0 # via - # -c requirements/_test.txt - # black - # build - # mypy + # -c requirements/_base.txt # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.12.2 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/services/payments/src/simcore_service_payments/_meta.py b/services/payments/src/simcore_service_payments/_meta.py index 13ba8146553e..f011e70ea6e9 100644 --- a/services/payments/src/simcore_service_payments/_meta.py +++ b/services/payments/src/simcore_service_payments/_meta.py @@ -14,6 +14,7 @@ PROJECT_NAME: Final[str] = info.project_name VERSION: Final[Version] = info.version API_VERSION: Final[VersionStr] = info.__version__ +APP_NAME: Final[str] = PROJECT_NAME API_VTAG: Final[str] = info.api_prefix_path_tag SUMMARY: Final[str] = info.get_summary() diff --git a/services/payments/src/simcore_service_payments/api/rest/_dependencies.py b/services/payments/src/simcore_service_payments/api/rest/_dependencies.py index 913122a7854b..cc0ead808945 100644 --- a/services/payments/src/simcore_service_payments/api/rest/_dependencies.py +++ b/services/payments/src/simcore_service_payments/api/rest/_dependencies.py @@ -1,7 +1,7 @@ # mypy: disable-error-code=truthy-function import logging from collections.abc import AsyncGenerator, Callable -from typing import Annotated, cast +from typing import Annotated from fastapi import Depends, FastAPI, Request from fastapi.security import OAuth2PasswordBearer @@ -40,9 +40,7 @@ def get_settings(request: Request) -> ApplicationSettings: def get_rut_api(request: Request) -> ResourceUsageTrackerApi: - return cast( - ResourceUsageTrackerApi, ResourceUsageTrackerApi.get_from_app_state(request.app) - ) + return ResourceUsageTrackerApi.get_from_app_state(request.app) def get_from_app_state( diff --git a/services/payments/src/simcore_service_payments/api/rpc/_payments_methods.py b/services/payments/src/simcore_service_payments/api/rpc/_payments_methods.py index 360dcf962c07..aac234601e88 100644 --- a/services/payments/src/simcore_service_payments/api/rpc/_payments_methods.py +++ b/services/payments/src/simcore_service_payments/api/rpc/_payments_methods.py @@ -1,6 +1,7 @@ import logging from decimal import Decimal +from common_library.pydantic_basic_types import IDStr from fastapi import FastAPI from models_library.api_schemas_payments.errors import ( PaymentsError, @@ -12,7 +13,6 @@ PaymentMethodID, PaymentMethodInitiated, ) -from models_library.basic_types import IDStr from models_library.payments import UserInvoiceAddress from models_library.products import StripePriceID, StripeTaxRateID from models_library.users import UserID diff --git a/services/payments/src/simcore_service_payments/core/application.py b/services/payments/src/simcore_service_payments/core/application.py index 3b43570f06cf..c85c7f91420d 100644 --- a/services/payments/src/simcore_service_payments/core/application.py +++ b/services/payments/src/simcore_service_payments/core/application.py @@ -3,11 +3,13 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from .._meta import ( API_VERSION, API_VTAG, APP_FINISHED_BANNER_MSG, + APP_NAME, APP_STARTED_BANNER_MSG, PROJECT_NAME, SUMMARY, @@ -68,6 +70,8 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: if app.state.settings.PAYMENTS_PROMETHEUS_INSTRUMENTATION_ENABLED: setup_prometheus_instrumentation(app) + if app.state.settings.PAYMENTS_TRACING: + setup_tracing(app, app.state.settings.PAYMENTS_TRACING, APP_NAME) # ERROR HANDLERS # ... add here ... diff --git a/services/payments/src/simcore_service_payments/core/settings.py b/services/payments/src/simcore_service_payments/core/settings.py index 3e8b2d44d76b..20193c81d655 100644 --- a/services/payments/src/simcore_service_payments/core/settings.py +++ b/services/payments/src/simcore_service_payments/core/settings.py @@ -16,6 +16,7 @@ from settings_library.postgres import PostgresSettings from settings_library.rabbit import RabbitSettings from settings_library.resource_usage_tracker import ResourceUsageTrackerSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from .._meta import API_VERSION, API_VTAG, PROJECT_NAME @@ -113,6 +114,10 @@ class ApplicationSettings(_BaseApplicationSettings): auto_default_from_env=True, description="settings for service/rabbitmq" ) + PAYMENTS_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) + PAYMENTS_POSTGRES: PostgresSettings = Field( auto_default_from_env=True, description="settings for postgres service" ) diff --git a/services/payments/src/simcore_service_payments/models/payments_gateway.py b/services/payments/src/simcore_service_payments/models/payments_gateway.py index e0d7481df585..39ba1a4b1d34 100644 --- a/services/payments/src/simcore_service_payments/models/payments_gateway.py +++ b/services/payments/src/simcore_service_payments/models/payments_gateway.py @@ -3,8 +3,9 @@ from pathlib import Path from typing import Literal +from common_library.pydantic_basic_types import IDStr from models_library.api_schemas_webserver.wallets import PaymentID, PaymentMethodID -from models_library.basic_types import AmountDecimal, IDStr +from models_library.basic_types import AmountDecimal from models_library.payments import UserInvoiceAddress from models_library.products import StripePriceID, StripeTaxRateID from pydantic import BaseModel, EmailStr, Extra, Field diff --git a/services/payments/src/simcore_service_payments/models/schemas/acknowledgements.py b/services/payments/src/simcore_service_payments/models/schemas/acknowledgements.py index 5b73282cc3c7..cf97be5a0b5c 100644 --- a/services/payments/src/simcore_service_payments/models/schemas/acknowledgements.py +++ b/services/payments/src/simcore_service_payments/models/schemas/acknowledgements.py @@ -1,8 +1,8 @@ # mypy: disable-error-code=truthy-function from typing import Any, ClassVar +from common_library.pydantic_basic_types import IDStr from models_library.api_schemas_webserver.wallets import PaymentID, PaymentMethodID -from models_library.basic_types import IDStr from pydantic import BaseModel, Field, HttpUrl, validator diff --git a/services/payments/src/simcore_service_payments/services/healthchecks.py b/services/payments/src/simcore_service_payments/services/healthchecks.py index 98774700f44b..be6344c00ef7 100644 --- a/services/payments/src/simcore_service_payments/services/healthchecks.py +++ b/services/payments/src/simcore_service_payments/services/healthchecks.py @@ -2,10 +2,10 @@ import logging from models_library.healthchecks import LivenessResult +from servicelib.db_asyncpg_utils import check_postgres_liveness from sqlalchemy.ext.asyncio import AsyncEngine from .payments_gateway import PaymentsGatewayApi -from .postgres import check_postgres_liveness from .resource_usage_tracker import ResourceUsageTrackerApi _logger = logging.getLogger(__name__) diff --git a/services/payments/src/simcore_service_payments/services/payments.py b/services/payments/src/simcore_service_payments/services/payments.py index 6e73bb66089e..3b394de4701b 100644 --- a/services/payments/src/simcore_service_payments/services/payments.py +++ b/services/payments/src/simcore_service_payments/services/payments.py @@ -11,6 +11,7 @@ from typing import cast import arrow +from common_library.pydantic_basic_types import IDStr from models_library.api_schemas_payments.errors import ( PaymentAlreadyAckedError, PaymentAlreadyExistsError, @@ -22,7 +23,7 @@ PaymentTransaction, WalletPaymentInitiated, ) -from models_library.basic_types import AmountDecimal, IDStr +from models_library.basic_types import AmountDecimal from models_library.payments import UserInvoiceAddress from models_library.products import ProductName, StripePriceID, StripeTaxRateID from models_library.users import UserID diff --git a/services/payments/src/simcore_service_payments/services/payments_methods.py b/services/payments/src/simcore_service_payments/services/payments_methods.py index f20189d78592..d65c508133e0 100644 --- a/services/payments/src/simcore_service_payments/services/payments_methods.py +++ b/services/payments/src/simcore_service_payments/services/payments_methods.py @@ -19,12 +19,12 @@ from typing import cast import arrow +from common_library.pydantic_basic_types import IDStr from models_library.api_schemas_webserver.wallets import ( PaymentMethodGet, PaymentMethodID, PaymentMethodInitiated, ) -from models_library.basic_types import IDStr from models_library.users import UserID from models_library.wallets import WalletID from pydantic import EmailStr, HttpUrl diff --git a/services/payments/src/simcore_service_payments/services/postgres.py b/services/payments/src/simcore_service_payments/services/postgres.py index ba68eae0fac4..fd84fba45ce7 100644 --- a/services/payments/src/simcore_service_payments/services/postgres.py +++ b/services/payments/src/simcore_service_payments/services/postgres.py @@ -1,10 +1,5 @@ -import time -from datetime import timedelta - from fastapi import FastAPI -from models_library.healthchecks import IsNonResponsive, IsResponsive, LivenessResult -from servicelib.db_async_engine import close_db_connection, connect_to_db -from sqlalchemy.exc import SQLAlchemyError +from servicelib.fastapi.db_asyncpg_engine import close_db_connection, connect_to_db from sqlalchemy.ext.asyncio import AsyncEngine from ..core.settings import ApplicationSettings @@ -16,18 +11,6 @@ def get_engine(app: FastAPI) -> AsyncEngine: return engine -async def check_postgres_liveness(engine: AsyncEngine) -> LivenessResult: - try: - tic = time.time() - # test - async with engine.connect(): - ... - elapsed_time = time.time() - tic - return IsResponsive(elapsed=timedelta(seconds=elapsed_time)) - except SQLAlchemyError as err: - return IsNonResponsive(reason=f"{err}") - - def setup_postgres(app: FastAPI): app.state.engine = None diff --git a/services/payments/tests/unit/api/test__one_time_payment_workflows.py b/services/payments/tests/unit/api/test__one_time_payment_workflows.py index 2052dee31f60..3876e34e9337 100644 --- a/services/payments/tests/unit/api/test__one_time_payment_workflows.py +++ b/services/payments/tests/unit/api/test__one_time_payment_workflows.py @@ -1,15 +1,16 @@ # pylint: disable=redefined-outer-name +# pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable -# pylint: disable=too-many-arguments import httpx import pytest +from common_library.pydantic_basic_types import IDStr from faker import Faker from fastapi import FastAPI, status from models_library.api_schemas_webserver.wallets import WalletPaymentInitiated -from models_library.basic_types import IDStr from models_library.payments import UserInvoiceAddress from models_library.products import StripePriceID, StripeTaxRateID from models_library.rabbitmq_basic_types import RPCMethodName diff --git a/services/payments/tests/unit/api/test__payment_method_workflows.py b/services/payments/tests/unit/api/test__payment_method_workflows.py index 5b92bee8b173..136f4235a307 100644 --- a/services/payments/tests/unit/api/test__payment_method_workflows.py +++ b/services/payments/tests/unit/api/test__payment_method_workflows.py @@ -1,18 +1,19 @@ # pylint: disable=redefined-outer-name +# pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable -# pylint: disable=too-many-arguments import httpx import pytest +from common_library.pydantic_basic_types import IDStr from faker import Faker from fastapi import FastAPI, status from models_library.api_schemas_webserver.wallets import ( PaymentMethodGet, PaymentMethodInitiated, ) -from models_library.basic_types import IDStr from models_library.rabbitmq_basic_types import RPCMethodName from models_library.users import UserID from models_library.wallets import WalletID diff --git a/services/payments/tests/unit/test_db_payments_users_repo.py b/services/payments/tests/unit/test_db_payments_users_repo.py index a695af251c41..51d5f540c6ba 100644 --- a/services/payments/tests/unit/test_db_payments_users_repo.py +++ b/services/payments/tests/unit/test_db_payments_users_repo.py @@ -59,7 +59,7 @@ async def user( injects a user in db """ assert user_id == user["id"] - async with insert_and_get_row_lifespan( + async with insert_and_get_row_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup get_engine(app), table=users, values=user, @@ -84,7 +84,7 @@ async def product( """ # NOTE: this fixture ignores products' group-id but it is fine for this test context assert product["group_id"] is None - async with insert_and_get_row_lifespan( + async with insert_and_get_row_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup get_engine(app), table=products, values=product, @@ -101,7 +101,7 @@ async def successful_transaction( """ injects transaction in db """ - async with insert_and_get_row_lifespan( + async with insert_and_get_row_lifespan( # pylint:disable=contextmanager-generator-missing-cleanup get_engine(app), table=payments_transactions, values=successful_transaction, diff --git a/services/payments/tests/unit/test_rpc_payments_methods.py b/services/payments/tests/unit/test_rpc_payments_methods.py index 9ecb10d9976f..3706123b3654 100644 --- a/services/payments/tests/unit/test_rpc_payments_methods.py +++ b/services/payments/tests/unit/test_rpc_payments_methods.py @@ -1,18 +1,19 @@ # pylint: disable=protected-access # pylint: disable=redefined-outer-name # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable import pytest +from common_library.pydantic_basic_types import IDStr from faker import Faker from fastapi import FastAPI from models_library.api_schemas_webserver.wallets import ( PaymentMethodInitiated, PaymentTransaction, ) -from models_library.basic_types import IDStr from models_library.payments import UserInvoiceAddress from models_library.products import ProductName, StripePriceID, StripeTaxRateID from models_library.rabbitmq_basic_types import RPCMethodName diff --git a/services/payments/tests/unit/test_services_payments.py b/services/payments/tests/unit/test_services_payments.py index 94452d9c7726..5caa087ac36d 100644 --- a/services/payments/tests/unit/test_services_payments.py +++ b/services/payments/tests/unit/test_services_payments.py @@ -1,6 +1,7 @@ # pylint: disable=protected-access # pylint: disable=redefined-outer-name # pylint: disable=too-many-arguments +# pylint: disable=too-many-positional-arguments # pylint: disable=unused-argument # pylint: disable=unused-variable @@ -10,9 +11,9 @@ from unittest.mock import MagicMock import pytest +from common_library.pydantic_basic_types import IDStr from fastapi import FastAPI from models_library.api_schemas_webserver.wallets import PaymentMethodID -from models_library.basic_types import IDStr from models_library.payments import UserInvoiceAddress from models_library.users import UserID from models_library.wallets import WalletID diff --git a/services/postgres/Makefile b/services/postgres/Makefile new file mode 100644 index 000000000000..f962ffec66cc --- /dev/null +++ b/services/postgres/Makefile @@ -0,0 +1,14 @@ +include ../../scripts/common.Makefile + + +ifneq (,$(wildcard $(DOT_ENV_FILE))) + include $(DOT_ENV_FILE) + export $(shell sed 's/=.*//' $(DOT_ENV_FILE)) +endif + + + +scripts/%.sql: scripts/%.sql.template + @echo "Generating SQL script from '$<'..." + @envsubst < $< > $@ + @echo "SQL script generated as '$@'" diff --git a/services/postgres/scripts/.gitignore b/services/postgres/scripts/.gitignore new file mode 100644 index 000000000000..9072771094f3 --- /dev/null +++ b/services/postgres/scripts/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!*.template.* diff --git a/services/postgres/scripts/create-readonly-user.sql.template b/services/postgres/scripts/create-readonly-user.sql.template new file mode 100644 index 000000000000..28b14f53d4fb --- /dev/null +++ b/services/postgres/scripts/create-readonly-user.sql.template @@ -0,0 +1,22 @@ +-- SQL script to create a read-only user and grant privileges + + +--Create the read-only user with a password +CREATE USER ${POSTGRES_READONLY_USER} WITH PASSWORD '${POSTGRES_READONLY_PASSWORD}'; + +--Grant CONNECT privilege to the database (e.g., 'foo' is the database name) +GRANT CONNECT ON DATABASE ${POSTGRES_DB} TO ${POSTGRES_READONLY_USER}; + +--Grant USAGE privilege on the **public** schema +GRANT USAGE ON SCHEMA public TO ${POSTGRES_READONLY_USER}; + +--Grant SELECT privilege on all existing tables and sequencies in the **public** schema +GRANT SELECT ON ALL TABLES IN SCHEMA public TO ${POSTGRES_READONLY_USER}; +GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO ${POSTGRES_READONLY_USER}; + +--Ensure that future tables created in the public schema and sequencies will have SELECT privilege for the read-only user +ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO ${POSTGRES_READONLY_USER}; +ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON SEQUENCES TO ${POSTGRES_READONLY_USER}; + +-- Listing all users +SELECT * FROM pg_roles; diff --git a/services/postgres/scripts/remove-readonly-user.sql.template b/services/postgres/scripts/remove-readonly-user.sql.template new file mode 100644 index 000000000000..5a1435ed9783 --- /dev/null +++ b/services/postgres/scripts/remove-readonly-user.sql.template @@ -0,0 +1,16 @@ +-- Revoke all privileges the user has on the public schema +REVOKE ALL PRIVILEGES ON SCHEMA public FROM ${POSTGRES_READONLY_USER}; + +-- Revoke all privileges the user has on tables and sequences in the public schema +REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM ${POSTGRES_READONLY_USER}; +REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM ${POSTGRES_READONLY_USER}; + +-- Revoke any future privileges set via ALTER DEFAULT PRIVILEGES +ALTER DEFAULT PRIVILEGES IN SCHEMA public REVOKE ALL ON TABLES FROM ${POSTGRES_READONLY_USER}; +ALTER DEFAULT PRIVILEGES IN SCHEMA public REVOKE ALL ON SEQUENCES FROM ${POSTGRES_READONLY_USER}; + +-- Drop the user +DROP USER ${POSTGRES_READONLY_USER}; + +-- Listing all users +SELECT * FROM pg_roles; diff --git a/services/resource-usage-tracker/requirements/_base.txt b/services/resource-usage-tracker/requirements/_base.txt index 64c4ae399ade..d31e353e4c39 100644 --- a/services/resource-usage-tracker/requirements/_base.txt +++ b/services/resource-usage-tracker/requirements/_base.txt @@ -8,7 +8,9 @@ aiobotocore==2.13.1 # via aioboto3 aiocache==0.12.2 # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in aiodebug==2.3.0 # via @@ -60,14 +62,14 @@ arrow==1.3.0 # -r requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/aws-library/requirements/_base.in # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in +asgiref==3.8.1 + # via opentelemetry-instrumentation-asgi async-timeout==4.0.3 - # via - # aiohttp - # asyncpg - # redis + # via asyncpg asyncpg==0.29.0 # via sqlalchemy attrs==23.2.0 @@ -114,12 +116,16 @@ cycler==0.12.1 # via matplotlib dateparser==1.2.0 # via prometheus-api-client +deprecated==1.2.14 + # via + # opentelemetry-api + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-semantic-conventions dnspython==2.6.1 # via email-validator email-validator==2.1.1 # via pydantic -exceptiongroup==1.2.0 - # via anyio fast-depends==2.4.2 # via faststream fastapi==0.99.1 @@ -150,8 +156,14 @@ frozenlist==1.4.1 # via # aiohttp # aiosignal +googleapis-common-protos==1.65.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http greenlet==3.0.3 # via sqlalchemy +grpcio==1.66.0 + # via opentelemetry-exporter-otlp-proto-grpc h11==0.14.0 # via # httpcore @@ -185,6 +197,8 @@ idna==3.6 # httpx # requests # yarl +importlib-metadata==8.0.0 + # via opentelemetry-api jmespath==1.0.1 # via # boto3 @@ -233,6 +247,65 @@ numpy==1.26.4 # matplotlib # pandas # prometheus-api-client +opentelemetry-api==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http + # opentelemetry-instrumentation + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-otlp==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-exporter-otlp-proto-common==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-exporter-otlp-proto-grpc==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-exporter-otlp-proto-http==1.26.0 + # via opentelemetry-exporter-otlp +opentelemetry-instrumentation==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests +opentelemetry-instrumentation-asgi==0.47b0 + # via opentelemetry-instrumentation-fastapi +opentelemetry-instrumentation-fastapi==0.47b0 + # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +opentelemetry-instrumentation-requests==0.47b0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in +opentelemetry-proto==1.26.0 + # via + # opentelemetry-exporter-otlp-proto-common + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-sdk==1.26.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in + # opentelemetry-exporter-otlp-proto-grpc + # opentelemetry-exporter-otlp-proto-http +opentelemetry-semantic-conventions==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests + # opentelemetry-sdk +opentelemetry-util-http==0.47b0 + # via + # opentelemetry-instrumentation-asgi + # opentelemetry-instrumentation-fastapi + # opentelemetry-instrumentation-requests orjson==3.10.0 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -270,6 +343,14 @@ prometheus-client==0.20.0 # prometheus-fastapi-instrumentator prometheus-fastapi-instrumentator==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_fastapi.in +protobuf==4.25.4 + # via + # googleapis-common-protos + # opentelemetry-proto +psutil==6.0.0 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.9 # via sqlalchemy pydantic==1.10.14 @@ -366,9 +447,14 @@ referencing==0.29.3 # jsonschema-specifications regex==2023.12.25 # via dateparser +repro-zipfile==0.3.1 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in requests==2.32.2 # via # httmock + # opentelemetry-exporter-otlp-proto-http # prometheus-api-client rich==13.7.1 # via @@ -383,6 +469,8 @@ rpds-py==0.18.0 # referencing s3transfer==0.10.1 # via boto3 +setuptools==74.0.0 + # via opentelemetry-instrumentation sh==2.0.6 # via -r requirements/../../../packages/aws-library/requirements/_base.in shellingham==1.5.4 @@ -465,16 +553,15 @@ typing-extensions==4.10.0 # aiodebug # aiodocker # alembic - # anyio # fastapi # faststream + # opentelemetry-sdk # pydantic # typer # types-aiobotocore # types-aiobotocore-ec2 # types-aiobotocore-s3 # types-aiobotocore-ssm - # uvicorn tzdata==2024.1 # via pandas tzlocal==5.2 @@ -507,10 +594,15 @@ watchfiles==0.21.0 websockets==12.0 # via uvicorn wrapt==1.16.0 - # via aiobotocore + # via + # aiobotocore + # deprecated + # opentelemetry-instrumentation yarl==1.9.4 # via # -r requirements/../../../packages/postgres-database/requirements/_base.in # aio-pika # aiohttp # aiormq +zipp==3.20.1 + # via importlib-metadata diff --git a/services/resource-usage-tracker/requirements/_test.txt b/services/resource-usage-tracker/requirements/_test.txt index de56ac958c72..d1c36a7f469f 100644 --- a/services/resource-usage-tracker/requirements/_test.txt +++ b/services/resource-usage-tracker/requirements/_test.txt @@ -10,10 +10,6 @@ anyio==4.3.0 # httpx asgi-lifespan==2.1.0 # via -r requirements/_test.in -async-timeout==4.0.3 - # via - # -c requirements/_base.txt - # redis attrs==23.2.0 # via # -c requirements/_base.txt @@ -44,9 +40,9 @@ certifi==2024.2.2 # httpcore # httpx # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography -cfn-lint==1.10.3 +cfn-lint==1.15.0 # via moto charset-normalizer==3.3.2 # via @@ -60,7 +56,7 @@ coverage==7.6.1 # via # -r requirements/_test.in # pytest-cov -cryptography==43.0.0 +cryptography==43.0.1 # via # -c requirements/../../../requirements/constraints.txt # joserfc @@ -69,22 +65,17 @@ docker==7.1.0 # via # -r requirements/_test.in # moto -exceptiongroup==1.2.0 - # via - # -c requirements/_base.txt - # anyio - # pytest -faker==27.0.0 +faker==29.0.0 # via -r requirements/_test.in -fakeredis==2.23.5 +fakeredis==2.24.1 # via -r requirements/_test.in flask==3.0.3 # via # flask-cors # moto -flask-cors==4.0.1 +flask-cors==5.0.0 # via moto -graphql-core==3.2.3 +graphql-core==3.2.4 # via moto greenlet==3.0.3 # via @@ -126,7 +117,7 @@ jmespath==1.0.1 # botocore joserfc==1.0.0 # via moto -jsondiff==2.2.0 +jsondiff==2.2.1 # via moto jsonpatch==1.33 # via cfn-lint @@ -162,11 +153,11 @@ markupsafe==2.1.5 # jinja2 # mako # werkzeug -moto==5.0.13 +moto==5.0.15 # via -r requirements/_test.in mpmath==1.3.0 # via sympy -mypy==1.11.1 +mypy==1.11.2 # via sqlalchemy mypy-extensions==1.0.0 # via mypy @@ -187,7 +178,7 @@ pluggy==1.5.0 # via pytest ply==3.11 # via jsonpath-ng -py-partiql-parser==0.5.5 +py-partiql-parser==0.5.6 # via moto pycparser==2.22 # via cffi @@ -200,7 +191,7 @@ pyparsing==3.1.2 # via # -c requirements/_base.txt # moto -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements/_test.in # pytest-asyncio @@ -278,8 +269,10 @@ s3transfer==0.10.1 # via # -c requirements/_base.txt # boto3 -setuptools==73.0.1 - # via moto +setuptools==74.0.0 + # via + # -c requirements/_base.txt + # moto six==1.16.0 # via # -c requirements/_base.txt @@ -301,25 +294,18 @@ sqlalchemy==1.4.52 # alembic sqlalchemy2-stubs==0.0.2a38 # via sqlalchemy -sympy==1.13.2 +sympy==1.13.3 # via cfn-lint termcolor==2.4.0 # via pytest-sugar -tomli==2.0.1 - # via - # coverage - # mypy - # pytest -types-requests==2.32.0.20240712 +types-requests==2.32.0.20240914 # via -r requirements/_test.in typing-extensions==4.10.0 # via # -c requirements/_base.txt # alembic - # anyio # aws-sam-translator # cfn-lint - # fakeredis # mypy # pydantic # sqlalchemy2-stubs @@ -332,7 +318,7 @@ urllib3==2.0.7 # requests # responses # types-requests -werkzeug==3.0.3 +werkzeug==3.0.4 # via # flask # moto diff --git a/services/resource-usage-tracker/requirements/_tools.txt b/services/resource-usage-tracker/requirements/_tools.txt index b84cbbeb9fbc..44759acdfd41 100644 --- a/services/resource-usage-tracker/requirements/_tools.txt +++ b/services/resource-usage-tracker/requirements/_tools.txt @@ -1,8 +1,8 @@ -astroid==3.2.4 +astroid==3.3.4 # via pylint black==24.8.0 # via -r requirements/../../../requirements/devenv.txt -build==1.2.1 +build==1.2.2 # via pip-tools bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt @@ -18,9 +18,9 @@ dill==0.3.8 # via pylint distlib==0.3.8 # via virtualenv -filelock==3.15.4 +filelock==3.16.1 # via virtualenv -identify==2.6.0 +identify==2.6.1 # via pre-commit isort==5.13.2 # via @@ -28,7 +28,7 @@ isort==5.13.2 # pylint mccabe==0.7.0 # via pylint -mypy==1.11.1 +mypy==1.11.2 # via # -c requirements/_test.txt # -r requirements/../../../requirements/devenv.txt @@ -51,14 +51,14 @@ pip==24.2 # via pip-tools pip-tools==7.4.1 # via -r requirements/../../../requirements/devenv.txt -platformdirs==4.2.2 +platformdirs==4.3.6 # via # black # pylint # virtualenv pre-commit==3.8.0 # via -r requirements/../../../requirements/devenv.txt -pylint==3.2.6 +pylint==3.3.0 # via -r requirements/../../../requirements/devenv.txt pyproject-hooks==1.1.0 # via @@ -71,32 +71,23 @@ pyyaml==6.0.1 # -c requirements/_test.txt # pre-commit # watchdog -ruff==0.6.1 +ruff==0.6.7 # via -r requirements/../../../requirements/devenv.txt -setuptools==73.0.1 +setuptools==74.0.0 # via + # -c requirements/_base.txt # -c requirements/_test.txt # pip-tools -tomli==2.0.1 - # via - # -c requirements/_test.txt - # black - # build - # mypy - # pip-tools - # pylint tomlkit==0.13.2 # via pylint typing-extensions==4.10.0 # via # -c requirements/_base.txt # -c requirements/_test.txt - # astroid - # black # mypy -virtualenv==20.26.3 +virtualenv==20.26.5 # via pre-commit -watchdog==4.0.2 +watchdog==5.0.2 # via -r requirements/_tools.in wheel==0.44.0 # via pip-tools diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/application.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/application.py index d8a696e087ee..a97db0170ae4 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/application.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/application.py @@ -5,6 +5,7 @@ from servicelib.fastapi.prometheus_instrumentation import ( setup_prometheus_instrumentation, ) +from servicelib.fastapi.tracing import setup_tracing from servicelib.rabbitmq.rpc_interfaces.resource_usage_tracker.errors import ( CustomResourceUsageTrackerError, ) @@ -53,6 +54,12 @@ def create_app(settings: ApplicationSettings) -> FastAPI: if app.state.settings.RESOURCE_USAGE_TRACKER_PROMETHEUS_INSTRUMENTATION_ENABLED: setup_prometheus_instrumentation(app) + if app.state.settings.RESOURCE_USAGE_TRACKER_TRACING: + setup_tracing( + app, + app.state.settings.RESOURCE_USAGE_TRACKER_TRACING, + app.state.settings.APP_NAME, + ) # ERROR HANDLERS app.add_exception_handler(CustomResourceUsageTrackerError, http404_error_handler) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/settings.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/settings.py index 7ac7599ed58b..c5d3b7b81c01 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/settings.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/core/settings.py @@ -10,6 +10,7 @@ from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings from settings_library.s3 import S3Settings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from .._meta import API_VERSION, API_VTAG, PROJECT_NAME @@ -110,3 +111,6 @@ class ApplicationSettings(MinimalApplicationSettings): ) RESOURCE_USAGE_TRACKER_PROMETHEUS_INSTRUMENTATION_ENABLED: bool = True RESOURCE_USAGE_TRACKER_S3: S3Settings | None = Field(auto_default_from_env=True) + RESOURCE_USAGE_TRACKER_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/models/resource_tracker_service_runs.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/models/resource_tracker_service_runs.py index 45cddca3057f..6bceaab4f8c3 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/models/resource_tracker_service_runs.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/models/resource_tracker_service_runs.py @@ -109,6 +109,7 @@ class Config: class OsparcCreditsAggregatedByServiceKeyDB(BaseModel): osparc_credits: Decimal service_key: ServiceKey + running_time_in_hours: Decimal class Config: orm_mode = True diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/__init__.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/__init__.py index bca3083383ce..42062cb30ba6 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/__init__.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/__init__.py @@ -1,5 +1,5 @@ from fastapi import FastAPI -from servicelib.db_async_engine import close_db_connection, connect_to_db +from servicelib.fastapi.db_asyncpg_engine import close_db_connection, connect_to_db def setup(app: FastAPI): diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/repositories/resource_tracker.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/repositories/resource_tracker.py index a341bf35e6a8..231c97502fbc 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/repositories/resource_tracker.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/modules/db/repositories/resource_tracker.py @@ -329,6 +329,22 @@ async def get_osparc_credits_aggregated_by_service( sa.func.SUM( resource_tracker_credit_transactions.c.osparc_credits ).label("osparc_credits"), + sa.func.SUM( + sa.func.round( + ( + sa.func.extract( + "epoch", + resource_tracker_service_runs.c.stopped_at, + ) + - sa.func.extract( + "epoch", + resource_tracker_service_runs.c.started_at, + ) + ) + / 3600, + 2, + ) + ).label("running_time_in_hours"), ) .select_from( resource_tracker_service_runs.join( diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/resource_tracker_service_runs.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/resource_tracker_service_runs.py index 8977d6fa5a08..d3ac6f743b8e 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/resource_tracker_service_runs.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/resource_tracker_service_runs.py @@ -219,6 +219,7 @@ async def get_osparc_credits_aggregated_usages_page( OsparcCreditsAggregatedByServiceGet.construct( osparc_credits=item.osparc_credits, service_key=item.service_key, + running_time_in_hours=item.running_time_in_hours, ) ) diff --git a/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans.py b/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans.py index 5e241e607674..609b0ebd54f0 100644 --- a/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans.py +++ b/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans.py @@ -27,6 +27,7 @@ from simcore_postgres_database.models.resource_tracker_pricing_units import ( resource_tracker_pricing_units, ) +from simcore_postgres_database.models.services import services_meta_data from starlette import status from yarl import URL @@ -184,6 +185,15 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato modified=datetime.now(tz=timezone.utc), ) ) + + con.execute( + services_meta_data.insert().values( + key=_SERVICE_KEY, + version=_SERVICE_VERSION, + name="name", + description="description", + ) + ) con.execute( resource_tracker_pricing_plan_to_service.insert().values( pricing_plan_id=_PRICING_PLAN_ID, @@ -192,6 +202,15 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato service_default_plan=True, ) ) + + con.execute( + services_meta_data.insert().values( + key=_SERVICE_KEY_2, + version=_SERVICE_VERSION_2, + name="name", + description="description", + ) + ) con.execute( resource_tracker_pricing_plan_to_service.insert().values( pricing_plan_id=_PRICING_PLAN_ID_2, @@ -207,6 +226,7 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato con.execute(resource_tracker_pricing_units.delete()) con.execute(resource_tracker_pricing_plans.delete()) con.execute(resource_tracker_pricing_unit_costs.delete()) + con.execute(services_meta_data.delete()) async def test_get_default_pricing_plan_for_service( diff --git a/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans_rpc.py b/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans_rpc.py index 5a12fd24dbe0..4ec8d45bb72d 100644 --- a/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans_rpc.py +++ b/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_pricing_plans_rpc.py @@ -37,6 +37,7 @@ from simcore_postgres_database.models.resource_tracker_pricing_units import ( resource_tracker_pricing_units, ) +from simcore_postgres_database.models.services import services_meta_data pytest_simcore_core_services_selection = ["postgres", "rabbit"] pytest_simcore_ops_services_selection = [ @@ -44,18 +45,52 @@ ] +_SERVICE_KEY = "simcore/services/comp/itis/sleeper" +_SERVICE_VERSION_1 = "2.0.2" +_SERVICE_VERSION_2 = "3.0.0" + +_SERVICE_KEY_3 = "simcore/services/comp/itis/different-service" +_SERVICE_VERSION_3 = "1.0.1" + + @pytest.fixture() def resource_tracker_setup_db( postgres_db: sa.engine.Engine, ) -> Iterator[None]: with postgres_db.connect() as con: + con.execute( + services_meta_data.insert().values( + key=_SERVICE_KEY, + version=_SERVICE_VERSION_1, + name="name", + description="description", + ) + ) + con.execute( + services_meta_data.insert().values( + key=_SERVICE_KEY, + version=_SERVICE_VERSION_2, + name="name", + description="description", + ) + ) + con.execute( + services_meta_data.insert().values( + key=_SERVICE_KEY_3, + version=_SERVICE_VERSION_3, + name="name", + description="description", + ) + ) + yield con.execute(resource_tracker_pricing_unit_costs.delete()) con.execute(resource_tracker_pricing_units.delete()) con.execute(resource_tracker_pricing_plan_to_service.delete()) con.execute(resource_tracker_pricing_plans.delete()) + con.execute(services_meta_data.delete()) async def test_rpc_pricing_plans_workflow( @@ -68,7 +103,7 @@ async def test_rpc_pricing_plans_workflow( result = await pricing_plans.create_pricing_plan( rpc_client, data=PricingPlanCreate( - product_name="s4l", + product_name="osparc", display_name=_display_name, description=faker.sentence(), classification=PricingPlanClassification.TIER, @@ -84,7 +119,7 @@ async def test_rpc_pricing_plans_workflow( _update_description = "description name updated" result = await pricing_plans.update_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", data=PricingPlanUpdate( pricing_plan_id=_pricing_plan_id, display_name=_update_display_name, @@ -99,7 +134,7 @@ async def test_rpc_pricing_plans_workflow( result = await pricing_plans.get_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) assert isinstance(result, PricingPlanGet) @@ -110,7 +145,7 @@ async def test_rpc_pricing_plans_workflow( result = await pricing_plans.list_pricing_plans( rpc_client, - product_name="s4l", + product_name="osparc", ) assert isinstance(result, list) assert len(result) == 1 @@ -120,7 +155,7 @@ async def test_rpc_pricing_plans_workflow( # Now I will deactivate the pricing plan result = await pricing_plans.update_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", data=PricingPlanUpdate( pricing_plan_id=_pricing_plan_id, display_name=faker.word(), @@ -142,7 +177,7 @@ async def test_rpc_pricing_plans_with_units_workflow( result = await pricing_plans.create_pricing_plan( rpc_client, data=PricingPlanCreate( - product_name="s4l", + product_name="osparc", display_name=_display_name, description=faker.sentence(), classification=PricingPlanClassification.TIER, @@ -156,7 +191,7 @@ async def test_rpc_pricing_plans_with_units_workflow( result = await pricing_units.create_pricing_unit( rpc_client, - product_name="s4l", + product_name="osparc", data=PricingUnitWithCostCreate( pricing_plan_id=_pricing_plan_id, unit_name="SMALL", @@ -175,7 +210,7 @@ async def test_rpc_pricing_plans_with_units_workflow( # Get pricing plan result = await pricing_plans.get_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) assert isinstance(result, PricingPlanGet) @@ -187,7 +222,7 @@ async def test_rpc_pricing_plans_with_units_workflow( _unit_name = "VERY SMALL" result = await pricing_units.update_pricing_unit( rpc_client, - product_name="s4l", + product_name="osparc", data=PricingUnitWithCostUpdate( pricing_plan_id=_pricing_plan_id, pricing_unit_id=_first_pricing_unit_id, @@ -206,7 +241,7 @@ async def test_rpc_pricing_plans_with_units_workflow( # Update pricing unit with COST update! result = await pricing_units.update_pricing_unit( rpc_client, - product_name="s4l", + product_name="osparc", data=PricingUnitWithCostUpdate( pricing_plan_id=_pricing_plan_id, pricing_unit_id=_first_pricing_unit_id, @@ -228,7 +263,7 @@ async def test_rpc_pricing_plans_with_units_workflow( # Test get pricing unit result = await pricing_units.get_pricing_unit( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, pricing_unit_id=_first_pricing_unit_id, ) @@ -238,7 +273,7 @@ async def test_rpc_pricing_plans_with_units_workflow( # Create one more unit result = await pricing_units.create_pricing_unit( rpc_client, - product_name="s4l", + product_name="osparc", data=PricingUnitWithCostCreate( pricing_plan_id=_pricing_plan_id, unit_name="LARGE", @@ -256,7 +291,7 @@ async def test_rpc_pricing_plans_with_units_workflow( # Get pricing plan with units result = await pricing_plans.get_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) assert isinstance(result, PricingPlanGet) @@ -275,7 +310,7 @@ async def test_rpc_pricing_plans_to_service_workflow( result = await pricing_plans.create_pricing_plan( rpc_client, data=PricingPlanCreate( - product_name="s4l", + product_name="osparc", display_name=faker.word(), description=faker.sentence(), classification=PricingPlanClassification.TIER, @@ -288,19 +323,19 @@ async def test_rpc_pricing_plans_to_service_workflow( result = ( await pricing_plans.list_connected_services_to_pricing_plan_by_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) ) assert isinstance(result, list) assert result == [] - _first_service_version = ServiceVersion("2.0.2") + _first_service_version = ServiceVersion(_SERVICE_VERSION_1) result = await pricing_plans.connect_service_to_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, - service_key=ServiceKey("simcore/services/comp/itis/sleeper"), + service_key=ServiceKey(_SERVICE_KEY), service_version=_first_service_version, ) assert isinstance(result, PricingPlanToServiceGet) @@ -310,7 +345,7 @@ async def test_rpc_pricing_plans_to_service_workflow( result = ( await pricing_plans.list_connected_services_to_pricing_plan_by_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) ) @@ -318,12 +353,12 @@ async def test_rpc_pricing_plans_to_service_workflow( assert len(result) == 1 # Connect different version - _second_service_version = ServiceVersion("3.0.0") + _second_service_version = ServiceVersion(_SERVICE_VERSION_2) result = await pricing_plans.connect_service_to_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, - service_key=ServiceKey("simcore/services/comp/itis/sleeper"), + service_key=ServiceKey(_SERVICE_KEY), service_version=_second_service_version, ) assert isinstance(result, PricingPlanToServiceGet) @@ -333,7 +368,7 @@ async def test_rpc_pricing_plans_to_service_workflow( result = ( await pricing_plans.list_connected_services_to_pricing_plan_by_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) ) @@ -341,13 +376,13 @@ async def test_rpc_pricing_plans_to_service_workflow( assert len(result) == 2 # Connect different service - _different_service_key = ServiceKey("simcore/services/comp/itis/different-service") + _different_service_key = ServiceKey(_SERVICE_KEY_3) result = await pricing_plans.connect_service_to_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, service_key=_different_service_key, - service_version=ServiceVersion("1.0.0"), + service_version=ServiceVersion(_SERVICE_VERSION_3), ) assert isinstance(result, PricingPlanToServiceGet) assert result.pricing_plan_id == _pricing_plan_id @@ -356,7 +391,7 @@ async def test_rpc_pricing_plans_to_service_workflow( result = ( await pricing_plans.list_connected_services_to_pricing_plan_by_pricing_plan( rpc_client, - product_name="s4l", + product_name="osparc", pricing_plan_id=_pricing_plan_id, ) ) diff --git a/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_service_runs__list_aggregated_usages.py b/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_service_runs__list_aggregated_usages.py index 36c9ef3fecaf..eea94827a440 100644 --- a/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_service_runs__list_aggregated_usages.py +++ b/services/resource-usage-tracker/tests/unit/with_dbs/test_api_resource_tracker_service_runs__list_aggregated_usages.py @@ -51,37 +51,49 @@ def resource_tracker_setup_db( random_resource_tracker_service_run( user_id=_USER_ID_1, service_run_id=_SERVICE_RUN_ID_1, - started_at=datetime.now(tz=timezone.utc), + started_at=datetime.now(tz=timezone.utc) - timedelta(hours=1), + stopped_at=datetime.now(tz=timezone.utc), + service_run_status="SUCCESS", service_key="simcore/services/dynamic/jupyter-smash", ), random_resource_tracker_service_run( user_id=_USER_ID_2, service_run_id=_SERVICE_RUN_ID_2, - started_at=datetime.now(tz=timezone.utc), + started_at=datetime.now(tz=timezone.utc) - timedelta(hours=1), + stopped_at=datetime.now(tz=timezone.utc), + service_run_status="SUCCESS", service_key="simcore/services/dynamic/jupyter-smash", ), random_resource_tracker_service_run( user_id=_USER_ID_1, service_run_id=_SERVICE_RUN_ID_3, - started_at=datetime.now(tz=timezone.utc), + started_at=datetime.now(tz=timezone.utc) - timedelta(hours=1), + stopped_at=datetime.now(tz=timezone.utc), + service_run_status="SUCCESS", service_key="simcore/services/dynamic/jupyter-smash", ), random_resource_tracker_service_run( user_id=_USER_ID_1, service_run_id=_SERVICE_RUN_ID_4, - started_at=datetime.now(tz=timezone.utc), + started_at=datetime.now(tz=timezone.utc) - timedelta(hours=1), + stopped_at=datetime.now(tz=timezone.utc), + service_run_status="SUCCESS", service_key="simcore/services/dynamic/jupyter-smash", ), random_resource_tracker_service_run( user_id=_USER_ID_1, service_run_id=_SERVICE_RUN_ID_5, started_at=datetime.now(tz=timezone.utc) - timedelta(days=3), + stopped_at=datetime.now(tz=timezone.utc), + service_run_status="SUCCESS", service_key="simcore/services/dynamic/jupyter-smash", ), random_resource_tracker_service_run( user_id=_USER_ID_1, service_run_id=_SERVICE_RUN_ID_6, started_at=datetime.now(tz=timezone.utc) - timedelta(days=10), + stopped_at=datetime.now(tz=timezone.utc), + service_run_status="SUCCESS", service_key="simcore/services/dynamic/sim4life", ), ] diff --git a/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_triggered_by_listening_with_billing.py b/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_triggered_by_listening_with_billing.py index 8d95ae78d75b..7a5e2114c1d8 100644 --- a/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_triggered_by_listening_with_billing.py +++ b/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_triggered_by_listening_with_billing.py @@ -28,6 +28,7 @@ from simcore_postgres_database.models.resource_tracker_pricing_units import ( resource_tracker_pricing_units, ) +from simcore_postgres_database.models.services import services_meta_data from .conftest import assert_service_runs_db_row @@ -128,6 +129,14 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato modified=datetime.now(tz=timezone.utc), ) ) + con.execute( + services_meta_data.insert().values( + key="simcore/services/comp/itis/sleeper", + version="1.0.16", + name="name", + description="description", + ) + ) con.execute( resource_tracker_pricing_plan_to_service.insert().values( pricing_plan_id=1, @@ -144,6 +153,7 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato con.execute(resource_tracker_pricing_plans.delete()) con.execute(resource_tracker_pricing_unit_costs.delete()) con.execute(resource_tracker_credit_transactions.delete()) + con.execute(services_meta_data.delete()) @pytest.mark.flaky(max_runs=3) diff --git a/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing.py b/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing.py index 92946509e912..4b6c1a0dfac7 100644 --- a/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing.py +++ b/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing.py @@ -30,6 +30,7 @@ from simcore_postgres_database.models.resource_tracker_pricing_units import ( resource_tracker_pricing_units, ) +from simcore_postgres_database.models.services import services_meta_data from simcore_service_resource_usage_tracker.modules.db.repositories.resource_tracker import ( ResourceTrackerRepository, ) @@ -142,6 +143,14 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato modified=datetime.now(tz=timezone.utc), ) ) + con.execute( + services_meta_data.insert().values( + key="simcore/services/comp/itis/sleeper", + version="1.0.16", + name="name", + description="description", + ) + ) con.execute( resource_tracker_pricing_plan_to_service.insert().values( pricing_plan_id=1, @@ -158,6 +167,7 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato con.execute(resource_tracker_pricing_plans.delete()) con.execute(resource_tracker_pricing_unit_costs.delete()) con.execute(resource_tracker_credit_transactions.delete()) + con.execute(services_meta_data.delete()) @pytest.fixture diff --git a/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing_cost_0.py b/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing_cost_0.py index d5bc497fb0f3..c1d62af5b23f 100644 --- a/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing_cost_0.py +++ b/services/resource-usage-tracker/tests/unit/with_dbs/test_process_rabbitmq_message_with_billing_cost_0.py @@ -30,6 +30,7 @@ from simcore_postgres_database.models.resource_tracker_pricing_units import ( resource_tracker_pricing_units, ) +from simcore_postgres_database.models.services import services_meta_data from simcore_service_resource_usage_tracker.modules.db.repositories.resource_tracker import ( ResourceTrackerRepository, ) @@ -88,6 +89,14 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato modified=datetime.now(tz=timezone.utc), ) ) + con.execute( + services_meta_data.insert().values( + key="simcore/services/comp/itis/sleeper", + version="1.0.16", + name="name", + description="description", + ) + ) con.execute( resource_tracker_pricing_plan_to_service.insert().values( pricing_plan_id=1, @@ -104,6 +113,7 @@ def resource_tracker_pricing_tables_db(postgres_db: sa.engine.Engine) -> Iterato con.execute(resource_tracker_pricing_plans.delete()) con.execute(resource_tracker_pricing_unit_costs.delete()) con.execute(resource_tracker_credit_transactions.delete()) + con.execute(services_meta_data.delete()) @pytest.fixture diff --git a/services/static-webserver/README.md b/services/static-webserver/README.md index 8ea7520df8d7..5b9d3691fb58 100644 --- a/services/static-webserver/README.md +++ b/services/static-webserver/README.md @@ -6,5 +6,5 @@ Used for static content serving. In the future will fully serve all static content. Currently the `webserver` is still serving the following routes: -- `/` resolves to one of the three index.html pages inside the 4 products (osparc, tis, s4l, s4llite, s4lacad, s4lengine, s4ldesktop, s4ldesktopacad) +- `/` resolves to one of the many index.html pages inside the products (osparc, tis, s4l...) - `/static-frontend-data.json` contains information required by the fronted diff --git a/services/static-webserver/client/compile.json b/services/static-webserver/client/compile.json index e7b1ff1cdfdd..780532e3cf01 100644 --- a/services/static-webserver/client/compile.json +++ b/services/static-webserver/client/compile.json @@ -136,7 +136,7 @@ "class": "osparc.Application", "theme": "osparc.theme.products.tis.ThemeDark", "name": "tis", - "title": "TI Plan - IT'IS", + "title": "TIP V3.0 - IT'IS", "include": [ "iconfont.material.Load", "iconfont.fontawesome5.Load", @@ -147,6 +147,22 @@ }, "addTimestampsToUrls": true, "bootPath": "source/boot" + }, + { + "class": "osparc.Application", + "theme": "osparc.theme.products.tis.ThemeDark", + "name": "tiplite", + "title": "TIP.lite - IT'IS", + "include": [ + "iconfont.material.Load", + "iconfont.fontawesome5.Load", + "osparc.theme.products.tis.ThemeLight" + ], + "environment": { + "product.name": "tiplite" + }, + "addTimestampsToUrls": true, + "bootPath": "source/boot" } ], "eslintConfig": { diff --git a/services/static-webserver/client/scripts/apps_metadata.json b/services/static-webserver/client/scripts/apps_metadata.json index 2c89e597e9a0..38959e460904 100644 --- a/services/static-webserver/client/scripts/apps_metadata.json +++ b/services/static-webserver/client/scripts/apps_metadata.json @@ -3,6 +3,9 @@ { "application": "osparc", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/osparc/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/osparc/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/osparc/browserconfig.xml", "replace_me_og_title": "oSPARC", "replace_me_og_description": "open online simulations for Stimulating Peripheral Activity to Relieve Conditions", "replace_me_og_image": "https://raw.githubusercontent.com/ITISFoundation/osparc-simcore/master/services/static-webserver/client/source/resource/osparc/favicon-osparc.png" @@ -10,6 +13,9 @@ }, { "application": "s4l", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/s4l/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/s4l/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/s4l/browserconfig.xml", "replace_me_og_title": "Sim4Life", "replace_me_og_description": "Computational life sciences platform that combines computable human phantoms, powerful physics solvers and advanced tissue models.", "replace_me_og_image": "https://raw.githubusercontent.com/ZurichMedTech/s4l-assets/main/app/full/background-images/S4L/Sim4Life-head-default.png" @@ -17,6 +23,9 @@ }, { "application": "s4lacad", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/s4l/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/s4l/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/s4l/browserconfig.xml", "replace_me_og_title": "Sim4Life Science", "replace_me_og_description": "Sim4Life for Science - Computational life sciences platform that combines computable human phantoms, powerful physics solvers and advanced tissue models.", "replace_me_og_image": "https://raw.githubusercontent.com/ZurichMedTech/s4l-assets/main/app/full/background-images/S4L/Sim4Life-head-academy.png" @@ -24,6 +33,9 @@ }, { "application": "s4lengine", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/s4l/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/s4l/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/s4l/browserconfig.xml", "replace_me_og_title": "Sim4Life Engineering", "replace_me_og_description": "Sim4Life for Engineers - Computational life sciences platform that combines computable human phantoms, powerful physics solvers and advanced tissue models.", "replace_me_og_image": "https://raw.githubusercontent.com/ZurichMedTech/s4l-assets/main/app/full/background-images/S4L/Sim4Life-head-default.png" @@ -31,6 +43,9 @@ }, { "application": "s4ldesktop", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/s4l/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/s4l/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/s4l/browserconfig.xml", "replace_me_og_title": "Sim4Life (Desktop)", "replace_me_og_description": "Computational life sciences platform that combines computable human phantoms, powerful physics solvers and advanced tissue models.", "replace_me_og_image": "https://raw.githubusercontent.com/ZurichMedTech/s4l-assets/main/app/full/background-images/S4L/Sim4Life-head-default.png" @@ -38,6 +53,9 @@ }, { "application": "s4ldesktopacad", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/s4l/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/s4l/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/s4l/browserconfig.xml", "replace_me_og_title": "Sim4Life Science (Desktop)", "replace_me_og_description": "Sim4Life for Science - Computational life sciences platform that combines computable human phantoms, powerful physics solvers and advanced tissue models.", "replace_me_og_image": "https://raw.githubusercontent.com/ZurichMedTech/s4l-assets/main/app/full/background-images/S4L/Sim4Life-head-academy.png" @@ -45,6 +63,9 @@ }, { "application": "s4llite", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/s4l/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/s4l/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/s4l/browserconfig.xml", "replace_me_og_title": "S4L Lite", "replace_me_og_description": "Sim4Life for Students - Computational life sciences platform that combines computable human phantoms, powerful physics solvers and advanced tissue models.", "replace_me_og_image": "https://raw.githubusercontent.com/ZurichMedTech/s4l-assets/main/app/full/background-images/S4L/Sim4Life-head-lite.png" @@ -52,10 +73,23 @@ }, { "application": "tis", "replacements": { + "replace_me_favicon_uri": "/resource/osparc/tis/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/tis/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/tis/browserconfig.xml", "replace_me_og_title": "TI Plan - IT'IS", "replace_me_og_description": "A tool powered by o²S²PARC technology that reduces optimization of targeted neurostimulation protocols.", "replace_me_og_image": "https://raw.githubusercontent.com/ITISFoundation/osparc-simcore/master/services/static-webserver/client/source/resource/osparc/tip_splitimage.png" } + }, { + "application": "tiplite", + "replacements": { + "replace_me_favicon_uri": "/resource/osparc/tis/icons/favicon-32x32.png", + "replace_me_manifest_uri": "/resource/osparc/tis/manifest.json", + "replace_me_browserconfig_uri": "/resource/osparc/tis/browserconfig.xml", + "replace_me_og_title": "TI Plan lite - IT'IS", + "replace_me_og_description": "A tool powered by o²S²PARC technology that reduces optimization of targeted neurostimulation protocols.", + "replace_me_og_image": "https://raw.githubusercontent.com/ITISFoundation/osparc-simcore/master/services/static-webserver/client/source/resource/osparc/tip_splitimage.png" + } } ] } diff --git a/services/static-webserver/client/scripts/post-compile.py b/services/static-webserver/client/scripts/post-compile.py index 6c532c83ceec..afc1cd5033a4 100644 --- a/services/static-webserver/client/scripts/post-compile.py +++ b/services/static-webserver/client/scripts/post-compile.py @@ -31,7 +31,7 @@ def update_apps_metadata(): replacements = i.get("replacements") for key in replacements: replace_text = replacements[key] - data = data.replace(key, replace_text) + data = data.replace("${"+key+"}", replace_text) with open(filename, "w") as file: print(f"Updating app metadata: {filename}") file.write(data) diff --git a/services/static-webserver/client/source/boot/index.html b/services/static-webserver/client/source/boot/index.html index 48b35c48db9e..c12e128897c8 100644 --- a/services/static-webserver/client/source/boot/index.html +++ b/services/static-webserver/client/source/boot/index.html @@ -24,10 +24,10 @@ - + - + @@ -35,15 +35,15 @@ - + - - - + + + ${appTitle} - +