From 2e2cc2144dd137e18a3770bf7fef8b45fb04883d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 08:44:32 +0200 Subject: [PATCH 001/122] using health checked only --- packages/service-library/src/servicelib/redis.py | 11 +++++++---- .../settings-library/src/settings_library/redis.py | 1 + .../services/expectations_tracker.py | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py diff --git a/packages/service-library/src/servicelib/redis.py b/packages/service-library/src/servicelib/redis.py index 5570d554c24b..24e4d3f14519 100644 --- a/packages/service-library/src/servicelib/redis.py +++ b/packages/service-library/src/servicelib/redis.py @@ -223,17 +223,20 @@ class RedisClientsManager: databases: set[RedisDatabase] settings: RedisSettings - _client_sdks: dict[RedisDatabase, RedisClientSDK] = field(default_factory=dict) + _client_sdks: dict[RedisDatabase, RedisClientSDKHealthChecked] = field( + default_factory=dict + ) async def setup(self) -> None: for db in self.databases: - self._client_sdks[db] = client_sdk = RedisClientSDK( + self._client_sdks[db] = RedisClientSDKHealthChecked( redis_dsn=self.settings.build_redis_dsn(db) ) - await client_sdk.setup() + + await logged_gather(*(c.setup() for c in self._client_sdks.values())) async def shutdown(self) -> None: await logged_gather(*(c.shutdown() for c in self._client_sdks.values())) - def client(self, database: RedisDatabase) -> RedisClientSDK: + def client(self, database: RedisDatabase) -> RedisClientSDKHealthChecked: return self._client_sdks[database] diff --git a/packages/settings-library/src/settings_library/redis.py b/packages/settings-library/src/settings_library/redis.py index ecccad69c100..4f8bf5d133de 100644 --- a/packages/settings-library/src/settings_library/redis.py +++ b/packages/settings-library/src/settings_library/redis.py @@ -17,6 +17,7 @@ class RedisDatabase(int, Enum): ANNOUNCEMENTS = 5 DISTRIBUTED_IDENTIFIERS = 6 DEFERRED_TASKS = 7 + DYNAMIC_SERVICES = 8 class RedisSettings(BaseCustomSettings): diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py new file mode 100644 index 000000000000..1a14490731d6 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py @@ -0,0 +1 @@ +# keeps track of what the user is supposed to do From b6d7c37ec14be2829db60a68b213215800614d61 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 08:46:25 +0200 Subject: [PATCH 002/122] revert unused --- packages/settings-library/src/settings_library/redis.py | 1 - .../services/expectations_tracker.py | 1 - 2 files changed, 2 deletions(-) delete mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py diff --git a/packages/settings-library/src/settings_library/redis.py b/packages/settings-library/src/settings_library/redis.py index 4f8bf5d133de..ecccad69c100 100644 --- a/packages/settings-library/src/settings_library/redis.py +++ b/packages/settings-library/src/settings_library/redis.py @@ -17,7 +17,6 @@ class RedisDatabase(int, Enum): ANNOUNCEMENTS = 5 DISTRIBUTED_IDENTIFIERS = 6 DEFERRED_TASKS = 7 - DYNAMIC_SERVICES = 8 class RedisSettings(BaseCustomSettings): diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py deleted file mode 100644 index 1a14490731d6..000000000000 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/expectations_tracker.py +++ /dev/null @@ -1 +0,0 @@ -# keeps track of what the user is supposed to do From 14a0f1f2b2a32a8a2bdf989bd0918c71dd80fad4 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 09:18:24 +0200 Subject: [PATCH 003/122] refactor redis database setup --- .../service-library/src/servicelib/redis.py | 11 ++++-- .../src/settings_library/redis.py | 1 + services/docker-compose-ops.yml | 3 +- services/docker-compose.yml | 2 +- .../api/rest/_dependencies.py | 9 +++-- .../api/rest/_health.py | 12 ++++-- .../services/redis.py | 37 ++++++++++++------- .../services/service_tracker/__init__.py | 0 .../services/service_tracker/_core.py | 2 + .../unit/api_rest/test_api_rest__health.py | 3 +- .../tests/unit/test_services_redis.py | 7 ++-- 11 files changed, 57 insertions(+), 30 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py diff --git a/packages/service-library/src/servicelib/redis.py b/packages/service-library/src/servicelib/redis.py index 5570d554c24b..24e4d3f14519 100644 --- a/packages/service-library/src/servicelib/redis.py +++ b/packages/service-library/src/servicelib/redis.py @@ -223,17 +223,20 @@ class RedisClientsManager: databases: set[RedisDatabase] settings: RedisSettings - _client_sdks: dict[RedisDatabase, RedisClientSDK] = field(default_factory=dict) + _client_sdks: dict[RedisDatabase, RedisClientSDKHealthChecked] = field( + default_factory=dict + ) async def setup(self) -> None: for db in self.databases: - self._client_sdks[db] = client_sdk = RedisClientSDK( + self._client_sdks[db] = RedisClientSDKHealthChecked( redis_dsn=self.settings.build_redis_dsn(db) ) - await client_sdk.setup() + + await logged_gather(*(c.setup() for c in self._client_sdks.values())) async def shutdown(self) -> None: await logged_gather(*(c.shutdown() for c in self._client_sdks.values())) - def client(self, database: RedisDatabase) -> RedisClientSDK: + def client(self, database: RedisDatabase) -> RedisClientSDKHealthChecked: return self._client_sdks[database] diff --git a/packages/settings-library/src/settings_library/redis.py b/packages/settings-library/src/settings_library/redis.py index ecccad69c100..4f8bf5d133de 100644 --- a/packages/settings-library/src/settings_library/redis.py +++ b/packages/settings-library/src/settings_library/redis.py @@ -17,6 +17,7 @@ class RedisDatabase(int, Enum): ANNOUNCEMENTS = 5 DISTRIBUTED_IDENTIFIERS = 6 DEFERRED_TASKS = 7 + DYNAMIC_SERVICES = 8 class RedisSettings(BaseCustomSettings): diff --git a/services/docker-compose-ops.yml b/services/docker-compose-ops.yml index 4c6398e454a3..adc4e9f066bf 100644 --- a/services/docker-compose-ops.yml +++ b/services/docker-compose-ops.yml @@ -83,7 +83,8 @@ services: user_notifications:${REDIS_HOST}:${REDIS_PORT}:4, announcements:${REDIS_HOST}:${REDIS_PORT}:5, distributed_identifiers:${REDIS_HOST}:${REDIS_PORT}:6, - deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7 + deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7, + dynamic_services:${REDIS_HOST}:${REDIS_PORT}:8 # If you add/remove a db, do not forget to update the --databases entry in the docker-compose.yml ports: - "18081:8081" diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 3ef810a2a3dd..f22d047bd4a3 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -1100,7 +1100,7 @@ services: "--loglevel", "verbose", "--databases", - "8", + "9", "--appendonly", "yes" ] diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py index 286e66ef2f78..4ad69770c894 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py @@ -2,7 +2,8 @@ from servicelib.fastapi.dependencies import get_app, get_reverse_url_mapper from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient from servicelib.redis import RedisClientSDKHealthChecked -from simcore_service_dynamic_scheduler.services.redis import get_redis_client +from settings_library.redis import RedisDatabase +from simcore_service_dynamic_scheduler.services.redis import get_all_redis_clients from ...services.rabbitmq import get_rabbitmq_client, get_rabbitmq_rpc_server @@ -18,8 +19,10 @@ def get_rabbitmq_rpc_server_from_request(request: Request) -> RabbitMQRPCClient: return get_rabbitmq_rpc_server(request.app) -def get_redis_client_from_request(request: Request) -> RedisClientSDKHealthChecked: - return get_redis_client(request.app) +def get_redis_clients_from_request( + request: Request, +) -> dict[RedisDatabase, RedisClientSDKHealthChecked]: + return get_all_redis_clients(request.app) __all__: tuple[str, ...] = ( diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py index 93a220cb68aa..d9fae5682c13 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_health.py @@ -9,11 +9,12 @@ ) from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient from servicelib.redis import RedisClientSDKHealthChecked +from settings_library.redis import RedisDatabase from ._dependencies import ( get_rabbitmq_client_from_request, get_rabbitmq_rpc_server_from_request, - get_redis_client_from_request, + get_redis_clients_from_request, ) router = APIRouter() @@ -29,14 +30,17 @@ async def healthcheck( rabbit_rpc_server: Annotated[ RabbitMQRPCClient, Depends(get_rabbitmq_rpc_server_from_request) ], - redis_client_sdk: Annotated[ - RedisClientSDKHealthChecked, Depends(get_redis_client_from_request) + redis_client_sdks: Annotated[ + dict[RedisDatabase, RedisClientSDKHealthChecked], + Depends(get_redis_clients_from_request), ], ): if not rabbit_client.healthy or not rabbit_rpc_server.healthy: raise HealthCheckError(RABBITMQ_CLIENT_UNHEALTHY_MSG) - if not redis_client_sdk.is_healthy: + if not all( + redis_client_sdk.is_healthy for redis_client_sdk in redis_client_sdks.values() + ): raise HealthCheckError(REDIS_CLIENT_UNHEALTHY_MSG) return f"{__name__}@{arrow.utcnow().isoformat()}" diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py index 30538d4a25f7..147a14607de7 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py @@ -1,29 +1,40 @@ +from typing import Final + from fastapi import FastAPI -from servicelib.redis import RedisClientSDKHealthChecked +from servicelib.redis import RedisClientSDKHealthChecked, RedisClientsManager from settings_library.redis import RedisDatabase, RedisSettings +_REDIS_DATABASES: Final[set[RedisDatabase]] = { + RedisDatabase.DEFERRED_TASKS, + RedisDatabase.DYNAMIC_SERVICES, +} + def setup_redis(app: FastAPI) -> None: settings: RedisSettings = app.state.settings.DYNAMIC_SCHEDULER_REDIS async def on_startup() -> None: - redis_locks_dsn = settings.build_redis_dsn(RedisDatabase.LOCKS) - app.state.redis_client_sdk = client = RedisClientSDKHealthChecked( - redis_locks_dsn + app.state.redis_clients_manager = manager = RedisClientsManager( + _REDIS_DATABASES, settings ) - await client.setup() + await manager.setup() async def on_shutdown() -> None: - redis_client_sdk: None | RedisClientSDKHealthChecked = ( - app.state.redis_client_sdk - ) - if redis_client_sdk: - await redis_client_sdk.shutdown() + manager: RedisClientsManager = app.state.redis_clients_manager + await manager.shutdown() app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) -def get_redis_client(app: FastAPI) -> RedisClientSDKHealthChecked: - redis_client_sdk: RedisClientSDKHealthChecked = app.state.redis_client_sdk - return redis_client_sdk +def get_redis_client( + app: FastAPI, database: RedisDatabase +) -> RedisClientSDKHealthChecked: + manager: RedisClientsManager = app.state.redis_clients_manager + return manager.client(database) + + +def get_all_redis_clients( + app: FastAPI, +) -> dict[RedisDatabase, RedisClientSDKHealthChecked]: + return {d: get_redis_client(app, d) for d in _REDIS_DATABASES} diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py new file mode 100644 index 000000000000..ff4a82a6588d --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py @@ -0,0 +1,2 @@ +# We need an interface to REDIS +# once connected we need to unfuck the shit out of it diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py index 8cc1c3279efd..6b892e774f1f 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py @@ -43,7 +43,8 @@ def mock_redis_client( ) -> None: base_path = "simcore_service_dynamic_scheduler.api.rest._dependencies" mocker.patch( - f"{base_path}.get_redis_client", return_value=MockHealth(redis_client_ok) + f"{base_path}.get_all_redis_clients", + return_value={0: MockHealth(redis_client_ok)}, ) diff --git a/services/dynamic-scheduler/tests/unit/test_services_redis.py b/services/dynamic-scheduler/tests/unit/test_services_redis.py index 7a7d90063851..896134c2278a 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_redis.py +++ b/services/dynamic-scheduler/tests/unit/test_services_redis.py @@ -6,7 +6,7 @@ from fastapi import FastAPI from pytest_simcore.helpers.typing_env import EnvVarsDict from settings_library.redis import RedisSettings -from simcore_service_dynamic_scheduler.services.redis import get_redis_client +from simcore_service_dynamic_scheduler.services.redis import get_all_redis_clients pytest_simcore_core_services_selection = [ "redis", @@ -23,5 +23,6 @@ def app_environment( async def test_health(app: FastAPI): - redis_client = get_redis_client(app) - assert await redis_client.ping() is True + redis_clients = get_all_redis_clients(app) + for redis_client in redis_clients.values(): + assert await redis_client.ping() is True From bddfa8ec9e7eea2a69fe48725a4d121714e049c4 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 14:49:48 +0200 Subject: [PATCH 004/122] partial service_tracker core --- .../core/application.py | 3 + .../services/service_tracker/__init__.py | 15 +++++ .../services/service_tracker/_api.py | 44 ++++++++++++++ .../services/service_tracker/_core.py | 2 - .../services/service_tracker/_models.py | 39 +++++++++++++ .../services/service_tracker/_setup.py | 19 +++++++ .../services/service_tracker/_tracker.py | 31 ++++++++++ .../services_service_tracker/test__api.py | 1 + .../services_service_tracker/test__models.py | 23 ++++++++ .../services_service_tracker/test__tracker.py | 57 +++++++++++++++++++ 10 files changed, 232 insertions(+), 2 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py delete mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py create mode 100644 services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py create mode 100644 services/dynamic-scheduler/tests/unit/services_service_tracker/test__models.py create mode 100644 services/dynamic-scheduler/tests/unit/services_service_tracker/test__tracker.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py index 62f07ea31fce..f51f0b754370 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py @@ -18,6 +18,7 @@ from ..services.director_v2 import setup_director_v2 from ..services.rabbitmq import setup_rabbitmq from ..services.redis import setup_redis +from ..services.service_tracker import setup_service_tracker from .settings import ApplicationSettings @@ -53,6 +54,8 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: setup_rpc_api_routes(app) setup_redis(app) + setup_service_tracker(app) + setup_rest_api(app) # ERROR HANDLERS diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index e69de29bb2d1..80e1d19af5a9 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -0,0 +1,15 @@ +from ._api import ( + get_tracked, + remove_tracked, + set_tracked_as_running, + set_tracked_as_stopped, +) +from ._setup import setup_service_tracker + +__all__: tuple[str, ...] = ( + "get_tracked", + "remove_tracked", + "set_tracked_as_running", + "set_tracked_as_stopped", + "setup_service_tracker", +) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py new file mode 100644 index 000000000000..6182dcf31c28 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -0,0 +1,44 @@ +from fastapi import FastAPI +from models_library.projects_nodes_io import NodeID + +from ._models import TrackedServiceModel, UserRequestedState +from ._setup import get_tracker +from ._tracker import Tracker + + +async def _set_requested_state( + app: FastAPI, node_id: NodeID, requested_state: UserRequestedState +) -> None: + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + model = TrackedServiceModel(requested_sate=requested_state) + else: + model.requested_sate = requested_state + await tracker.save(node_id, model) + + +async def set_tracked_as_running(app: FastAPI, node_id: NodeID) -> None: + """Stores the intention fo the user: ``start`` requested""" + await _set_requested_state(app, node_id, UserRequestedState.RUNNING) + + +async def set_tracked_as_stopped(app: FastAPI, node_id: NodeID) -> None: + """Stores the intention of the user: ``stop`` requested""" + await _set_requested_state(app, node_id, UserRequestedState.STOPPED) + + +# TODO: call this when can no longer find the service +async def remove_tracked(app: FastAPI, node_id: NodeID) -> None: + """Removes the service from tracking (usually after stop completes)""" + tracker: Tracker = get_tracker(app) + await tracker.delete(node_id) + + +async def get_tracked(app: FastAPI, node_id: NodeID) -> TrackedServiceModel | None: + """Returns information about the tracked service""" + tracker: Tracker = get_tracker(app) + return await tracker.load(node_id) + + +# TODO: figure out if we want to emit events for this in order for other parts to react properly and how to handle all these events diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py deleted file mode 100644 index ff4a82a6588d..000000000000 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_core.py +++ /dev/null @@ -1,2 +0,0 @@ -# We need an interface to REDIS -# once connected we need to unfuck the shit out of it diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py new file mode 100644 index 000000000000..2717c706a11f --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass +from enum import auto + +import orjson +from models_library.utils.enums import StrAutoEnum + + +class UserRequestedState(StrAutoEnum): + RUNNING = auto() + STOPPED = auto() + + +class ServiceStates(StrAutoEnum): + RUNNING = auto() + STOPPED = auto() + + STARTING = auto() + STOPPING = auto() + + UNKNOWN = auto() + + +@dataclass +class TrackedServiceModel: + # what the user desires (RUNNING or STOPPED) + requested_sate: UserRequestedState + + # set this after parsing the incoming state via the API calls + current_state: ServiceStates = ServiceStates.UNKNOWN + + # stored for debug mainly this is used to compute ``current_state`` + service_status: str = "" + + def to_bytes(self) -> bytes: + return orjson.dumps(self) + + @classmethod + def from_bytes(cls, json: bytes) -> "TrackedServiceModel": + return cls(**orjson.loads(json)) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py new file mode 100644 index 000000000000..40a47bb8becc --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_setup.py @@ -0,0 +1,19 @@ +from fastapi import FastAPI +from settings_library.redis import RedisDatabase + +from ..redis import get_redis_client +from ._tracker import Tracker + + +def setup_service_tracker(app: FastAPI) -> None: + async def on_startup() -> None: + app.state.service_tracker = Tracker( + get_redis_client(app, RedisDatabase.DYNAMIC_SERVICES) + ) + + app.add_event_handler("startup", on_startup) + + +def get_tracker(app: FastAPI) -> Tracker: + tracker: Tracker = app.state.service_tracker + return tracker diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py new file mode 100644 index 000000000000..d974d8d21c79 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py @@ -0,0 +1,31 @@ +from dataclasses import dataclass + +from models_library.projects_nodes_io import NodeID +from servicelib.redis import RedisClientSDKHealthChecked + +from ._models import TrackedServiceModel + + +def _get_key(node_id: NodeID) -> str: + return f"t::{node_id}" + + +@dataclass +class Tracker: + redis_client_sdk: RedisClientSDKHealthChecked + + async def save(self, node_id: NodeID, model: TrackedServiceModel) -> None: + await self.redis_client_sdk.redis.set(_get_key(node_id), model.to_bytes()) + + async def load(self, node_id: NodeID) -> TrackedServiceModel | None: + model_as_bytes: bytes | None = await self.redis_client_sdk.redis.get( + _get_key(node_id) + ) + return ( + None + if model_as_bytes is None + else TrackedServiceModel.from_bytes(model_as_bytes) + ) + + async def delete(self, node_id: NodeID) -> None: + await self.redis_client_sdk.redis.delete(_get_key(node_id)) diff --git a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py new file mode 100644 index 000000000000..9ee548b489ce --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py @@ -0,0 +1 @@ +# TODO: check if it is even readable in redis, or if we should store as bytes or if they can be decoded somehow from redis commander diff --git a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__models.py new file mode 100644 index 000000000000..f39be1b07892 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__models.py @@ -0,0 +1,23 @@ +import pytest +from faker import Faker +from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + ServiceStates, + TrackedServiceModel, + UserRequestedState, +) + + +@pytest.mark.parametrize("requested_state", UserRequestedState) +@pytest.mark.parametrize("current_state", ServiceStates) +def test_serialization( + faker: Faker, requested_state: UserRequestedState, current_state: ServiceStates +) -> None: + tracked_model = TrackedServiceModel( + service_status=faker.pystr(), + requested_sate=requested_state, + current_state=current_state, + ) + + as_bytes = tracked_model.to_bytes() + assert as_bytes + assert TrackedServiceModel.from_bytes(as_bytes) == tracked_model diff --git a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__tracker.py new file mode 100644 index 000000000000..7249b7f76e38 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__tracker.py @@ -0,0 +1,57 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument + +from uuid import uuid4 + +import pytest +from fastapi import FastAPI +from models_library.projects_nodes_io import NodeID +from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + TrackedServiceModel, + UserRequestedState, +) +from simcore_service_dynamic_scheduler.services.service_tracker._setup import ( + get_tracker, +) +from simcore_service_dynamic_scheduler.services.service_tracker._tracker import Tracker + +pytest_simcore_core_services_selection = [ + "redis", +] + + +@pytest.fixture +def app_environment( + disable_rabbitmq_setup: None, + app_environment: EnvVarsDict, + redis_service: RedisSettings, +) -> EnvVarsDict: + return app_environment + + +@pytest.fixture +def tracker(app: FastAPI) -> Tracker: + return get_tracker(app) + + +async def test_tracker_workflow(tracker: Tracker): + node_id: NodeID = uuid4() + + # ensure does not already exist + result = await tracker.load(node_id) + assert result is None + + # node creation + model = TrackedServiceModel(requested_sate=UserRequestedState.RUNNING) + await tracker.save(node_id, model) + + # check if exists + result = await tracker.load(node_id) + assert result == model + + # remove and check is missing + await tracker.delete(node_id) + result = await tracker.load(node_id) + assert result is None From acbcaee972b2f7a32666e648ddd904f2731a9d37 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 15:27:18 +0200 Subject: [PATCH 005/122] refactor and added API tests --- .../services/service_tracker/_api.py | 2 + .../tests/unit/service_tracker/test__api.py | 55 +++++++++++++++++++ .../test__models.py | 0 .../test__tracker.py | 0 .../services_service_tracker/test__api.py | 1 - 5 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 services/dynamic-scheduler/tests/unit/service_tracker/test__api.py rename services/dynamic-scheduler/tests/unit/{services_service_tracker => service_tracker}/test__models.py (100%) rename services/dynamic-scheduler/tests/unit/{services_service_tracker => service_tracker}/test__tracker.py (100%) delete mode 100644 services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 6182dcf31c28..ac8ceb852029 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -18,11 +18,13 @@ async def _set_requested_state( await tracker.save(node_id, model) +# TODO: call this when user requests the start of a service async def set_tracked_as_running(app: FastAPI, node_id: NodeID) -> None: """Stores the intention fo the user: ``start`` requested""" await _set_requested_state(app, node_id, UserRequestedState.RUNNING) +# TODO: call this when user requests the stop of a service async def set_tracked_as_stopped(app: FastAPI, node_id: NodeID) -> None: """Stores the intention of the user: ``stop`` requested""" await _set_requested_state(app, node_id, UserRequestedState.STOPPED) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py new file mode 100644 index 000000000000..a698b3d6349c --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -0,0 +1,55 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument + +from uuid import uuid4 + +import pytest +from fastapi import FastAPI +from models_library.projects_nodes_io import NodeID +from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.services.service_tracker import ( + get_tracked, + remove_tracked, + set_tracked_as_running, + set_tracked_as_stopped, +) +from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + UserRequestedState, +) + +pytest_simcore_core_services_selection = [ + "redis", +] + + +@pytest.fixture +def app_environment( + disable_rabbitmq_setup: None, + app_environment: EnvVarsDict, + redis_service: RedisSettings, +) -> EnvVarsDict: + return app_environment + + +async def test_services_tracer_workflow(app: FastAPI): + node_id: NodeID = uuid4() + + # service does not exist + assert await get_tracked(app, node_id) is None + + # service requested as to be in RUNNING + await set_tracked_as_running(app, node_id) + tracked_model = await get_tracked(app, node_id) + assert tracked_model + assert tracked_model.requested_sate == UserRequestedState.RUNNING + + # service requested as to be in STOPPED + await set_tracked_as_stopped(app, node_id) + tracked_model = await get_tracked(app, node_id) + assert tracked_model + assert tracked_model.requested_sate == UserRequestedState.STOPPED + + # remove service + await remove_tracked(app, node_id) + assert await get_tracked(app, node_id) is None diff --git a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py similarity index 100% rename from services/dynamic-scheduler/tests/unit/services_service_tracker/test__models.py rename to services/dynamic-scheduler/tests/unit/service_tracker/test__models.py diff --git a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py similarity index 100% rename from services/dynamic-scheduler/tests/unit/services_service_tracker/test__tracker.py rename to services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py diff --git a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py deleted file mode 100644 index 9ee548b489ce..000000000000 --- a/services/dynamic-scheduler/tests/unit/services_service_tracker/test__api.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: check if it is even readable in redis, or if we should store as bytes or if they can be decoded somehow from redis commander From 6c72d620b224a83aea32e43ec5fea9c780e0795b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 15:34:34 +0200 Subject: [PATCH 006/122] connected to start and stop --- .../api/rpc/_services.py | 5 +++++ .../services/service_tracker/__init__.py | 8 ++++---- .../services/service_tracker/_api.py | 9 ++------- .../tests/unit/service_tracker/test__api.py | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index 416775b9d252..5a5404449a8f 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -13,6 +13,7 @@ from ...core.settings import ApplicationSettings from ...services.director_v2 import DirectorV2Client +from ...services.service_tracker import set_request_as_running, set_request_as_stopped router = RPCRouter() @@ -29,6 +30,8 @@ async def get_service_status( async def run_dynamic_service( app: FastAPI, *, rpc_dynamic_service_create: RPCDynamicServiceCreate ) -> NodeGet | DynamicServiceGet: + await set_request_as_running(app, rpc_dynamic_service_create.node_uuid) + director_v2_client = DirectorV2Client.get_from_app_state(app) return await director_v2_client.run_dynamic_service(rpc_dynamic_service_create) @@ -42,6 +45,8 @@ async def run_dynamic_service( async def stop_dynamic_service( app: FastAPI, *, node_id: NodeID, simcore_user_agent: str, save_state: bool ) -> NodeGet | DynamicServiceGet: + await set_request_as_stopped(app, node_id) + director_v2_client = DirectorV2Client.get_from_app_state(app) settings: ApplicationSettings = app.state.settings return await director_v2_client.stop_dynamic_service( diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index 80e1d19af5a9..f065530e3000 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -1,15 +1,15 @@ from ._api import ( get_tracked, remove_tracked, - set_tracked_as_running, - set_tracked_as_stopped, + set_request_as_running, + set_request_as_stopped, ) from ._setup import setup_service_tracker __all__: tuple[str, ...] = ( "get_tracked", "remove_tracked", - "set_tracked_as_running", - "set_tracked_as_stopped", + "set_request_as_running", + "set_request_as_stopped", "setup_service_tracker", ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index ac8ceb852029..896ad76458ca 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -18,14 +18,12 @@ async def _set_requested_state( await tracker.save(node_id, model) -# TODO: call this when user requests the start of a service -async def set_tracked_as_running(app: FastAPI, node_id: NodeID) -> None: +async def set_request_as_running(app: FastAPI, node_id: NodeID) -> None: """Stores the intention fo the user: ``start`` requested""" await _set_requested_state(app, node_id, UserRequestedState.RUNNING) -# TODO: call this when user requests the stop of a service -async def set_tracked_as_stopped(app: FastAPI, node_id: NodeID) -> None: +async def set_request_as_stopped(app: FastAPI, node_id: NodeID) -> None: """Stores the intention of the user: ``stop`` requested""" await _set_requested_state(app, node_id, UserRequestedState.STOPPED) @@ -41,6 +39,3 @@ async def get_tracked(app: FastAPI, node_id: NodeID) -> TrackedServiceModel | No """Returns information about the tracked service""" tracker: Tracker = get_tracker(app) return await tracker.load(node_id) - - -# TODO: figure out if we want to emit events for this in order for other parts to react properly and how to handle all these events diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index a698b3d6349c..059e415fd19b 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -11,8 +11,8 @@ from simcore_service_dynamic_scheduler.services.service_tracker import ( get_tracked, remove_tracked, - set_tracked_as_running, - set_tracked_as_stopped, + set_request_as_running, + set_request_as_stopped, ) from simcore_service_dynamic_scheduler.services.service_tracker._models import ( UserRequestedState, @@ -39,13 +39,13 @@ async def test_services_tracer_workflow(app: FastAPI): assert await get_tracked(app, node_id) is None # service requested as to be in RUNNING - await set_tracked_as_running(app, node_id) + await set_request_as_running(app, node_id) tracked_model = await get_tracked(app, node_id) assert tracked_model assert tracked_model.requested_sate == UserRequestedState.RUNNING # service requested as to be in STOPPED - await set_tracked_as_stopped(app, node_id) + await set_request_as_stopped(app, node_id) tracked_model = await get_tracked(app, node_id) assert tracked_model assert tracked_model.requested_sate == UserRequestedState.STOPPED From 1c914bd1823fdc395afd9e5fb58c3f3364f2e06f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 29 May 2024 15:44:37 +0200 Subject: [PATCH 007/122] refactor --- .../src/simcore_service_dynamic_scheduler/core/application.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py index f51f0b754370..1a1b868fe5d5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py @@ -50,10 +50,11 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: # PLUGINS SETUP setup_director_v2(app) + setup_rabbitmq(app) setup_rpc_api_routes(app) - setup_redis(app) + setup_redis(app) setup_service_tracker(app) setup_rest_api(app) From 1de0248493f08a9f07d8f09c0f9583473057a45e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 10:47:34 +0200 Subject: [PATCH 008/122] fixed tests --- .../tests/fastapi/test_application.py | 27 +++++++++++++++++++ services/dynamic-scheduler/tests/conftest.py | 6 +++++ .../unit/api_rest/test_api_rest__health.py | 5 +++- .../unit/api_rest/test_api_rest__meta.py | 1 + .../unit/api_rpc/test_api_rpc__services.py | 4 ++- .../tests/unit/test_services_rabbitmq.py | 1 + 6 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 packages/service-library/tests/fastapi/test_application.py diff --git a/packages/service-library/tests/fastapi/test_application.py b/packages/service-library/tests/fastapi/test_application.py new file mode 100644 index 000000000000..7def9fa95359 --- /dev/null +++ b/packages/service-library/tests/fastapi/test_application.py @@ -0,0 +1,27 @@ +import asyncio + +from asgi_lifespan import LifespanManager +from fastapi import FastAPI + + +def _setup(app: FastAPI, index: int) -> None: + async def startup() -> None: + print(f"[{index}] startup") + + async def shutdown() -> None: + print(f"[{index}] shutdown") + + app.add_event_handler("startup", startup) + app.add_event_handler("startup", shutdown) + + +async def test_application_lifespan() -> None: + app = FastAPI() + + for i in range(5): + _setup(app, i) + + async with LifespanManager(app): + print("DONE startup") + await asyncio.sleep(1) + print("DONE shutdown") diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index 641e26559b5a..4a588168ddb9 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -87,6 +87,12 @@ def disable_redis_setup(mocker: MockerFixture) -> None: mocker.patch(f"{base_path}.setup_redis") +@pytest.fixture +def disable_service_tracker_setup(mocker: MockerFixture) -> None: + base_path = "simcore_service_dynamic_scheduler.core.application" + mocker.patch(f"{base_path}.setup_service_tracker") + + MAX_TIME_FOR_APP_TO_STARTUP = 10 MAX_TIME_FOR_APP_TO_SHUTDOWN = 10 diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py index 6b892e774f1f..301fd2f76e65 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py @@ -39,7 +39,10 @@ def mock_rabbitmq_clients( @pytest.fixture def mock_redis_client( - disable_redis_setup: None, mocker: MockerFixture, redis_client_ok: bool + disable_redis_setup: None, + disable_service_tracker_setup: None, + mocker: MockerFixture, + redis_client_ok: bool, ) -> None: base_path = "simcore_service_dynamic_scheduler.api.rest._dependencies" mocker.patch( diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py index 6e68190bcee9..431be1647dd3 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py @@ -14,6 +14,7 @@ def app_environment( disable_rabbitmq_setup: None, disable_redis_setup: None, + disable_service_tracker_setup: None, app_environment: EnvVarsDict, ) -> EnvVarsDict: return app_environment diff --git a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py index 34387e93d7ff..67db3e4d85f2 100644 --- a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py +++ b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py @@ -24,8 +24,10 @@ ServiceWasNotFoundError, ) from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisSettings pytest_simcore_core_services_selection = [ + "redis", "rabbit", ] @@ -125,9 +127,9 @@ def mock_director_v2_service_state( @pytest.fixture def app_environment( - disable_redis_setup: None, app_environment: EnvVarsDict, rabbit_service: RabbitSettings, + redis_service: RedisSettings, ) -> EnvVarsDict: return app_environment diff --git a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py index feefc0c1aa4a..8df0a3f8a075 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py +++ b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py @@ -21,6 +21,7 @@ @pytest.fixture def app_environment( disable_redis_setup: None, + disable_service_tracker_setup: None, app_environment: EnvVarsDict, rabbit_service: RabbitSettings, ) -> EnvVarsDict: From 98f5fa9a5b0cdc26e86e604516908f3d6682ad87 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 10:48:11 +0200 Subject: [PATCH 009/122] reverted --- .../tests/fastapi/test_application.py | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 packages/service-library/tests/fastapi/test_application.py diff --git a/packages/service-library/tests/fastapi/test_application.py b/packages/service-library/tests/fastapi/test_application.py deleted file mode 100644 index 7def9fa95359..000000000000 --- a/packages/service-library/tests/fastapi/test_application.py +++ /dev/null @@ -1,27 +0,0 @@ -import asyncio - -from asgi_lifespan import LifespanManager -from fastapi import FastAPI - - -def _setup(app: FastAPI, index: int) -> None: - async def startup() -> None: - print(f"[{index}] startup") - - async def shutdown() -> None: - print(f"[{index}] shutdown") - - app.add_event_handler("startup", startup) - app.add_event_handler("startup", shutdown) - - -async def test_application_lifespan() -> None: - app = FastAPI() - - for i in range(5): - _setup(app, i) - - async with LifespanManager(app): - print("DONE startup") - await asyncio.sleep(1) - print("DONE shutdown") From 90cd4b4db2307b125017f48f19091ea821a3c225 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 11:57:08 +0200 Subject: [PATCH 010/122] tracker can now list all tracked --- .../services/service_tracker/__init__.py | 2 ++ .../services/service_tracker/_api.py | 6 ++++++ .../services/service_tracker/_tracker.py | 13 +++++++++++- .../services/status_monitor/__init__.py | 0 services/dynamic-scheduler/tests/conftest.py | 11 ++++++++++ .../tests/unit/service_tracker/test__api.py | 20 ++++++++++++++++++- .../unit/service_tracker/test__tracker.py | 17 ++++++++++++++++ 7 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index f065530e3000..84e142d8bbb3 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -1,4 +1,5 @@ from ._api import ( + get_all_tracked, get_tracked, remove_tracked, set_request_as_running, @@ -7,6 +8,7 @@ from ._setup import setup_service_tracker __all__: tuple[str, ...] = ( + "get_all_tracked", "get_tracked", "remove_tracked", "set_request_as_running", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 896ad76458ca..de05ed69928d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -39,3 +39,9 @@ async def get_tracked(app: FastAPI, node_id: NodeID) -> TrackedServiceModel | No """Returns information about the tracked service""" tracker: Tracker = get_tracker(app) return await tracker.load(node_id) + + +async def get_all_tracked(app: FastAPI) -> list[TrackedServiceModel]: + """Returns all tracked services""" + tracker: Tracker = get_tracker(app) + return await tracker.all() diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py index d974d8d21c79..9cc61271c930 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py @@ -1,13 +1,16 @@ from dataclasses import dataclass +from typing import Final from models_library.projects_nodes_io import NodeID from servicelib.redis import RedisClientSDKHealthChecked from ._models import TrackedServiceModel +_KEY_PREFIX: Final[str] = "t::" + def _get_key(node_id: NodeID) -> str: - return f"t::{node_id}" + return f"{_KEY_PREFIX}{node_id}" @dataclass @@ -29,3 +32,11 @@ async def load(self, node_id: NodeID) -> TrackedServiceModel | None: async def delete(self, node_id: NodeID) -> None: await self.redis_client_sdk.redis.delete(_get_key(node_id)) + + async def all(self) -> list[TrackedServiceModel]: + found_keys = await self.redis_client_sdk.redis.keys(f"{_KEY_PREFIX}*") + return [ + TrackedServiceModel.from_bytes(v) + for v in await self.redis_client_sdk.redis.mget(found_keys) + if v is not None + ] diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index 4a588168ddb9..8f651396ba73 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -13,6 +13,9 @@ from pytest_mock import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict from pytest_simcore.helpers.utils_envs import setenvs_from_dict +from servicelib.redis import RedisClientsManager +from servicelib.utils import logged_gather +from settings_library.redis import RedisDatabase, RedisSettings from simcore_service_dynamic_scheduler.core.application import create_app pytest_plugins = [ @@ -108,3 +111,11 @@ async def app( shutdown_timeout=None if is_pdb_enabled else MAX_TIME_FOR_APP_TO_SHUTDOWN, ): yield test_app + + +@pytest.fixture +async def remove_redis_data(redis_service: RedisSettings) -> None: + async with RedisClientsManager(set(RedisDatabase), redis_service) as manager: + await logged_gather( + *[manager.client(d).redis.flushall() for d in RedisDatabase] + ) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 059e415fd19b..fe57461f5b7c 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -6,9 +6,12 @@ import pytest from fastapi import FastAPI from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeInt from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.utils import logged_gather from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( + get_all_tracked, get_tracked, remove_tracked, set_request_as_running, @@ -28,11 +31,13 @@ def app_environment( disable_rabbitmq_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, + remove_redis_data: None, ) -> EnvVarsDict: return app_environment -async def test_services_tracer_workflow(app: FastAPI): +@pytest.mark.parametrize("item_count", [100]) +async def test_services_tracer_workflow(app: FastAPI, item_count: NonNegativeInt): node_id: NodeID = uuid4() # service does not exist @@ -53,3 +58,16 @@ async def test_services_tracer_workflow(app: FastAPI): # remove service await remove_tracked(app, node_id) assert await get_tracked(app, node_id) is None + + # check listing services + assert await get_all_tracked(app) == [] + + await logged_gather( + *[set_request_as_stopped(app, uuid4()) for _ in range(item_count)], + max_concurrency=100 + ) + await logged_gather( + *[set_request_as_running(app, uuid4()) for _ in range(item_count)], + max_concurrency=100 + ) + assert len(await get_all_tracked(app)) == item_count * 2 diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py index 7249b7f76e38..34cb3e285e78 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -6,7 +6,9 @@ import pytest from fastapi import FastAPI from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeInt from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.utils import logged_gather from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker._models import ( TrackedServiceModel, @@ -27,6 +29,7 @@ def app_environment( disable_rabbitmq_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, + remove_redis_data: None, ) -> EnvVarsDict: return app_environment @@ -55,3 +58,17 @@ async def test_tracker_workflow(tracker: Tracker): await tracker.delete(node_id) result = await tracker.load(node_id) assert result is None + + +@pytest.mark.parametrize("item_count", [100]) +async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> None: + assert await tracker.all() == [] + + model_to_insert = TrackedServiceModel(requested_sate=UserRequestedState.RUNNING) + + await logged_gather( + *[tracker.save(uuid4(), model_to_insert) for _ in range(item_count)], + max_concurrency=100 + ) + + assert await tracker.all() == [model_to_insert for _ in range(item_count)] From 0858387795f6587f101c47566bc3792e2db49318 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 13:13:48 +0200 Subject: [PATCH 011/122] added context manager --- packages/service-library/src/servicelib/redis.py | 7 +++++++ .../services/status_monitor/_setup.py | 5 +++++ 2 files changed, 12 insertions(+) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py diff --git a/packages/service-library/src/servicelib/redis.py b/packages/service-library/src/servicelib/redis.py index 24e4d3f14519..3327e6fee2d7 100644 --- a/packages/service-library/src/servicelib/redis.py +++ b/packages/service-library/src/servicelib/redis.py @@ -240,3 +240,10 @@ async def shutdown(self) -> None: def client(self, database: RedisDatabase) -> RedisClientSDKHealthChecked: return self._client_sdks[database] + + async def __aenter__(self): + await self.setup() + return self + + async def __aexit__(self, *args): + await self.shutdown() diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py new file mode 100644 index 000000000000..51319491e1e1 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py @@ -0,0 +1,5 @@ +from fastapi import FastAPI + + +def setup(app: FastAPI) -> None: + pass From 061abc1070f4943997df22f4b3218db642bcb9ec Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 13:55:12 +0200 Subject: [PATCH 012/122] added last checked to the TrackedServiceModel --- services/dynamic-scheduler/requirements/_base.in | 2 +- .../dynamic-scheduler/requirements/_base.txt | 1 + .../services/service_tracker/_models.py | 16 ++++++++++++++++ .../services/status_monitor/_monitor.py | 11 +++++++++++ .../services/status_monitor/_setup.py | 11 +++++++++-- 5 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py diff --git a/services/dynamic-scheduler/requirements/_base.in b/services/dynamic-scheduler/requirements/_base.in index 74bc0519c820..8bbd3daa1d3d 100644 --- a/services/dynamic-scheduler/requirements/_base.in +++ b/services/dynamic-scheduler/requirements/_base.in @@ -14,7 +14,7 @@ --requirement ../../../packages/service-library/requirements/_fastapi.in - +arrow fastapi httpx packaging diff --git a/services/dynamic-scheduler/requirements/_base.txt b/services/dynamic-scheduler/requirements/_base.txt index f0ded2ee17a9..fdc1be2e7392 100644 --- a/services/dynamic-scheduler/requirements/_base.txt +++ b/services/dynamic-scheduler/requirements/_base.txt @@ -44,6 +44,7 @@ arrow==1.3.0 # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/_base.in async-timeout==4.0.3 # via # aiohttp diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 2717c706a11f..484a94cd10db 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -1,9 +1,13 @@ from dataclasses import dataclass from enum import auto +from typing import Final +import arrow import orjson from models_library.utils.enums import StrAutoEnum +_SECONDS_TO_TRIGGER_SERVICE_CHECKING: Final[float] = 1e6 + class UserRequestedState(StrAutoEnum): RUNNING = auto() @@ -31,6 +35,18 @@ class TrackedServiceModel: # stored for debug mainly this is used to compute ``current_state`` service_status: str = "" + last_checked: float | None = None + + def set_last_checked_to_now(self) -> None: + self.last_checked = arrow.utcnow().timestamp() + + def seconds_since_last_check(self) -> float: + return ( + arrow.utcnow().timestamp() - self.last_checked + if self.last_checked + else _SECONDS_TO_TRIGGER_SERVICE_CHECKING + ) + def to_bytes(self) -> bytes: return orjson.dumps(self) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py new file mode 100644 index 000000000000..c5e9adfb3298 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -0,0 +1,11 @@ +from fastapi import FastAPI + + +class Monitor: + def __init__(self, app: FastAPI) -> None: + self.app = app + + # NOTE: THIS needs to be distributed only 1 at a time + + async def _check_status(self) -> None: + pass diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py index 51319491e1e1..4f599fc4f801 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py @@ -1,5 +1,12 @@ from fastapi import FastAPI -def setup(app: FastAPI) -> None: - pass +def setup_status_monitor(app: FastAPI) -> None: + async def on_startup() -> None: + pass + + async def on_shutdown() -> None: + pass + + app.add_event_handler("startup", on_startup) + app.add_event_handler("shutdown", on_shutdown) From 0c62ac3d8c2c35c80c6367c78c898c4d2c95762e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 14:20:08 +0200 Subject: [PATCH 013/122] tracker.all returns nodes and models now --- .../services/service_tracker/__init__.py | 2 ++ .../services/service_tracker/_api.py | 2 +- .../services/service_tracker/_tracker.py | 12 ++++---- .../tests/unit/service_tracker/test__api.py | 2 +- .../unit/service_tracker/test__models.py | 28 +++++++++++++++++-- .../unit/service_tracker/test__tracker.py | 14 ++++++---- 6 files changed, 46 insertions(+), 14 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index 84e142d8bbb3..a24de4819fe5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -5,6 +5,7 @@ set_request_as_running, set_request_as_stopped, ) +from ._models import TrackedServiceModel from ._setup import setup_service_tracker __all__: tuple[str, ...] = ( @@ -14,4 +15,5 @@ "set_request_as_running", "set_request_as_stopped", "setup_service_tracker", + "TrackedServiceModel", ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index de05ed69928d..21d45b0521a9 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -41,7 +41,7 @@ async def get_tracked(app: FastAPI, node_id: NodeID) -> TrackedServiceModel | No return await tracker.load(node_id) -async def get_all_tracked(app: FastAPI) -> list[TrackedServiceModel]: +async def get_all_tracked(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: """Returns all tracked services""" tracker: Tracker = get_tracker(app) return await tracker.all() diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py index 9cc61271c930..363b25f9296c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py @@ -33,10 +33,12 @@ async def load(self, node_id: NodeID) -> TrackedServiceModel | None: async def delete(self, node_id: NodeID) -> None: await self.redis_client_sdk.redis.delete(_get_key(node_id)) - async def all(self) -> list[TrackedServiceModel]: + async def all(self) -> dict[NodeID, TrackedServiceModel]: found_keys = await self.redis_client_sdk.redis.keys(f"{_KEY_PREFIX}*") - return [ - TrackedServiceModel.from_bytes(v) - for v in await self.redis_client_sdk.redis.mget(found_keys) + found_values = await self.redis_client_sdk.redis.mget(found_keys) + + return { + k: TrackedServiceModel.from_bytes(v) + for k, v in zip(found_keys, found_values, strict=True) if v is not None - ] + } diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index fe57461f5b7c..aa578476b6c6 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -60,7 +60,7 @@ async def test_services_tracer_workflow(app: FastAPI, item_count: NonNegativeInt assert await get_tracked(app, node_id) is None # check listing services - assert await get_all_tracked(app) == [] + assert await get_all_tracked(app) == {} await logged_gather( *[set_request_as_stopped(app, uuid4()) for _ in range(item_count)], diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py index f39be1b07892..35c52ce61abf 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py @@ -1,6 +1,10 @@ +import asyncio + +import arrow import pytest from faker import Faker from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + _SECONDS_TO_TRIGGER_SERVICE_CHECKING, ServiceStates, TrackedServiceModel, UserRequestedState, @@ -9,15 +13,35 @@ @pytest.mark.parametrize("requested_state", UserRequestedState) @pytest.mark.parametrize("current_state", ServiceStates) +@pytest.mark.parametrize("last_checked", [None, 1, arrow.utcnow().timestamp()]) def test_serialization( - faker: Faker, requested_state: UserRequestedState, current_state: ServiceStates -) -> None: + faker: Faker, + requested_state: UserRequestedState, + current_state: ServiceStates, + last_checked: float, +): tracked_model = TrackedServiceModel( service_status=faker.pystr(), requested_sate=requested_state, current_state=current_state, + last_checked=last_checked, ) as_bytes = tracked_model.to_bytes() assert as_bytes assert TrackedServiceModel.from_bytes(as_bytes) == tracked_model + + +async def test_last_checked(): + model = TrackedServiceModel(UserRequestedState.RUNNING) + + # when last_checked is None + assert model.seconds_since_last_check() == _SECONDS_TO_TRIGGER_SERVICE_CHECKING + + model.set_last_checked_to_now() + + assert model.seconds_since_last_check() < 0.1 + + await asyncio.sleep(0.1) + + assert model.seconds_since_last_check() > 0.1 diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py index 34cb3e285e78..06374a16a29c 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -17,7 +17,10 @@ from simcore_service_dynamic_scheduler.services.service_tracker._setup import ( get_tracker, ) -from simcore_service_dynamic_scheduler.services.service_tracker._tracker import Tracker +from simcore_service_dynamic_scheduler.services.service_tracker._tracker import ( + Tracker, + _get_key, +) pytest_simcore_core_services_selection = [ "redis", @@ -62,13 +65,14 @@ async def test_tracker_workflow(tracker: Tracker): @pytest.mark.parametrize("item_count", [100]) async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> None: - assert await tracker.all() == [] + assert await tracker.all() == {} model_to_insert = TrackedServiceModel(requested_sate=UserRequestedState.RUNNING) + data_to_insert = {uuid4(): model_to_insert for _ in range(item_count)} + await logged_gather( - *[tracker.save(uuid4(), model_to_insert) for _ in range(item_count)], - max_concurrency=100 + *[tracker.save(k, v) for k, v in data_to_insert.items()], max_concurrency=100 ) - assert await tracker.all() == [model_to_insert for _ in range(item_count)] + assert await tracker.all() == {_get_key(k): v for k, v in data_to_insert.items()} From 2a00f59a4ebadeffb905f68820746ca0941ae24c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 16:08:08 +0200 Subject: [PATCH 014/122] upgrade faststream --- .../dynamic-scheduler/requirements/_base.txt | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/services/dynamic-scheduler/requirements/_base.txt b/services/dynamic-scheduler/requirements/_base.txt index fdc1be2e7392..d19eef44cf79 100644 --- a/services/dynamic-scheduler/requirements/_base.txt +++ b/services/dynamic-scheduler/requirements/_base.txt @@ -70,7 +70,7 @@ certifi==2024.2.2 # httpx click==8.1.7 # via - # typer-slim + # typer # uvicorn dnspython==2.6.1 # via email-validator @@ -92,7 +92,7 @@ fastapi==0.99.1 # -r requirements/../../../packages/service-library/requirements/_fastapi.in # -r requirements/_base.in # prometheus-fastapi-instrumentator -faststream==0.4.7 +faststream==0.5.10 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -r requirements/../../../packages/service-library/requirements/_base.in @@ -241,13 +241,13 @@ rich==13.7.1 # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in - # typer-slim + # typer rpds-py==0.18.0 # via # jsonschema # referencing shellingham==1.5.4 - # via typer-slim + # via typer six==1.16.0 # via python-dateutil sniffio==1.3.1 @@ -287,19 +287,13 @@ tqdm==4.66.2 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -r requirements/../../../packages/service-library/requirements/_base.in -typer==0.12.0 +typer==0.12.3 # via # -c requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/_base.in # faststream -typer-cli==0.12.0 - # via typer -typer-slim==0.12.0 - # via - # typer - # typer-cli types-python-dateutil==2.9.0.20240316 # via arrow typing-extensions==4.10.0 @@ -311,7 +305,7 @@ typing-extensions==4.10.0 # fastapi # faststream # pydantic - # typer-slim + # typer # uvicorn uvicorn==0.29.0 # via From ccb767f20f9b9ed87141b9e3c6758f8c2beeb895 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 16:12:19 +0200 Subject: [PATCH 015/122] added test --- .../tests/unit/service_tracker/test__tracker.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py index 06374a16a29c..42483975d85e 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -30,6 +30,7 @@ @pytest.fixture def app_environment( disable_rabbitmq_setup: None, + disable_deferred_manager_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, remove_redis_data: None, @@ -76,3 +77,7 @@ async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> ) assert await tracker.all() == {_get_key(k): v for k, v in data_to_insert.items()} + + +async def test_remove_missing_key_does_not_raise_error(tracker: Tracker): + await tracker.delete(uuid4()) From 9c25cd73e400791f472df307c1e8c690c0fe599f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 16:12:36 +0200 Subject: [PATCH 016/122] fixed tests --- services/dynamic-scheduler/tests/conftest.py | 6 ++++++ .../tests/unit/service_tracker/test__api.py | 1 + 2 files changed, 7 insertions(+) diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index 8f651396ba73..f32c3ba74195 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -96,6 +96,12 @@ def disable_service_tracker_setup(mocker: MockerFixture) -> None: mocker.patch(f"{base_path}.setup_service_tracker") +@pytest.fixture +def disable_deferred_manager_setup(mocker: MockerFixture) -> None: + base_path = "simcore_service_dynamic_scheduler.core.application" + mocker.patch(f"{base_path}.setup_deferred_manager") + + MAX_TIME_FOR_APP_TO_STARTUP = 10 MAX_TIME_FOR_APP_TO_SHUTDOWN = 10 diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index aa578476b6c6..096fa456be85 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -29,6 +29,7 @@ @pytest.fixture def app_environment( disable_rabbitmq_setup: None, + disable_deferred_manager_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, remove_redis_data: None, From 59403877cfc25316fe24227b9262758437f6e3e1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 16:24:24 +0200 Subject: [PATCH 017/122] fixed tests --- .../tests/unit/api_rest/test_api_rest__health.py | 1 + .../dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py | 1 + services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py | 1 + services/dynamic-scheduler/tests/unit/test_services_redis.py | 1 + 4 files changed, 4 insertions(+) diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py index 301fd2f76e65..8365a72670e9 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py @@ -55,6 +55,7 @@ def mock_redis_client( def app_environment( mock_rabbitmq_clients: None, mock_redis_client: None, + disable_deferred_manager_setup: None, app_environment: EnvVarsDict, ) -> EnvVarsDict: return app_environment diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py index 431be1647dd3..79b59ab204df 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py @@ -15,6 +15,7 @@ def app_environment( disable_rabbitmq_setup: None, disable_redis_setup: None, disable_service_tracker_setup: None, + disable_deferred_manager_setup: None, app_environment: EnvVarsDict, ) -> EnvVarsDict: return app_environment diff --git a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py index 8df0a3f8a075..4600f18a0927 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py +++ b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py @@ -22,6 +22,7 @@ def app_environment( disable_redis_setup: None, disable_service_tracker_setup: None, + disable_deferred_manager_setup: None, app_environment: EnvVarsDict, rabbit_service: RabbitSettings, ) -> EnvVarsDict: diff --git a/services/dynamic-scheduler/tests/unit/test_services_redis.py b/services/dynamic-scheduler/tests/unit/test_services_redis.py index 896134c2278a..090f27ff5b38 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_redis.py +++ b/services/dynamic-scheduler/tests/unit/test_services_redis.py @@ -16,6 +16,7 @@ @pytest.fixture def app_environment( disable_rabbitmq_setup: None, + disable_deferred_manager_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, ) -> EnvVarsDict: From 2e08854208eaa8a02063c6bf6a44f9f1b89eed02 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 16:34:48 +0200 Subject: [PATCH 018/122] enhances service_tracker --- .../services/service_tracker/__init__.py | 4 ++ .../services/service_tracker/_api.py | 45 +++++++++++++++- .../services/service_tracker/_models.py | 2 + .../tests/unit/service_tracker/test__api.py | 52 ++++++++++++++++++- 4 files changed, 100 insertions(+), 3 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index a24de4819fe5..469cbc1f668c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -2,8 +2,10 @@ get_all_tracked, get_tracked, remove_tracked, + set_new_status, set_request_as_running, set_request_as_stopped, + set_service_status_task_uid, ) from ._models import TrackedServiceModel from ._setup import setup_service_tracker @@ -12,8 +14,10 @@ "get_all_tracked", "get_tracked", "remove_tracked", + "set_new_status", "set_request_as_running", "set_request_as_stopped", + "set_service_status_task_uid", "setup_service_tracker", "TrackedServiceModel", ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 21d45b0521a9..65ff3f801217 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -1,10 +1,17 @@ +import logging + from fastapi import FastAPI +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID +from servicelib.deferred_tasks import TaskUID from ._models import TrackedServiceModel, UserRequestedState from ._setup import get_tracker from ._tracker import Tracker +_logger = logging.getLogger(__name__) + async def _set_requested_state( app: FastAPI, node_id: NodeID, requested_state: UserRequestedState @@ -28,9 +35,45 @@ async def set_request_as_stopped(app: FastAPI, node_id: NodeID) -> None: await _set_requested_state(app, node_id, UserRequestedState.STOPPED) -# TODO: call this when can no longer find the service +async def set_new_status( + app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle +) -> None: + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _logger.info( + "Could not find a %s entry for node_id %s: skipping set_new_status", + TrackedServiceModel.__name__, + node_id, + ) + return + + model.service_status = status.json() + model.set_last_checked_to_now() + model.service_status_task_uid = None + await tracker.save(node_id, model) + + +async def set_service_status_task_uid( + app: FastAPI, node_id: NodeID, task_uid: TaskUID +) -> None: + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _logger.info( + "Could not find a %s entry for node_id %s: skipping set_service_status_task_uid", + TrackedServiceModel.__name__, + node_id, + ) + return + + model.service_status_task_uid = task_uid + await tracker.save(node_id, model) + + async def remove_tracked(app: FastAPI, node_id: NodeID) -> None: """Removes the service from tracking (usually after stop completes)""" + # NOTE: does not raise if node_id is not found tracker: Tracker = get_tracker(app) await tracker.delete(node_id) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 484a94cd10db..8cab58b789c1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -5,6 +5,7 @@ import arrow import orjson from models_library.utils.enums import StrAutoEnum +from servicelib.deferred_tasks import TaskUID _SECONDS_TO_TRIGGER_SERVICE_CHECKING: Final[float] = 1e6 @@ -34,6 +35,7 @@ class TrackedServiceModel: # stored for debug mainly this is used to compute ``current_state`` service_status: str = "" + service_status_task_uid: TaskUID | None = None last_checked: float | None = None diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 096fa456be85..6b3d630f6b5e 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -4,18 +4,24 @@ from uuid import uuid4 import pytest +from faker import Faker from fastapi import FastAPI +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID from pydantic import NonNegativeInt from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.deferred_tasks import TaskUID from servicelib.utils import logged_gather from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( get_all_tracked, get_tracked, remove_tracked, + set_new_status, set_request_as_running, set_request_as_stopped, + set_service_status_task_uid, ) from simcore_service_dynamic_scheduler.services.service_tracker._models import ( UserRequestedState, @@ -37,9 +43,15 @@ def app_environment( return app_environment +@pytest.fixture +def node_id() -> NodeID: + return uuid4() + + @pytest.mark.parametrize("item_count", [100]) -async def test_services_tracer_workflow(app: FastAPI, item_count: NonNegativeInt): - node_id: NodeID = uuid4() +async def test_services_tracer_workflow( + app: FastAPI, node_id: NodeID, item_count: NonNegativeInt +): # service does not exist assert await get_tracked(app, node_id) is None @@ -72,3 +84,39 @@ async def test_services_tracer_workflow(app: FastAPI, item_count: NonNegativeInt max_concurrency=100 ) assert len(await get_all_tracked(app)) == item_count * 2 + + +@pytest.mark.parametrize( + "status", + [ + NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + *[ + DynamicServiceGet.parse_obj(x) + for x in DynamicServiceGet.Config.schema_extra["examples"] + ], + NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + ], +) +async def test_set_new_status( + app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle +): + await set_request_as_running(app, node_id) + + await set_new_status(app, node_id, status) + + model = await get_tracked(app, node_id) + assert model + + assert model.service_status == status.json() + + +async def test_set_service_status_task_uid(app: FastAPI, node_id: NodeID, faker: Faker): + await set_request_as_running(app, node_id) + + task_uid = TaskUID(faker.uuid4()) + await set_service_status_task_uid(app, node_id, task_uid) + + model = await get_tracked(app, node_id) + assert model + + assert model.service_status_task_uid == task_uid From 374e84a5dd11843bfd51819fa778774967c37f24 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 30 May 2024 16:35:04 +0200 Subject: [PATCH 019/122] wired setup functions --- .../simcore_service_dynamic_scheduler/core/application.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py index 1a1b868fe5d5..d4b5f357ede1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py @@ -15,10 +15,12 @@ ) from ..api.rest.routes import setup_rest_api from ..api.rpc.routes import setup_rpc_api_routes +from ..services.deferred_manager import setup_deferred_manager from ..services.director_v2 import setup_director_v2 from ..services.rabbitmq import setup_rabbitmq from ..services.redis import setup_redis from ..services.service_tracker import setup_service_tracker +from ..services.status_monitor._setup import setup_status_monitor from .settings import ApplicationSettings @@ -55,7 +57,10 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: setup_rpc_api_routes(app) setup_redis(app) + setup_service_tracker(app) + setup_deferred_manager(app) + setup_status_monitor(app) setup_rest_api(app) From a20929c54f89b39fa456a033f103e4639e58726e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 31 May 2024 10:15:13 +0200 Subject: [PATCH 020/122] extended services tracker and fixed tests --- .../services/service_tracker/__init__.py | 2 + .../services/service_tracker/_api.py | 42 ++++++++++++++++- .../services/service_tracker/_models.py | 17 ++----- .../tests/unit/service_tracker/test__api.py | 47 +++++++++++++++++++ .../unit/service_tracker/test__models.py | 32 +++++++------ 5 files changed, 111 insertions(+), 29 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index 469cbc1f668c..cb17648f1e66 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -2,6 +2,7 @@ get_all_tracked, get_tracked, remove_tracked, + set_check_status_after_to, set_new_status, set_request_as_running, set_request_as_stopped, @@ -14,6 +15,7 @@ "get_all_tracked", "get_tracked", "remove_tracked", + "set_check_status_after_to", "set_new_status", "set_request_as_running", "set_request_as_stopped", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 65ff3f801217..5a7c1c15ac0c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -1,9 +1,12 @@ import logging +from datetime import timedelta +from typing import Final from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID +from models_library.services_enums import ServiceState from servicelib.deferred_tasks import TaskUID from ._models import TrackedServiceModel, UserRequestedState @@ -13,6 +16,10 @@ _logger = logging.getLogger(__name__) +_LOW_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=1) +_NORMAL_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=5) + + async def _set_requested_state( app: FastAPI, node_id: NodeID, requested_state: UserRequestedState ) -> None: @@ -35,6 +42,22 @@ async def set_request_as_stopped(app: FastAPI, node_id: NodeID) -> None: await _set_requested_state(app, node_id, UserRequestedState.STOPPED) +def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: + # Attributes where to find the state + # NodeGet -> service_state + # DynamicServiceGet -> state + # NodeGetIdle -> service_state + state_key = "state" if isinstance(status, DynamicServiceGet) else "service_state" + + state: ServiceState | str = getattr(status, state_key) + state_str: str = state.value if isinstance(state, ServiceState) else state + + if state_str != "running": + return _LOW_RATE_POLL_INTERVAL + + return _NORMAL_RATE_POLL_INTERVAL + + async def set_new_status( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> None: @@ -49,11 +72,28 @@ async def set_new_status( return model.service_status = status.json() - model.set_last_checked_to_now() + model.set_check_status_after_to(_get_poll_interval(status)) model.service_status_task_uid = None await tracker.save(node_id, model) +async def set_check_status_after_to( + app: FastAPI, node_id: NodeID, delay: timedelta +) -> None: + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _logger.info( + "Could not find a %s entry for node_id %s: skipping set_new_status", + TrackedServiceModel.__name__, + node_id, + ) + return + + model.set_check_status_after_to(delay) + await tracker.save(node_id, model) + + async def set_service_status_task_uid( app: FastAPI, node_id: NodeID, task_uid: TaskUID ) -> None: diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 8cab58b789c1..d3de3a82cc11 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -1,14 +1,12 @@ from dataclasses import dataclass +from datetime import timedelta from enum import auto -from typing import Final import arrow import orjson from models_library.utils.enums import StrAutoEnum from servicelib.deferred_tasks import TaskUID -_SECONDS_TO_TRIGGER_SERVICE_CHECKING: Final[float] = 1e6 - class UserRequestedState(StrAutoEnum): RUNNING = auto() @@ -37,17 +35,10 @@ class TrackedServiceModel: service_status: str = "" service_status_task_uid: TaskUID | None = None - last_checked: float | None = None - - def set_last_checked_to_now(self) -> None: - self.last_checked = arrow.utcnow().timestamp() + check_status_after: float | None = None - def seconds_since_last_check(self) -> float: - return ( - arrow.utcnow().timestamp() - self.last_checked - if self.last_checked - else _SECONDS_TO_TRIGGER_SERVICE_CHECKING - ) + def set_check_status_after_to(self, delay: timedelta) -> None: + self.check_status_after = (arrow.utcnow() + delay).timestamp() def to_bytes(self) -> bytes: return orjson.dumps(self) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 6b3d630f6b5e..d348e51ec68c 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -1,8 +1,10 @@ # pylint:disable=redefined-outer-name # pylint:disable=unused-argument +from datetime import timedelta from uuid import uuid4 +import arrow import pytest from faker import Faker from fastapi import FastAPI @@ -18,11 +20,17 @@ get_all_tracked, get_tracked, remove_tracked, + set_check_status_after_to, set_new_status, set_request_as_running, set_request_as_stopped, set_service_status_task_uid, ) +from simcore_service_dynamic_scheduler.services.service_tracker._api import ( + _LOW_RATE_POLL_INTERVAL, + _NORMAL_RATE_POLL_INTERVAL, + _get_poll_interval, +) from simcore_service_dynamic_scheduler.services.service_tracker._models import ( UserRequestedState, ) @@ -120,3 +128,42 @@ async def test_set_service_status_task_uid(app: FastAPI, node_id: NodeID, faker: assert model assert model.service_status_task_uid == task_uid + + +async def test_set_check_status_after_to(app: FastAPI, node_id: NodeID): + await set_request_as_running(app, node_id) + + delay = timedelta(seconds=6) + + benfore = (arrow.utcnow() + delay).timestamp() + await set_check_status_after_to(app, node_id, delay) + after = (arrow.utcnow() + delay).timestamp() + + model = await get_tracked(app, node_id) + assert model + assert model.check_status_after + + assert benfore < model.check_status_after < after + + +@pytest.mark.parametrize( + "status, expected_poll_interval", + [ + ( + NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + _LOW_RATE_POLL_INTERVAL, + ), + *[ + (DynamicServiceGet.parse_obj(x), _NORMAL_RATE_POLL_INTERVAL) + for x in DynamicServiceGet.Config.schema_extra["examples"] + ], + ( + NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + _LOW_RATE_POLL_INTERVAL, + ), + ], +) +def test__get_poll_interval( + status: NodeGet | DynamicServiceGet | NodeGetIdle, expected_poll_interval: timedelta +): + assert _get_poll_interval(status) == expected_poll_interval diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py index 35c52ce61abf..836a7cba1545 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py @@ -1,10 +1,10 @@ -import asyncio +from datetime import timedelta import arrow import pytest from faker import Faker +from servicelib.deferred_tasks import TaskUID from simcore_service_dynamic_scheduler.services.service_tracker._models import ( - _SECONDS_TO_TRIGGER_SERVICE_CHECKING, ServiceStates, TrackedServiceModel, UserRequestedState, @@ -13,18 +13,21 @@ @pytest.mark.parametrize("requested_state", UserRequestedState) @pytest.mark.parametrize("current_state", ServiceStates) -@pytest.mark.parametrize("last_checked", [None, 1, arrow.utcnow().timestamp()]) +@pytest.mark.parametrize("check_status_after", [None, 1, arrow.utcnow().timestamp()]) +@pytest.mark.parametrize("service_status_task_uid", [None, TaskUID("ok")]) def test_serialization( faker: Faker, requested_state: UserRequestedState, current_state: ServiceStates, - last_checked: float, + check_status_after: float | None, + service_status_task_uid: TaskUID | None, ): tracked_model = TrackedServiceModel( - service_status=faker.pystr(), requested_sate=requested_state, current_state=current_state, - last_checked=last_checked, + service_status=faker.pystr(), + check_status_after=check_status_after, + service_status_task_uid=service_status_task_uid, ) as_bytes = tracked_model.to_bytes() @@ -32,16 +35,15 @@ def test_serialization( assert TrackedServiceModel.from_bytes(as_bytes) == tracked_model -async def test_last_checked(): +async def test_set_check_status_after_to(): model = TrackedServiceModel(UserRequestedState.RUNNING) + assert model.check_status_after is None - # when last_checked is None - assert model.seconds_since_last_check() == _SECONDS_TO_TRIGGER_SERVICE_CHECKING - - model.set_last_checked_to_now() - - assert model.seconds_since_last_check() < 0.1 + delay = timedelta(seconds=4) - await asyncio.sleep(0.1) + before = (arrow.utcnow() + delay).timestamp() + model.set_check_status_after_to(delay) + after = (arrow.utcnow() + delay).timestamp() - assert model.seconds_since_last_check() > 0.1 + assert model.check_status_after + assert before < model.check_status_after < after From 0b9bcb1a6e94e0191e785f41f8bdd2b5e34d2824 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 31 May 2024 10:19:53 +0200 Subject: [PATCH 021/122] mypy --- .../services/service_tracker/_api.py | 4 ++-- .../services/service_tracker/_models.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 5a7c1c15ac0c..b7be3608e54b 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -34,12 +34,12 @@ async def _set_requested_state( async def set_request_as_running(app: FastAPI, node_id: NodeID) -> None: """Stores the intention fo the user: ``start`` requested""" - await _set_requested_state(app, node_id, UserRequestedState.RUNNING) + await _set_requested_state(app, node_id, UserRequestedState.RUNNING) # type: ignore async def set_request_as_stopped(app: FastAPI, node_id: NodeID) -> None: """Stores the intention of the user: ``stop`` requested""" - await _set_requested_state(app, node_id, UserRequestedState.STOPPED) + await _set_requested_state(app, node_id, UserRequestedState.STOPPED) # type: ignore def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index d3de3a82cc11..aef2c293d19f 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -29,7 +29,7 @@ class TrackedServiceModel: requested_sate: UserRequestedState # set this after parsing the incoming state via the API calls - current_state: ServiceStates = ServiceStates.UNKNOWN + current_state: ServiceStates = ServiceStates.UNKNOWN # type: ignore # stored for debug mainly this is used to compute ``current_state`` service_status: str = "" @@ -41,7 +41,8 @@ def set_check_status_after_to(self, delay: timedelta) -> None: self.check_status_after = (arrow.utcnow() + delay).timestamp() def to_bytes(self) -> bytes: - return orjson.dumps(self) + result: bytes = orjson.dumps(self) + return result @classmethod def from_bytes(cls, json: bytes) -> "TrackedServiceModel": From 0cd023f5bb0f47d83ccca35c9a305e22da0c5e8a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 31 May 2024 10:20:48 +0200 Subject: [PATCH 022/122] added deferred manager and setup for status_monitor --- .../services/deferred_manager.py | 24 +++++++++++++++++++ .../services/status_monitor/_setup.py | 12 ++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py new file mode 100644 index 000000000000..8544c0f38e6f --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/deferred_manager.py @@ -0,0 +1,24 @@ +from fastapi import FastAPI +from servicelib.deferred_tasks import DeferredManager +from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisDatabase + +from .redis import get_redis_client + + +def setup_deferred_manager(app: FastAPI) -> None: + async def on_startup() -> None: + rabbit_settings: RabbitSettings = app.state.settings.DYNAMIC_SCHEDULER_RABBITMQ + + redis_client_sdk = get_redis_client(app, RedisDatabase.DEFERRED_TASKS) + app.state.deferred_manager = manager = DeferredManager( + rabbit_settings, redis_client_sdk, globals_context={"app": app} + ) + await manager.setup() + + async def on_shutdown() -> None: + manager: DeferredManager = app.state.deferred_manager + await manager.shutdown() + + app.add_event_handler("startup", on_startup) + app.add_event_handler("shutdown", on_shutdown) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py index 4f599fc4f801..803b5c77e632 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py @@ -1,12 +1,20 @@ +from datetime import timedelta + from fastapi import FastAPI +from ._monitor import Monitor + def setup_status_monitor(app: FastAPI) -> None: async def on_startup() -> None: - pass + app.state.status_monitor = monitor = Monitor( + app, check_threshold=timedelta(seconds=1) + ) + await monitor.setup() async def on_shutdown() -> None: - pass + monitor: Monitor = app.state.status_monitor + await monitor.shutdown() app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) From 4598b9b7d9789fb67a6ad7016c7d6b2b1c4c9088 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 11:13:41 +0200 Subject: [PATCH 023/122] refactor RedisClientsManager --- .../service-library/src/servicelib/redis.py | 36 ++++++++++++++----- packages/service-library/tests/test_redis.py | 11 +++--- .../modules/redis.py | 11 +++--- .../src/simcore_service_webserver/redis.py | 19 +++++----- 4 files changed, 52 insertions(+), 25 deletions(-) diff --git a/packages/service-library/src/servicelib/redis.py b/packages/service-library/src/servicelib/redis.py index 24e4d3f14519..fbad667293e6 100644 --- a/packages/service-library/src/servicelib/redis.py +++ b/packages/service-library/src/servicelib/redis.py @@ -26,6 +26,12 @@ _DEFAULT_SOCKET_TIMEOUT: Final[datetime.timedelta] = datetime.timedelta(seconds=30) +_DEFAULT_DECODE_RESPONSES: Final[bool] = True +_DEFAULT_HEALTH_CHECK_INTERVAL: Final[datetime.timedelta] = datetime.timedelta( + seconds=5 +) + + _logger = logging.getLogger(__name__) @@ -44,6 +50,7 @@ class CouldNotConnectToRedisError(BaseRedisError): @dataclass class RedisClientSDK: redis_dsn: str + decode_responses: bool = _DEFAULT_DECODE_RESPONSES _client: aioredis.Redis = field(init=False) @property @@ -63,7 +70,7 @@ def __post_init__(self): socket_timeout=_DEFAULT_SOCKET_TIMEOUT.total_seconds(), socket_connect_timeout=_DEFAULT_SOCKET_TIMEOUT.total_seconds(), encoding="utf-8", - decode_responses=True, + decode_responses=self.decode_responses, ) @retry(**RedisRetryPolicyUponInitialization(_logger).kwargs) @@ -178,9 +185,11 @@ class RedisClientSDKHealthChecked(RedisClientSDK): def __init__( self, redis_dsn: str, - health_check_interval: datetime.timedelta = datetime.timedelta(seconds=5), + *, + decode_responses: bool = _DEFAULT_DECODE_RESPONSES, + health_check_interval: datetime.timedelta = _DEFAULT_HEALTH_CHECK_INTERVAL, ) -> None: - super().__init__(redis_dsn) + super().__init__(redis_dsn, decode_responses) self.health_check_interval: datetime.timedelta = health_check_interval self._health_check_task: Task | None = None self._is_healthy: bool = True @@ -205,22 +214,29 @@ async def setup(self) -> None: self._health_check_task = start_periodic_task( self._check_health, interval=self.health_check_interval, - task_name="redis_service_health_check", + task_name=f"redis_service_health_check_{self.redis_dsn}", ) async def shutdown(self) -> None: if self._health_check_task: - await stop_periodic_task(self._health_check_task) + await stop_periodic_task(self._health_check_task, timeout=1) await super().shutdown() +@dataclass(frozen=True) +class RedisManagerDBConfig: + database: RedisDatabase + decode_responses: bool = _DEFAULT_DECODE_RESPONSES + health_check_interval: datetime.timedelta = _DEFAULT_HEALTH_CHECK_INTERVAL + + @dataclass class RedisClientsManager: """ Manages the lifetime of redis client sdk connections """ - databases: set[RedisDatabase] + db_configs: set[RedisManagerDBConfig] settings: RedisSettings _client_sdks: dict[RedisDatabase, RedisClientSDKHealthChecked] = field( @@ -228,9 +244,11 @@ class RedisClientsManager: ) async def setup(self) -> None: - for db in self.databases: - self._client_sdks[db] = RedisClientSDKHealthChecked( - redis_dsn=self.settings.build_redis_dsn(db) + for config in self.db_configs: + self._client_sdks[config.database] = RedisClientSDKHealthChecked( + redis_dsn=self.settings.build_redis_dsn(config.database), + decode_responses=config.decode_responses, + health_check_interval=config.health_check_interval, ) await logged_gather(*(c.setup() for c in self._client_sdks.values())) diff --git a/packages/service-library/tests/test_redis.py b/packages/service-library/tests/test_redis.py index 29b039f59755..b2bf5515d2c0 100644 --- a/packages/service-library/tests/test_redis.py +++ b/packages/service-library/tests/test_redis.py @@ -20,6 +20,7 @@ RedisClientSDK, RedisClientSDKHealthChecked, RedisClientsManager, + RedisManagerDBConfig, ) from settings_library.redis import RedisDatabase, RedisSettings @@ -249,13 +250,15 @@ async def _inc_counter() -> None: async def test_redis_client_sdks_manager(redis_service: RedisSettings): - all_redis_databases: set[RedisDatabase] = set(RedisDatabase) - manager = RedisClientsManager(databases=all_redis_databases, settings=redis_service) + all_redis_configs: set[RedisManagerDBConfig] = { + RedisManagerDBConfig(x) for x in RedisDatabase + } + manager = RedisClientsManager(db_configs=all_redis_configs, settings=redis_service) await manager.setup() - for database in all_redis_databases: - assert manager.client(database) + for config in all_redis_configs: + assert manager.client(config.database) await manager.shutdown() diff --git a/services/director-v2/src/simcore_service_director_v2/modules/redis.py b/services/director-v2/src/simcore_service_director_v2/modules/redis.py index b111b8792ed0..dfd66c1d715e 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/redis.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/redis.py @@ -1,5 +1,5 @@ from fastapi import FastAPI -from servicelib.redis import RedisClientsManager +from servicelib.redis import RedisClientsManager, RedisManagerDBConfig from settings_library.redis import RedisDatabase from ..core.settings import AppSettings @@ -10,9 +10,12 @@ async def on_startup() -> None: settings: AppSettings = app.state.settings app.state.redis_clients_manager = redis_clients_manager = RedisClientsManager( - databases={ - RedisDatabase.LOCKS, - RedisDatabase.DISTRIBUTED_IDENTIFIERS, + db_configs={ + RedisManagerDBConfig(db) + for db in ( + RedisDatabase.LOCKS, + RedisDatabase.DISTRIBUTED_IDENTIFIERS, + ) }, settings=settings.REDIS, ) diff --git a/services/web/server/src/simcore_service_webserver/redis.py b/services/web/server/src/simcore_service_webserver/redis.py index fd1f891187f4..cf870d53f35f 100644 --- a/services/web/server/src/simcore_service_webserver/redis.py +++ b/services/web/server/src/simcore_service_webserver/redis.py @@ -3,7 +3,7 @@ import redis.asyncio as aioredis from aiohttp import web from servicelib.aiohttp.application_setup import ModuleCategory, app_module_setup -from servicelib.redis import RedisClientSDK, RedisClientsManager +from servicelib.redis import RedisClientSDK, RedisClientsManager, RedisManagerDBConfig from settings_library.redis import RedisDatabase, RedisSettings from ._constants import APP_SETTINGS_KEY @@ -32,13 +32,16 @@ async def setup_redis_client(app: web.Application): """ redis_settings: RedisSettings = get_plugin_settings(app) app[_APP_REDIS_CLIENTS_MANAGER] = manager = RedisClientsManager( - databases={ - RedisDatabase.RESOURCES, - RedisDatabase.LOCKS, - RedisDatabase.VALIDATION_CODES, - RedisDatabase.SCHEDULED_MAINTENANCE, - RedisDatabase.USER_NOTIFICATIONS, - RedisDatabase.ANNOUNCEMENTS, + db_configs={ + RedisManagerDBConfig(x) + for x in ( + RedisDatabase.RESOURCES, + RedisDatabase.LOCKS, + RedisDatabase.VALIDATION_CODES, + RedisDatabase.SCHEDULED_MAINTENANCE, + RedisDatabase.USER_NOTIFICATIONS, + RedisDatabase.ANNOUNCEMENTS, + ) }, settings=redis_settings, ) From ae2e220d274df1fa0b8817c59b21a3335da27fd5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 11:44:27 +0200 Subject: [PATCH 024/122] using bytes instead of json --- .../src/servicelib/deferred_tasks/_redis_task_tracker.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py b/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py index 528f7cc971c1..f4b673c20d55 100644 --- a/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py +++ b/packages/service-library/src/servicelib/deferred_tasks/_redis_task_tracker.py @@ -1,3 +1,4 @@ +import pickle from typing import Final from uuid import uuid4 @@ -32,13 +33,13 @@ async def get_new_unique_identifier(self) -> TaskUID: async def _get_raw(self, redis_key: str) -> TaskScheduleModel | None: found_data = await self.redis_sdk.redis.get(redis_key) - return None if found_data is None else TaskScheduleModel.parse_raw(found_data) + return None if found_data is None else pickle.loads(found_data) # noqa: S301 async def get(self, task_uid: TaskUID) -> TaskScheduleModel | None: return await self._get_raw(_get_key(task_uid)) async def save(self, task_uid: TaskUID, task_schedule: TaskScheduleModel) -> None: - await self.redis_sdk.redis.set(_get_key(task_uid), task_schedule.json()) + await self.redis_sdk.redis.set(_get_key(task_uid), pickle.dumps(task_schedule)) async def remove(self, task_uid: TaskUID) -> None: await self.redis_sdk.redis.delete(_get_key(task_uid)) From a47a0effda761f2a0ef13a8a4aa8c0ce5b559636 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 11:45:02 +0200 Subject: [PATCH 025/122] name of service --- .../src/models_library/api_schemas_webserver/projects_nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py b/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py index 7003784269f8..b450aaa8cf1e 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py @@ -92,7 +92,7 @@ class Config: "published_port": 30000, "entrypoint": "/the/entry/point/is/here", "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "service_key": "simcore/services/comp/itis/sleeper", + "service_key": "simcore/services/dynamic/some-dynamic-service", "service_version": "1.2.3", "service_host": "jupyter_E1O2E-LAH", "service_port": 8081, From 38fc9f8d2a23797345d59826bd0c0615c3791348 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 15:26:07 +0200 Subject: [PATCH 026/122] using pickle to serialize to json --- .../services/service_tracker/_models.py | 9 ++++----- .../services/service_tracker/_tracker.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index aef2c293d19f..4dae3eaca94b 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -1,9 +1,9 @@ +import pickle from dataclasses import dataclass from datetime import timedelta from enum import auto import arrow -import orjson from models_library.utils.enums import StrAutoEnum from servicelib.deferred_tasks import TaskUID @@ -41,9 +41,8 @@ def set_check_status_after_to(self, delay: timedelta) -> None: self.check_status_after = (arrow.utcnow() + delay).timestamp() def to_bytes(self) -> bytes: - result: bytes = orjson.dumps(self) - return result + return pickle.dumps(self) @classmethod - def from_bytes(cls, json: bytes) -> "TrackedServiceModel": - return cls(**orjson.loads(json)) + def from_bytes(cls, data: bytes) -> "TrackedServiceModel": + return pickle.loads(data) # noqa: S301 diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py index 363b25f9296c..fa67a9c488dc 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py @@ -38,7 +38,7 @@ async def all(self) -> dict[NodeID, TrackedServiceModel]: found_values = await self.redis_client_sdk.redis.mget(found_keys) return { - k: TrackedServiceModel.from_bytes(v) + NodeID(k.decode().lstrip(_KEY_PREFIX)): TrackedServiceModel.from_bytes(v) for k, v in zip(found_keys, found_values, strict=True) if v is not None } From 9a394007ff44683f685e7d4ec608f8dbdaa842b1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 15:27:15 +0200 Subject: [PATCH 027/122] using raw binary redis --- .../simcore_service_dynamic_scheduler/services/redis.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py index 147a14607de7..db8f51e653c3 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py @@ -1,7 +1,11 @@ from typing import Final from fastapi import FastAPI -from servicelib.redis import RedisClientSDKHealthChecked, RedisClientsManager +from servicelib.redis import ( + RedisClientSDKHealthChecked, + RedisClientsManager, + RedisManagerDBConfig, +) from settings_library.redis import RedisDatabase, RedisSettings _REDIS_DATABASES: Final[set[RedisDatabase]] = { @@ -15,7 +19,8 @@ def setup_redis(app: FastAPI) -> None: async def on_startup() -> None: app.state.redis_clients_manager = manager = RedisClientsManager( - _REDIS_DATABASES, settings + {RedisManagerDBConfig(x, decode_responses=False) for x in _REDIS_DATABASES}, + settings, ) await manager.setup() From ff65e2bb077ed4cd0030b97d747132a291839712 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 15:28:10 +0200 Subject: [PATCH 028/122] refactor test --- .../tests/unit/service_tracker/test__tracker.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py index 42483975d85e..953d71243651 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -17,10 +17,7 @@ from simcore_service_dynamic_scheduler.services.service_tracker._setup import ( get_tracker, ) -from simcore_service_dynamic_scheduler.services.service_tracker._tracker import ( - Tracker, - _get_key, -) +from simcore_service_dynamic_scheduler.services.service_tracker._tracker import Tracker pytest_simcore_core_services_selection = [ "redis", @@ -76,7 +73,10 @@ async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> *[tracker.save(k, v) for k, v in data_to_insert.items()], max_concurrency=100 ) - assert await tracker.all() == {_get_key(k): v for k, v in data_to_insert.items()} + response = await tracker.all() + for key in response.keys(): + assert isinstance(key, NodeID) + assert response == data_to_insert async def test_remove_missing_key_does_not_raise_error(tracker: Tracker): From 3a9c8a80601f24df527c9e166b29fbce1c89be78 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 15:28:32 +0200 Subject: [PATCH 029/122] refactor --- .../unit/test_services_status_monitor.py | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 services/dynamic-scheduler/tests/unit/test_services_status_monitor.py diff --git a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py new file mode 100644 index 000000000000..198fa9f02d0b --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py @@ -0,0 +1,155 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument + +import json +import re +from collections.abc import AsyncIterable +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest +import respx +from fastapi import FastAPI, status +from fastapi.encoders import jsonable_encoder +from httpx import Request, Response +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeInt +from pytest_mock import MockerFixture +from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisSettings +from simcore_service_dynamic_scheduler.services.service_tracker import ( + set_request_as_running, +) +from simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status import ( + DeferredGetStatus, +) +from simcore_service_dynamic_scheduler.services.status_monitor._monitor import Monitor +from simcore_service_dynamic_scheduler.services.status_monitor._setup import get_monitor +from tenacity import AsyncRetrying +from tenacity.retry import retry_if_exception_type +from tenacity.stop import stop_after_delay +from tenacity.wait import wait_fixed + +pytest_simcore_core_services_selection = [ + "rabbit", + "redis", +] + + +@pytest.fixture +def app_environment( + app_environment: EnvVarsDict, + rabbit_service: RabbitSettings, + redis_service: RedisSettings, + remove_redis_data: None, +) -> EnvVarsDict: + return app_environment + + +# create service pattern for start & stop with the appropriate type of message types +# including idle and the ones for legacy services + + +class _StatusResponseTimeline: + # TODO: use to generate a future timeline of responses in order to properly test + # how the status wil behave with time + pass + + +@pytest.fixture +async def mock_director_v2_status( + app: FastAPI, + service_status: NodeGet | DynamicServiceGet | NodeGetIdle, +) -> AsyncIterable[None]: + def _side_effect_node_status_response(request: Request) -> Response: + node_id = NodeID(f"{request.url}".split("/")[-1]) + print("<<<<<<<<", node_id, request.url) + + # fetch `node_id` from request and then compose sequence of events which which it should respond + if isinstance(service_status, NodeGet): + return Response( + status.HTTP_200_OK, + text=json.dumps(jsonable_encoder({"data": service_status.dict()})), + ) + if isinstance(service_status, DynamicServiceGet): + return Response(status.HTTP_200_OK, text=service_status.json()) + if isinstance(service_status, NodeGetIdle): + return Response(status.HTTP_404_NOT_FOUND) + + # Not moced http://director-v2:8000/v2/dynamic_services/87a2a7c6-7166-4ffe-8a03-e4e947753ed3 + + with respx.mock( + base_url=app.state.settings.DYNAMIC_SCHEDULER_DIRECTOR_V2_SETTINGS.api_base_url, + assert_all_called=False, + assert_all_mocked=True, + ) as mock: + mock.get(re.compile(r"/dynamic_services/([\w-]+)")).mock( + side_effect=_side_effect_node_status_response + ) + yield + + +@pytest.fixture +def monitor(mock_director_v2_status: None, app: FastAPI) -> Monitor: + return get_monitor(app) + + +@pytest.fixture +def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: + results: dict[str, AsyncMock] = {} + for method_name in ( + "on_result", + "run", + "on_finished_with_error", + ): + mock_method = mocker.AsyncMock(wraps=getattr(DeferredGetStatus, method_name)) + mocker.patch.object(DeferredGetStatus, method_name, mock_method) + results[method_name] = mock_method + + return results + + +async def _wait_for_result( + deferred_status_spies: dict[str, AsyncMock], *, key: str, count: NonNegativeInt +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(5), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert deferred_status_spies[key].call_count == count + + +@pytest.mark.parametrize( + "service_status", + [ + NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + *( + DynamicServiceGet.parse_obj(x) + for x in DynamicServiceGet.Config.schema_extra["examples"] + ), + NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + ], +) +async def test_basic_examples( + deferred_status_spies: dict[str, AsyncMock], + app: FastAPI, + monitor: Monitor, + service_status: NodeGet | DynamicServiceGet | NodeGetIdle, +): + mode_id = uuid4() + await set_request_as_running(app, mode_id) + + # ADD some service to monitor, then mock the API to director-v2 to returns different + # statuses based on the times when it is called + + await monitor._worker_start_get_status_requests() + + await _wait_for_result(deferred_status_spies, key="run", count=1) + await _wait_for_result(deferred_status_spies, key="on_result", count=1) + await _wait_for_result(deferred_status_spies, key="on_finished_with_error", count=0) From ab4ec3f45ab826050811276a75145fb9fefccca1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 15:28:47 +0200 Subject: [PATCH 030/122] refactor --- services/dynamic-scheduler/tests/conftest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index f32c3ba74195..5427367c19c1 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -13,7 +13,7 @@ from pytest_mock import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict from pytest_simcore.helpers.utils_envs import setenvs_from_dict -from servicelib.redis import RedisClientsManager +from servicelib.redis import RedisClientsManager, RedisManagerDBConfig from servicelib.utils import logged_gather from settings_library.redis import RedisDatabase, RedisSettings from simcore_service_dynamic_scheduler.core.application import create_app @@ -121,7 +121,9 @@ async def app( @pytest.fixture async def remove_redis_data(redis_service: RedisSettings) -> None: - async with RedisClientsManager(set(RedisDatabase), redis_service) as manager: + async with RedisClientsManager( + {RedisManagerDBConfig(x) for x in RedisDatabase}, redis_service + ) as manager: await logged_gather( *[manager.client(d).redis.flushall() for d in RedisDatabase] ) From 1317ff09132669f298f2d2c94de6f697ecc91201 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 16:22:49 +0200 Subject: [PATCH 031/122] added base test to validate --- .../unit/test_services_status_monitor.py | 170 +++++++++++++++--- 1 file changed, 145 insertions(+), 25 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py index 198fa9f02d0b..fd1814c3c675 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py +++ b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py @@ -4,6 +4,8 @@ import json import re from collections.abc import AsyncIterable +from copy import deepcopy +from typing import Any from unittest.mock import AsyncMock from uuid import uuid4 @@ -52,23 +54,105 @@ def app_environment( # create service pattern for start & stop with the appropriate type of message types # including idle and the ones for legacy services +_DEFAULT_NODE_ID: NodeID = uuid4() -class _StatusResponseTimeline: - # TODO: use to generate a future timeline of responses in order to properly test - # how the status wil behave with time - pass + +def _add_to_dict(dict_data: dict, entries: list[tuple[str, Any]]) -> None: + for key, data in entries: + assert key in dict_data + dict_data[key] = data + + +def _get_node_get_with(state: str, node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGet: + dict_data = deepcopy(NodeGet.Config.schema_extra["example"]) + _add_to_dict( + dict_data, + [ + ("service_state", state), + ("service_uuid", f"{node_id}"), + ], + ) + return NodeGet.parse_obj(dict_data) + + +def __get_dynamic_service_get_legacy_with( + state: str, node_id: NodeID = _DEFAULT_NODE_ID +) -> DynamicServiceGet: + dict_data = deepcopy(DynamicServiceGet.Config.schema_extra["examples"][0]) + _add_to_dict( + dict_data, + [ + ("state", state), + ("uuid", f"{node_id}"), + ("node_uuid", f"{node_id}"), + ], + ) + return DynamicServiceGet.parse_obj(dict_data) + + +def __get_dynamic_service_get_new_style_with( + state: str, node_id: NodeID = _DEFAULT_NODE_ID +) -> DynamicServiceGet: + dict_data = deepcopy(DynamicServiceGet.Config.schema_extra["examples"][1]) + _add_to_dict( + dict_data, + [ + ("state", state), + ("uuid", f"{node_id}"), + ("node_uuid", f"{node_id}"), + ], + ) + return DynamicServiceGet.parse_obj(dict_data) + + +def __get_node_get_idle(node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGetIdle: + dict_data = NodeGetIdle.Config.schema_extra["example"] + _add_to_dict( + dict_data, + [ + ("service_uuid", f"{node_id}"), + ], + ) + return NodeGetIdle.parse_obj(dict_data) + + +class _ResponseTimeline: + def __init__( + self, timeline: list[NodeGet | DynamicServiceGet | NodeGetIdle] + ) -> None: + self._timeline = timeline + + self._client_access_history: dict[NodeID, NonNegativeInt] = {} + + @property + def entries(self) -> list[NodeGet | DynamicServiceGet | NodeGetIdle]: + return self._timeline + + def __len__(self) -> int: + return len(self._timeline) + + def get_status(self, node_id: NodeID) -> NodeGet | DynamicServiceGet | NodeGetIdle: + if node_id not in self._client_access_history: + self._client_access_history[node_id] = 0 + + # always return node idle when timeline finished playing + if self._client_access_history[node_id] >= len(self._timeline): + return __get_node_get_idle() + + status = self._timeline[self._client_access_history[node_id]] + self._client_access_history[node_id] += 1 + return status @pytest.fixture async def mock_director_v2_status( - app: FastAPI, - service_status: NodeGet | DynamicServiceGet | NodeGetIdle, + app: FastAPI, response_timeline: _ResponseTimeline ) -> AsyncIterable[None]: def _side_effect_node_status_response(request: Request) -> Response: node_id = NodeID(f"{request.url}".split("/")[-1]) - print("<<<<<<<<", node_id, request.url) - # fetch `node_id` from request and then compose sequence of events which which it should respond + service_status = response_timeline.get_status(node_id) + if isinstance(service_status, NodeGet): return Response( status.HTTP_200_OK, @@ -79,7 +163,7 @@ def _side_effect_node_status_response(request: Request) -> Response: if isinstance(service_status, NodeGetIdle): return Response(status.HTTP_404_NOT_FOUND) - # Not moced http://director-v2:8000/v2/dynamic_services/87a2a7c6-7166-4ffe-8a03-e4e947753ed3 + raise TypeError() with respx.mock( base_url=app.state.settings.DYNAMIC_SCHEDULER_DIRECTOR_V2_SETTINGS.api_base_url, @@ -112,8 +196,23 @@ def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: return results -async def _wait_for_result( - deferred_status_spies: dict[str, AsyncMock], *, key: str, count: NonNegativeInt +async def _assert_call_to( + deferred_status_spies: dict[str, AsyncMock], *, method: str, count: NonNegativeInt +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(5), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert deferred_status_spies[method].call_count == count + + +async def _assert_result( + deferred_status_spies: dict[str, AsyncMock], + *, + timeline: list[NodeGet | DynamicServiceGet | NodeGetIdle], ) -> None: async for attempt in AsyncRetrying( reraise=True, @@ -122,34 +221,55 @@ async def _wait_for_result( retry=retry_if_exception_type(AssertionError), ): with attempt: - assert deferred_status_spies[key].call_count == count + + assert deferred_status_spies["on_result"].call_count == len(timeline) + assert [ + x.args[0] for x in deferred_status_spies["on_result"].call_args_list + ] == timeline + + +@pytest.fixture +def node_id() -> NodeID: + return _DEFAULT_NODE_ID @pytest.mark.parametrize( - "service_status", + "response_timeline", [ - NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), - *( - DynamicServiceGet.parse_obj(x) - for x in DynamicServiceGet.Config.schema_extra["examples"] + _ResponseTimeline([_get_node_get_with("running")]), + _ResponseTimeline( + [ + __get_dynamic_service_get_legacy_with("running"), + __get_dynamic_service_get_legacy_with("running"), + ] ), - NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + _ResponseTimeline([__get_dynamic_service_get_new_style_with("running")]), + _ResponseTimeline([__get_node_get_idle()]), ], ) async def test_basic_examples( deferred_status_spies: dict[str, AsyncMock], app: FastAPI, monitor: Monitor, - service_status: NodeGet | DynamicServiceGet | NodeGetIdle, + response_timeline: _ResponseTimeline, + node_id: NodeID, ): - mode_id = uuid4() - await set_request_as_running(app, mode_id) + await set_request_as_running(app, node_id) # ADD some service to monitor, then mock the API to director-v2 to returns different # statuses based on the times when it is called - await monitor._worker_start_get_status_requests() + entries_in_timeline = len(response_timeline) + + for i in range(entries_in_timeline): + await monitor._worker_start_get_status_requests() + await _assert_call_to(deferred_status_spies, method="on_result", count=i + 1) + + await _assert_call_to( + deferred_status_spies, method="run", count=entries_in_timeline + ) + await _assert_call_to( + deferred_status_spies, method="on_finished_with_error", count=0 + ) - await _wait_for_result(deferred_status_spies, key="run", count=1) - await _wait_for_result(deferred_status_spies, key="on_result", count=1) - await _wait_for_result(deferred_status_spies, key="on_finished_with_error", count=0) + await _assert_result(deferred_status_spies, timeline=response_timeline.entries) From c644563fe51c4fdeb1f34c9b6156976eed2e6639 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 16:23:40 +0200 Subject: [PATCH 032/122] setup monitor --- .../services/status_monitor/_setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py index 803b5c77e632..c3ee9d64d6df 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py @@ -18,3 +18,8 @@ async def on_shutdown() -> None: app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) + + +def get_monitor(app: FastAPI) -> Monitor: + monitor: Monitor = app.state.status_monitor + return monitor From b8eb97cc4659d5b0a057559e2fb325716c024b3a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 4 Jun 2024 16:23:50 +0200 Subject: [PATCH 033/122] wip --- .../status_monitor/_deferred_get_status.py | 76 +++++++++++++++++++ .../services/status_monitor/_monitor.py | 72 +++++++++++++++++- 2 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py new file mode 100644 index 000000000000..abb772aea21d --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -0,0 +1,76 @@ +import logging +from datetime import timedelta + +from fastapi import FastAPI +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects_nodes_io import NodeID +from servicelib.deferred_tasks import BaseDeferredHandler, TaskUID +from servicelib.deferred_tasks._base_deferred_handler import DeferredContext + +from ..director_v2 import DirectorV2Client +from ..service_tracker import ( + remove_tracked, + set_new_status, + set_service_status_task_uid, +) + +_logger = logging.getLogger(__name__) + + +class DeferredGetStatus(BaseDeferredHandler[NodeGet | DynamicServiceGet | NodeGetIdle]): + @classmethod + async def get_timeout(cls, context: DeferredContext) -> timedelta: + assert context # nosec + return timedelta(seconds=5) + + @classmethod + async def start( # pylint:disable=arguments-differ + cls, node_id: NodeID + ) -> DeferredContext: + _logger.debug("Getting service status for %s", node_id) + return {"node_id": node_id} + + @classmethod + async def on_created(cls, task_uid: TaskUID, context: DeferredContext) -> None: + """called after deferred was scheduled to run""" + app: FastAPI = context["app"] + node_id: NodeID = context["node_id"] + + await set_service_status_task_uid(app, node_id, task_uid) + + @classmethod + async def run( + cls, context: DeferredContext + ) -> NodeGet | DynamicServiceGet | NodeGetIdle: + app: FastAPI = context["app"] + node_id: NodeID = context["node_id"] + + director_v2_client = DirectorV2Client.get_from_app_state(app) + service_status = await director_v2_client.get_status(node_id) + _logger.debug( + "Service status type=%s, %s", type(service_status), service_status + ) + return service_status + + @classmethod + async def on_result( + cls, result: NodeGet | DynamicServiceGet | NodeGetIdle, context: DeferredContext + ) -> None: + app: FastAPI = context["app"] + node_id: NodeID = context["node_id"] + + _logger.debug("CALLED ON RESULT %s %s", node_id, result) + + # TOOD: maybe move all this logic tot the service_tracker + + # TODO: this should be transformed in set_new_status_if_changed + # also this should return a "bool" if the status changed form the previous + # this allows us to figure out when to send to the FE notifications + # TODO: from here we need to add an integration with the module sending via webseocket + # the status to the fronted + await set_new_status(app, node_id, result) + + # remove service if no longer running + if isinstance(result, NodeGetIdle): + await remove_tracked(app, node_id) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index c5e9adfb3298..c2acefad7a93 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -1,11 +1,79 @@ +import logging +from datetime import timedelta +from functools import cached_property +from typing import Final + +import arrow from fastapi import FastAPI +from models_library.projects_nodes_io import NodeID +from pydantic import NonNegativeFloat, NonNegativeInt +from servicelib.utils import logged_gather + +from ..service_tracker import ( + TrackedServiceModel, + get_all_tracked, + set_check_status_after_to, +) +from ._deferred_get_status import DeferredGetStatus + +_logger = logging.getLogger(__name__) + +_MAX_CONCURRENCY: Final[NonNegativeInt] = 10 class Monitor: - def __init__(self, app: FastAPI) -> None: + def __init__(self, app: FastAPI, check_threshold: timedelta) -> None: self.app = app + self.check_threshold = check_threshold + + @cached_property + def check_threshold_seconds(self) -> NonNegativeFloat: + return self.check_threshold.total_seconds() + + async def _worker_start_get_status_requests(self) -> None: + # NOTE: this worker runs on only once across all instances of the scheduler + + models: dict[NodeID, TrackedServiceModel] = await get_all_tracked(self.app) + to_start: list[NodeID] = [] + to_set_check_status_after: list[NodeID] = [] + + current_timestamp = arrow.utcnow().timestamp() + + for node_id, model in models.items(): + if ( + model.check_status_after is None + or model.check_status_after > current_timestamp + ): + # status fetching is required + if model.service_status_task_uid is None: + to_start.append(node_id) + else: + _logger.info( + "Skipping status check for %s, since already running. Will check later", + node_id, + ) + if model.check_status_after is None: + to_set_check_status_after.append(node_id) + + # for services where the check never ran, make sure we are nto able to start the check while it's running + await logged_gather( + *( + set_check_status_after_to(self.app, node_id, timedelta(seconds=5)) + for node_id in to_set_check_status_after + ), + max_concurrency=_MAX_CONCURRENCY, + ) + + await logged_gather( + *(DeferredGetStatus.start(node_id=node_id) for node_id in to_start), + max_concurrency=_MAX_CONCURRENCY, + ) + + async def setup(self) -> None: + # TODO: start uniquely running task # NOTE: THIS needs to be distributed only 1 at a time + pass - async def _check_status(self) -> None: + async def shutdown(self) -> None: pass From 356076fd955928dec5c139ad0fe6c4d35c4f7ba0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 5 Jun 2024 11:10:46 +0200 Subject: [PATCH 034/122] extended _api --- .../services/service_tracker/_api.py | 107 ++++++++++- .../tests/unit/service_tracker/test__api.py | 170 +++++++++++++++++- 2 files changed, 271 insertions(+), 6 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index b7be3608e54b..91f5c2ed8f3d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -2,6 +2,7 @@ from datetime import timedelta from typing import Final +import arrow from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle @@ -9,7 +10,7 @@ from models_library.services_enums import ServiceState from servicelib.deferred_tasks import TaskUID -from ._models import TrackedServiceModel, UserRequestedState +from ._models import SchedulerServiceState, TrackedServiceModel, UserRequestedState from ._setup import get_tracker from ._tracker import Tracker @@ -18,6 +19,7 @@ _LOW_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=1) _NORMAL_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=5) +_MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES: Final[timedelta] = timedelta(seconds=60) async def _set_requested_state( @@ -42,7 +44,7 @@ async def set_request_as_stopped(app: FastAPI, node_id: NodeID) -> None: await _set_requested_state(app, node_id, UserRequestedState.STOPPED) # type: ignore -def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: +def __get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: # Attributes where to find the state # NodeGet -> service_state # DynamicServiceGet -> state @@ -50,14 +52,52 @@ def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> tim state_key = "state" if isinstance(status, DynamicServiceGet) else "service_state" state: ServiceState | str = getattr(status, state_key) - state_str: str = state.value if isinstance(state, ServiceState) else state + return state.value if isinstance(state, ServiceState) else state + - if state_str != "running": +def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: + if __get_state_str(status) != "running": return _LOW_RATE_POLL_INTERVAL return _NORMAL_RATE_POLL_INTERVAL +def _get_current_state( + requested_sate: UserRequestedState, + status: NodeGet | DynamicServiceGet | NodeGetIdle, +) -> SchedulerServiceState: + """ + Computes the `SchedulerServiceState` used internally by the scheduler + to decide about a service's future. + """ + + if isinstance(status, NodeGetIdle): + return SchedulerServiceState.IDLE + + service_state: ServiceState = ServiceState(__get_state_str(status)) + + if requested_sate == UserRequestedState.RUNNING: + if service_state == ServiceState.RUNNING: + return SchedulerServiceState.RUNNING + + if ServiceState.PENDING <= service_state <= ServiceState.STARTING: + return SchedulerServiceState.STARTING + + if service_state < ServiceState.PENDING or service_state > ServiceState.RUNNING: + return SchedulerServiceState.UNEXPECTED_OUTCOME + + if requested_sate == UserRequestedState.STOPPED: + if service_state >= ServiceState.RUNNING: + return SchedulerServiceState.STOPPING + + if service_state < ServiceState.RUNNING: + return SchedulerServiceState.UNEXPECTED_OUTCOME + + msg = f"Could not determine current_state from: '{requested_sate=}', '{status=}'" + raise TypeError(msg) + + +# TODO: remove below, not used async def set_new_status( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> None: @@ -77,6 +117,65 @@ async def set_new_status( await tracker.save(node_id, model) +async def set_if_status_changed( + app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle +) -> bool: + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _logger.info( + "Could not find a %s entry for node_id %s: skipping set_new_status", + TrackedServiceModel.__name__, + node_id, + ) + return False + + # set new polling interval in the future + model.set_check_status_after_to(_get_poll_interval(status)) + model.service_status_task_uid = None + + # check if model changed + json_status = status.json() + if model.service_status != json_status: + model.service_status = json_status + model.current_state = _get_current_state(model.requested_sate, status) + await tracker.save(node_id, model) + return True + + return False + + +async def can_notify_frontend( + app: FastAPI, node_id: NodeID, *, status_changed: bool +) -> bool: + """ + Checks if it's time to notify the frontend. + The frontend will be notified at regular intervals and on changes + Avoids sending too many updates. + """ + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _logger.info( + "Could not find a %s entry for node_id %s: skipping set_new_status", + TrackedServiceModel.__name__, + node_id, + ) + return False + + if status_changed: + return True + + # check if too much time has passed since the last time an update was sent + current_timestamp = arrow.utcnow().timestamp() + if ( + current_timestamp - model.last_status_notification + ) > _MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES.total_seconds(): + return True + + return False + + async def set_check_status_after_to( app: FastAPI, node_id: NodeID, delay: timedelta ) -> None: diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index d348e51ec68c..a79bfef41c1c 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -2,6 +2,7 @@ # pylint:disable=unused-argument from datetime import timedelta +from typing import Any, Final from uuid import uuid4 import arrow @@ -11,6 +12,7 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID +from models_library.services_enums import ServiceState from pydantic import NonNegativeInt from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.deferred_tasks import TaskUID @@ -21,6 +23,7 @@ get_tracked, remove_tracked, set_check_status_after_to, + set_if_status_changed, set_new_status, set_request_as_running, set_request_as_stopped, @@ -29,9 +32,11 @@ from simcore_service_dynamic_scheduler.services.service_tracker._api import ( _LOW_RATE_POLL_INTERVAL, _NORMAL_RATE_POLL_INTERVAL, + _get_current_state, _get_poll_interval, ) from simcore_service_dynamic_scheduler.services.service_tracker._models import ( + SchedulerServiceState, UserRequestedState, ) @@ -85,11 +90,11 @@ async def test_services_tracer_workflow( await logged_gather( *[set_request_as_stopped(app, uuid4()) for _ in range(item_count)], - max_concurrency=100 + max_concurrency=100, ) await logged_gather( *[set_request_as_running(app, uuid4()) for _ in range(item_count)], - max_concurrency=100 + max_concurrency=100, ) assert len(await get_all_tracked(app)) == item_count * 2 @@ -118,6 +123,32 @@ async def test_set_new_status( assert model.service_status == status.json() +@pytest.mark.parametrize( + "status", + [ + NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + *[ + DynamicServiceGet.parse_obj(x) + for x in DynamicServiceGet.Config.schema_extra["examples"] + ], + NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), + ], +) +async def test_set_if_status_changed( + app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle +): + await set_request_as_running(app, node_id) + + assert await set_if_status_changed(app, node_id, status) is True + + assert await set_if_status_changed(app, node_id, status) is False + + model = await get_tracked(app, node_id) + assert model + + assert model.service_status == status.json() + + async def test_set_service_status_task_uid(app: FastAPI, node_id: NodeID, faker: Faker): await set_request_as_running(app, node_id) @@ -167,3 +198,138 @@ def test__get_poll_interval( status: NodeGet | DynamicServiceGet | NodeGetIdle, expected_poll_interval: timedelta ): assert _get_poll_interval(status) == expected_poll_interval + + +def _get_node_get_from(service_state: ServiceState) -> NodeGet: + dict_data = NodeGet.Config.schema_extra["example"] + assert "service_state" in dict_data + dict_data["service_state"] = service_state + return NodeGet.parse_obj(dict_data) + + +def _get_dynamic_service_get_from( + service_state: DynamicServiceGet, +) -> DynamicServiceGet: + dict_data = DynamicServiceGet.Config.schema_extra["examples"][1] + assert "state" in dict_data + dict_data["state"] = service_state + return DynamicServiceGet.parse_obj(dict_data) + + +def _get_node_get_idle() -> NodeGetIdle: + return NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]) + + +def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: + return [item for sublist in nested_list for item in sublist] + + +_EXPECTED_TEST_CASES: list[list[tuple]] = [ + [ + # UserRequestedState.RUNNING + ( + UserRequestedState.RUNNING, + get_status(ServiceState.PENDING), + SchedulerServiceState.STARTING, + ), + ( + UserRequestedState.RUNNING, + get_status(ServiceState.PULLING), + SchedulerServiceState.STARTING, + ), + ( + UserRequestedState.RUNNING, + get_status(ServiceState.STARTING), + SchedulerServiceState.STARTING, + ), + ( + UserRequestedState.RUNNING, + get_status(ServiceState.RUNNING), + SchedulerServiceState.RUNNING, + ), + ( + UserRequestedState.RUNNING, + get_status(ServiceState.COMPLETE), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.RUNNING, + get_status(ServiceState.FAILED), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.RUNNING, + get_status(ServiceState.STOPPING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.RUNNING, + _get_node_get_idle(), + SchedulerServiceState.IDLE, + ), + # UserRequestedState.STOPPED + ( + UserRequestedState.STOPPED, + get_status(ServiceState.PENDING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.STOPPED, + get_status(ServiceState.PULLING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.STOPPED, + get_status(ServiceState.STARTING), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.STOPPED, + get_status(ServiceState.RUNNING), + SchedulerServiceState.STOPPING, + ), + ( + UserRequestedState.STOPPED, + get_status(ServiceState.COMPLETE), + SchedulerServiceState.STOPPING, + ), + ( + UserRequestedState.STOPPED, + get_status(ServiceState.FAILED), + SchedulerServiceState.UNEXPECTED_OUTCOME, + ), + ( + UserRequestedState.STOPPED, + get_status(ServiceState.STOPPING), + SchedulerServiceState.STOPPING, + ), + ( + UserRequestedState.STOPPED, + _get_node_get_idle(), + SchedulerServiceState.IDLE, + ), + ] + for get_status in ( + _get_node_get_from, + _get_dynamic_service_get_from, + ) +] +_FLAT_EXPECTED_TEST_CASES = __get_flat_list(_EXPECTED_TEST_CASES) +# ensure enum changes do not break above rules +_IDLE_ITEM_COUNT: Final[int] = 1 +_NODE_STATUS_FORMATS_COUNT: Final[int] = 2 +assert ( + len(_FLAT_EXPECTED_TEST_CASES) + == (len(ServiceState) + _IDLE_ITEM_COUNT) + * len(UserRequestedState) + * _NODE_STATUS_FORMATS_COUNT +) + + +@pytest.mark.parametrize("requested_state, status, expected", _FLAT_EXPECTED_TEST_CASES) +def test__get_current_state( + requested_state: UserRequestedState, + status: NodeGet | DynamicServiceGet | NodeGetIdle, + expected: SchedulerServiceState, +): + assert _get_current_state(requested_state, status) == expected From a5345cfecda3b0fc64ce90c363632fbd4a653528 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 5 Jun 2024 11:11:31 +0200 Subject: [PATCH 035/122] removed unused --- .../services/service_tracker/_api.py | 20 --------------- .../tests/unit/service_tracker/test__api.py | 25 ------------------- 2 files changed, 45 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 91f5c2ed8f3d..6f4667cb525c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -97,26 +97,6 @@ def _get_current_state( raise TypeError(msg) -# TODO: remove below, not used -async def set_new_status( - app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle -) -> None: - tracker: Tracker = get_tracker(app) - model: TrackedServiceModel | None = await tracker.load(node_id) - if model is None: - _logger.info( - "Could not find a %s entry for node_id %s: skipping set_new_status", - TrackedServiceModel.__name__, - node_id, - ) - return - - model.service_status = status.json() - model.set_check_status_after_to(_get_poll_interval(status)) - model.service_status_task_uid = None - await tracker.save(node_id, model) - - async def set_if_status_changed( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> bool: diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index a79bfef41c1c..686c622ba0cc 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -24,7 +24,6 @@ remove_tracked, set_check_status_after_to, set_if_status_changed, - set_new_status, set_request_as_running, set_request_as_stopped, set_service_status_task_uid, @@ -99,30 +98,6 @@ async def test_services_tracer_workflow( assert len(await get_all_tracked(app)) == item_count * 2 -@pytest.mark.parametrize( - "status", - [ - NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), - *[ - DynamicServiceGet.parse_obj(x) - for x in DynamicServiceGet.Config.schema_extra["examples"] - ], - NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), - ], -) -async def test_set_new_status( - app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle -): - await set_request_as_running(app, node_id) - - await set_new_status(app, node_id, status) - - model = await get_tracked(app, node_id) - assert model - - assert model.service_status == status.json() - - @pytest.mark.parametrize( "status", [ From 9e39d8c788f9b7523bc120e7c31aadab7762a027 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 5 Jun 2024 11:18:19 +0200 Subject: [PATCH 036/122] listing in comparison order --- .../models-library/src/models_library/services_enums.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/models-library/src/models_library/services_enums.py b/packages/models-library/src/models_library/services_enums.py index 8a55c0a960b7..90dfef039e37 100644 --- a/packages/models-library/src/models_library/services_enums.py +++ b/packages/models-library/src/models_library/services_enums.py @@ -11,14 +11,17 @@ class ServiceBootType(str, Enum): @functools.total_ordering @unique class ServiceState(Enum): + FAILED = "failed" + PENDING = "pending" PULLING = "pulling" STARTING = "starting" RUNNING = "running" - COMPLETE = "complete" - FAILED = "failed" + STOPPING = "stopping" + COMPLETE = "complete" + def __lt__(self, other): if self.__class__ is other.__class__: comparison_order = ServiceState.comparison_order() From ebf75f9859f3ebcfee823e664b97baaafd826129 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 5 Jun 2024 11:18:39 +0200 Subject: [PATCH 037/122] refactor --- .../services/service_tracker/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index cb17648f1e66..a01c366f8f0c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -1,9 +1,10 @@ from ._api import ( + can_notify_frontend, get_all_tracked, get_tracked, remove_tracked, set_check_status_after_to, - set_new_status, + set_if_status_changed, set_request_as_running, set_request_as_stopped, set_service_status_task_uid, @@ -12,11 +13,12 @@ from ._setup import setup_service_tracker __all__: tuple[str, ...] = ( + "can_notify_frontend", "get_all_tracked", "get_tracked", "remove_tracked", "set_check_status_after_to", - "set_new_status", + "set_if_status_changed", "set_request_as_running", "set_request_as_stopped", "set_service_status_task_uid", From 8cb1b4f570fd97873b1011658d995615d55c4979 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 5 Jun 2024 11:32:21 +0200 Subject: [PATCH 038/122] wrapping up status_monitor --- .../status_monitor/_deferred_get_status.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index abb772aea21d..585dee3f5cae 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -9,9 +9,11 @@ from servicelib.deferred_tasks._base_deferred_handler import DeferredContext from ..director_v2 import DirectorV2Client +from ..notifier import notify_frontend from ..service_tracker import ( + can_notify_frontend, remove_tracked, - set_new_status, + set_if_status_changed, set_service_status_task_uid, ) @@ -60,16 +62,13 @@ async def on_result( app: FastAPI = context["app"] node_id: NodeID = context["node_id"] - _logger.debug("CALLED ON RESULT %s %s", node_id, result) + _logger.debug("Received status for service '%s': '%s'", node_id, result) - # TOOD: maybe move all this logic tot the service_tracker - - # TODO: this should be transformed in set_new_status_if_changed - # also this should return a "bool" if the status changed form the previous - # this allows us to figure out when to send to the FE notifications - # TODO: from here we need to add an integration with the module sending via webseocket - # the status to the fronted - await set_new_status(app, node_id, result) + # TODO: figure out if this needs to be an atomic change in the Redis DB + # set & notify + status_changed: bool = await set_if_status_changed(app, node_id, result) + if await can_notify_frontend(app, node_id, status_changed=status_changed): + await notify_frontend(app, node_id, result) # remove service if no longer running if isinstance(result, NodeGetIdle): From 88c8926dd163c0f65e79c259a6f2e8b847238640 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 5 Jun 2024 11:51:18 +0200 Subject: [PATCH 039/122] refactor --- .../services/service_tracker/_api.py | 10 ++----- .../services/service_tracker/_models.py | 29 +++++++++++++++++-- .../status_monitor/_deferred_get_status.py | 2 -- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 6f4667cb525c..c467dafffaf5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -100,11 +100,12 @@ def _get_current_state( async def set_if_status_changed( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> bool: + """returns ``True`` if the tracker detected a status change""" tracker: Tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: _logger.info( - "Could not find a %s entry for node_id %s: skipping set_new_status", + "Could not find a %s entry for node_id %s: skipping set_if_status_changed", TrackedServiceModel.__name__, node_id, ) @@ -136,11 +137,6 @@ async def can_notify_frontend( tracker: Tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: - _logger.info( - "Could not find a %s entry for node_id %s: skipping set_new_status", - TrackedServiceModel.__name__, - node_id, - ) return False if status_changed: @@ -163,7 +159,7 @@ async def set_check_status_after_to( model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: _logger.info( - "Could not find a %s entry for node_id %s: skipping set_new_status", + "Could not find a %s entry for node_id %s: skipping set_check_status_after_to", TrackedServiceModel.__name__, node_id, ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 4dae3eaca94b..9ba1a7585d0f 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -13,13 +13,20 @@ class UserRequestedState(StrAutoEnum): STOPPED = auto() -class ServiceStates(StrAutoEnum): +class SchedulerServiceState(StrAutoEnum): + # service was started and is running as expected RUNNING = auto() - STOPPED = auto() + # service is not present + IDLE = auto() + # something went wrong while starting/stopping service + UNEXPECTED_OUTCOME = auto() + # service is being started STARTING = auto() + # service is being stopped STOPPING = auto() + # service status has not been determined UNKNOWN = auto() @@ -29,17 +36,33 @@ class TrackedServiceModel: requested_sate: UserRequestedState # set this after parsing the incoming state via the API calls - current_state: ServiceStates = ServiceStates.UNKNOWN # type: ignore + current_state: SchedulerServiceState = SchedulerServiceState.UNKNOWN # type: ignore + + ############################# + ### SERVICE STATSU UPDATE ### + ############################# # stored for debug mainly this is used to compute ``current_state`` service_status: str = "" + # uid of the job currently fetching the status service_status_task_uid: TaskUID | None = None + # used to determine when to poll the status again check_status_after: float | None = None def set_check_status_after_to(self, delay: timedelta) -> None: self.check_status_after = (arrow.utcnow() + delay).timestamp() + # used to determine when was the last time the status was notified + last_status_notification: float = 0 + + def set_last_status_notification_to_now(self) -> None: + self.last_status_notification = arrow.utcnow().timestamp() + + ##################### + ### SERIALIZATION ### + ##################### + def to_bytes(self) -> bytes: return pickle.dumps(self) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index 585dee3f5cae..4e31ff3d4e89 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -64,8 +64,6 @@ async def on_result( _logger.debug("Received status for service '%s': '%s'", node_id, result) - # TODO: figure out if this needs to be an atomic change in the Redis DB - # set & notify status_changed: bool = await set_if_status_changed(app, node_id, result) if await can_notify_frontend(app, node_id, status_changed=status_changed): await notify_frontend(app, node_id, result) From c3782c93b11791ec1b1a7536e39190098766d76e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 10:42:58 +0200 Subject: [PATCH 040/122] refactor service_tracker --- .../services/service_tracker/__init__.py | 4 ++-- .../services/service_tracker/_api.py | 23 +++++++++++-------- .../services/service_tracker/_models.py | 12 ++++++---- .../tests/unit/service_tracker/test__api.py | 18 --------------- .../unit/service_tracker/test__models.py | 12 +++++----- 5 files changed, 29 insertions(+), 40 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index a01c366f8f0c..6040be720e2d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -3,10 +3,10 @@ get_all_tracked, get_tracked, remove_tracked, - set_check_status_after_to, set_if_status_changed, set_request_as_running, set_request_as_stopped, + set_scheduled_to_run, set_service_status_task_uid, ) from ._models import TrackedServiceModel @@ -17,10 +17,10 @@ "get_all_tracked", "get_tracked", "remove_tracked", - "set_check_status_after_to", "set_if_status_changed", "set_request_as_running", "set_request_as_stopped", + "set_scheduled_to_run", "set_service_status_task_uid", "setup_service_tracker", "TrackedServiceModel", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index c467dafffaf5..d09e962a2779 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -114,6 +114,7 @@ async def set_if_status_changed( # set new polling interval in the future model.set_check_status_after_to(_get_poll_interval(status)) model.service_status_task_uid = None + model.scheduled_to_run = False # check if model changed json_status = status.json() @@ -136,36 +137,38 @@ async def can_notify_frontend( """ tracker: Tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: return False - if status_changed: - return True - # check if too much time has passed since the last time an update was sent - current_timestamp = arrow.utcnow().timestamp() if ( - current_timestamp - model.last_status_notification - ) > _MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES.total_seconds(): + status_changed + or (arrow.utcnow().timestamp() - model.last_status_notification) + > _MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES.total_seconds() + ): + model.set_last_status_notification_to_now() + await tracker.save(node_id, model) return True return False -async def set_check_status_after_to( - app: FastAPI, node_id: NodeID, delay: timedelta +async def set_scheduled_to_run( + app: FastAPI, node_id: NodeID, delay_from_now: timedelta ) -> None: tracker: Tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: _logger.info( - "Could not find a %s entry for node_id %s: skipping set_check_status_after_to", + "Could not find a %s entry for node_id %s: skipping set_scheduled_to_start", TrackedServiceModel.__name__, node_id, ) return - model.set_check_status_after_to(delay) + model.scheduled_to_run = True + model.set_check_status_after_to(delay_from_now) await tracker.save(node_id, model) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 9ba1a7585d0f..e750f9f99533 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -1,5 +1,5 @@ import pickle -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import timedelta from enum import auto @@ -42,16 +42,20 @@ class TrackedServiceModel: ### SERVICE STATSU UPDATE ### ############################# + scheduled_to_run: bool = False + # stored for debug mainly this is used to compute ``current_state`` service_status: str = "" # uid of the job currently fetching the status service_status_task_uid: TaskUID | None = None # used to determine when to poll the status again - check_status_after: float | None = None + check_status_after: float = field( + default_factory=lambda: arrow.utcnow().timestamp() + ) - def set_check_status_after_to(self, delay: timedelta) -> None: - self.check_status_after = (arrow.utcnow() + delay).timestamp() + def set_check_status_after_to(self, delay_from_now: timedelta) -> None: + self.check_status_after = (arrow.utcnow() + delay_from_now).timestamp() # used to determine when was the last time the status was notified last_status_notification: float = 0 diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 686c622ba0cc..3458f9cc102c 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -5,7 +5,6 @@ from typing import Any, Final from uuid import uuid4 -import arrow import pytest from faker import Faker from fastapi import FastAPI @@ -22,7 +21,6 @@ get_all_tracked, get_tracked, remove_tracked, - set_check_status_after_to, set_if_status_changed, set_request_as_running, set_request_as_stopped, @@ -136,22 +134,6 @@ async def test_set_service_status_task_uid(app: FastAPI, node_id: NodeID, faker: assert model.service_status_task_uid == task_uid -async def test_set_check_status_after_to(app: FastAPI, node_id: NodeID): - await set_request_as_running(app, node_id) - - delay = timedelta(seconds=6) - - benfore = (arrow.utcnow() + delay).timestamp() - await set_check_status_after_to(app, node_id, delay) - after = (arrow.utcnow() + delay).timestamp() - - model = await get_tracked(app, node_id) - assert model - assert model.check_status_after - - assert benfore < model.check_status_after < after - - @pytest.mark.parametrize( "status, expected_poll_interval", [ diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py index 836a7cba1545..0e5901c4f87f 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py @@ -5,21 +5,21 @@ from faker import Faker from servicelib.deferred_tasks import TaskUID from simcore_service_dynamic_scheduler.services.service_tracker._models import ( - ServiceStates, + SchedulerServiceState, TrackedServiceModel, UserRequestedState, ) @pytest.mark.parametrize("requested_state", UserRequestedState) -@pytest.mark.parametrize("current_state", ServiceStates) -@pytest.mark.parametrize("check_status_after", [None, 1, arrow.utcnow().timestamp()]) +@pytest.mark.parametrize("current_state", SchedulerServiceState) +@pytest.mark.parametrize("check_status_after", [1, arrow.utcnow().timestamp()]) @pytest.mark.parametrize("service_status_task_uid", [None, TaskUID("ok")]) def test_serialization( faker: Faker, requested_state: UserRequestedState, - current_state: ServiceStates, - check_status_after: float | None, + current_state: SchedulerServiceState, + check_status_after: float, service_status_task_uid: TaskUID | None, ): tracked_model = TrackedServiceModel( @@ -37,7 +37,7 @@ def test_serialization( async def test_set_check_status_after_to(): model = TrackedServiceModel(UserRequestedState.RUNNING) - assert model.check_status_after is None + assert model.check_status_after < arrow.utcnow().timestamp() delay = timedelta(seconds=4) From 17b0ad9a192b70da5af0141760f753f37d23c7aa Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 10:44:14 +0200 Subject: [PATCH 041/122] basic working version of monitor --- .../services/status_monitor/_monitor.py | 75 ++++++++++--------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index c2acefad7a93..b82bf2adebfb 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -9,26 +9,30 @@ from pydantic import NonNegativeFloat, NonNegativeInt from servicelib.utils import logged_gather -from ..service_tracker import ( - TrackedServiceModel, - get_all_tracked, - set_check_status_after_to, -) +from ..service_tracker import TrackedServiceModel, get_all_tracked, set_scheduled_to_run from ._deferred_get_status import DeferredGetStatus _logger = logging.getLogger(__name__) _MAX_CONCURRENCY: Final[NonNegativeInt] = 10 +_NEXT_STATUS_CHECK_AFTER: Final[timedelta] = timedelta(seconds=0.1) + + +async def _start_get_status_deferred( + app: FastAPI, node_id: NodeID, *, next_check_delay: timedelta +) -> None: + await set_scheduled_to_run(app, node_id, next_check_delay) + await DeferredGetStatus.start(node_id=node_id) class Monitor: - def __init__(self, app: FastAPI, check_threshold: timedelta) -> None: + def __init__(self, app: FastAPI, status_worker_interval: timedelta) -> None: self.app = app - self.check_threshold = check_threshold + self.status_worker_interval = status_worker_interval @cached_property - def check_threshold_seconds(self) -> NonNegativeFloat: - return self.check_threshold.total_seconds() + def status_worker_interval_seconds(self) -> NonNegativeFloat: + return self.status_worker_interval.total_seconds() async def _worker_start_get_status_requests(self) -> None: # NOTE: this worker runs on only once across all instances of the scheduler @@ -36,43 +40,44 @@ async def _worker_start_get_status_requests(self) -> None: models: dict[NodeID, TrackedServiceModel] = await get_all_tracked(self.app) to_start: list[NodeID] = [] - to_set_check_status_after: list[NodeID] = [] current_timestamp = arrow.utcnow().timestamp() for node_id, model in models.items(): - if ( - model.check_status_after is None - or model.check_status_after > current_timestamp - ): - # status fetching is required - if model.service_status_task_uid is None: - to_start.append(node_id) - else: - _logger.info( - "Skipping status check for %s, since already running. Will check later", - node_id, - ) - if model.check_status_after is None: - to_set_check_status_after.append(node_id) - - # for services where the check never ran, make sure we are nto able to start the check while it's running + + job_not_running = not ( + model.scheduled_to_run + and model.service_status_task_uid is not None + and await DeferredGetStatus.is_present(model.service_status_task_uid) + ) + wait_period_finished = current_timestamp > model.check_status_after + if job_not_running and wait_period_finished: + to_start.append(node_id) + else: + _logger.info( + "Skipping status check for %s, because: %s or %s", + node_id, + f"{job_not_running=}", + ( + f"{wait_period_finished=}" + if wait_period_finished + else f"can_start_in={model.check_status_after - current_timestamp}" + ), + ) + + _logger.debug("DeferredGetStatus to start: '%s'", to_start) await logged_gather( *( - set_check_status_after_to(self.app, node_id, timedelta(seconds=5)) - for node_id in to_set_check_status_after + _start_get_status_deferred( + self.app, node_id, next_check_delay=_NEXT_STATUS_CHECK_AFTER + ) + for node_id in to_start ), max_concurrency=_MAX_CONCURRENCY, ) - await logged_gather( - *(DeferredGetStatus.start(node_id=node_id) for node_id in to_start), - max_concurrency=_MAX_CONCURRENCY, - ) - async def setup(self) -> None: - # TODO: start uniquely running task - # NOTE: THIS needs to be distributed only 1 at a time + # TODO: run uniquely across all processes pass async def shutdown(self) -> None: From 0238ee0efdd52238d85acb055db5a66859470854 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 11:02:52 +0200 Subject: [PATCH 042/122] refactor --- .../services/service_tracker/__init__.py | 2 ++ .../services/service_tracker/_api.py | 4 ++-- .../dynamic-scheduler/tests/unit/service_tracker/test__api.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index 6040be720e2d..24bd9d52effe 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -1,4 +1,5 @@ from ._api import ( + NORMAL_RATE_POLL_INTERVAL, can_notify_frontend, get_all_tracked, get_tracked, @@ -16,6 +17,7 @@ "can_notify_frontend", "get_all_tracked", "get_tracked", + "NORMAL_RATE_POLL_INTERVAL", "remove_tracked", "set_if_status_changed", "set_request_as_running", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index d09e962a2779..69f3ab2d85a3 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -18,7 +18,7 @@ _LOW_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=1) -_NORMAL_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=5) +NORMAL_RATE_POLL_INTERVAL: Final[timedelta] = timedelta(seconds=5) _MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES: Final[timedelta] = timedelta(seconds=60) @@ -59,7 +59,7 @@ def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> tim if __get_state_str(status) != "running": return _LOW_RATE_POLL_INTERVAL - return _NORMAL_RATE_POLL_INTERVAL + return NORMAL_RATE_POLL_INTERVAL def _get_current_state( diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 3458f9cc102c..281db6e64d9e 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -28,7 +28,7 @@ ) from simcore_service_dynamic_scheduler.services.service_tracker._api import ( _LOW_RATE_POLL_INTERVAL, - _NORMAL_RATE_POLL_INTERVAL, + NORMAL_RATE_POLL_INTERVAL, _get_current_state, _get_poll_interval, ) @@ -142,7 +142,7 @@ async def test_set_service_status_task_uid(app: FastAPI, node_id: NodeID, faker: _LOW_RATE_POLL_INTERVAL, ), *[ - (DynamicServiceGet.parse_obj(x), _NORMAL_RATE_POLL_INTERVAL) + (DynamicServiceGet.parse_obj(x), NORMAL_RATE_POLL_INTERVAL) for x in DynamicServiceGet.Config.schema_extra["examples"] ], ( From 71ca28779a4b3e7e46df4ad1c92aa99668dcd8a4 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 11:03:10 +0200 Subject: [PATCH 043/122] basic working test --- .../unit/test_services_status_monitor.py | 145 ++++++++++++------ 1 file changed, 96 insertions(+), 49 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py index fd1814c3c675..13867989fd11 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py +++ b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py @@ -5,6 +5,7 @@ import re from collections.abc import AsyncIterable from copy import deepcopy +from datetime import timedelta from typing import Any from unittest.mock import AsyncMock from uuid import uuid4 @@ -23,8 +24,10 @@ from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( + _api, set_request_as_running, ) +from simcore_service_dynamic_scheduler.services.status_monitor import _monitor from simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status import ( DeferredGetStatus, ) @@ -51,9 +54,6 @@ def app_environment( return app_environment -# create service pattern for start & stop with the appropriate type of message types -# including idle and the ones for legacy services - _DEFAULT_NODE_ID: NodeID = uuid4() @@ -144,6 +144,54 @@ def get_status(self, node_id: NodeID) -> NodeGet | DynamicServiceGet | NodeGetId return status +async def _assert_call_to( + deferred_status_spies: dict[str, AsyncMock], *, method: str, count: NonNegativeInt +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(1), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + call_count = deferred_status_spies[method].call_count + assert ( + call_count == count + ), f"Received calls {call_count} != {count} (expected) to '{method}'" + + +async def _assert_result( + deferred_status_spies: dict[str, AsyncMock], + *, + timeline: list[NodeGet | DynamicServiceGet | NodeGetIdle], +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(1), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + + assert deferred_status_spies["on_result"].call_count == len(timeline) + assert [ + x.args[0] for x in deferred_status_spies["on_result"].call_args_list + ] == timeline + + +async def _assert_notification_count( + mock: AsyncMock, expected_count: NonNegativeInt +) -> None: + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(1), + wait=wait_fixed(0.01), + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert mock.call_count == expected_count + + @pytest.fixture async def mock_director_v2_status( app: FastAPI, response_timeline: _ResponseTimeline @@ -185,7 +233,9 @@ def monitor(mock_director_v2_status: None, app: FastAPI) -> Monitor: def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: results: dict[str, AsyncMock] = {} for method_name in ( + "start", "on_result", + "on_created", "run", "on_finished_with_error", ): @@ -196,74 +246,67 @@ def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: return results -async def _assert_call_to( - deferred_status_spies: dict[str, AsyncMock], *, method: str, count: NonNegativeInt -) -> None: - async for attempt in AsyncRetrying( - reraise=True, - stop=stop_after_delay(5), - wait=wait_fixed(0.01), - retry=retry_if_exception_type(AssertionError), - ): - with attempt: - assert deferred_status_spies[method].call_count == count - +@pytest.fixture +def node_id() -> NodeID: + return _DEFAULT_NODE_ID -async def _assert_result( - deferred_status_spies: dict[str, AsyncMock], - *, - timeline: list[NodeGet | DynamicServiceGet | NodeGetIdle], -) -> None: - async for attempt in AsyncRetrying( - reraise=True, - stop=stop_after_delay(5), - wait=wait_fixed(0.01), - retry=retry_if_exception_type(AssertionError), - ): - with attempt: - assert deferred_status_spies["on_result"].call_count == len(timeline) - assert [ - x.args[0] for x in deferred_status_spies["on_result"].call_args_list - ] == timeline +@pytest.fixture +def mocked_notify_frontend(mocker: MockerFixture) -> AsyncMock: + return mocker.patch( + "simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status.notify_frontend" + ) @pytest.fixture -def node_id() -> NodeID: - return _DEFAULT_NODE_ID +def mock_poll_rate_intervals(mocker: MockerFixture) -> None: + mocker.patch.object(_api, "_LOW_RATE_POLL_INTERVAL", timedelta(seconds=0.1)) + mocker.patch.object(_api, "NORMAL_RATE_POLL_INTERVAL", timedelta(seconds=0.2)) + mocker.patch.object(_monitor, "NORMAL_RATE_POLL_INTERVAL", timedelta(seconds=0.2)) @pytest.mark.parametrize( - "response_timeline", + "response_timeline, expected_notification_count", [ - _ResponseTimeline([_get_node_get_with("running")]), - _ResponseTimeline( - [ - __get_dynamic_service_get_legacy_with("running"), - __get_dynamic_service_get_legacy_with("running"), - ] + # TODO: below + # create service pattern for start & stop with the appropriate type of message types + # including idle and the ones for legacy services + (_ResponseTimeline([_get_node_get_with("running")]), 1), + ( + _ResponseTimeline( + [__get_dynamic_service_get_legacy_with("running") for _ in range(10)] + ), + 1, ), - _ResponseTimeline([__get_dynamic_service_get_new_style_with("running")]), - _ResponseTimeline([__get_node_get_idle()]), + (_ResponseTimeline([__get_dynamic_service_get_new_style_with("running")]), 1), + (_ResponseTimeline([__get_node_get_idle()]), 1), ], ) -async def test_basic_examples( +async def test_expected_calls_to_notify_frontend( + mock_poll_rate_intervals: None, + mocked_notify_frontend: AsyncMock, deferred_status_spies: dict[str, AsyncMock], app: FastAPI, monitor: Monitor, - response_timeline: _ResponseTimeline, node_id: NodeID, + response_timeline: _ResponseTimeline, + expected_notification_count: NonNegativeInt, ): await set_request_as_running(app, node_id) - # ADD some service to monitor, then mock the API to director-v2 to returns different - # statuses based on the times when it is called - entries_in_timeline = len(response_timeline) for i in range(entries_in_timeline): - await monitor._worker_start_get_status_requests() - await _assert_call_to(deferred_status_spies, method="on_result", count=i + 1) + async for attempt in AsyncRetrying( + reraise=True, stop=stop_after_delay(10), wait=wait_fixed(0.1) + ): + with attempt: + # pylint:disable=protected-access + await monitor._worker_start_get_status_requests() # noqa: SLF001 + for method in ("start", "on_created", "on_result"): + await _assert_call_to( + deferred_status_spies, method=method, count=i + 1 + ) await _assert_call_to( deferred_status_spies, method="run", count=entries_in_timeline @@ -273,3 +316,7 @@ async def test_basic_examples( ) await _assert_result(deferred_status_spies, timeline=response_timeline.entries) + + await _assert_notification_count( + mocked_notify_frontend, expected_notification_count + ) From 9b2d3dbc66d90ee2f06ab54230b365e54397bdb7 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 11:38:29 +0200 Subject: [PATCH 044/122] extended use cases --- .../unit/test_services_status_monitor.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py index 13867989fd11..f242247b10fe 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py +++ b/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py @@ -268,9 +268,6 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: @pytest.mark.parametrize( "response_timeline, expected_notification_count", [ - # TODO: below - # create service pattern for start & stop with the appropriate type of message types - # including idle and the ones for legacy services (_ResponseTimeline([_get_node_get_with("running")]), 1), ( _ResponseTimeline( @@ -280,6 +277,24 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: ), (_ResponseTimeline([__get_dynamic_service_get_new_style_with("running")]), 1), (_ResponseTimeline([__get_node_get_idle()]), 1), + ( + _ResponseTimeline( + [ + __get_node_get_idle(), + __get_dynamic_service_get_new_style_with("pending"), + __get_dynamic_service_get_new_style_with("pulling"), + __get_dynamic_service_get_new_style_with("starting"), + __get_dynamic_service_get_new_style_with("starting"), + __get_dynamic_service_get_new_style_with("starting"), + __get_dynamic_service_get_new_style_with("starting"), + __get_dynamic_service_get_new_style_with("running"), + __get_dynamic_service_get_new_style_with("stopping"), + __get_dynamic_service_get_new_style_with("complete"), + __get_node_get_idle(), + ] + ), + 8, + ), ], ) async def test_expected_calls_to_notify_frontend( From 66b15acda43b79d2a4a3128c579fe5d5e4caa1f0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 11:45:23 +0200 Subject: [PATCH 045/122] test location and names --- .../test_services_status_monitor__monitor.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename services/dynamic-scheduler/tests/unit/{test_services_status_monitor.py => status_monitor/test_services_status_monitor__monitor.py} (100%) diff --git a/services/dynamic-scheduler/tests/unit/test_services_status_monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py similarity index 100% rename from services/dynamic-scheduler/tests/unit/test_services_status_monitor.py rename to services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py From 01cf4a1e81e3f65439023fc80b5725a95752136c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 14:16:44 +0200 Subject: [PATCH 046/122] refactor removal from tracking --- .../status_monitor/_deferred_get_status.py | 5 - .../services/status_monitor/_monitor.py | 28 +++++- .../test_services_status_monitor__monitor.py | 91 ++++++++++++++++--- 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index 4e31ff3d4e89..cd5a4ac1392c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -12,7 +12,6 @@ from ..notifier import notify_frontend from ..service_tracker import ( can_notify_frontend, - remove_tracked, set_if_status_changed, set_service_status_task_uid, ) @@ -67,7 +66,3 @@ async def on_result( status_changed: bool = await set_if_status_changed(app, node_id, result) if await can_notify_frontend(app, node_id, status_changed=status_changed): await notify_frontend(app, node_id, result) - - # remove service if no longer running - if isinstance(result, NodeGetIdle): - await remove_tracked(app, node_id) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index b82bf2adebfb..a60f66e73c83 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -9,13 +9,19 @@ from pydantic import NonNegativeFloat, NonNegativeInt from servicelib.utils import logged_gather -from ..service_tracker import TrackedServiceModel, get_all_tracked, set_scheduled_to_run +from ..service_tracker import ( + NORMAL_RATE_POLL_INTERVAL, + TrackedServiceModel, + get_all_tracked, + remove_tracked, + set_scheduled_to_run, +) +from ..service_tracker._models import SchedulerServiceState, UserRequestedState from ._deferred_get_status import DeferredGetStatus _logger = logging.getLogger(__name__) _MAX_CONCURRENCY: Final[NonNegativeInt] = 10 -_NEXT_STATUS_CHECK_AFTER: Final[timedelta] = timedelta(seconds=0.1) async def _start_get_status_deferred( @@ -39,11 +45,19 @@ async def _worker_start_get_status_requests(self) -> None: models: dict[NodeID, TrackedServiceModel] = await get_all_tracked(self.app) + to_remove: list[NodeID] = [] to_start: list[NodeID] = [] current_timestamp = arrow.utcnow().timestamp() for node_id, model in models.items(): + # check if service is idle and status polling should stop + if ( + model.current_state == SchedulerServiceState.IDLE + and model.requested_sate == UserRequestedState.STOPPED + ): + to_remove.append(node_id) + continue job_not_running = not ( model.scheduled_to_run @@ -65,11 +79,17 @@ async def _worker_start_get_status_requests(self) -> None: ), ) - _logger.debug("DeferredGetStatus to start: '%s'", to_start) + _logger.debug("Removing tracked services: '%s'", to_remove) + await logged_gather( + *(remove_tracked(self.app, node_id) for node_id in to_remove), + max_concurrency=_MAX_CONCURRENCY, + ) + + _logger.debug("Poll status for tracked services: '%s'", to_start) await logged_gather( *( _start_get_status_deferred( - self.app, node_id, next_check_delay=_NEXT_STATUS_CHECK_AFTER + self.app, node_id, next_check_delay=NORMAL_RATE_POLL_INTERVAL ) for node_id in to_start ), diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index f242247b10fe..b6f8190d80f5 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -26,6 +26,7 @@ from simcore_service_dynamic_scheduler.services.service_tracker import ( _api, set_request_as_running, + set_request_as_stopped, ) from simcore_service_dynamic_scheduler.services.status_monitor import _monitor from simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status import ( @@ -246,6 +247,12 @@ def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: return results +@pytest.fixture +def remove_tracked_spy(mocker: MockerFixture) -> AsyncMock: + mock_method = mocker.AsyncMock(wraps=_monitor.remove_tracked) + return mocker.patch.object(_monitor, "remove_tracked", mock_method) + + @pytest.fixture def node_id() -> NodeID: return _DEFAULT_NODE_ID @@ -266,27 +273,49 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: @pytest.mark.parametrize( - "response_timeline, expected_notification_count", + "user_requests_running, response_timeline, expected_notification_count, remove_tracked_count", [ - (_ResponseTimeline([_get_node_get_with("running")]), 1), - ( + pytest.param( + True, + _ResponseTimeline([_get_node_get_with("running")]), + 1, + 0, + id="requested_running_state_changes_1_no_task_removal", + ), + pytest.param( + True, _ResponseTimeline( [__get_dynamic_service_get_legacy_with("running") for _ in range(10)] ), 1, + 0, + id="requested_running_state_changes_1_for_multiple_same_state_no_task_removal", + ), + pytest.param( + True, + _ResponseTimeline([__get_node_get_idle()]), + 1, + 0, + id="requested_running_state_idle_no_removal", ), - (_ResponseTimeline([__get_dynamic_service_get_new_style_with("running")]), 1), - (_ResponseTimeline([__get_node_get_idle()]), 1), - ( + pytest.param( + False, + _ResponseTimeline([__get_node_get_idle()]), + 1, + 1, + id="requested_stopped_state_idle_is_removed", + ), + pytest.param( + True, _ResponseTimeline( [ - __get_node_get_idle(), + *[__get_node_get_idle() for _ in range(10)], __get_dynamic_service_get_new_style_with("pending"), __get_dynamic_service_get_new_style_with("pulling"), - __get_dynamic_service_get_new_style_with("starting"), - __get_dynamic_service_get_new_style_with("starting"), - __get_dynamic_service_get_new_style_with("starting"), - __get_dynamic_service_get_new_style_with("starting"), + *[ + __get_dynamic_service_get_new_style_with("starting") + for _ in range(10) + ], __get_dynamic_service_get_new_style_with("running"), __get_dynamic_service_get_new_style_with("stopping"), __get_dynamic_service_get_new_style_with("complete"), @@ -294,20 +323,48 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: ] ), 8, + 0, + id="requested_running_state_changes_8_no_removal", + ), + pytest.param( + False, + _ResponseTimeline( + [ + __get_dynamic_service_get_new_style_with("pending"), + __get_dynamic_service_get_new_style_with("pulling"), + *[ + __get_dynamic_service_get_new_style_with("starting") + for _ in range(10) + ], + __get_dynamic_service_get_new_style_with("running"), + __get_dynamic_service_get_new_style_with("stopping"), + __get_dynamic_service_get_new_style_with("complete"), + __get_node_get_idle(), + ] + ), + 7, + 1, + id="requested_stopped_state_changes_7_is_removed", ), ], ) -async def test_expected_calls_to_notify_frontend( +async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arguments mock_poll_rate_intervals: None, mocked_notify_frontend: AsyncMock, deferred_status_spies: dict[str, AsyncMock], + remove_tracked_spy: AsyncMock, app: FastAPI, monitor: Monitor, node_id: NodeID, + user_requests_running: bool, response_timeline: _ResponseTimeline, expected_notification_count: NonNegativeInt, + remove_tracked_count: NonNegativeInt, ): - await set_request_as_running(app, node_id) + if user_requests_running: + await set_request_as_running(app, node_id) + else: + await set_request_as_stopped(app, node_id) entries_in_timeline = len(response_timeline) @@ -335,3 +392,11 @@ async def test_expected_calls_to_notify_frontend( await _assert_notification_count( mocked_notify_frontend, expected_notification_count ) + + async for attempt in AsyncRetrying( + reraise=True, stop=stop_after_delay(1), wait=wait_fixed(0.1) + ): + with attempt: + # pylint:disable=protected-access + await monitor._worker_start_get_status_requests() # noqa: SLF001 + assert remove_tracked_spy.call_count == remove_tracked_count From b5df2013b2b03f69d0ff96d899bf8db2e23d20aa Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 14:16:58 +0200 Subject: [PATCH 047/122] add description --- .../services/service_tracker/_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index e750f9f99533..8a97fcd132a4 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -42,6 +42,7 @@ class TrackedServiceModel: ### SERVICE STATSU UPDATE ### ############################# + # set when a job will be immediately scheduled scheduled_to_run: bool = False # stored for debug mainly this is used to compute ``current_state`` From d480d60b8617dc55d6c07b6472536174058bbb6a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 6 Jun 2024 16:02:53 +0200 Subject: [PATCH 048/122] fixed test --- .../test_services_status_monitor__monitor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index b6f8190d80f5..477d5f4f96d4 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -16,6 +16,9 @@ from fastapi.encoders import jsonable_encoder from httpx import Request, Response from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceCreate, +) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID from pydantic import NonNegativeInt @@ -261,7 +264,7 @@ def node_id() -> NodeID: @pytest.fixture def mocked_notify_frontend(mocker: MockerFixture) -> AsyncMock: return mocker.patch( - "simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status.notify_frontend" + "simcore_service_dynamic_scheduler.services.status_monitor._deferred_get_status.notify_service_status_change" ) @@ -361,9 +364,13 @@ async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arg expected_notification_count: NonNegativeInt, remove_tracked_count: NonNegativeInt, ): - if user_requests_running: - await set_request_as_running(app, node_id) - else: + # request started (and also tracks service) + data_dict = deepcopy(RPCDynamicServiceCreate.Config.schema_extra["example"]) + data_dict["service_uuid"] = f"{node_id}" + await set_request_as_running(app, RPCDynamicServiceCreate.parse_obj(data_dict)) + + # request stopping only if tracked + if not user_requests_running: await set_request_as_stopped(app, node_id) entries_in_timeline = len(response_timeline) From 9f8158f857b970fb987aee4d8dc2130b90679b5f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 09:33:49 +0200 Subject: [PATCH 049/122] refactor stop service --- .../dynamic_services.py | 23 ++++++++++++++++++ .../dynamic_scheduler/services.py | 9 +++---- .../api/rpc/_services.py | 9 +++---- .../dynamic_scheduler/api.py | 19 ++++++++------- .../garbage_collector/_core_orphans.py | 13 +++++++--- .../projects/_nodes_handlers.py | 24 ++++++++++--------- 6 files changed, 64 insertions(+), 33 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py index 6adb136b2c3f..e4d9d8ed13cf 100644 --- a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py +++ b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py @@ -1,9 +1,13 @@ from typing import Any, ClassVar from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceCreate +from models_library.projects import ProjectID +from models_library.projects_nodes_io import NodeID from models_library.resource_tracker import HardwareInfo, PricingInfo from models_library.services_resources import ServiceResourcesDictHelpers +from models_library.users import UserID from models_library.wallets import WalletInfo +from pydantic import BaseModel class RPCDynamicServiceCreate(DynamicServiceCreate): @@ -32,3 +36,22 @@ class Config: "hardware_info": HardwareInfo.Config.schema_extra["examples"][0], } } + + +class RPCDynamicServiceStop(BaseModel): + user_id: UserID + project_id: ProjectID + node_id: NodeID + simcore_user_agent: str + save_state: bool + + class Config: + schema_extra: ClassVar[dict[str, Any]] = { + "example": { + "user_id": 234, + "project_id": "dd1d04d9-d704-4f7e-8f0f-1ca60cc771fe", + "node_id": "75c7f3f4-18f9-4678-8610-54a2ade78eaa", + "simcore_user_agent": "", + "can_save": True, + } + } diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py index 662280faf277..5e09a02ebbb5 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py @@ -5,6 +5,7 @@ from models_library.api_schemas_dynamic_scheduler import DYNAMIC_SCHEDULER_RPC_NAMESPACE from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( RPCDynamicServiceCreate, + RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -60,17 +61,13 @@ async def run_dynamic_service( async def stop_dynamic_service( rabbitmq_rpc_client: RabbitMQRPCClient, *, - node_id: NodeID, - simcore_user_agent: str, - save_state: bool, + rpc_dynamic_service_stop: RPCDynamicServiceStop, timeout_s: NonNegativeInt, ) -> None: result = await rabbitmq_rpc_client.request( DYNAMIC_SCHEDULER_RPC_NAMESPACE, parse_obj_as(RPCMethodName, "stop_dynamic_service"), - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=rpc_dynamic_service_stop, timeout_s=timeout_s, ) assert result is None # nosec diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index 416775b9d252..ce08ed6e95fe 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -2,6 +2,7 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( RPCDynamicServiceCreate, + RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -40,13 +41,13 @@ async def run_dynamic_service( ) ) async def stop_dynamic_service( - app: FastAPI, *, node_id: NodeID, simcore_user_agent: str, save_state: bool + app: FastAPI, *, rpc_dynamic_service_stop: RPCDynamicServiceStop ) -> NodeGet | DynamicServiceGet: director_v2_client = DirectorV2Client.get_from_app_state(app) settings: ApplicationSettings = app.state.settings return await director_v2_client.stop_dynamic_service( - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + node_id=rpc_dynamic_service_stop.node_id, + simcore_user_agent=rpc_dynamic_service_stop.simcore_user_agent, + save_state=rpc_dynamic_service_stop.save_state, timeout=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) diff --git a/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py b/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py index 8709b6625414..7064fffab1c1 100644 --- a/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py +++ b/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py @@ -6,6 +6,7 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( RPCDynamicServiceCreate, + RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import ( NodeGet, @@ -53,9 +54,7 @@ async def run_dynamic_service( async def stop_dynamic_service( app: web.Application, *, - node_id: NodeID, - simcore_user_agent: str, - save_state: bool, + rpc_dynamic_service_stop: RPCDynamicServiceStop, progress: ProgressBarData | None = None, ) -> None: async with AsyncExitStack() as stack: @@ -65,9 +64,7 @@ async def stop_dynamic_service( settings: DynamicSchedulerSettings = get_plugin_settings(app) await services.stop_dynamic_service( get_rabbitmq_rpc_client(app), - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=rpc_dynamic_service_stop, timeout_s=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) @@ -118,9 +115,13 @@ async def stop_dynamic_services_in_project( services_to_stop = [ stop_dynamic_service( app=app, - node_id=service.node_uuid, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service.project_id, + node_id=service.node_uuid, + simcore_user_agent=simcore_user_agent, + save_state=save_state, + ), progress=progress_bar.sub_progress( 1, description=f"{service.node_uuid}" ), diff --git a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py index 01426c6ea117..69f8cbc97b37 100644 --- a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py +++ b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py @@ -3,6 +3,9 @@ from aiohttp import web from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceStop, +) from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID from servicelib.common_headers import UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE @@ -51,9 +54,13 @@ async def _remove_service( ): await dynamic_scheduler_api.stop_dynamic_service( app, - node_id=node_id, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=save_service_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=service.user_id, + project_id=service.project_id, + node_id=service.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=save_service_state, + ), ) diff --git a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py index 6a7109799e0a..4ebd137bb44d 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py @@ -11,6 +11,9 @@ ServiceAccessRightsGet, ) from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceStop, +) from models_library.api_schemas_webserver.projects_nodes import ( NodeCreate, NodeCreated, @@ -326,17 +329,12 @@ async def _stop_dynamic_service_task( _task_progress: TaskProgress, *, app: web.Application, - node_id: NodeID, - simcore_user_agent: str, - save_state: bool, + rpc_dynamic_service_stop: RPCDynamicServiceStop, ): # NOTE: _handle_project_nodes_exceptions only decorate handlers try: await dynamic_scheduler_api.stop_dynamic_service( - app, - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + app, rpc_dynamic_service_stop=rpc_dynamic_service_stop ) raise web.HTTPNoContent(content_type=MIMETYPE_APPLICATION_JSON) @@ -376,11 +374,15 @@ async def stop_node(request: web.Request) -> web.Response: task_context=jsonable_encoder(req_ctx), # task arguments from here on --- app=request.app, - node_id=path_params.node_id, - simcore_user_agent=request.headers.get( - X_SIMCORE_USER_AGENT, UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=req_ctx.user_id, + project_id=path_params.project_id, + node_id=path_params.node_id, + simcore_user_agent=request.headers.get( + X_SIMCORE_USER_AGENT, UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE + ), + save_state=save_state, ), - save_state=save_state, fire_and_forget=True, ) From 8970055246f4b7b917f8ce0607572d297b121ad3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 09:46:57 +0200 Subject: [PATCH 050/122] fixed broken tests --- .../unit/api_rpc/test_api_rpc__services.py | 52 ++++++++++++++----- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py index 34387e93d7ff..71edaa075fec 100644 --- a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py +++ b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py @@ -12,9 +12,12 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( RPCDynamicServiceCreate, + RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID +from models_library.users import UserID from pytest_mock import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.rabbitmq import RabbitMQRPCClient, RPCServerError @@ -24,8 +27,10 @@ ServiceWasNotFoundError, ) from settings_library.rabbit import RabbitSettings +from settings_library.redis import RedisSettings pytest_simcore_core_services_selection = [ + "redis", "rabbit", ] @@ -125,9 +130,9 @@ def mock_director_v2_service_state( @pytest.fixture def app_environment( - disable_redis_setup: None, app_environment: EnvVarsDict, rabbit_service: RabbitSettings, + redis_service: RedisSettings, ) -> EnvVarsDict: return app_environment @@ -254,6 +259,16 @@ def node_id_manual_intervention(faker: Faker) -> NodeID: return faker.uuid4(cast_to=None) +@pytest.fixture +def user_id() -> UserID: + return 42 + + +@pytest.fixture +def project_id(faker: Faker) -> ProjectID: + return faker.uuid4(cast_to=None) + + @pytest.fixture def mock_director_v0_service_stop( fake_director_v0_base_url: str, @@ -344,18 +359,27 @@ async def test_stop_dynamic_service( mock_director_v0_service_stop: None, mock_director_v2_service_stop: None, rpc_client: RabbitMQRPCClient, + user_id: UserID, + project_id: ProjectID, node_id: NodeID, node_id_not_found: NodeID, node_id_manual_intervention: NodeID, simcore_user_agent: str, save_state: bool, ): + def _get_rpc_stop(with_node_id: NodeID) -> RPCDynamicServiceStop: + return RPCDynamicServiceStop( + user_id=user_id, + project_id=project_id, + node_id=with_node_id, + simcore_user_agent=simcore_user_agent, + save_state=save_state, + ) + # service was stopped result = await services.stop_dynamic_service( rpc_client, - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=_get_rpc_stop(node_id), timeout_s=5, ) assert result is None @@ -364,9 +388,7 @@ async def test_stop_dynamic_service( with pytest.raises(ServiceWasNotFoundError): await services.stop_dynamic_service( rpc_client, - node_id=node_id_not_found, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=_get_rpc_stop(node_id_not_found), timeout_s=5, ) @@ -374,9 +396,7 @@ async def test_stop_dynamic_service( with pytest.raises(ServiceWaitingForManualInterventionError): await services.stop_dynamic_service( rpc_client, - node_id=node_id_manual_intervention, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=_get_rpc_stop(node_id_manual_intervention), timeout_s=5, ) @@ -399,6 +419,8 @@ def mock_raise_generic_error( async def test_stop_dynamic_service_serializes_generic_errors( mock_raise_generic_error: None, rpc_client: RabbitMQRPCClient, + user_id: UserID, + project_id: ProjectID, node_id: NodeID, simcore_user_agent: str, save_state: bool, @@ -408,8 +430,12 @@ async def test_stop_dynamic_service_serializes_generic_errors( ): await services.stop_dynamic_service( rpc_client, - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=project_id, + node_id=node_id, + simcore_user_agent=simcore_user_agent, + save_state=save_state, + ), timeout_s=5, ) From a13532485e31f7ca97c28b781e72d729c9b6a37e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 10:00:53 +0200 Subject: [PATCH 051/122] fixed broken --- .../projects/projects_api.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/projects/projects_api.py b/services/web/server/src/simcore_service_webserver/projects/projects_api.py index 28655495bfbf..376f3265f72b 100644 --- a/services/web/server/src/simcore_service_webserver/projects/projects_api.py +++ b/services/web/server/src/simcore_service_webserver/projects/projects_api.py @@ -28,6 +28,7 @@ ) from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( RPCDynamicServiceCreate, + RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects import ProjectPatch from models_library.api_schemas_webserver.projects_nodes import NodePatch @@ -800,9 +801,13 @@ async def _remove_service_and_its_data_folders( # no need to save the state of the node when deleting it await dynamic_scheduler_api.stop_dynamic_service( app, - node_id=NodeID(node_uuid), - simcore_user_agent=user_agent, - save_state=False, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=project_uuid, + node_id=NodeID(node_uuid), + simcore_user_agent=user_agent, + save_state=False, + ), ) # remove the node's data if any From 41ce7f90fcaccf994ef895d675ecdae93b7f66c0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 10:07:21 +0200 Subject: [PATCH 052/122] fixed example --- .../api_schemas_dynamic_scheduler/dynamic_services.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py index e4d9d8ed13cf..33d19b0da93a 100644 --- a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py +++ b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py @@ -52,6 +52,6 @@ class Config: "project_id": "dd1d04d9-d704-4f7e-8f0f-1ca60cc771fe", "node_id": "75c7f3f4-18f9-4678-8610-54a2ade78eaa", "simcore_user_agent": "", - "can_save": True, + "save_state": True, } } From 3765525862c047f79a4a02fd104e3aab4573e578 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 10:14:26 +0200 Subject: [PATCH 053/122] fixed failing tests --- .../isolated/test_garbage_collector_core.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/services/web/server/tests/unit/isolated/test_garbage_collector_core.py b/services/web/server/tests/unit/isolated/test_garbage_collector_core.py index 8f1d27b2c217..763a5ac1a58b 100644 --- a/services/web/server/tests/unit/isolated/test_garbage_collector_core.py +++ b/services/web/server/tests/unit/isolated/test_garbage_collector_core.py @@ -9,9 +9,13 @@ import pytest from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceStop, +) from models_library.projects import ProjectID from models_library.users import UserID from pytest_mock import MockerFixture +from servicelib.common_headers import UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE from simcore_postgres_database.models.users import UserRole from simcore_service_webserver.garbage_collector._core_orphans import ( remove_orphaned_services, @@ -200,11 +204,16 @@ async def test_remove_orphaned_services( else: mock_get_user_role.assert_not_called() mock_has_write_permission.assert_not_called() + mock_stop_dynamic_service.assert_called_once_with( mock_app, - node_id=fake_running_service.node_uuid, - simcore_user_agent=mock.ANY, - save_state=expected_save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=fake_running_service.user_id, + project_id=fake_running_service.project_id, + node_id=fake_running_service.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + ), ) @@ -238,9 +247,13 @@ async def test_remove_orphaned_services_inexisting_user_does_not_save_state( mock_has_write_permission.assert_not_called() mock_stop_dynamic_service.assert_called_once_with( mock_app, - node_id=fake_running_service.node_uuid, - simcore_user_agent=mock.ANY, - save_state=False, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=fake_running_service.user_id, + project_id=fake_running_service.project_id, + node_id=fake_running_service.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=False, + ), ) From 5d07c6d998ae3945afd61da704ae7060c1e20f0c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 10:24:08 +0200 Subject: [PATCH 054/122] fixed broken test --- .../02/test_projects_crud_handlers__delete.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py b/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py index f5980ff24579..9b13f978a90b 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py @@ -16,6 +16,9 @@ from aiohttp.test_utils import TestClient from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceStop, +) from models_library.projects import ProjectID from models_library.projects_state import ProjectStatus from pytest_simcore.helpers.utils_assert import assert_status @@ -72,8 +75,10 @@ async def test_delete_project( await _request_delete_project(client, user_project, expected.no_content) + user_id: int = logged_user["id"] + tasks = _crud_api_delete.get_scheduled_tasks( - project_uuid=user_project["uuid"], user_id=logged_user["id"] + project_uuid=user_project["uuid"], user_id=user_id ) if expected.no_content == status.HTTP_204_NO_CONTENT: @@ -91,9 +96,13 @@ async def test_delete_project( expected_calls = [ call( app=client.app, - node_id=service.node_uuid, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=True, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service.project_id, + node_id=service.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=True, + ), progress=mock.ANY, ) for service in fakes From 8bce042ebd0a0a2156beb94def752524a6a15ede Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 10:40:32 +0200 Subject: [PATCH 055/122] fixed tests --- .../test_resource_manager.py | 92 ++++++++++++------- 1 file changed, 61 insertions(+), 31 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py b/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py index 85ad63e9aa6c..070b78c2b585 100644 --- a/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py +++ b/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py @@ -20,6 +20,9 @@ from aiohttp.test_utils import TestClient from aioresponses import aioresponses from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceStop, +) from models_library.utils.fastapi_encoders import jsonable_encoder from pytest_mock import MockerFixture from pytest_simcore.helpers.utils_assert import assert_status @@ -486,9 +489,9 @@ async def test_interactive_services_removed_after_logout( mocked_notifications_plugin: dict[str, mock.Mock], ): assert client.app - + user_id = logged_user["id"] service = await create_dynamic_service_mock( - user_id=logged_user["id"], project_id=empty_user_project["uuid"] + user_id=user_id, project_id=empty_user_project["uuid"] ) # create websocket client_session_id1 = client_session_id_factory() @@ -518,9 +521,13 @@ async def test_interactive_services_removed_after_logout( "dynamic_scheduler.api.stop_dynamic_service" ].assert_awaited_with( app=client.app, - node_id=service.node_uuid, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=expected_save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service.project_id, + node_id=service.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + ), progress=mock.ANY, ) @@ -548,9 +555,9 @@ async def test_interactive_services_remain_after_websocket_reconnection_from_2_t mocked_notifications_plugin: dict[str, mock.Mock], ): assert client.app - + user_id = logged_user["id"] service = await create_dynamic_service_mock( - user_id=logged_user["id"], project_id=empty_user_project["uuid"] + user_id=user_id, project_id=empty_user_project["uuid"] ) # create first websocket client_session_id1 = client_session_id_factory() @@ -584,7 +591,7 @@ async def test_interactive_services_remain_after_websocket_reconnection_from_2_t "locked": { "value": False, "owner": { - "user_id": logged_user["id"], + "user_id": user_id, "first_name": logged_user.get("first_name", None), "last_name": logged_user.get("last_name", None), }, @@ -635,9 +642,13 @@ async def test_interactive_services_remain_after_websocket_reconnection_from_2_t calls = [ call( app=client.app, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=expected_save_state, - node_id=service.node_uuid, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service.project_id, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + node_id=service.node_uuid, + ), progress=mock.ANY, ) ] @@ -682,15 +693,16 @@ async def test_interactive_services_removed_per_project( open_project: Callable, mocked_notifications_plugin: dict[str, mock.Mock], ): + user_id = logged_user["id"] # create server with delay set to DELAY service1 = await create_dynamic_service_mock( - user_id=logged_user["id"], project_id=empty_user_project["uuid"] + user_id=user_id, project_id=empty_user_project["uuid"] ) service2 = await create_dynamic_service_mock( - user_id=logged_user["id"], project_id=empty_user_project2["uuid"] + user_id=user_id, project_id=empty_user_project2["uuid"] ) service3 = await create_dynamic_service_mock( - user_id=logged_user["id"], project_id=empty_user_project2["uuid"] + user_id=user_id, project_id=empty_user_project2["uuid"] ) # create websocket1 from tab1 client_session_id1 = client_session_id_factory() @@ -714,9 +726,13 @@ async def test_interactive_services_removed_per_project( calls = [ call( app=client.app, - node_id=service1.node_uuid, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=expected_save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service1.project_id, + node_id=service1.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + ), progress=mock.ANY, ) ] @@ -739,16 +755,24 @@ async def test_interactive_services_removed_per_project( calls = [ call( app=client.server.app, - node_id=service2.node_uuid, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=expected_save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service2.project_id, + node_id=service2.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + ), progress=mock.ANY, ), call( app=client.server.app, - node_id=service3.node_uuid, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=expected_save_state, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service3.project_id, + node_id=service3.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + ), progress=mock.ANY, ), ] @@ -840,8 +864,9 @@ async def test_websocket_disconnected_remove_or_maintain_files_based_on_role( open_project: Callable, mocked_notifications_plugin: dict[str, mock.Mock], ): + user_id = logged_user["id"] service = await create_dynamic_service_mock( - user_id=logged_user["id"], project_id=empty_user_project["uuid"] + user_id=user_id, project_id=empty_user_project["uuid"] ) # create websocket client_session_id1 = client_session_id_factory() @@ -863,9 +888,13 @@ async def test_websocket_disconnected_remove_or_maintain_files_based_on_role( calls = [ call( app=client.server.app, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=expected_save_state, - node_id=service.node_uuid, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=user_id, + project_id=service.project_id, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=expected_save_state, + node_id=service.node_uuid, + ), progress=mock.ANY, ) ] @@ -899,26 +928,27 @@ async def test_regression_removing_unexisting_user( # regression test for https://github.com/ITISFoundation/osparc-simcore/issues/2504 assert client.app # remove project + user_id = logged_user["id"] delete_task = await submit_delete_project_task( app=client.app, project_uuid=empty_user_project["uuid"], - user_id=logged_user["id"], + user_id=user_id, simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, ) await delete_task # remove user - await delete_user_without_projects(app=client.app, user_id=logged_user["id"]) + await delete_user_without_projects(app=client.app, user_id=user_id) with pytest.raises(UserNotFoundError): await remove_project_dynamic_services( - user_id=logged_user["id"], + user_id=user_id, project_uuid=empty_user_project["uuid"], app=client.app, simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, ) with pytest.raises(ProjectNotFoundError): await remove_project_dynamic_services( - user_id=logged_user["id"], + user_id=user_id, project_uuid=empty_user_project["uuid"], app=client.app, user_name={"first_name": "my name is", "last_name": "pytest"}, From a82fb53fa9f2abc0249c0cdc923186eeccc993ef Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 11:02:15 +0200 Subject: [PATCH 056/122] fixed broken tests --- .../with_dbs/02/test_projects_nodes_handler.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py b/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py index ca456f236aeb..62cabb17fb56 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py @@ -21,6 +21,9 @@ from aioresponses import aioresponses from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + RPCDynamicServiceStop, +) from models_library.api_schemas_storage import FileMetaDataGet, PresignedLink from models_library.generics import Envelope from models_library.projects_nodes_io import NodeID @@ -634,6 +637,7 @@ async def test_creating_deprecated_node_returns_406_not_acceptable( @pytest.mark.parametrize(*standard_role_response(), ids=str) async def test_delete_node( client: TestClient, + logged_user: dict, user_project: ProjectDict, expected: ExpectedResponse, mocked_director_v2_api: dict[str, mock.MagicMock], @@ -681,9 +685,13 @@ async def test_delete_node( "dynamic_scheduler.api.stop_dynamic_service" ].assert_called_once_with( mock.ANY, - node_id=NodeID(node_id), - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=False, + rpc_dynamic_service_stop=RPCDynamicServiceStop( + user_id=logged_user["id"], + project_id=user_project["uuid"], + node_id=NodeID(node_id), + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=False, + ), ) mocked_director_v2_api[ "dynamic_scheduler.api.stop_dynamic_service" From f0878ced61e0b8e3c96c8cf1379031c3fd51b01d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 11:23:52 +0200 Subject: [PATCH 057/122] refactor name --- .../dynamic_services.py | 2 +- .../rpc_interfaces/dynamic_scheduler/services.py | 6 +++--- .../api/rpc/_services.py | 10 +++++----- .../tests/unit/api_rpc/test_api_rpc__services.py | 14 +++++++------- .../dynamic_scheduler/api.py | 8 ++++---- .../garbage_collector/_core_orphans.py | 4 ++-- .../projects/_nodes_handlers.py | 8 ++++---- .../projects/projects_api.py | 4 ++-- .../unit/isolated/test_garbage_collector_core.py | 6 +++--- .../02/test_projects_crud_handlers__delete.py | 4 ++-- .../with_dbs/02/test_projects_nodes_handler.py | 4 ++-- .../03/garbage_collector/test_resource_manager.py | 14 +++++++------- 12 files changed, 42 insertions(+), 42 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py index 33d19b0da93a..df6cf67f4b49 100644 --- a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py +++ b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py @@ -38,7 +38,7 @@ class Config: } -class RPCDynamicServiceStop(BaseModel): +class DynamicServiceStop(BaseModel): user_id: UserID project_id: ProjectID node_id: NodeID diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py index 5e09a02ebbb5..193616b8e773 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py @@ -4,8 +4,8 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler import DYNAMIC_SCHEDULER_RPC_NAMESPACE from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStop, RPCDynamicServiceCreate, - RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -61,13 +61,13 @@ async def run_dynamic_service( async def stop_dynamic_service( rabbitmq_rpc_client: RabbitMQRPCClient, *, - rpc_dynamic_service_stop: RPCDynamicServiceStop, + dynamic_service_stop: DynamicServiceStop, timeout_s: NonNegativeInt, ) -> None: result = await rabbitmq_rpc_client.request( DYNAMIC_SCHEDULER_RPC_NAMESPACE, parse_obj_as(RPCMethodName, "stop_dynamic_service"), - rpc_dynamic_service_stop=rpc_dynamic_service_stop, + dynamic_service_stop=dynamic_service_stop, timeout_s=timeout_s, ) assert result is None # nosec diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index ce08ed6e95fe..a50b8a2311f2 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -1,8 +1,8 @@ from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStop, RPCDynamicServiceCreate, - RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -41,13 +41,13 @@ async def run_dynamic_service( ) ) async def stop_dynamic_service( - app: FastAPI, *, rpc_dynamic_service_stop: RPCDynamicServiceStop + app: FastAPI, *, dynamic_service_stop: DynamicServiceStop ) -> NodeGet | DynamicServiceGet: director_v2_client = DirectorV2Client.get_from_app_state(app) settings: ApplicationSettings = app.state.settings return await director_v2_client.stop_dynamic_service( - node_id=rpc_dynamic_service_stop.node_id, - simcore_user_agent=rpc_dynamic_service_stop.simcore_user_agent, - save_state=rpc_dynamic_service_stop.save_state, + node_id=dynamic_service_stop.node_id, + simcore_user_agent=dynamic_service_stop.simcore_user_agent, + save_state=dynamic_service_stop.save_state, timeout=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) diff --git a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py index 71edaa075fec..240532d6d1ed 100644 --- a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py +++ b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py @@ -11,8 +11,8 @@ from fastapi.encoders import jsonable_encoder from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStop, RPCDynamicServiceCreate, - RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects import ProjectID @@ -367,8 +367,8 @@ async def test_stop_dynamic_service( simcore_user_agent: str, save_state: bool, ): - def _get_rpc_stop(with_node_id: NodeID) -> RPCDynamicServiceStop: - return RPCDynamicServiceStop( + def _get_rpc_stop(with_node_id: NodeID) -> DynamicServiceStop: + return DynamicServiceStop( user_id=user_id, project_id=project_id, node_id=with_node_id, @@ -379,7 +379,7 @@ def _get_rpc_stop(with_node_id: NodeID) -> RPCDynamicServiceStop: # service was stopped result = await services.stop_dynamic_service( rpc_client, - rpc_dynamic_service_stop=_get_rpc_stop(node_id), + dynamic_service_stop=_get_rpc_stop(node_id), timeout_s=5, ) assert result is None @@ -388,7 +388,7 @@ def _get_rpc_stop(with_node_id: NodeID) -> RPCDynamicServiceStop: with pytest.raises(ServiceWasNotFoundError): await services.stop_dynamic_service( rpc_client, - rpc_dynamic_service_stop=_get_rpc_stop(node_id_not_found), + dynamic_service_stop=_get_rpc_stop(node_id_not_found), timeout_s=5, ) @@ -396,7 +396,7 @@ def _get_rpc_stop(with_node_id: NodeID) -> RPCDynamicServiceStop: with pytest.raises(ServiceWaitingForManualInterventionError): await services.stop_dynamic_service( rpc_client, - rpc_dynamic_service_stop=_get_rpc_stop(node_id_manual_intervention), + dynamic_service_stop=_get_rpc_stop(node_id_manual_intervention), timeout_s=5, ) @@ -430,7 +430,7 @@ async def test_stop_dynamic_service_serializes_generic_errors( ): await services.stop_dynamic_service( rpc_client, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=project_id, node_id=node_id, diff --git a/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py b/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py index 7064fffab1c1..d1bc4a3ff84d 100644 --- a/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py +++ b/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py @@ -5,8 +5,8 @@ from aiohttp import web from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStop, RPCDynamicServiceCreate, - RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import ( NodeGet, @@ -54,7 +54,7 @@ async def run_dynamic_service( async def stop_dynamic_service( app: web.Application, *, - rpc_dynamic_service_stop: RPCDynamicServiceStop, + dynamic_service_stop: DynamicServiceStop, progress: ProgressBarData | None = None, ) -> None: async with AsyncExitStack() as stack: @@ -64,7 +64,7 @@ async def stop_dynamic_service( settings: DynamicSchedulerSettings = get_plugin_settings(app) await services.stop_dynamic_service( get_rabbitmq_rpc_client(app), - rpc_dynamic_service_stop=rpc_dynamic_service_stop, + dynamic_service_stop=dynamic_service_stop, timeout_s=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) @@ -115,7 +115,7 @@ async def stop_dynamic_services_in_project( services_to_stop = [ stop_dynamic_service( app=app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service.project_id, node_id=service.node_uuid, diff --git a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py index 69f8cbc97b37..491189039f66 100644 --- a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py +++ b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_orphans.py @@ -4,7 +4,7 @@ from aiohttp import web from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceStop, + DynamicServiceStop, ) from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID @@ -54,7 +54,7 @@ async def _remove_service( ): await dynamic_scheduler_api.stop_dynamic_service( app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=service.user_id, project_id=service.project_id, node_id=service.node_uuid, diff --git a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py index 4ebd137bb44d..8fe5e00d69ba 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py @@ -12,7 +12,7 @@ ) from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceStop, + DynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import ( NodeCreate, @@ -329,12 +329,12 @@ async def _stop_dynamic_service_task( _task_progress: TaskProgress, *, app: web.Application, - rpc_dynamic_service_stop: RPCDynamicServiceStop, + dynamic_service_stop: DynamicServiceStop, ): # NOTE: _handle_project_nodes_exceptions only decorate handlers try: await dynamic_scheduler_api.stop_dynamic_service( - app, rpc_dynamic_service_stop=rpc_dynamic_service_stop + app, dynamic_service_stop=dynamic_service_stop ) raise web.HTTPNoContent(content_type=MIMETYPE_APPLICATION_JSON) @@ -374,7 +374,7 @@ async def stop_node(request: web.Request) -> web.Response: task_context=jsonable_encoder(req_ctx), # task arguments from here on --- app=request.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=req_ctx.user_id, project_id=path_params.project_id, node_id=path_params.node_id, diff --git a/services/web/server/src/simcore_service_webserver/projects/projects_api.py b/services/web/server/src/simcore_service_webserver/projects/projects_api.py index 376f3265f72b..88de576ed451 100644 --- a/services/web/server/src/simcore_service_webserver/projects/projects_api.py +++ b/services/web/server/src/simcore_service_webserver/projects/projects_api.py @@ -27,8 +27,8 @@ GetProjectInactivityResponse, ) from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStop, RPCDynamicServiceCreate, - RPCDynamicServiceStop, ) from models_library.api_schemas_webserver.projects import ProjectPatch from models_library.api_schemas_webserver.projects_nodes import NodePatch @@ -801,7 +801,7 @@ async def _remove_service_and_its_data_folders( # no need to save the state of the node when deleting it await dynamic_scheduler_api.stop_dynamic_service( app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=project_uuid, node_id=NodeID(node_uuid), diff --git a/services/web/server/tests/unit/isolated/test_garbage_collector_core.py b/services/web/server/tests/unit/isolated/test_garbage_collector_core.py index 763a5ac1a58b..78c0d96e46cd 100644 --- a/services/web/server/tests/unit/isolated/test_garbage_collector_core.py +++ b/services/web/server/tests/unit/isolated/test_garbage_collector_core.py @@ -10,7 +10,7 @@ from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceStop, + DynamicServiceStop, ) from models_library.projects import ProjectID from models_library.users import UserID @@ -207,7 +207,7 @@ async def test_remove_orphaned_services( mock_stop_dynamic_service.assert_called_once_with( mock_app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=fake_running_service.user_id, project_id=fake_running_service.project_id, node_id=fake_running_service.node_uuid, @@ -247,7 +247,7 @@ async def test_remove_orphaned_services_inexisting_user_does_not_save_state( mock_has_write_permission.assert_not_called() mock_stop_dynamic_service.assert_called_once_with( mock_app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=fake_running_service.user_id, project_id=fake_running_service.project_id, node_id=fake_running_service.node_uuid, diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py b/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py index 9b13f978a90b..f54537a6d0ef 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_crud_handlers__delete.py @@ -17,7 +17,7 @@ from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceStop, + DynamicServiceStop, ) from models_library.projects import ProjectID from models_library.projects_state import ProjectStatus @@ -96,7 +96,7 @@ async def test_delete_project( expected_calls = [ call( app=client.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service.project_id, node_id=service.node_uuid, diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py b/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py index 62cabb17fb56..24929f4f8ef9 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py @@ -22,7 +22,7 @@ from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceStop, + DynamicServiceStop, ) from models_library.api_schemas_storage import FileMetaDataGet, PresignedLink from models_library.generics import Envelope @@ -685,7 +685,7 @@ async def test_delete_node( "dynamic_scheduler.api.stop_dynamic_service" ].assert_called_once_with( mock.ANY, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=logged_user["id"], project_id=user_project["uuid"], node_id=NodeID(node_id), diff --git a/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py b/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py index 070b78c2b585..27ad5fe04f5e 100644 --- a/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py +++ b/services/web/server/tests/unit/with_dbs/03/garbage_collector/test_resource_manager.py @@ -21,7 +21,7 @@ from aioresponses import aioresponses from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceStop, + DynamicServiceStop, ) from models_library.utils.fastapi_encoders import jsonable_encoder from pytest_mock import MockerFixture @@ -521,7 +521,7 @@ async def test_interactive_services_removed_after_logout( "dynamic_scheduler.api.stop_dynamic_service" ].assert_awaited_with( app=client.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service.project_id, node_id=service.node_uuid, @@ -642,7 +642,7 @@ async def test_interactive_services_remain_after_websocket_reconnection_from_2_t calls = [ call( app=client.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service.project_id, simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, @@ -726,7 +726,7 @@ async def test_interactive_services_removed_per_project( calls = [ call( app=client.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service1.project_id, node_id=service1.node_uuid, @@ -755,7 +755,7 @@ async def test_interactive_services_removed_per_project( calls = [ call( app=client.server.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service2.project_id, node_id=service2.node_uuid, @@ -766,7 +766,7 @@ async def test_interactive_services_removed_per_project( ), call( app=client.server.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service3.project_id, node_id=service3.node_uuid, @@ -888,7 +888,7 @@ async def test_websocket_disconnected_remove_or_maintain_files_based_on_role( calls = [ call( app=client.server.app, - rpc_dynamic_service_stop=RPCDynamicServiceStop( + dynamic_service_stop=DynamicServiceStop( user_id=user_id, project_id=service.project_id, simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, From aa93e579c0474a495064069766484e583b10b193 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 11:46:50 +0200 Subject: [PATCH 058/122] renaming --- .../dynamic_services.py | 2 +- .../dynamic_scheduler/services.py | 6 ++-- .../api/rpc/_services.py | 6 ++-- .../services/director_v2/_public_client.py | 8 ++--- .../services/director_v2/_thin_client.py | 34 +++++++++---------- .../unit/api_rpc/test_api_rpc__services.py | 12 +++---- .../dynamic_scheduler/api.py | 6 ++-- .../projects/projects_api.py | 4 +-- .../unit/isolated/test_dynamic_scheduler.py | 12 +++---- .../02/test_projects_nodes_handler.py | 4 +-- .../02/test_projects_states_handlers.py | 6 ++-- 11 files changed, 49 insertions(+), 51 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py index df6cf67f4b49..48ef3c484455 100644 --- a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py +++ b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/dynamic_services.py @@ -10,7 +10,7 @@ from pydantic import BaseModel -class RPCDynamicServiceCreate(DynamicServiceCreate): +class DynamicServiceStart(DynamicServiceCreate): request_dns: str request_scheme: str simcore_user_agent: str diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py index 193616b8e773..9da2dad425e2 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py @@ -4,8 +4,8 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler import DYNAMIC_SCHEDULER_RPC_NAMESPACE from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, DynamicServiceStop, - RPCDynamicServiceCreate, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -45,12 +45,12 @@ async def get_service_status( async def run_dynamic_service( rabbitmq_rpc_client: RabbitMQRPCClient, *, - rpc_dynamic_service_create: RPCDynamicServiceCreate, + dynamic_service_start: DynamicServiceStart, ) -> DynamicServiceGet | NodeGet: result = await rabbitmq_rpc_client.request( DYNAMIC_SCHEDULER_RPC_NAMESPACE, parse_obj_as(RPCMethodName, "run_dynamic_service"), - rpc_dynamic_service_create=rpc_dynamic_service_create, + dynamic_service_start=dynamic_service_start, timeout_s=_RPC_DEFAULT_TIMEOUT_S, ) assert isinstance(result, DynamicServiceGet | NodeGet) # nosec diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index a50b8a2311f2..7f27bdcb5dc5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -1,8 +1,8 @@ from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, DynamicServiceStop, - RPCDynamicServiceCreate, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -28,10 +28,10 @@ async def get_service_status( @router.expose() async def run_dynamic_service( - app: FastAPI, *, rpc_dynamic_service_create: RPCDynamicServiceCreate + app: FastAPI, *, dynamic_service_start: DynamicServiceStart ) -> NodeGet | DynamicServiceGet: director_v2_client = DirectorV2Client.get_from_app_state(app) - return await director_v2_client.run_dynamic_service(rpc_dynamic_service_create) + return await director_v2_client.run_dynamic_service(dynamic_service_start) @router.expose( diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_public_client.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_public_client.py index e9e6ee6fff01..6c514b48f790 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_public_client.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_public_client.py @@ -4,7 +4,7 @@ from fastapi import FastAPI, status from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceCreate, + DynamicServiceStart, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -55,11 +55,9 @@ async def get_status( raise async def run_dynamic_service( - self, rpc_dynamic_service_create: RPCDynamicServiceCreate + self, dynamic_service_start: DynamicServiceStart ) -> NodeGet | DynamicServiceGet: - response = await self.thin_client.post_dynamic_service( - rpc_dynamic_service_create - ) + response = await self.thin_client.post_dynamic_service(dynamic_service_start) dict_response: dict[str, Any] = response.json() # legacy services diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_thin_client.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_thin_client.py index 8de10a032c84..e0a138ad18c5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_thin_client.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/director_v2/_thin_client.py @@ -3,7 +3,7 @@ from fastapi import FastAPI, status from httpx import Response, Timeout from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceCreate, + DynamicServiceStart, ) from models_library.projects_nodes_io import NodeID from models_library.services_resources import ServiceResourcesDictHelpers @@ -48,29 +48,29 @@ async def get_status(self, node_id: NodeID) -> Response: @retry_on_errors() @expect_status(status.HTTP_201_CREATED) async def post_dynamic_service( - self, rpc_dynamic_service_create: RPCDynamicServiceCreate + self, dynamic_service_start: DynamicServiceStart ) -> Response: post_data = { - "product_name": rpc_dynamic_service_create.product_name, - "can_save": rpc_dynamic_service_create.can_save, - "user_id": rpc_dynamic_service_create.user_id, - "project_id": rpc_dynamic_service_create.project_id, - "key": rpc_dynamic_service_create.key, - "version": rpc_dynamic_service_create.version, - "node_uuid": rpc_dynamic_service_create.node_uuid, - "basepath": f"/x/{rpc_dynamic_service_create.node_uuid}", + "product_name": dynamic_service_start.product_name, + "can_save": dynamic_service_start.can_save, + "user_id": dynamic_service_start.user_id, + "project_id": dynamic_service_start.project_id, + "key": dynamic_service_start.key, + "version": dynamic_service_start.version, + "node_uuid": dynamic_service_start.node_uuid, + "basepath": f"/x/{dynamic_service_start.node_uuid}", "service_resources": ServiceResourcesDictHelpers.create_jsonable( - rpc_dynamic_service_create.service_resources + dynamic_service_start.service_resources ), - "wallet_info": rpc_dynamic_service_create.wallet_info, - "pricing_info": rpc_dynamic_service_create.pricing_info, - "hardware_info": rpc_dynamic_service_create.hardware_info, + "wallet_info": dynamic_service_start.wallet_info, + "pricing_info": dynamic_service_start.pricing_info, + "hardware_info": dynamic_service_start.hardware_info, } headers = { - X_DYNAMIC_SIDECAR_REQUEST_DNS: rpc_dynamic_service_create.request_dns, - X_DYNAMIC_SIDECAR_REQUEST_SCHEME: rpc_dynamic_service_create.request_scheme, - X_SIMCORE_USER_AGENT: rpc_dynamic_service_create.simcore_user_agent, + X_DYNAMIC_SIDECAR_REQUEST_DNS: dynamic_service_start.request_dns, + X_DYNAMIC_SIDECAR_REQUEST_SCHEME: dynamic_service_start.request_scheme, + X_SIMCORE_USER_AGENT: dynamic_service_start.simcore_user_agent, } return await self.client.post( diff --git a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py index 240532d6d1ed..7c8dada1e183 100644 --- a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py +++ b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py @@ -11,8 +11,8 @@ from fastapi.encoders import jsonable_encoder from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, DynamicServiceStop, - RPCDynamicServiceCreate, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects import ProjectID @@ -171,10 +171,10 @@ async def test_get_state( @pytest.fixture -def rpc_dynamic_service_create() -> RPCDynamicServiceCreate: +def dynamic_service_start() -> DynamicServiceStart: # one for legacy and one for new style? - return RPCDynamicServiceCreate.parse_obj( - RPCDynamicServiceCreate.Config.schema_extra["example"] + return DynamicServiceStart.parse_obj( + DynamicServiceStart.Config.schema_extra["example"] ) @@ -226,11 +226,11 @@ async def test_run_dynamic_service( mock_director_v0_service_run: None, mock_director_v2_service_run: None, rpc_client: RabbitMQRPCClient, - rpc_dynamic_service_create: RPCDynamicServiceCreate, + dynamic_service_start: DynamicServiceStart, is_legacy: bool, ): result = await services.run_dynamic_service( - rpc_client, rpc_dynamic_service_create=rpc_dynamic_service_create + rpc_client, dynamic_service_start=dynamic_service_start ) if is_legacy: diff --git a/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py b/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py index d1bc4a3ff84d..637d308c56ef 100644 --- a/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py +++ b/services/web/server/src/simcore_service_webserver/dynamic_scheduler/api.py @@ -5,8 +5,8 @@ from aiohttp import web from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, DynamicServiceStop, - RPCDynamicServiceCreate, ) from models_library.api_schemas_webserver.projects_nodes import ( NodeGet, @@ -43,11 +43,11 @@ async def get_dynamic_service( async def run_dynamic_service( - app: web.Application, *, rpc_dynamic_service_create: RPCDynamicServiceCreate + app: web.Application, *, dynamic_service_start: DynamicServiceStart ) -> DynamicServiceGet | NodeGet: return await services.run_dynamic_service( get_rabbitmq_rpc_client(app), - rpc_dynamic_service_create=rpc_dynamic_service_create, + dynamic_service_start=dynamic_service_start, ) diff --git a/services/web/server/src/simcore_service_webserver/projects/projects_api.py b/services/web/server/src/simcore_service_webserver/projects/projects_api.py index 88de576ed451..28296fa40188 100644 --- a/services/web/server/src/simcore_service_webserver/projects/projects_api.py +++ b/services/web/server/src/simcore_service_webserver/projects/projects_api.py @@ -27,8 +27,8 @@ GetProjectInactivityResponse, ) from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, DynamicServiceStop, - RPCDynamicServiceCreate, ) from models_library.api_schemas_webserver.projects import ProjectPatch from models_library.api_schemas_webserver.projects_nodes import NodePatch @@ -680,7 +680,7 @@ async def _start_dynamic_service( ) await dynamic_scheduler_api.run_dynamic_service( app=request.app, - rpc_dynamic_service_create=RPCDynamicServiceCreate( + dynamic_service_start=DynamicServiceStart( product_name=product_name, can_save=save_state, project_id=project_uuid, diff --git a/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py b/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py index 83de45b76e43..0823f52b1b29 100644 --- a/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py +++ b/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py @@ -7,7 +7,7 @@ from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceCreate, + DynamicServiceStart, ) from models_library.api_schemas_webserver.projects_nodes import ( NodeGet, @@ -46,9 +46,9 @@ def mock_rpc_client( @pytest.fixture -def rpc_dynamic_service_create() -> RPCDynamicServiceCreate: - return RPCDynamicServiceCreate.parse_obj( - RPCDynamicServiceCreate.Config.schema_extra["example"] +def dynamic_service_start() -> DynamicServiceStart: + return DynamicServiceStart.parse_obj( + DynamicServiceStart.Config.schema_extra["example"] ) @@ -108,11 +108,11 @@ async def test_run_dynamic_service( mock_rpc_client: None, mocked_app: AsyncMock, expected_response: NodeGet | NodeGetIdle | DynamicServiceGet, - rpc_dynamic_service_create: RPCDynamicServiceCreate, + dynamic_service_start: DynamicServiceStart, ): assert ( await run_dynamic_service( - mocked_app, rpc_dynamic_service_create=rpc_dynamic_service_create + mocked_app, dynamic_service_start=dynamic_service_start ) == expected_response ) diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py b/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py index 24929f4f8ef9..9ceec7318702 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_nodes_handler.py @@ -392,7 +392,7 @@ def num_services( def inc_running_services(self, *args, **kwargs): # noqa: ARG002 self.running_services_uuids.append( - kwargs["rpc_dynamic_service_create"].node_uuid + kwargs["dynamic_service_start"].node_uuid ) # let's count the started services @@ -515,7 +515,7 @@ async def inc_running_services(self, *args, **kwargs): # noqa: ARG002 # reproduces real world conditions and makes test to fail await asyncio.sleep(SERVICE_IS_RUNNING_AFTER_S) self.running_services_uuids.append( - kwargs["rpc_dynamic_service_create"].node_uuid + kwargs["dynamic_service_start"].node_uuid ) # let's count the started services diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py b/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py index 6a556465bdce..46865db431c1 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py @@ -22,7 +22,7 @@ from faker import Faker from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceCreate, + DynamicServiceStart, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects import ProjectID @@ -370,7 +370,7 @@ async def test_open_project( calls.append( call( app=client.app, - rpc_dynamic_service_create=RPCDynamicServiceCreate( + dynamic_service_start=DynamicServiceStart( project_id=user_project["uuid"], service_key=service["key"], service_uuid=service_uuid, @@ -449,7 +449,7 @@ async def test_open_template_project_for_edition( calls.append( call( app=client.app, - rpc_dynamic_service_create=RPCDynamicServiceCreate( + dynamic_service_start=DynamicServiceStart( project_id=template_project["uuid"], service_key=service["key"], service_uuid=service_uuid, From 4a3c16bd051efd1432462dccb1f851034442d73c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 12:04:09 +0200 Subject: [PATCH 059/122] fixed broken test --- .../02/test_projects_states_handlers.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py b/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py index 46865db431c1..d95fb1313e1e 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py @@ -23,6 +23,7 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( DynamicServiceStart, + DynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects import ProjectID @@ -798,6 +799,8 @@ async def test_close_project( "dynamic_scheduler.api.list_dynamic_services" ].return_value = fake_dynamic_services + user_id = logged_user["id"] + assert client.app # open project client_id = client_session_id_factory() @@ -809,7 +812,7 @@ async def test_close_project( client.app, ProjectID(user_project["uuid"]) ) mocked_director_v2_api["director_v2.api.list_dynamic_services"].assert_any_call( - client.app, logged_user["id"], user_project["uuid"] + client.app, user_id, user_project["uuid"] ) mocked_director_v2_api["director_v2.api.list_dynamic_services"].reset_mock() else: @@ -830,7 +833,7 @@ async def test_close_project( calls = [ call( client.app, - user_id=logged_user["id"], + user_id=user_id, project_id=user_project["uuid"], ), ] @@ -841,9 +844,13 @@ async def test_close_project( calls = [ call( app=client.app, - node_id=service.node_uuid, - simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, - save_state=True, + dynamic_service_stop=DynamicServiceStop( + user_id=user_id, + project_id=service.project_id, + node_id=service.node_uuid, + simcore_user_agent=UNDEFINED_DEFAULT_SIMCORE_USER_AGENT_VALUE, + save_state=True, + ), progress=mock.ANY, ) for service in fake_dynamic_services From 4e2643f825aef5d2c33e25efab7b2c5263f13565 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 12:43:12 +0200 Subject: [PATCH 060/122] refactor --- .../api/rpc/_services.py | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index 5a5404449a8f..368c8395864f 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -30,10 +30,11 @@ async def get_service_status( async def run_dynamic_service( app: FastAPI, *, rpc_dynamic_service_create: RPCDynamicServiceCreate ) -> NodeGet | DynamicServiceGet: - await set_request_as_running(app, rpc_dynamic_service_create.node_uuid) - director_v2_client = DirectorV2Client.get_from_app_state(app) - return await director_v2_client.run_dynamic_service(rpc_dynamic_service_create) + result = await director_v2_client.run_dynamic_service(rpc_dynamic_service_create) + + await set_request_as_running(app, rpc_dynamic_service_create) + return result @router.expose( @@ -43,15 +44,21 @@ async def run_dynamic_service( ) ) async def stop_dynamic_service( - app: FastAPI, *, node_id: NodeID, simcore_user_agent: str, save_state: bool + app: FastAPI, *, rpc_dynamic_service_stop: RPCDynamicServiceStop ) -> NodeGet | DynamicServiceGet: - await set_request_as_stopped(app, node_id) - director_v2_client = DirectorV2Client.get_from_app_state(app) settings: ApplicationSettings = app.state.settings - return await director_v2_client.stop_dynamic_service( - node_id=node_id, - simcore_user_agent=simcore_user_agent, - save_state=save_state, + result = await director_v2_client.stop_dynamic_service( + node_id=rpc_dynamic_service_stop.node_id, + simcore_user_agent=rpc_dynamic_service_stop.simcore_user_agent, + save_state=rpc_dynamic_service_stop.save_state, timeout=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) + + await set_request_as_stopped( + app, + user_id=rpc_dynamic_service_stop.user_id, + project_id=rpc_dynamic_service_stop.project_id, + node_id=rpc_dynamic_service_stop.node_id, + ) + return result From 56121043a3540b9068c2ea933ee43f4768f07527 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 12:46:13 +0200 Subject: [PATCH 061/122] added requirement --- services/dynamic-scheduler/requirements/_base.in | 1 + services/dynamic-scheduler/requirements/_base.txt | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/services/dynamic-scheduler/requirements/_base.in b/services/dynamic-scheduler/requirements/_base.in index 8bbd3daa1d3d..ab95aec0daa5 100644 --- a/services/dynamic-scheduler/requirements/_base.in +++ b/services/dynamic-scheduler/requirements/_base.in @@ -18,5 +18,6 @@ arrow fastapi httpx packaging +python-socketio typer[all] uvicorn[standard] diff --git a/services/dynamic-scheduler/requirements/_base.txt b/services/dynamic-scheduler/requirements/_base.txt index d19eef44cf79..4d6fdbcd374b 100644 --- a/services/dynamic-scheduler/requirements/_base.txt +++ b/services/dynamic-scheduler/requirements/_base.txt @@ -57,6 +57,8 @@ attrs==23.2.0 # aiohttp # jsonschema # referencing +bidict==0.23.1 + # via python-socketio certifi==2024.2.2 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -106,6 +108,7 @@ h11==0.14.0 # via # httpcore # uvicorn + # wsproto httpcore==1.0.5 # via httpx httptools==0.6.1 @@ -208,6 +211,10 @@ python-dateutil==2.9.0.post0 # via arrow python-dotenv==1.0.1 # via uvicorn +python-engineio==4.9.1 + # via python-socketio +python-socketio==5.11.2 + # via -r requirements/_base.in pyyaml==6.0.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -248,6 +255,8 @@ rpds-py==0.18.0 # referencing shellingham==1.5.4 # via typer +simple-websocket==1.0.0 + # via python-engineio six==1.16.0 # via python-dateutil sniffio==1.3.1 @@ -317,6 +326,8 @@ watchfiles==0.21.0 # via uvicorn websockets==12.0 # via uvicorn +wsproto==1.2.0 + # via simple-websocket yarl==1.9.4 # via # -r requirements/../../../packages/postgres-database/requirements/_base.in From 74f467aad8563255f19d804dd6e8bc2af7db9d78 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 13:41:00 +0200 Subject: [PATCH 062/122] can switch states form running to stopped --- .../services/service_tracker/_api.py | 18 ++- .../services/service_tracker/_models.py | 2 +- .../tests/unit/service_tracker/test__api.py | 126 +++++++++++------- .../unit/service_tracker/test__models.py | 12 +- .../unit/service_tracker/test__tracker.py | 16 ++- 5 files changed, 109 insertions(+), 65 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 73dcccb3618c..5cdfaeeae12e 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -37,18 +37,16 @@ async def set_request_as_running( model: TrackedServiceModel | None = await tracker.load(node_id) if model is not None: - _logger.info( - "Could track as running %s since an entry node_id %s already exists", - TrackedServiceModel.__name__, - node_id, - ) - return + model.dynamic_service_start = dynamic_service_start + model.requested_state = UserRequestedState.RUNNING + model.project_id = dynamic_service_start.project_id + model.user_id = dynamic_service_start.user_id await tracker.save( node_id, TrackedServiceModel( dynamic_service_start=dynamic_service_start, - requested_sate=UserRequestedState.RUNNING, + requested_state=UserRequestedState.RUNNING, project_id=dynamic_service_start.project_id, user_id=dynamic_service_start.user_id, ), @@ -67,10 +65,10 @@ async def set_request_as_stopped( dynamic_service_start=None, user_id=dynamic_service_stop.user_id, project_id=dynamic_service_stop.project_id, - requested_sate=UserRequestedState.STOPPED, + requested_state=UserRequestedState.STOPPED, ) - model.requested_sate = UserRequestedState.STOPPED + model.requested_state = UserRequestedState.STOPPED await tracker.save(dynamic_service_stop.node_id, model) @@ -150,7 +148,7 @@ async def set_if_status_changed( json_status = status.json() if model.service_status != json_status: model.service_status = json_status - model.current_state = _get_current_state(model.requested_sate, status) + model.current_state = _get_current_state(model.requested_state, status) await tracker.save(node_id, model) return True diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index c28afda433a6..1a2c2f42e834 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -46,7 +46,7 @@ class TrackedServiceModel: project_id: ProjectID | None # what the user desires (RUNNING or STOPPED) - requested_sate: UserRequestedState + requested_state: UserRequestedState # set this after parsing the incoming state via the API calls current_state: SchedulerServiceState = SchedulerServiceState.UNKNOWN # type: ignore diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 281db6e64d9e..ccc9fd92c250 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -1,6 +1,7 @@ # pylint:disable=redefined-outer-name # pylint:disable=unused-argument +from collections.abc import Callable from datetime import timedelta from typing import Any, Final from uuid import uuid4 @@ -9,6 +10,10 @@ from faker import Faker from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, + DynamicServiceStop, +) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID from models_library.services_enums import ServiceState @@ -58,42 +63,57 @@ def node_id() -> NodeID: return uuid4() -@pytest.mark.parametrize("item_count", [100]) -async def test_services_tracer_workflow( - app: FastAPI, node_id: NodeID, item_count: NonNegativeInt +async def test_services_tracer_set_as_running_set_as_stopped( + app: FastAPI, + node_id: NodeID, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], + get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], ): + async def _remove_service() -> None: + await remove_tracked(app, node_id) + assert await get_tracked(app, node_id) is None + assert await get_all_tracked(app) == {} + + async def _set_as_running() -> None: + await set_request_as_running(app, get_dynamic_service_start(node_id)) + tracked_model = await get_tracked(app, node_id) + assert tracked_model + assert tracked_model.requested_state == UserRequestedState.RUNNING + + async def _set_as_stopped() -> None: + await set_request_as_stopped(app, get_dynamic_service_stop(node_id)) + tracked_model = await get_tracked(app, node_id) + assert tracked_model + assert tracked_model.requested_state == UserRequestedState.STOPPED + + # request as running then as stopped + await _remove_service() + await _set_as_running() + await _set_as_stopped() + + # request as stopped then as running + await _remove_service() + await _set_as_stopped() + await _set_as_running() - # service does not exist - assert await get_tracked(app, node_id) is None - - # service requested as to be in RUNNING - await set_request_as_running(app, node_id) - tracked_model = await get_tracked(app, node_id) - assert tracked_model - assert tracked_model.requested_sate == UserRequestedState.RUNNING - - # service requested as to be in STOPPED - await set_request_as_stopped(app, node_id) - tracked_model = await get_tracked(app, node_id) - assert tracked_model - assert tracked_model.requested_sate == UserRequestedState.STOPPED - # remove service - await remove_tracked(app, node_id) - assert await get_tracked(app, node_id) is None - - # check listing services - assert await get_all_tracked(app) == {} - - await logged_gather( - *[set_request_as_stopped(app, uuid4()) for _ in range(item_count)], - max_concurrency=100, - ) +@pytest.mark.parametrize("item_count", [100]) +async def test_services_tracer_workflow( + app: FastAPI, + node_id: NodeID, + item_count: NonNegativeInt, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], + get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], +): + # ensure more than one service can be tracked await logged_gather( - *[set_request_as_running(app, uuid4()) for _ in range(item_count)], + *[ + set_request_as_stopped(app, get_dynamic_service_stop(uuid4())) + for _ in range(item_count) + ], max_concurrency=100, ) - assert len(await get_all_tracked(app)) == item_count * 2 + assert len(await get_all_tracked(app)) == item_count @pytest.mark.parametrize( @@ -108,9 +128,12 @@ async def test_services_tracer_workflow( ], ) async def test_set_if_status_changed( - app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle + app: FastAPI, + node_id: NodeID, + status: NodeGet | DynamicServiceGet | NodeGetIdle, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], ): - await set_request_as_running(app, node_id) + await set_request_as_running(app, get_dynamic_service_start(node_id)) assert await set_if_status_changed(app, node_id, status) is True @@ -122,8 +145,13 @@ async def test_set_if_status_changed( assert model.service_status == status.json() -async def test_set_service_status_task_uid(app: FastAPI, node_id: NodeID, faker: Faker): - await set_request_as_running(app, node_id) +async def test_set_service_status_task_uid( + app: FastAPI, + node_id: NodeID, + faker: Faker, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], +): + await set_request_as_running(app, get_dynamic_service_start(node_id)) task_uid = TaskUID(faker.uuid4()) await set_service_status_task_uid(app, node_id, task_uid) @@ -186,37 +214,37 @@ def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: # UserRequestedState.RUNNING ( UserRequestedState.RUNNING, - get_status(ServiceState.PENDING), + status_generator(ServiceState.PENDING), SchedulerServiceState.STARTING, ), ( UserRequestedState.RUNNING, - get_status(ServiceState.PULLING), + status_generator(ServiceState.PULLING), SchedulerServiceState.STARTING, ), ( UserRequestedState.RUNNING, - get_status(ServiceState.STARTING), + status_generator(ServiceState.STARTING), SchedulerServiceState.STARTING, ), ( UserRequestedState.RUNNING, - get_status(ServiceState.RUNNING), + status_generator(ServiceState.RUNNING), SchedulerServiceState.RUNNING, ), ( UserRequestedState.RUNNING, - get_status(ServiceState.COMPLETE), + status_generator(ServiceState.COMPLETE), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( UserRequestedState.RUNNING, - get_status(ServiceState.FAILED), + status_generator(ServiceState.FAILED), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( UserRequestedState.RUNNING, - get_status(ServiceState.STOPPING), + status_generator(ServiceState.STOPPING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( @@ -227,37 +255,37 @@ def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: # UserRequestedState.STOPPED ( UserRequestedState.STOPPED, - get_status(ServiceState.PENDING), + status_generator(ServiceState.PENDING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( UserRequestedState.STOPPED, - get_status(ServiceState.PULLING), + status_generator(ServiceState.PULLING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( UserRequestedState.STOPPED, - get_status(ServiceState.STARTING), + status_generator(ServiceState.STARTING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( UserRequestedState.STOPPED, - get_status(ServiceState.RUNNING), + status_generator(ServiceState.RUNNING), SchedulerServiceState.STOPPING, ), ( UserRequestedState.STOPPED, - get_status(ServiceState.COMPLETE), + status_generator(ServiceState.COMPLETE), SchedulerServiceState.STOPPING, ), ( UserRequestedState.STOPPED, - get_status(ServiceState.FAILED), + status_generator(ServiceState.FAILED), SchedulerServiceState.UNEXPECTED_OUTCOME, ), ( UserRequestedState.STOPPED, - get_status(ServiceState.STOPPING), + status_generator(ServiceState.STOPPING), SchedulerServiceState.STOPPING, ), ( @@ -266,7 +294,7 @@ def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: SchedulerServiceState.IDLE, ), ] - for get_status in ( + for status_generator in ( _get_node_get_from, _get_dynamic_service_get_from, ) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py index 0e5901c4f87f..6b8e31321b38 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__models.py @@ -23,7 +23,10 @@ def test_serialization( service_status_task_uid: TaskUID | None, ): tracked_model = TrackedServiceModel( - requested_sate=requested_state, + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=requested_state, current_state=current_state, service_status=faker.pystr(), check_status_after=check_status_after, @@ -36,7 +39,12 @@ def test_serialization( async def test_set_check_status_after_to(): - model = TrackedServiceModel(UserRequestedState.RUNNING) + model = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=UserRequestedState.RUNNING, + ) assert model.check_status_after < arrow.utcnow().timestamp() delay = timedelta(seconds=4) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py index 953d71243651..0dce5e355bae 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -48,7 +48,12 @@ async def test_tracker_workflow(tracker: Tracker): assert result is None # node creation - model = TrackedServiceModel(requested_sate=UserRequestedState.RUNNING) + model = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=UserRequestedState.RUNNING, + ) await tracker.save(node_id, model) # check if exists @@ -65,7 +70,12 @@ async def test_tracker_workflow(tracker: Tracker): async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> None: assert await tracker.all() == {} - model_to_insert = TrackedServiceModel(requested_sate=UserRequestedState.RUNNING) + model_to_insert = TrackedServiceModel( + dynamic_service_start=None, + user_id=None, + project_id=None, + requested_state=UserRequestedState.RUNNING, + ) data_to_insert = {uuid4(): model_to_insert for _ in range(item_count)} @@ -74,7 +84,7 @@ async def test_tracker_listing(tracker: Tracker, item_count: NonNegativeInt) -> ) response = await tracker.all() - for key in response.keys(): + for key in response: assert isinstance(key, NodeID) assert response == data_to_insert From 176be12c9664c1d104338f1c55968a1467136f43 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 13:41:27 +0200 Subject: [PATCH 063/122] refactor --- .../status_monitor/_deferred_get_status.py | 4 ++-- .../test_services_status_monitor__monitor.py | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index cd5a4ac1392c..931f1d3fc414 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -9,7 +9,7 @@ from servicelib.deferred_tasks._base_deferred_handler import DeferredContext from ..director_v2 import DirectorV2Client -from ..notifier import notify_frontend +from ..notifier import notify_service_status_change from ..service_tracker import ( can_notify_frontend, set_if_status_changed, @@ -65,4 +65,4 @@ async def on_result( status_changed: bool = await set_if_status_changed(app, node_id, result) if await can_notify_frontend(app, node_id, status_changed=status_changed): - await notify_frontend(app, node_id, result) + await notify_service_status_change(app, node_id, result) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index 477d5f4f96d4..40068fb9779e 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -3,7 +3,7 @@ import json import re -from collections.abc import AsyncIterable +from collections.abc import AsyncIterable, Callable from copy import deepcopy from datetime import timedelta from typing import Any @@ -17,7 +17,8 @@ from httpx import Request, Response from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( - RPCDynamicServiceCreate, + DynamicServiceStart, + DynamicServiceStop, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID @@ -363,15 +364,14 @@ async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arg response_timeline: _ResponseTimeline, expected_notification_count: NonNegativeInt, remove_tracked_count: NonNegativeInt, + get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], + get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], ): - # request started (and also tracks service) - data_dict = deepcopy(RPCDynamicServiceCreate.Config.schema_extra["example"]) - data_dict["service_uuid"] = f"{node_id}" - await set_request_as_running(app, RPCDynamicServiceCreate.parse_obj(data_dict)) - - # request stopping only if tracked - if not user_requests_running: - await set_request_as_stopped(app, node_id) + # ensure it does not exist before running this + if user_requests_running: + await set_request_as_running(app, get_dynamic_service_start(node_id)) + else: + await set_request_as_stopped(app, get_dynamic_service_stop(node_id)) entries_in_timeline = len(response_timeline) From b80edc59c0448275e2fb5301565802a94814b672 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 13:41:42 +0200 Subject: [PATCH 064/122] added missing --- .../dynamic-scheduler/tests/unit/conftest.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 services/dynamic-scheduler/tests/unit/conftest.py diff --git a/services/dynamic-scheduler/tests/unit/conftest.py b/services/dynamic-scheduler/tests/unit/conftest.py new file mode 100644 index 000000000000..642ed2170ce1 --- /dev/null +++ b/services/dynamic-scheduler/tests/unit/conftest.py @@ -0,0 +1,29 @@ +from collections.abc import Callable +from copy import deepcopy + +import pytest +from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( + DynamicServiceStart, + DynamicServiceStop, +) +from models_library.projects_nodes_io import NodeID + + +@pytest.fixture +def get_dynamic_service_start() -> Callable[[NodeID], DynamicServiceStart]: + def _(node_id: NodeID) -> DynamicServiceStart: + dict_data = deepcopy(DynamicServiceStart.Config.schema_extra["example"]) + dict_data["service_uuid"] = f"{node_id}" + return DynamicServiceStart.parse_obj(dict_data) + + return _ + + +@pytest.fixture +def get_dynamic_service_stop() -> Callable[[NodeID], DynamicServiceStop]: + def _(node_id: NodeID) -> DynamicServiceStop: + dict_data = deepcopy(DynamicServiceStop.Config.schema_extra["example"]) + dict_data["node_id"] = f"{node_id}" + return DynamicServiceStop.parse_obj(dict_data) + + return _ From 8a47d45544ddcdbcb45b3034f8d70100b9ef7799 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 13:43:33 +0200 Subject: [PATCH 065/122] refactored test --- .../status_monitor/test_services_status_monitor__monitor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index 40068fb9779e..b6ee225c6a0f 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -29,6 +29,7 @@ from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( _api, + get_all_tracked, set_request_as_running, set_request_as_stopped, ) @@ -367,7 +368,8 @@ async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arg get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], ): - # ensure it does not exist before running this + assert await get_all_tracked(app) == {} + if user_requests_running: await set_request_as_running(app, get_dynamic_service_start(node_id)) else: From 0d02a76cb34b964c8500edbddd93a3a74b9f2653 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 13:43:53 +0200 Subject: [PATCH 066/122] rename --- .../services/status_monitor/_monitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index a60f66e73c83..56fbf664623d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -54,7 +54,7 @@ async def _worker_start_get_status_requests(self) -> None: # check if service is idle and status polling should stop if ( model.current_state == SchedulerServiceState.IDLE - and model.requested_sate == UserRequestedState.STOPPED + and model.requested_state == UserRequestedState.STOPPED ): to_remove.append(node_id) continue From 256091199f5845cd68307c4d06d7dd717a77927e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 14:21:26 +0200 Subject: [PATCH 067/122] fixed broken app --- .../service-library/tests/deferred_tasks/example_app.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/service-library/tests/deferred_tasks/example_app.py b/packages/service-library/tests/deferred_tasks/example_app.py index e363204d2495..fb33c8fe5ee3 100644 --- a/packages/service-library/tests/deferred_tasks/example_app.py +++ b/packages/service-library/tests/deferred_tasks/example_app.py @@ -55,7 +55,8 @@ async def on_result(cls, result: str, context: DeferredContext) -> None: class InMemoryLists: def __init__(self, redis_settings: RedisSettings, port: int) -> None: self.redis_sdk = RedisClientSDK( - redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS) + redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS), + decode_responses=True, ) self.port = port @@ -80,7 +81,8 @@ def __init__( max_workers: NonNegativeInt, ) -> None: self._redis_client = RedisClientSDKHealthChecked( - redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS) + redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS), + decode_responses=False, ) self._manager = DeferredManager( rabbit_settings, From ac0ea32aec61294148c0c945eca39f325136cf19 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 15:13:09 +0200 Subject: [PATCH 068/122] pushing notification to FE --- .../dynamic_scheduler/services.py | 17 +++++- .../services/notifier.py | 57 +++++++++++++++++++ .../services/service_tracker/__init__.py | 2 + .../services/service_tracker/_api.py | 8 +++ .../status_monitor/_deferred_get_status.py | 27 ++++++--- .../services/status_monitor/_monitor.py | 22 +++---- .../test_services_status_monitor__monitor.py | 4 +- .../projects/_nodes_handlers.py | 9 ++- 8 files changed, 118 insertions(+), 28 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py index 9da2dad425e2..eb9abf33907b 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py @@ -7,7 +7,11 @@ DynamicServiceStart, DynamicServiceStop, ) -from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.api_schemas_webserver.projects_nodes import ( + NodeGet, + NodeGetIdle, + NodeGetUnknown, +) from models_library.projects_nodes_io import NodeID from models_library.rabbitmq_basic_types import RPCMethodName from pydantic import NonNegativeInt, parse_obj_as @@ -71,3 +75,14 @@ async def stop_dynamic_service( timeout_s=timeout_s, ) assert result is None # nosec + + +def get_dict_from_status( + status: NodeGetIdle | NodeGetUnknown | DynamicServiceGet | NodeGet, +) -> dict: + """shared between different backend services to guarantee same result to frontend""" + return ( + status.dict(by_alias=True) + if isinstance(status, DynamicServiceGet) + else status.dict() + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py new file mode 100644 index 000000000000..d6226801a4a0 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py @@ -0,0 +1,57 @@ +import contextlib + +import socketio +from fastapi import FastAPI +from fastapi.encoders import jsonable_encoder +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_dynamic_sidecar.socketio import ( + SOCKET_IO_SERVICE_DISK_USAGE_EVENT, +) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle +from models_library.api_schemas_webserver.socketio import SocketIORoomStr +from models_library.users import UserID +from servicelib.fastapi.app_state import SingletonInAppStateMixin +from servicelib.rabbitmq.rpc_interfaces.dynamic_scheduler.services import ( + get_dict_from_status, +) + + +class Notifier(SingletonInAppStateMixin): + app_state_name: str = "notifier" + + def __init__(self, sio_manager: socketio.AsyncAioPikaManager): + self._sio_manager = sio_manager + + async def notify_service_status( + self, user_id: UserID, status: NodeGet | DynamicServiceGet | NodeGetIdle + ) -> None: + await self._sio_manager.emit( + SOCKET_IO_SERVICE_DISK_USAGE_EVENT, + data=jsonable_encoder(get_dict_from_status(status)), + room=SocketIORoomStr.from_user_id(user_id), + ) + + +async def notify_service_status_change( + app: FastAPI, user_id: UserID, status: NodeGet | DynamicServiceGet | NodeGetIdle +) -> None: + notifier: Notifier = Notifier.get_from_app_state(app) + await notifier.notify_service_status(user_id=user_id, status=status) + + +def setup_notifier(app: FastAPI): + async def _on_startup() -> None: + assert app.state.external_socketio # nosec + + notifier = Notifier( + sio_manager=app.state.external_socketio, + ) + notifier.set_to_app_state(app) + assert Notifier.get_from_app_state(app) == notifier # nosec + + async def _on_shutdown() -> None: + with contextlib.suppress(AttributeError): + Notifier.pop_from_app_state(app) + + app.add_event_handler("startup", _on_startup) + app.add_event_handler("shutdown", _on_shutdown) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index 24bd9d52effe..95de4a751470 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -3,6 +3,7 @@ can_notify_frontend, get_all_tracked, get_tracked, + get_user_id, remove_tracked, set_if_status_changed, set_request_as_running, @@ -17,6 +18,7 @@ "can_notify_frontend", "get_all_tracked", "get_tracked", + "get_user_id", "NORMAL_RATE_POLL_INTERVAL", "remove_tracked", "set_if_status_changed", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 5cdfaeeae12e..21e32c495c95 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -12,6 +12,7 @@ from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID from models_library.services_enums import ServiceState +from models_library.users import UserID from servicelib.deferred_tasks import TaskUID from ._models import SchedulerServiceState, TrackedServiceModel, UserRequestedState @@ -234,3 +235,10 @@ async def get_all_tracked(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: """Returns all tracked services""" tracker: Tracker = get_tracker(app) return await tracker.all() + + +async def get_user_id(app: FastAPI, node_id: NodeID) -> UserID | None: + """returns user_id for the user""" + tracker: Tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + return model.user_id if model else None diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index 931f1d3fc414..38542886a785 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -5,16 +5,13 @@ from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID +from models_library.users import UserID from servicelib.deferred_tasks import BaseDeferredHandler, TaskUID from servicelib.deferred_tasks._base_deferred_handler import DeferredContext +from .. import service_tracker from ..director_v2 import DirectorV2Client from ..notifier import notify_service_status_change -from ..service_tracker import ( - can_notify_frontend, - set_if_status_changed, - set_service_status_task_uid, -) _logger = logging.getLogger(__name__) @@ -38,7 +35,7 @@ async def on_created(cls, task_uid: TaskUID, context: DeferredContext) -> None: app: FastAPI = context["app"] node_id: NodeID = context["node_id"] - await set_service_status_task_uid(app, node_id, task_uid) + await service_tracker.set_service_status_task_uid(app, node_id, task_uid) @classmethod async def run( @@ -63,6 +60,18 @@ async def on_result( _logger.debug("Received status for service '%s': '%s'", node_id, result) - status_changed: bool = await set_if_status_changed(app, node_id, result) - if await can_notify_frontend(app, node_id, status_changed=status_changed): - await notify_service_status_change(app, node_id, result) + status_changed: bool = await service_tracker.set_if_status_changed( + app, node_id, result + ) + if await service_tracker.can_notify_frontend( + app, node_id, status_changed=status_changed + ): + user_id: UserID | None = await service_tracker.get_user_id(app, node_id) + if user_id: + await notify_service_status_change(app, user_id, result) + else: + _logger.info( + "Did not find a user for '%s', skipping status delivery of: %s", + node_id, + result, + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index 56fbf664623d..ac16fb2b9ab3 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -9,13 +9,8 @@ from pydantic import NonNegativeFloat, NonNegativeInt from servicelib.utils import logged_gather -from ..service_tracker import ( - NORMAL_RATE_POLL_INTERVAL, - TrackedServiceModel, - get_all_tracked, - remove_tracked, - set_scheduled_to_run, -) +from .. import service_tracker +from ..service_tracker import NORMAL_RATE_POLL_INTERVAL, TrackedServiceModel from ..service_tracker._models import SchedulerServiceState, UserRequestedState from ._deferred_get_status import DeferredGetStatus @@ -27,7 +22,7 @@ async def _start_get_status_deferred( app: FastAPI, node_id: NodeID, *, next_check_delay: timedelta ) -> None: - await set_scheduled_to_run(app, node_id, next_check_delay) + await service_tracker.set_scheduled_to_run(app, node_id, next_check_delay) await DeferredGetStatus.start(node_id=node_id) @@ -43,7 +38,9 @@ def status_worker_interval_seconds(self) -> NonNegativeFloat: async def _worker_start_get_status_requests(self) -> None: # NOTE: this worker runs on only once across all instances of the scheduler - models: dict[NodeID, TrackedServiceModel] = await get_all_tracked(self.app) + models: dict[ + NodeID, TrackedServiceModel + ] = await service_tracker.get_all_tracked(self.app) to_remove: list[NodeID] = [] to_start: list[NodeID] = [] @@ -81,7 +78,10 @@ async def _worker_start_get_status_requests(self) -> None: _logger.debug("Removing tracked services: '%s'", to_remove) await logged_gather( - *(remove_tracked(self.app, node_id) for node_id in to_remove), + *( + service_tracker.remove_tracked(self.app, node_id) + for node_id in to_remove + ), max_concurrency=_MAX_CONCURRENCY, ) @@ -97,7 +97,7 @@ async def _worker_start_get_status_requests(self) -> None: ) async def setup(self) -> None: - # TODO: run uniquely across all processes + # TODO: finish uniquely run across all processes pass async def shutdown(self) -> None: diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index b6ee225c6a0f..c819ec303207 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -254,8 +254,8 @@ def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: @pytest.fixture def remove_tracked_spy(mocker: MockerFixture) -> AsyncMock: - mock_method = mocker.AsyncMock(wraps=_monitor.remove_tracked) - return mocker.patch.object(_monitor, "remove_tracked", mock_method) + mock_method = mocker.AsyncMock(wraps=_monitor.service_tracker.remove_tracked) + return mocker.patch.object(_monitor.service_tracker, "remove_tracked", mock_method) @pytest.fixture diff --git a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py index 8fe5e00d69ba..364d063b0c30 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py @@ -54,6 +54,9 @@ ServiceWaitingForManualInterventionError, ServiceWasNotFoundError, ) +from servicelib.rabbitmq.rpc_interfaces.dynamic_scheduler.services import ( + get_dict_from_status, +) from simcore_postgres_database.models.users import UserRole from .._meta import API_VTAG as VTAG @@ -201,11 +204,7 @@ async def get_node(request: web.Request) -> web.Response: ) ) - return envelope_json_response( - service_data.dict(by_alias=True) - if isinstance(service_data, DynamicServiceGet) - else service_data.dict() - ) + return envelope_json_response(get_dict_from_status(service_data)) @routes.patch( From ee22e17761d01ed4913d89398ca3696853de12cc Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 15:18:21 +0200 Subject: [PATCH 069/122] using correct event name --- .../api_schemas_dynamic_scheduler/socketio.py | 3 +++ .../simcore_service_dynamic_scheduler/services/notifier.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py diff --git a/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py new file mode 100644 index 000000000000..89a493a56cce --- /dev/null +++ b/packages/models-library/src/models_library/api_schemas_dynamic_scheduler/socketio.py @@ -0,0 +1,3 @@ +from typing import Final + +SOCKET_IO_SERVICE_STATUS_EVENT: Final[str] = "serviceStatus" diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py index d6226801a4a0..e650bbd2e5c1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py @@ -4,8 +4,8 @@ from fastapi import FastAPI from fastapi.encoders import jsonable_encoder from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet -from models_library.api_schemas_dynamic_sidecar.socketio import ( - SOCKET_IO_SERVICE_DISK_USAGE_EVENT, +from models_library.api_schemas_dynamic_scheduler.socketio import ( + SOCKET_IO_SERVICE_STATUS_EVENT, ) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.api_schemas_webserver.socketio import SocketIORoomStr @@ -26,7 +26,7 @@ async def notify_service_status( self, user_id: UserID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> None: await self._sio_manager.emit( - SOCKET_IO_SERVICE_DISK_USAGE_EVENT, + SOCKET_IO_SERVICE_STATUS_EVENT, data=jsonable_encoder(get_dict_from_status(status)), room=SocketIORoomStr.from_user_id(user_id), ) From b8c9236000f0d239ba3f78cbcf73378f0b9e1dff Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 7 Jun 2024 16:09:07 +0200 Subject: [PATCH 070/122] first running version --- .../core/application.py | 5 ++- .../services/notifier/__init__.py | 7 ++++ .../{notifier.py => notifier/_notifier.py} | 0 .../services/notifier/_setup.py | 8 +++++ .../services/notifier/_socketio.py | 32 +++++++++++++++++++ .../services/redis.py | 13 ++++++-- .../services/status_monitor/__init__.py | 3 ++ .../services/status_monitor/_monitor.py | 16 ++++++++-- .../services/status_monitor/_setup.py | 5 ++- 9 files changed, 81 insertions(+), 8 deletions(-) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py rename services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/{notifier.py => notifier/_notifier.py} (100%) create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py create mode 100644 services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py index d4b5f357ede1..b564fa56de7c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/core/application.py @@ -17,10 +17,11 @@ from ..api.rpc.routes import setup_rpc_api_routes from ..services.deferred_manager import setup_deferred_manager from ..services.director_v2 import setup_director_v2 +from ..services.notifier import setup_notifier from ..services.rabbitmq import setup_rabbitmq from ..services.redis import setup_redis from ..services.service_tracker import setup_service_tracker -from ..services.status_monitor._setup import setup_status_monitor +from ..services.status_monitor import setup_status_monitor from .settings import ApplicationSettings @@ -58,6 +59,8 @@ def create_app(settings: ApplicationSettings | None = None) -> FastAPI: setup_redis(app) + setup_notifier(app) + setup_service_tracker(app) setup_deferred_manager(app) setup_status_monitor(app) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py new file mode 100644 index 000000000000..8cd33e12808f --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/__init__.py @@ -0,0 +1,7 @@ +from ._notifier import notify_service_status_change +from ._setup import setup_notifier + +__all__: tuple[str, ...] = ( + "setup_notifier", + "notify_service_status_change", +) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py similarity index 100% rename from services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier.py rename to services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py new file mode 100644 index 000000000000..935a301f045d --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py @@ -0,0 +1,8 @@ +from fastapi import FastAPI + +from . import _notifier, _socketio + + +def setup_notifier(app: FastAPI): + _socketio.setup(app) + _notifier.setup_notifier(app) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py new file mode 100644 index 000000000000..521786004818 --- /dev/null +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py @@ -0,0 +1,32 @@ +import logging + +import socketio +from fastapi import FastAPI +from servicelib.socketio_utils import cleanup_socketio_async_pubsub_manager + +from ...core.settings import ApplicationSettings + +_logger = logging.getLogger(__name__) + + +def setup(app: FastAPI): + settings: ApplicationSettings = app.state.settings + + async def _on_startup() -> None: + assert app.state.rabbitmq_client # nosec + + # Connect to the as an external process in write-only mode + # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes + assert settings.DYNAMIC_SCHEDULER_RABBITMQ # nosec + app.state.external_socketio = socketio.AsyncAioPikaManager( + url=settings.DYNAMIC_SCHEDULER_RABBITMQ.dsn, logger=_logger, write_only=True + ) + + async def _on_shutdown() -> None: + if external_socketio := getattr(app.state, "external_socketio"): # noqa: B009 + await cleanup_socketio_async_pubsub_manager( + server_manager=external_socketio + ) + + app.add_event_handler("startup", _on_startup) + app.add_event_handler("shutdown", _on_shutdown) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py index db8f51e653c3..fbe7cab2727b 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/redis.py @@ -8,18 +8,25 @@ ) from settings_library.redis import RedisDatabase, RedisSettings -_REDIS_DATABASES: Final[set[RedisDatabase]] = { +_DECODE_DBS: Final[set[RedisDatabase]] = { + RedisDatabase.LOCKS, +} + +_BINARY_DBS: Final[set[RedisDatabase]] = { RedisDatabase.DEFERRED_TASKS, RedisDatabase.DYNAMIC_SERVICES, } +_ALL_REDIS_DATABASES: Final[set[RedisDatabase]] = _DECODE_DBS | _BINARY_DBS + def setup_redis(app: FastAPI) -> None: settings: RedisSettings = app.state.settings.DYNAMIC_SCHEDULER_REDIS async def on_startup() -> None: app.state.redis_clients_manager = manager = RedisClientsManager( - {RedisManagerDBConfig(x, decode_responses=False) for x in _REDIS_DATABASES}, + {RedisManagerDBConfig(x, decode_responses=False) for x in _BINARY_DBS} + | {RedisManagerDBConfig(x, decode_responses=True) for x in _DECODE_DBS}, settings, ) await manager.setup() @@ -42,4 +49,4 @@ def get_redis_client( def get_all_redis_clients( app: FastAPI, ) -> dict[RedisDatabase, RedisClientSDKHealthChecked]: - return {d: get_redis_client(app, d) for d in _REDIS_DATABASES} + return {d: get_redis_client(app, d) for d in _ALL_REDIS_DATABASES} diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py index e69de29bb2d1..263451243252 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/__init__.py @@ -0,0 +1,3 @@ +from ._setup import setup_status_monitor + +__all__: tuple[str, ...] = ("setup_status_monitor",) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index ac16fb2b9ab3..05d7ec733cf5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -7,9 +7,13 @@ from fastapi import FastAPI from models_library.projects_nodes_io import NodeID from pydantic import NonNegativeFloat, NonNegativeInt +from servicelib.background_task import stop_periodic_task +from servicelib.redis_utils import start_exclusive_periodic_task from servicelib.utils import logged_gather +from settings_library.redis import RedisDatabase from .. import service_tracker +from ..redis import get_redis_client from ..service_tracker import NORMAL_RATE_POLL_INTERVAL, TrackedServiceModel from ..service_tracker._models import SchedulerServiceState, UserRequestedState from ._deferred_get_status import DeferredGetStatus @@ -97,8 +101,14 @@ async def _worker_start_get_status_requests(self) -> None: ) async def setup(self) -> None: - # TODO: finish uniquely run across all processes - pass + self.app.state.status_monitor_background_task = start_exclusive_periodic_task( + get_redis_client(self.app, RedisDatabase.LOCKS), + self._worker_start_get_status_requests, + task_period=timedelta(seconds=1), + retry_after=timedelta(seconds=1), + task_name="periodic_service_status_update", + ) async def shutdown(self) -> None: - pass + if self.app.state.status_monitor_background_task: + await stop_periodic_task(self.app.state.status_monitor_background_task) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py index c3ee9d64d6df..8f9601464bcb 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_setup.py @@ -1,14 +1,17 @@ from datetime import timedelta +from typing import Final from fastapi import FastAPI from ._monitor import Monitor +_STATUS_WORKER_DEFAULT_INTERVAL: Final[timedelta] = timedelta(seconds=1) + def setup_status_monitor(app: FastAPI) -> None: async def on_startup() -> None: app.state.status_monitor = monitor = Monitor( - app, check_threshold=timedelta(seconds=1) + app, status_worker_interval=_STATUS_WORKER_DEFAULT_INTERVAL ) await monitor.setup() From 18c4f876c47bcb6c33a3d6d6e1faad1016452476 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 10 Jun 2024 08:13:42 +0200 Subject: [PATCH 071/122] pylint --- .../services/service_tracker/_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 1a2c2f42e834..4ed2da9cd139 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -36,7 +36,7 @@ class SchedulerServiceState(StrAutoEnum): @dataclass -class TrackedServiceModel: +class TrackedServiceModel: # pylint:disable=too-many-instance-attributes # used to create the service in any given moment if the requested_state is RUNNING # can be set to None only when stopping the service dynamic_service_start: DynamicServiceStart | None From 27644be96691b22f47e46fb63c3f8879fd4068c9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 10 Jun 2024 08:47:17 +0200 Subject: [PATCH 072/122] mypy --- .../src/models_library/utils/enums.py | 5 ++++ .../services/service_tracker/_api.py | 15 ++++++------ .../services/service_tracker/_models.py | 23 +++++++++---------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/packages/models-library/src/models_library/utils/enums.py b/packages/models-library/src/models_library/utils/enums.py index 2e0be0e4865a..3cd19a288e2a 100644 --- a/packages/models-library/src/models_library/utils/enums.py +++ b/packages/models-library/src/models_library/utils/enums.py @@ -1,8 +1,13 @@ +import enum import inspect from enum import Enum, unique from typing import Any +class auto_str(enum.auto): # noqa: N801 + value: str = enum._auto_null # pylint:disable=protected-access # noqa: SLF001 + + @unique class StrAutoEnum(str, Enum): @staticmethod diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 21e32c495c95..b8048277b139 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -81,7 +81,8 @@ def __get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: state_key = "state" if isinstance(status, DynamicServiceGet) else "service_state" state: ServiceState | str = getattr(status, state_key) - return state.value if isinstance(state, ServiceState) else state + result: str = state.value if isinstance(state, ServiceState) else state + return result def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: @@ -101,26 +102,26 @@ def _get_current_state( """ if isinstance(status, NodeGetIdle): - return SchedulerServiceState.IDLE + return SchedulerServiceState.IDLE # type:ignore service_state: ServiceState = ServiceState(__get_state_str(status)) if requested_sate == UserRequestedState.RUNNING: if service_state == ServiceState.RUNNING: - return SchedulerServiceState.RUNNING + return SchedulerServiceState.RUNNING # type:ignore if ServiceState.PENDING <= service_state <= ServiceState.STARTING: - return SchedulerServiceState.STARTING + return SchedulerServiceState.STARTING # type:ignore if service_state < ServiceState.PENDING or service_state > ServiceState.RUNNING: - return SchedulerServiceState.UNEXPECTED_OUTCOME + return SchedulerServiceState.UNEXPECTED_OUTCOME # type:ignore if requested_sate == UserRequestedState.STOPPED: if service_state >= ServiceState.RUNNING: - return SchedulerServiceState.STOPPING + return SchedulerServiceState.STOPPING # type:ignore if service_state < ServiceState.RUNNING: - return SchedulerServiceState.UNEXPECTED_OUTCOME + return SchedulerServiceState.UNEXPECTED_OUTCOME # type:ignore msg = f"Could not determine current_state from: '{requested_sate=}', '{status=}'" raise TypeError(msg) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 4ed2da9cd139..8e577dae13a0 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -1,7 +1,6 @@ import pickle from dataclasses import dataclass, field from datetime import timedelta -from enum import auto import arrow from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( @@ -9,30 +8,30 @@ ) from models_library.projects import ProjectID from models_library.users import UserID -from models_library.utils.enums import StrAutoEnum +from models_library.utils.enums import StrAutoEnum, auto_str from servicelib.deferred_tasks import TaskUID class UserRequestedState(StrAutoEnum): - RUNNING = auto() - STOPPED = auto() + RUNNING = auto_str() + STOPPED = auto_str() class SchedulerServiceState(StrAutoEnum): # service was started and is running as expected - RUNNING = auto() + RUNNING = auto_str() # service is not present - IDLE = auto() + IDLE = auto_str() # something went wrong while starting/stopping service - UNEXPECTED_OUTCOME = auto() + UNEXPECTED_OUTCOME = auto_str() # service is being started - STARTING = auto() + STARTING = auto_str() # service is being stopped - STOPPING = auto() + STOPPING = auto_str() # service status has not been determined - UNKNOWN = auto() + UNKNOWN = auto_str() @dataclass @@ -49,7 +48,7 @@ class TrackedServiceModel: # pylint:disable=too-many-instance-attributes requested_state: UserRequestedState # set this after parsing the incoming state via the API calls - current_state: SchedulerServiceState = SchedulerServiceState.UNKNOWN # type: ignore + current_state: SchedulerServiceState = SchedulerServiceState.UNKNOWN ############################# ### SERVICE STATUS UPDATE ### @@ -86,4 +85,4 @@ def to_bytes(self) -> bytes: @classmethod def from_bytes(cls, data: bytes) -> "TrackedServiceModel": - return pickle.loads(data) # noqa: S301 + return pickle.loads(data) # type: ignore # noqa: S301 From d3035444c3444ee7f0c1d3b156a9e35daf67fc55 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 10 Jun 2024 09:23:23 +0200 Subject: [PATCH 073/122] feedback --- packages/service-library/src/servicelib/redis.py | 13 +++++++------ packages/service-library/tests/test_redis.py | 10 ++++++++-- .../simcore_service_director_v2/modules/redis.py | 2 +- .../server/src/simcore_service_webserver/redis.py | 6 +++--- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/packages/service-library/src/servicelib/redis.py b/packages/service-library/src/servicelib/redis.py index fbad667293e6..36dd29cb6aa4 100644 --- a/packages/service-library/src/servicelib/redis.py +++ b/packages/service-library/src/servicelib/redis.py @@ -20,7 +20,6 @@ from .background_task import periodic_task, start_periodic_task, stop_periodic_task from .logging_utils import log_catch, log_context from .retry_policies import RedisRetryPolicyUponInitialization -from .utils import logged_gather _DEFAULT_LOCK_TTL: Final[datetime.timedelta] = datetime.timedelta(seconds=10) _DEFAULT_SOCKET_TIMEOUT: Final[datetime.timedelta] = datetime.timedelta(seconds=30) @@ -219,7 +218,7 @@ async def setup(self) -> None: async def shutdown(self) -> None: if self._health_check_task: - await stop_periodic_task(self._health_check_task, timeout=1) + await stop_periodic_task(self._health_check_task) await super().shutdown() @@ -236,7 +235,7 @@ class RedisClientsManager: Manages the lifetime of redis client sdk connections """ - db_configs: set[RedisManagerDBConfig] + databases_configs: set[RedisManagerDBConfig] settings: RedisSettings _client_sdks: dict[RedisDatabase, RedisClientSDKHealthChecked] = field( @@ -244,17 +243,19 @@ class RedisClientsManager: ) async def setup(self) -> None: - for config in self.db_configs: + for config in self.databases_configs: self._client_sdks[config.database] = RedisClientSDKHealthChecked( redis_dsn=self.settings.build_redis_dsn(config.database), decode_responses=config.decode_responses, health_check_interval=config.health_check_interval, ) - await logged_gather(*(c.setup() for c in self._client_sdks.values())) + for client in self._client_sdks.values(): + await client.setup() async def shutdown(self) -> None: - await logged_gather(*(c.shutdown() for c in self._client_sdks.values())) + for client in self._client_sdks.values(): + await client.shutdown() def client(self, database: RedisDatabase) -> RedisClientSDKHealthChecked: return self._client_sdks[database] diff --git a/packages/service-library/tests/test_redis.py b/packages/service-library/tests/test_redis.py index b2bf5515d2c0..8b23ab2b801f 100644 --- a/packages/service-library/tests/test_redis.py +++ b/packages/service-library/tests/test_redis.py @@ -251,9 +251,11 @@ async def _inc_counter() -> None: async def test_redis_client_sdks_manager(redis_service: RedisSettings): all_redis_configs: set[RedisManagerDBConfig] = { - RedisManagerDBConfig(x) for x in RedisDatabase + RedisManagerDBConfig(db) for db in RedisDatabase } - manager = RedisClientsManager(db_configs=all_redis_configs, settings=redis_service) + manager = RedisClientsManager( + databases_configs=all_redis_configs, settings=redis_service + ) await manager.setup() @@ -269,6 +271,10 @@ async def test_redis_client_sdk_health_checked(redis_service: RedisSettings): client = RedisClientSDKHealthChecked(redis_resources_dns) assert client assert client.redis_dsn == redis_resources_dns + + # ensure nothing happens if shutdown is called before setup + await client.shutdown() + await client.setup() await client._check_health() # noqa: SLF001 diff --git a/services/director-v2/src/simcore_service_director_v2/modules/redis.py b/services/director-v2/src/simcore_service_director_v2/modules/redis.py index dfd66c1d715e..7cb6f86cc824 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/redis.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/redis.py @@ -10,7 +10,7 @@ async def on_startup() -> None: settings: AppSettings = app.state.settings app.state.redis_clients_manager = redis_clients_manager = RedisClientsManager( - db_configs={ + databases_configs={ RedisManagerDBConfig(db) for db in ( RedisDatabase.LOCKS, diff --git a/services/web/server/src/simcore_service_webserver/redis.py b/services/web/server/src/simcore_service_webserver/redis.py index cf870d53f35f..deee93f1fbd4 100644 --- a/services/web/server/src/simcore_service_webserver/redis.py +++ b/services/web/server/src/simcore_service_webserver/redis.py @@ -32,9 +32,9 @@ async def setup_redis_client(app: web.Application): """ redis_settings: RedisSettings = get_plugin_settings(app) app[_APP_REDIS_CLIENTS_MANAGER] = manager = RedisClientsManager( - db_configs={ - RedisManagerDBConfig(x) - for x in ( + databases_configs={ + RedisManagerDBConfig(db) + for db in ( RedisDatabase.RESOURCES, RedisDatabase.LOCKS, RedisDatabase.VALIDATION_CODES, From a1a7bc166387342067883c2bc2ce55ca299ab2d6 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 12 Jul 2024 15:13:40 +0200 Subject: [PATCH 074/122] refactor test --- .../tests/deferred_tasks/test__utils.py | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/packages/service-library/tests/deferred_tasks/test__utils.py b/packages/service-library/tests/deferred_tasks/test__utils.py index baf942f2d39a..c336c5adc983 100644 --- a/packages/service-library/tests/deferred_tasks/test__utils.py +++ b/packages/service-library/tests/deferred_tasks/test__utils.py @@ -2,6 +2,7 @@ # pylint:disable=unused-argument import asyncio +import operator import time from collections.abc import AsyncIterator, Callable from contextlib import AbstractAsyncContextManager, asynccontextmanager @@ -17,7 +18,7 @@ RabbitRouter, TestRabbitBroker, ) -from pydantic import NonNegativeFloat +from pydantic import NonNegativeInt from servicelib.deferred_tasks._utils import stop_retry_for_unintended_errors from settings_library.rabbit import RabbitSettings from tenacity._asyncio import AsyncRetrying @@ -59,12 +60,19 @@ def rabbit_exchange() -> RabbitExchange: return RabbitExchange("test_exchange") -async def _get_call_count( - handler: HandlerCallWrapper, *, wait_for: NonNegativeFloat = 0.1 -) -> int: - await asyncio.sleep(wait_for) - assert handler.mock - return len(handler.mock.call_args_list) +async def _assert_call_count( + handler: HandlerCallWrapper, *, expected_count: NonNegativeInt, operator=operator.eq +) -> None: + async for attempt in AsyncRetrying( + wait=wait_fixed(0.01), + stop=stop_after_delay(5), + reraise=True, + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert handler.mock + count = len(handler.mock.call_args_list) + assert operator(count) == expected_count async def test_handler_called_as_expected( @@ -85,12 +93,12 @@ async def print_message_with_deco(some_value: int) -> None: await test_broker.publish( 24, queue="print_message_no_deco", exchange=rabbit_exchange ) - assert await _get_call_count(print_message_no_deco) == 1 + await _assert_call_count(print_message_no_deco, expected_count=1) await test_broker.publish( 42, queue="print_message_with_deco", exchange=rabbit_exchange ) - assert await _get_call_count(print_message_with_deco) == 1 + await _assert_call_count(print_message_with_deco, expected_count=1) async def test_handler_nacks_message( @@ -115,12 +123,16 @@ async def nacked_message_with_deco(msg: str) -> None: await test_broker.publish( "", queue="nacked_message_no_deco", exchange=rabbit_exchange ) - assert await _get_call_count(nacked_message_no_deco) > 10 + await _assert_call_count( + nacked_message_no_deco, expected_count=10, operator=operator.gt + ) await test_broker.publish( "", queue="nacked_message_with_deco", exchange=rabbit_exchange ) - assert await _get_call_count(nacked_message_with_deco) > 10 + await _assert_call_count( + nacked_message_with_deco, expected_count=10, operator=operator.gt + ) async def test_handler_rejects_message( @@ -146,12 +158,12 @@ async def rejected_message_with_deco(msg: str) -> None: await test_broker.publish( "", queue="rejected_message_no_deco", exchange=rabbit_exchange ) - assert await _get_call_count(rejected_message_no_deco) == 1 + await _assert_call_count(rejected_message_no_deco, expected_count=1) await test_broker.publish( "", queue="rejected_message_with_deco", exchange=rabbit_exchange ) - assert await _get_call_count(rejected_message_with_deco) == 1 + await _assert_call_count(rejected_message_with_deco, expected_count=1) async def test_handler_unintended_error( @@ -178,12 +190,14 @@ async def unintended_error_with_deco(msg: str) -> None: await test_broker.publish( "", queue="unintended_error_no_deco", exchange=rabbit_exchange ) - assert await _get_call_count(unintended_error_no_deco) > 10 + await _assert_call_count( + unintended_error_no_deco, expected_count=10, operator=operator.gt + ) await test_broker.publish( "", queue="unintended_error_with_deco", exchange=rabbit_exchange ) - assert await _get_call_count(unintended_error_with_deco) == 1 + await _assert_call_count(unintended_error_with_deco, expected_count=1) async def test_handler_parallelism( From 4edbe0adeb6dd63259dcad7592176862c95681d2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 12 Jul 2024 15:14:18 +0200 Subject: [PATCH 075/122] added typing --- packages/service-library/tests/deferred_tasks/test__utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/service-library/tests/deferred_tasks/test__utils.py b/packages/service-library/tests/deferred_tasks/test__utils.py index c336c5adc983..c34913b50770 100644 --- a/packages/service-library/tests/deferred_tasks/test__utils.py +++ b/packages/service-library/tests/deferred_tasks/test__utils.py @@ -61,7 +61,10 @@ def rabbit_exchange() -> RabbitExchange: async def _assert_call_count( - handler: HandlerCallWrapper, *, expected_count: NonNegativeInt, operator=operator.eq + handler: HandlerCallWrapper, + *, + expected_count: NonNegativeInt, + operator: Callable = operator.eq ) -> None: async for attempt in AsyncRetrying( wait=wait_fixed(0.01), From a2a268a2c320a046c252d9c240cb55254b33bde7 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 12 Jul 2024 15:14:52 +0200 Subject: [PATCH 076/122] avoids obscuring --- .../tests/deferred_tasks/test__utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/service-library/tests/deferred_tasks/test__utils.py b/packages/service-library/tests/deferred_tasks/test__utils.py index c34913b50770..30c8c7326637 100644 --- a/packages/service-library/tests/deferred_tasks/test__utils.py +++ b/packages/service-library/tests/deferred_tasks/test__utils.py @@ -64,7 +64,7 @@ async def _assert_call_count( handler: HandlerCallWrapper, *, expected_count: NonNegativeInt, - operator: Callable = operator.eq + operation: Callable = operator.eq ) -> None: async for attempt in AsyncRetrying( wait=wait_fixed(0.01), @@ -75,7 +75,7 @@ async def _assert_call_count( with attempt: assert handler.mock count = len(handler.mock.call_args_list) - assert operator(count) == expected_count + assert operation(count) == expected_count async def test_handler_called_as_expected( @@ -127,14 +127,14 @@ async def nacked_message_with_deco(msg: str) -> None: "", queue="nacked_message_no_deco", exchange=rabbit_exchange ) await _assert_call_count( - nacked_message_no_deco, expected_count=10, operator=operator.gt + nacked_message_no_deco, expected_count=10, operation=operator.gt ) await test_broker.publish( "", queue="nacked_message_with_deco", exchange=rabbit_exchange ) await _assert_call_count( - nacked_message_with_deco, expected_count=10, operator=operator.gt + nacked_message_with_deco, expected_count=10, operation=operator.gt ) @@ -194,7 +194,7 @@ async def unintended_error_with_deco(msg: str) -> None: "", queue="unintended_error_no_deco", exchange=rabbit_exchange ) await _assert_call_count( - unintended_error_no_deco, expected_count=10, operator=operator.gt + unintended_error_no_deco, expected_count=10, operation=operator.gt ) await test_broker.publish( From c6a97f40eb2f39cddf0b391b8556d90d5f3640d2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 12 Jul 2024 15:16:08 +0200 Subject: [PATCH 077/122] refactor to be correct --- packages/service-library/tests/deferred_tasks/test__utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/service-library/tests/deferred_tasks/test__utils.py b/packages/service-library/tests/deferred_tasks/test__utils.py index 30c8c7326637..535a26bc71d8 100644 --- a/packages/service-library/tests/deferred_tasks/test__utils.py +++ b/packages/service-library/tests/deferred_tasks/test__utils.py @@ -75,7 +75,7 @@ async def _assert_call_count( with attempt: assert handler.mock count = len(handler.mock.call_args_list) - assert operation(count) == expected_count + assert operation(count, expected_count) async def test_handler_called_as_expected( From d0a5415e0f70dc5bd3a4d586441b804dea2b0f08 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 10:58:44 +0200 Subject: [PATCH 078/122] removed redefined --- packages/service-library/src/servicelib/redis.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/packages/service-library/src/servicelib/redis.py b/packages/service-library/src/servicelib/redis.py index 497acaf348dd..03847ae0b046 100644 --- a/packages/service-library/src/servicelib/redis.py +++ b/packages/service-library/src/servicelib/redis.py @@ -230,13 +230,6 @@ class RedisManagerDBConfig: health_check_interval: datetime.timedelta = _DEFAULT_HEALTH_CHECK_INTERVAL -@dataclass(frozen=True) -class RedisManagerDBConfig: - database: RedisDatabase - decode_responses: bool = _DEFAULT_DECODE_RESPONSES - health_check_interval: datetime.timedelta = _DEFAULT_HEALTH_CHECK_INTERVAL - - @dataclass class RedisClientsManager: """ From b0a7bbee41b3d541d22e0788e6ec1b7b14129a66 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 11:00:59 +0200 Subject: [PATCH 079/122] removed unused --- .../api/rest/_dependencies.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py index 370e2f12feda..ce43766f5a33 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rest/_dependencies.py @@ -4,10 +4,7 @@ from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient from servicelib.redis import RedisClientSDK from settings_library.redis import RedisDatabase -from simcore_service_dynamic_scheduler.services.redis import ( - get_all_redis_clients, - get_redis_client, -) +from simcore_service_dynamic_scheduler.services.redis import get_all_redis_clients from ...services.rabbitmq import get_rabbitmq_client, get_rabbitmq_rpc_server @@ -29,10 +26,6 @@ def get_redis_clients_from_request( return get_all_redis_clients(request.app) -def get_redis_client_from_request(request: Request) -> RedisClientSDK: - return get_redis_client(request.app) - - __all__: tuple[str, ...] = ( "get_app", "get_reverse_url_mapper", From 40b01ba4b4de16dcd987df31c01ba950c27d1316 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 12:35:26 +0200 Subject: [PATCH 080/122] refactor fixture --- packages/service-library/tests/conftest.py | 6 ++++-- packages/service-library/tests/deferred_tasks/conftest.py | 6 ++++-- .../tests/deferred_tasks/test__base_deferred_handler.py | 5 ++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/packages/service-library/tests/conftest.py b/packages/service-library/tests/conftest.py index f069aeedd768..712746ccce97 100644 --- a/packages/service-library/tests/conftest.py +++ b/packages/service-library/tests/conftest.py @@ -76,9 +76,11 @@ async def get_redis_client_sdk( Callable[[RedisDatabase], AbstractAsyncContextManager[RedisClientSDK]] ]: @asynccontextmanager - async def _(database: RedisDatabase) -> AsyncIterator[RedisClientSDK]: + async def _( + database: RedisDatabase, decode_response: bool = True # noqa: FBT002 + ) -> AsyncIterator[RedisClientSDK]: redis_resources_dns = redis_service.build_redis_dsn(database) - client = RedisClientSDK(redis_resources_dns) + client = RedisClientSDK(redis_resources_dns, decode_responses=decode_response) assert client assert client.redis_dsn == redis_resources_dns await client.setup() diff --git a/packages/service-library/tests/deferred_tasks/conftest.py b/packages/service-library/tests/deferred_tasks/conftest.py index 642a67336b6b..3b5dcc75c049 100644 --- a/packages/service-library/tests/deferred_tasks/conftest.py +++ b/packages/service-library/tests/deferred_tasks/conftest.py @@ -9,8 +9,10 @@ @pytest.fixture async def redis_client_sdk_deferred_tasks( get_redis_client_sdk: Callable[ - [RedisDatabase], AbstractAsyncContextManager[RedisClientSDK] + [RedisDatabase, bool], AbstractAsyncContextManager[RedisClientSDK] ] ) -> AsyncIterator[RedisClientSDK]: - async with get_redis_client_sdk(RedisDatabase.DEFERRED_TASKS) as client: + async with get_redis_client_sdk( + RedisDatabase.DEFERRED_TASKS, False # noqa: FBT003 + ) as client: yield client diff --git a/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py b/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py index 80a6090a63eb..de12baaca587 100644 --- a/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py +++ b/packages/service-library/tests/deferred_tasks/test__base_deferred_handler.py @@ -52,7 +52,10 @@ class MockKeys(StrAutoEnum): async def redis_client_sdk( redis_service: RedisSettings, ) -> AsyncIterable[RedisClientSDK]: - sdk = RedisClientSDK(redis_service.build_redis_dsn(RedisDatabase.DEFERRED_TASKS)) + sdk = RedisClientSDK( + redis_service.build_redis_dsn(RedisDatabase.DEFERRED_TASKS), + decode_responses=False, + ) await sdk.setup() yield sdk await sdk.shutdown() From 5f8eb55e9df268d0e3b63b7fae419165e635a82c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 14:41:45 +0200 Subject: [PATCH 081/122] fixed failing tests --- services/dynamic-scheduler/tests/conftest.py | 29 ++++++++++++------- .../tests/unit/api_rest/conftest.py | 20 ++++++++++++- .../unit/api_rest/test_api_rest__health.py | 4 --- .../unit/api_rest/test_api_rest__meta.py | 15 ---------- 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index 1212ff529bc8..464db028147e 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -4,6 +4,7 @@ import string from collections.abc import AsyncIterator from pathlib import Path +from typing import Final import pytest import simcore_service_dynamic_scheduler @@ -13,7 +14,6 @@ from pytest_mock import MockerFixture from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict from pytest_simcore.helpers.typing_env import EnvVarsDict -from pytest_simcore.helpers.utils_envs import setenvs_from_dict from servicelib.redis import RedisClientsManager, RedisManagerDBConfig from servicelib.utils import logged_gather from settings_library.redis import RedisDatabase, RedisSettings @@ -77,29 +77,38 @@ def app_environment( ) +_PATH_APPLICATION: Final[str] = "simcore_service_dynamic_scheduler.core.application" + + @pytest.fixture def disable_rabbitmq_setup(mocker: MockerFixture) -> None: - base_path = "simcore_service_dynamic_scheduler.core.application" - mocker.patch(f"{base_path}.setup_rabbitmq") - mocker.patch(f"{base_path}.setup_rpc_api_routes") + mocker.patch(f"{_PATH_APPLICATION}.setup_rabbitmq") + mocker.patch(f"{_PATH_APPLICATION}.setup_rpc_api_routes") @pytest.fixture def disable_redis_setup(mocker: MockerFixture) -> None: - base_path = "simcore_service_dynamic_scheduler.core.application" - mocker.patch(f"{base_path}.setup_redis") + mocker.patch(f"{_PATH_APPLICATION}.setup_redis") @pytest.fixture def disable_service_tracker_setup(mocker: MockerFixture) -> None: - base_path = "simcore_service_dynamic_scheduler.core.application" - mocker.patch(f"{base_path}.setup_service_tracker") + mocker.patch(f"{_PATH_APPLICATION}.setup_service_tracker") @pytest.fixture def disable_deferred_manager_setup(mocker: MockerFixture) -> None: - base_path = "simcore_service_dynamic_scheduler.core.application" - mocker.patch(f"{base_path}.setup_deferred_manager") + mocker.patch(f"{_PATH_APPLICATION}.setup_deferred_manager") + + +@pytest.fixture +def disable_notifier_setup(mocker: MockerFixture) -> None: + mocker.patch(f"{_PATH_APPLICATION}.setup_notifier") + + +@pytest.fixture +def disable_status_monitor_setup(mocker: MockerFixture) -> None: + mocker.patch(f"{_PATH_APPLICATION}.setup_status_monitor") MAX_TIME_FOR_APP_TO_STARTUP = 10 diff --git a/services/dynamic-scheduler/tests/unit/api_rest/conftest.py b/services/dynamic-scheduler/tests/unit/api_rest/conftest.py index 987ed8c4d851..efef4241d981 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/conftest.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/conftest.py @@ -1,13 +1,31 @@ +# pylint:disable=redefined-outer-name +# pylint:disable=unused-argument from collections.abc import AsyncIterator import pytest from fastapi import FastAPI from httpx import AsyncClient from httpx._transports.asgi import ASGITransport +from pytest_simcore.helpers.typing_env import EnvVarsDict @pytest.fixture -async def client(app: FastAPI) -> AsyncIterator[AsyncClient]: +def app_environment( + disable_rabbitmq_setup: None, + disable_redis_setup: None, + disable_service_tracker_setup: None, + disable_deferred_manager_setup: None, + disable_notifier_setup: None, + disable_status_monitor_setup: None, + app_environment: EnvVarsDict, +) -> EnvVarsDict: + return app_environment + + +@pytest.fixture +async def client( + app_environment: EnvVarsDict, app: FastAPI +) -> AsyncIterator[AsyncClient]: # - Needed for app to trigger start/stop event handlers # - Prefer this client instead of fastapi.testclient.TestClient async with AsyncClient( diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py index 8365a72670e9..9b5648e12b4e 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__health.py @@ -21,7 +21,6 @@ def __init__(self, is_ok: bool) -> None: @pytest.fixture def mock_rabbitmq_clients( - disable_rabbitmq_setup: None, mocker: MockerFixture, rabbit_client_ok: bool, rabbit_rpc_server_ok: bool, @@ -39,8 +38,6 @@ def mock_rabbitmq_clients( @pytest.fixture def mock_redis_client( - disable_redis_setup: None, - disable_service_tracker_setup: None, mocker: MockerFixture, redis_client_ok: bool, ) -> None: @@ -55,7 +52,6 @@ def mock_redis_client( def app_environment( mock_rabbitmq_clients: None, mock_redis_client: None, - disable_deferred_manager_setup: None, app_environment: EnvVarsDict, ) -> EnvVarsDict: return app_environment diff --git a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py index 79b59ab204df..8d986dfe60ed 100644 --- a/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py +++ b/services/dynamic-scheduler/tests/unit/api_rest/test_api_rest__meta.py @@ -1,26 +1,11 @@ # pylint:disable=redefined-outer-name # pylint:disable=unused-argument - - -import pytest from fastapi import status from httpx import AsyncClient -from pytest_simcore.helpers.typing_env import EnvVarsDict from simcore_service_dynamic_scheduler._meta import API_VTAG from simcore_service_dynamic_scheduler.models.schemas.meta import Meta -@pytest.fixture -def app_environment( - disable_rabbitmq_setup: None, - disable_redis_setup: None, - disable_service_tracker_setup: None, - disable_deferred_manager_setup: None, - app_environment: EnvVarsDict, -) -> EnvVarsDict: - return app_environment - - async def test_health(client: AsyncClient): response = await client.get(f"/{API_VTAG}/meta") assert response.status_code == status.HTTP_200_OK From d906cd6ce94c4fcbc95192b4f3a29f5fc4d01399 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 14:41:57 +0200 Subject: [PATCH 082/122] renaming --- .../services/notifier/_notifier.py | 2 +- .../services/notifier/_setup.py | 2 +- .../services/service_tracker/_tracker.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py index e650bbd2e5c1..8b64403a38d1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py @@ -39,7 +39,7 @@ async def notify_service_status_change( await notifier.notify_service_status(user_id=user_id, status=status) -def setup_notifier(app: FastAPI): +def setup(app: FastAPI): async def _on_startup() -> None: assert app.state.external_socketio # nosec diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py index 935a301f045d..1542afa8a87d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_setup.py @@ -5,4 +5,4 @@ def setup_notifier(app: FastAPI): _socketio.setup(app) - _notifier.setup_notifier(app) + _notifier.setup(app) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py index fa67a9c488dc..489cee153105 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_tracker.py @@ -2,7 +2,7 @@ from typing import Final from models_library.projects_nodes_io import NodeID -from servicelib.redis import RedisClientSDKHealthChecked +from servicelib.redis import RedisClientSDK from ._models import TrackedServiceModel @@ -15,7 +15,7 @@ def _get_key(node_id: NodeID) -> str: @dataclass class Tracker: - redis_client_sdk: RedisClientSDKHealthChecked + redis_client_sdk: RedisClientSDK async def save(self, node_id: NodeID, model: TrackedServiceModel) -> None: await self.redis_client_sdk.redis.set(_get_key(node_id), model.to_bytes()) From be81d86dd3964e644858a298c4b3b969c459df7c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 14:46:05 +0200 Subject: [PATCH 083/122] fixe failing tests --- .../dynamic-scheduler/tests/unit/service_tracker/test__api.py | 1 + .../tests/unit/service_tracker/test__tracker.py | 1 + services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py | 2 ++ services/dynamic-scheduler/tests/unit/test_services_redis.py | 2 ++ 4 files changed, 6 insertions(+) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index ccc9fd92c250..64aea4802964 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -51,6 +51,7 @@ def app_environment( disable_rabbitmq_setup: None, disable_deferred_manager_setup: None, + disable_notifier_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, remove_redis_data: None, diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py index 0dce5e355bae..59739ddf8f60 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__tracker.py @@ -28,6 +28,7 @@ def app_environment( disable_rabbitmq_setup: None, disable_deferred_manager_setup: None, + disable_notifier_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, remove_redis_data: None, diff --git a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py index 4600f18a0927..eadb7c9ee038 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py +++ b/services/dynamic-scheduler/tests/unit/test_services_rabbitmq.py @@ -23,6 +23,8 @@ def app_environment( disable_redis_setup: None, disable_service_tracker_setup: None, disable_deferred_manager_setup: None, + disable_notifier_setup: None, + disable_status_monitor_setup: None, app_environment: EnvVarsDict, rabbit_service: RabbitSettings, ) -> EnvVarsDict: diff --git a/services/dynamic-scheduler/tests/unit/test_services_redis.py b/services/dynamic-scheduler/tests/unit/test_services_redis.py index 090f27ff5b38..059a17aeb0fc 100644 --- a/services/dynamic-scheduler/tests/unit/test_services_redis.py +++ b/services/dynamic-scheduler/tests/unit/test_services_redis.py @@ -17,6 +17,8 @@ def app_environment( disable_rabbitmq_setup: None, disable_deferred_manager_setup: None, + disable_notifier_setup: None, + disable_status_monitor_setup: None, app_environment: EnvVarsDict, redis_service: RedisSettings, ) -> EnvVarsDict: From 2d0bf027f292c0815fbf93b7582b60cd33388862 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 2 Aug 2024 14:55:40 +0200 Subject: [PATCH 084/122] fixed some issues --- .../test_services_status_monitor__monitor.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index c819ec303207..d3abc42e048e 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -81,7 +81,7 @@ def _get_node_get_with(state: str, node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGe return NodeGet.parse_obj(dict_data) -def __get_dynamic_service_get_legacy_with( +def _get_dynamic_service_get_legacy_with( state: str, node_id: NodeID = _DEFAULT_NODE_ID ) -> DynamicServiceGet: dict_data = deepcopy(DynamicServiceGet.Config.schema_extra["examples"][0]) @@ -96,7 +96,7 @@ def __get_dynamic_service_get_legacy_with( return DynamicServiceGet.parse_obj(dict_data) -def __get_dynamic_service_get_new_style_with( +def _get_dynamic_service_get_new_style_with( state: str, node_id: NodeID = _DEFAULT_NODE_ID ) -> DynamicServiceGet: dict_data = deepcopy(DynamicServiceGet.Config.schema_extra["examples"][1]) @@ -111,7 +111,7 @@ def __get_dynamic_service_get_new_style_with( return DynamicServiceGet.parse_obj(dict_data) -def __get_node_get_idle(node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGetIdle: +def _get_node_get_idle(node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGetIdle: dict_data = NodeGetIdle.Config.schema_extra["example"] _add_to_dict( dict_data, @@ -143,7 +143,7 @@ def get_status(self, node_id: NodeID) -> NodeGet | DynamicServiceGet | NodeGetId # always return node idle when timeline finished playing if self._client_access_history[node_id] >= len(self._timeline): - return __get_node_get_idle() + return _get_node_get_idle() status = self._timeline[self._client_access_history[node_id]] self._client_access_history[node_id] += 1 @@ -217,7 +217,7 @@ def _side_effect_node_status_response(request: Request) -> Response: if isinstance(service_status, NodeGetIdle): return Response(status.HTTP_404_NOT_FOUND) - raise TypeError() + raise TypeError with respx.mock( base_url=app.state.settings.DYNAMIC_SCHEDULER_DIRECTOR_V2_SETTINGS.api_base_url, @@ -290,7 +290,7 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: pytest.param( True, _ResponseTimeline( - [__get_dynamic_service_get_legacy_with("running") for _ in range(10)] + [_get_dynamic_service_get_legacy_with("running") for _ in range(10)] ), 1, 0, @@ -298,14 +298,14 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: ), pytest.param( True, - _ResponseTimeline([__get_node_get_idle()]), + _ResponseTimeline([_get_node_get_idle()]), 1, 0, id="requested_running_state_idle_no_removal", ), pytest.param( False, - _ResponseTimeline([__get_node_get_idle()]), + _ResponseTimeline([_get_node_get_idle()]), 1, 1, id="requested_stopped_state_idle_is_removed", @@ -314,17 +314,17 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: True, _ResponseTimeline( [ - *[__get_node_get_idle() for _ in range(10)], - __get_dynamic_service_get_new_style_with("pending"), - __get_dynamic_service_get_new_style_with("pulling"), + *[_get_node_get_idle() for _ in range(10)], + _get_dynamic_service_get_new_style_with("pending"), + _get_dynamic_service_get_new_style_with("pulling"), *[ - __get_dynamic_service_get_new_style_with("starting") + _get_dynamic_service_get_new_style_with("starting") for _ in range(10) ], - __get_dynamic_service_get_new_style_with("running"), - __get_dynamic_service_get_new_style_with("stopping"), - __get_dynamic_service_get_new_style_with("complete"), - __get_node_get_idle(), + _get_dynamic_service_get_new_style_with("running"), + _get_dynamic_service_get_new_style_with("stopping"), + _get_dynamic_service_get_new_style_with("complete"), + _get_node_get_idle(), ] ), 8, @@ -335,16 +335,16 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: False, _ResponseTimeline( [ - __get_dynamic_service_get_new_style_with("pending"), - __get_dynamic_service_get_new_style_with("pulling"), + _get_dynamic_service_get_new_style_with("pending"), + _get_dynamic_service_get_new_style_with("pulling"), *[ - __get_dynamic_service_get_new_style_with("starting") + _get_dynamic_service_get_new_style_with("starting") for _ in range(10) ], - __get_dynamic_service_get_new_style_with("running"), - __get_dynamic_service_get_new_style_with("stopping"), - __get_dynamic_service_get_new_style_with("complete"), - __get_node_get_idle(), + _get_dynamic_service_get_new_style_with("running"), + _get_dynamic_service_get_new_style_with("stopping"), + _get_dynamic_service_get_new_style_with("complete"), + _get_node_get_idle(), ] ), 7, From d4d2580c7f38229b4a16dff9828772311329f48e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 19 Aug 2024 15:57:28 +0200 Subject: [PATCH 085/122] revert and update ccredentials --- services/docker-compose-ops.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/services/docker-compose-ops.yml b/services/docker-compose-ops.yml index daba6fdd1f0b..f78a93f32d55 100644 --- a/services/docker-compose-ops.yml +++ b/services/docker-compose-ops.yml @@ -75,15 +75,15 @@ services: environment: - >- REDIS_HOSTS= - resources:${REDIS_HOST}:${REDIS_PORT}:0, - locks:${REDIS_HOST}:${REDIS_PORT}:1, - validation_codes:${REDIS_HOST}:${REDIS_PORT}:2, - scheduled_maintenance:${REDIS_HOST}:${REDIS_PORT}:3, - user_notifications:${REDIS_HOST}:${REDIS_PORT}:4, - announcements:${REDIS_HOST}:${REDIS_PORT}:5, - distributed_identifiers:${REDIS_HOST}:${REDIS_PORT}:6, - deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7, - dynamic_services:${REDIS_HOST}:${REDIS_PORT}:8 + resources:${REDIS_HOST}:${REDIS_PORT}:0:${REDIS_PASSWORD}, + locks:${REDIS_HOST}:${REDIS_PORT}:1:${REDIS_PASSWORD}, + validation_codes:${REDIS_HOST}:${REDIS_PORT}:2:${REDIS_PASSWORD}, + scheduled_maintenance:${REDIS_HOST}:${REDIS_PORT}:3:${REDIS_PASSWORD}, + user_notifications:${REDIS_HOST}:${REDIS_PORT}:4:${REDIS_PASSWORD}, + announcements:${REDIS_HOST}:${REDIS_PORT}:5:${REDIS_PASSWORD}, + distributed_identifiers:${REDIS_HOST}:${REDIS_PORT}:6:${REDIS_PASSWORD}, + deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7:${REDIS_PASSWORD}, + dynamic_services:${REDIS_HOST}:${REDIS_PORT}:8:${REDIS_PASSWORD} # If you add/remove a db, do not forget to update the --databases entry in the docker-compose.yml ports: - "18081:8081" From 8d5d006089e627ecf505541bd1237c72bb4e96f3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 19 Aug 2024 15:58:21 +0200 Subject: [PATCH 086/122] remove spaces --- services/docker-compose-ops.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/services/docker-compose-ops.yml b/services/docker-compose-ops.yml index f78a93f32d55..5b2b8b689615 100644 --- a/services/docker-compose-ops.yml +++ b/services/docker-compose-ops.yml @@ -76,14 +76,14 @@ services: - >- REDIS_HOSTS= resources:${REDIS_HOST}:${REDIS_PORT}:0:${REDIS_PASSWORD}, - locks:${REDIS_HOST}:${REDIS_PORT}:1:${REDIS_PASSWORD}, - validation_codes:${REDIS_HOST}:${REDIS_PORT}:2:${REDIS_PASSWORD}, - scheduled_maintenance:${REDIS_HOST}:${REDIS_PORT}:3:${REDIS_PASSWORD}, - user_notifications:${REDIS_HOST}:${REDIS_PORT}:4:${REDIS_PASSWORD}, - announcements:${REDIS_HOST}:${REDIS_PORT}:5:${REDIS_PASSWORD}, - distributed_identifiers:${REDIS_HOST}:${REDIS_PORT}:6:${REDIS_PASSWORD}, - deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7:${REDIS_PASSWORD}, - dynamic_services:${REDIS_HOST}:${REDIS_PORT}:8:${REDIS_PASSWORD} + locks:${REDIS_HOST}:${REDIS_PORT}:1:${REDIS_PASSWORD}, + validation_codes:${REDIS_HOST}:${REDIS_PORT}:2:${REDIS_PASSWORD}, + scheduled_maintenance:${REDIS_HOST}:${REDIS_PORT}:3:${REDIS_PASSWORD}, + user_notifications:${REDIS_HOST}:${REDIS_PORT}:4:${REDIS_PASSWORD}, + announcements:${REDIS_HOST}:${REDIS_PORT}:5:${REDIS_PASSWORD}, + distributed_identifiers:${REDIS_HOST}:${REDIS_PORT}:6:${REDIS_PASSWORD}, + deferred_tasks:${REDIS_HOST}:${REDIS_PORT}:7:${REDIS_PASSWORD}, + dynamic_services:${REDIS_HOST}:${REDIS_PORT}:8:${REDIS_PASSWORD} # If you add/remove a db, do not forget to update the --databases entry in the docker-compose.yml ports: - "18081:8081" From fa4f6ef35f2df9ab748548343ad768df4f25812a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 19 Aug 2024 16:21:38 +0200 Subject: [PATCH 087/122] fixed broken test --- .../services/status_monitor/_monitor.py | 2 +- .../test_services_status_monitor__monitor.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index 05d7ec733cf5..b80daf3abdb5 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -110,5 +110,5 @@ async def setup(self) -> None: ) async def shutdown(self) -> None: - if self.app.state.status_monitor_background_task: + if getattr(self.app.state, "status_monitor_background_task", None): await stop_periodic_task(self.app.state.status_monitor_background_task) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index d3abc42e048e..f5d9d6521fa0 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -5,7 +5,6 @@ import re from collections.abc import AsyncIterable, Callable from copy import deepcopy -from datetime import timedelta from typing import Any from unittest.mock import AsyncMock from uuid import uuid4 @@ -28,7 +27,6 @@ from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( - _api, get_all_tracked, set_request_as_running, set_request_as_stopped, @@ -271,10 +269,10 @@ def mocked_notify_frontend(mocker: MockerFixture) -> AsyncMock: @pytest.fixture -def mock_poll_rate_intervals(mocker: MockerFixture) -> None: - mocker.patch.object(_api, "_LOW_RATE_POLL_INTERVAL", timedelta(seconds=0.1)) - mocker.patch.object(_api, "NORMAL_RATE_POLL_INTERVAL", timedelta(seconds=0.2)) - mocker.patch.object(_monitor, "NORMAL_RATE_POLL_INTERVAL", timedelta(seconds=0.2)) +def disable_status_monitor_background_task(mocker: MockerFixture) -> None: + mocker.patch( + "simcore_service_dynamic_scheduler.services.status_monitor._monitor.Monitor.setup" + ) @pytest.mark.parametrize( @@ -354,7 +352,7 @@ def mock_poll_rate_intervals(mocker: MockerFixture) -> None: ], ) async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arguments - mock_poll_rate_intervals: None, + disable_status_monitor_background_task: None, mocked_notify_frontend: AsyncMock, deferred_status_spies: dict[str, AsyncMock], remove_tracked_spy: AsyncMock, From cc310eee9db383b240df9de9d451dd29911ea840 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 19 Aug 2024 16:42:07 +0200 Subject: [PATCH 088/122] fix mypy --- .../services/notifier/_notifier.py | 2 +- .../services/notifier/_socketio.py | 2 +- .../services/service_tracker/_api.py | 20 +++++++++++-------- .../status_monitor/_deferred_get_status.py | 11 +++++++--- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py index 8b64403a38d1..d0bf384b2c8d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py @@ -1,6 +1,6 @@ import contextlib -import socketio +import socketio # type: ignore[import-untyped] from fastapi import FastAPI from fastapi.encoders import jsonable_encoder from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py index 521786004818..2f0abfbd3af1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py @@ -1,6 +1,6 @@ import logging -import socketio +import socketio # type: ignore[import-untyped] from fastapi import FastAPI from servicelib.socketio_utils import cleanup_socketio_async_pubsub_manager diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index b8048277b139..39deec0087d6 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -102,26 +102,30 @@ def _get_current_state( """ if isinstance(status, NodeGetIdle): - return SchedulerServiceState.IDLE # type:ignore + return SchedulerServiceState.IDLE service_state: ServiceState = ServiceState(__get_state_str(status)) if requested_sate == UserRequestedState.RUNNING: if service_state == ServiceState.RUNNING: - return SchedulerServiceState.RUNNING # type:ignore + return SchedulerServiceState.RUNNING - if ServiceState.PENDING <= service_state <= ServiceState.STARTING: - return SchedulerServiceState.STARTING # type:ignore + if ( + ServiceState.PENDING # type:ignore[operator] + <= service_state + <= ServiceState.STARTING + ): + return SchedulerServiceState.STARTING if service_state < ServiceState.PENDING or service_state > ServiceState.RUNNING: - return SchedulerServiceState.UNEXPECTED_OUTCOME # type:ignore + return SchedulerServiceState.UNEXPECTED_OUTCOME if requested_sate == UserRequestedState.STOPPED: - if service_state >= ServiceState.RUNNING: - return SchedulerServiceState.STOPPING # type:ignore + if service_state >= ServiceState.RUNNING: # type:ignore[operator] + return SchedulerServiceState.STOPPING if service_state < ServiceState.RUNNING: - return SchedulerServiceState.UNEXPECTED_OUTCOME # type:ignore + return SchedulerServiceState.UNEXPECTED_OUTCOME msg = f"Could not determine current_state from: '{requested_sate=}', '{status=}'" raise TypeError(msg) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index 38542886a785..d34dd1c15e3a 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -3,6 +3,9 @@ from fastapi import FastAPI from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_directorv2.dynamic_services_service import ( + RunningDynamicServiceDetails, +) from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID from models_library.users import UserID @@ -23,7 +26,7 @@ async def get_timeout(cls, context: DeferredContext) -> timedelta: return timedelta(seconds=5) @classmethod - async def start( # pylint:disable=arguments-differ + async def start( # type:ignore[override] # pylint:disable=arguments-differ cls, node_id: NodeID ) -> DeferredContext: _logger.debug("Getting service status for %s", node_id) @@ -44,8 +47,10 @@ async def run( app: FastAPI = context["app"] node_id: NodeID = context["node_id"] - director_v2_client = DirectorV2Client.get_from_app_state(app) - service_status = await director_v2_client.get_status(node_id) + director_v2_client: DirectorV2Client = DirectorV2Client.get_from_app_state(app) + service_status: NodeGet | RunningDynamicServiceDetails | NodeGetIdle = ( + await director_v2_client.get_status(node_id) + ) _logger.debug( "Service status type=%s, %s", type(service_status), service_status ) From f50e10076459f3f402d9447f0f0dc9d8c61d15d5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 19 Aug 2024 17:08:58 +0200 Subject: [PATCH 089/122] refactor docstrings --- .../services/status_monitor/_monitor.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index b80daf3abdb5..3a2f8fcd267d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -20,6 +20,7 @@ _logger = logging.getLogger(__name__) +_INTERVAL_BETWEEN_CHECKS: Final[timedelta] = timedelta(seconds=1) _MAX_CONCURRENCY: Final[NonNegativeInt] = 10 @@ -40,6 +41,12 @@ def status_worker_interval_seconds(self) -> NonNegativeFloat: return self.status_worker_interval.total_seconds() async def _worker_start_get_status_requests(self) -> None: + """ + Check if a service requires it's status to be polled. + Note that the interval at which the status is polled can vary. + This is a relatively low resoruce check. + """ + # NOTE: this worker runs on only once across all instances of the scheduler models: dict[ @@ -104,8 +111,8 @@ async def setup(self) -> None: self.app.state.status_monitor_background_task = start_exclusive_periodic_task( get_redis_client(self.app, RedisDatabase.LOCKS), self._worker_start_get_status_requests, - task_period=timedelta(seconds=1), - retry_after=timedelta(seconds=1), + task_period=_INTERVAL_BETWEEN_CHECKS, + retry_after=_INTERVAL_BETWEEN_CHECKS, task_name="periodic_service_status_update", ) From b79f975c1e34c3c6c7c7735288fae5f72d70ea09 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 10:18:52 +0200 Subject: [PATCH 090/122] expanded exmaple --- .../api_schemas_webserver/projects_nodes.py | 43 +++++++++++++------ .../tests/unit/service_tracker/test__api.py | 6 +-- .../test_services_status_monitor__monitor.py | 2 +- .../unit/isolated/test_dynamic_scheduler.py | 4 +- .../02/test_projects_states_handlers.py | 2 +- 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py b/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py index be2ae33bfa8d..0c2bdd07c7fd 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/projects_nodes.py @@ -93,19 +93,36 @@ class NodeGet(OutputSchema): class Config: schema_extra: ClassVar[dict[str, Any]] = { - "example": { - "published_port": 30000, - "entrypoint": "/the/entry/point/is/here", - "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", - "service_key": "simcore/services/dynamic/some-dynamic-service", - "service_version": "1.2.3", - "service_host": "jupyter_E1O2E-LAH", - "service_port": 8081, - "service_basepath": "/x/E1O2E-LAH", - "service_state": "pending", - "service_message": "no suitable node (insufficient resources on 1 node)", - "user_id": 123, - } + "examples": [ + # computational + { + "published_port": 30000, + "entrypoint": "/the/entry/point/is/here", + "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "service_key": "simcore/services/comp/itis/sleeper", + "service_version": "1.2.3", + "service_host": "jupyter_E1O2E-LAH", + "service_port": 8081, + "service_basepath": "/x/E1O2E-LAH", + "service_state": "pending", + "service_message": "no suitable node (insufficient resources on 1 node)", + "user_id": 123, + }, + # dynamic + { + "published_port": 30000, + "entrypoint": "/the/entry/point/is/here", + "service_uuid": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "service_key": "simcore/services/dynamic/some-dynamic-service", + "service_version": "1.2.3", + "service_host": "jupyter_E1O2E-LAH", + "service_port": 8081, + "service_basepath": "/x/E1O2E-LAH", + "service_state": "pending", + "service_message": "no suitable node (insufficient resources on 1 node)", + "user_id": 123, + }, + ] } diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 64aea4802964..847caba48212 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -120,7 +120,7 @@ async def test_services_tracer_workflow( @pytest.mark.parametrize( "status", [ - NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + *[NodeGet.parse_obj(x) for x in NodeGet.Config.schema_extra["examples"]], *[ DynamicServiceGet.parse_obj(x) for x in DynamicServiceGet.Config.schema_extra["examples"] @@ -167,7 +167,7 @@ async def test_set_service_status_task_uid( "status, expected_poll_interval", [ ( - NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + NodeGet.parse_obj(NodeGet.Config.schema_extra["examples"][1]), _LOW_RATE_POLL_INTERVAL, ), *[ @@ -187,7 +187,7 @@ def test__get_poll_interval( def _get_node_get_from(service_state: ServiceState) -> NodeGet: - dict_data = NodeGet.Config.schema_extra["example"] + dict_data = NodeGet.Config.schema_extra["examples"][1] assert "service_state" in dict_data dict_data["service_state"] = service_state return NodeGet.parse_obj(dict_data) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index f5d9d6521fa0..8098b67d41f7 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -68,7 +68,7 @@ def _add_to_dict(dict_data: dict, entries: list[tuple[str, Any]]) -> None: def _get_node_get_with(state: str, node_id: NodeID = _DEFAULT_NODE_ID) -> NodeGet: - dict_data = deepcopy(NodeGet.Config.schema_extra["example"]) + dict_data = deepcopy(NodeGet.Config.schema_extra["examples"][1]) _add_to_dict( dict_data, [ diff --git a/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py b/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py index 0823f52b1b29..6308141d2540 100644 --- a/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py +++ b/services/web/server/tests/unit/isolated/test_dynamic_scheduler.py @@ -55,7 +55,7 @@ def dynamic_service_start() -> DynamicServiceStart: @pytest.mark.parametrize( "expected_response", [ - NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + *[NodeGet.parse_obj(x) for x in NodeGet.Config.schema_extra["examples"]], NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), DynamicServiceGet.parse_obj( DynamicServiceGet.Config.schema_extra["examples"][0] @@ -98,7 +98,7 @@ async def test_get_service_status_raises_rpc_server_error( @pytest.mark.parametrize( "expected_response", [ - NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]), + *[NodeGet.parse_obj(x) for x in NodeGet.Config.schema_extra["examples"]], DynamicServiceGet.parse_obj( DynamicServiceGet.Config.schema_extra["examples"][0] ), diff --git a/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py b/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py index 8613fbc83193..d34adace8ae1 100644 --- a/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py +++ b/services/web/server/tests/unit/with_dbs/02/test_projects_states_handlers.py @@ -1036,7 +1036,7 @@ async def test_project_node_lifetime( # noqa: PLR0915 project_id=user_project["uuid"], node_id=dynamic_node_id ) - node_sample = deepcopy(NodeGet.Config.schema_extra["example"]) + node_sample = deepcopy(NodeGet.Config.schema_extra["examples"][1]) mocked_director_v2_api[ "dynamic_scheduler.api.get_dynamic_service" ].return_value = NodeGet.parse_obj( From d43fda72dd7115c948d5bae47daf18fff117982c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 10:32:48 +0200 Subject: [PATCH 091/122] refactor --- packages/models-library/src/models_library/utils/enums.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/models-library/src/models_library/utils/enums.py b/packages/models-library/src/models_library/utils/enums.py index 3cd19a288e2a..40a785457eef 100644 --- a/packages/models-library/src/models_library/utils/enums.py +++ b/packages/models-library/src/models_library/utils/enums.py @@ -5,6 +5,11 @@ class auto_str(enum.auto): # noqa: N801 + """ + To be used in place of `auto()` when inheriting form `StrAutoEnum` + Makes typechecking play nice. + """ + value: str = enum._auto_null # pylint:disable=protected-access # noqa: SLF001 From 329f617a05bb9af34feacfe9c77ac0220bc8768f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 10:40:28 +0200 Subject: [PATCH 092/122] renamed and moved --- .../dynamic_scheduler/services.py | 17 +---------------- .../src/servicelib/services_utils.py | 17 +++++++++++++++++ .../services/notifier/_notifier.py | 6 ++---- .../projects/_nodes_handlers.py | 6 ++---- 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py index eb9abf33907b..9da2dad425e2 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/dynamic_scheduler/services.py @@ -7,11 +7,7 @@ DynamicServiceStart, DynamicServiceStop, ) -from models_library.api_schemas_webserver.projects_nodes import ( - NodeGet, - NodeGetIdle, - NodeGetUnknown, -) +from models_library.api_schemas_webserver.projects_nodes import NodeGet, NodeGetIdle from models_library.projects_nodes_io import NodeID from models_library.rabbitmq_basic_types import RPCMethodName from pydantic import NonNegativeInt, parse_obj_as @@ -75,14 +71,3 @@ async def stop_dynamic_service( timeout_s=timeout_s, ) assert result is None # nosec - - -def get_dict_from_status( - status: NodeGetIdle | NodeGetUnknown | DynamicServiceGet | NodeGet, -) -> dict: - """shared between different backend services to guarantee same result to frontend""" - return ( - status.dict(by_alias=True) - if isinstance(status, DynamicServiceGet) - else status.dict() - ) diff --git a/packages/service-library/src/servicelib/services_utils.py b/packages/service-library/src/servicelib/services_utils.py index 60a9caf92a53..98aace49c6c6 100644 --- a/packages/service-library/src/servicelib/services_utils.py +++ b/packages/service-library/src/servicelib/services_utils.py @@ -1,5 +1,11 @@ import urllib.parse +from models_library.api_schemas_directorv2.dynamic_services import DynamicServiceGet +from models_library.api_schemas_webserver.projects_nodes import ( + NodeGet, + NodeGetIdle, + NodeGetUnknown, +) from models_library.services import ServiceType @@ -9,3 +15,14 @@ def get_service_from_key(service_key: str) -> ServiceType: if encoded_service_type == "comp": encoded_service_type = "computational" return ServiceType(encoded_service_type) + + +def get_status_as_dict( + status: NodeGetIdle | NodeGetUnknown | DynamicServiceGet | NodeGet, +) -> dict: + """shared between different backend services to guarantee same result to frontend""" + return ( + status.dict(by_alias=True) + if isinstance(status, DynamicServiceGet) + else status.dict() + ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py index d0bf384b2c8d..0b8690a96766 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_notifier.py @@ -11,9 +11,7 @@ from models_library.api_schemas_webserver.socketio import SocketIORoomStr from models_library.users import UserID from servicelib.fastapi.app_state import SingletonInAppStateMixin -from servicelib.rabbitmq.rpc_interfaces.dynamic_scheduler.services import ( - get_dict_from_status, -) +from servicelib.services_utils import get_status_as_dict class Notifier(SingletonInAppStateMixin): @@ -27,7 +25,7 @@ async def notify_service_status( ) -> None: await self._sio_manager.emit( SOCKET_IO_SERVICE_STATUS_EVENT, - data=jsonable_encoder(get_dict_from_status(status)), + data=jsonable_encoder(get_status_as_dict(status)), room=SocketIORoomStr.from_user_id(user_id), ) diff --git a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py index ccdeb5144d85..3cdd3332bb33 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py @@ -57,9 +57,7 @@ ServiceWaitingForManualInterventionError, ServiceWasNotFoundError, ) -from servicelib.rabbitmq.rpc_interfaces.dynamic_scheduler.services import ( - get_dict_from_status, -) +from servicelib.services_utils import get_status_as_dict from simcore_postgres_database.models.users import UserRole from .._meta import API_VTAG as VTAG @@ -211,7 +209,7 @@ async def get_node(request: web.Request) -> web.Response: ) ) - return envelope_json_response(get_dict_from_status(service_data)) + return envelope_json_response(get_status_as_dict(service_data)) @routes.patch( From a5734743ca13e6a88960ea072295303a1011b2b5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 10:42:12 +0200 Subject: [PATCH 093/122] using keywordargument --- packages/service-library/tests/deferred_tasks/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/service-library/tests/deferred_tasks/conftest.py b/packages/service-library/tests/deferred_tasks/conftest.py index 3b5dcc75c049..00881e614715 100644 --- a/packages/service-library/tests/deferred_tasks/conftest.py +++ b/packages/service-library/tests/deferred_tasks/conftest.py @@ -13,6 +13,6 @@ async def redis_client_sdk_deferred_tasks( ] ) -> AsyncIterator[RedisClientSDK]: async with get_redis_client_sdk( - RedisDatabase.DEFERRED_TASKS, False # noqa: FBT003 + RedisDatabase.DEFERRED_TASKS, decode_response=False ) as client: yield client From 01d6fafbc59610dca1c2e2e86b06b9b0bcd448c2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 11:09:54 +0200 Subject: [PATCH 094/122] removed unrequired keywork argument --- .../src/simcore_service_dynamic_scheduler/api/rpc/_services.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py index 8aa6eaf5ba8c..0687c58bac12 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/api/rpc/_services.py @@ -61,5 +61,5 @@ async def stop_dynamic_service( timeout=settings.DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT, ) ) - await set_request_as_stopped(app, dynamic_service_stop=dynamic_service_stop) + await set_request_as_stopped(app, dynamic_service_stop) return response From f21eae92df7df598c483ab2d6875d1e9a03f6ba5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 11:10:08 +0200 Subject: [PATCH 095/122] removed unrequired typing ignore --- packages/service-library/tests/deferred_tasks/example_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/service-library/tests/deferred_tasks/example_app.py b/packages/service-library/tests/deferred_tasks/example_app.py index cb0d99fd95a0..e6acf3fb8c1a 100644 --- a/packages/service-library/tests/deferred_tasks/example_app.py +++ b/packages/service-library/tests/deferred_tasks/example_app.py @@ -64,7 +64,7 @@ def _get_queue_name(self, queue_name: str) -> str: return f"in_memory_lists::{queue_name}.{self.port}" async def append_to(self, queue_name: str, value: Any) -> None: - await self.redis_sdk.redis.rpush(self._get_queue_name(queue_name), value) # type: ignore + await self.redis_sdk.redis.rpush(self._get_queue_name(queue_name), value) async def get_all_from(self, queue_name: str) -> list: return await self.redis_sdk.redis.lrange( From a5e0b65e8cee9d483124663c9e1d9b9c6c78b126 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 11:24:18 +0200 Subject: [PATCH 096/122] update comment --- .../src/simcore_service_director_v2/modules/socketio.py | 2 +- .../services/notifier/_socketio.py | 2 +- .../modules/system_monitor/_socketio.py | 2 +- .../payments/src/simcore_service_payments/services/socketio.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/socketio.py b/services/director-v2/src/simcore_service_director_v2/modules/socketio.py index 5a6a561e9733..27e8025dca88 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/socketio.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/socketio.py @@ -15,7 +15,7 @@ def setup(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the as an external process in write-only mode + # Connect to the webserver's external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes app.state.external_socketio = socketio.AsyncAioPikaManager( url=settings.DIRECTOR_V2_RABBITMQ.dsn, diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py index 2f0abfbd3af1..1de03a9aa97b 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py @@ -15,7 +15,7 @@ def setup(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the as an external process in write-only mode + # Connect to the webserver's external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes assert settings.DYNAMIC_SCHEDULER_RABBITMQ # nosec app.state.external_socketio = socketio.AsyncAioPikaManager( diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py index bdbe9808a8a0..18e286ec1bc3 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py @@ -15,7 +15,7 @@ def setup_socketio(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the as an external process in write-only mode + # Connect to the webserver's external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes assert settings.RABBIT_SETTINGS # nosec app.state.external_socketio = socketio.AsyncAioPikaManager( diff --git a/services/payments/src/simcore_service_payments/services/socketio.py b/services/payments/src/simcore_service_payments/services/socketio.py index bfa4d98c5d6f..0cabedea47a9 100644 --- a/services/payments/src/simcore_service_payments/services/socketio.py +++ b/services/payments/src/simcore_service_payments/services/socketio.py @@ -16,7 +16,7 @@ def setup_socketio(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the as an external process in write-only mode + # Connect to the webserver's external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes app.state.external_socketio = socketio.AsyncAioPikaManager( url=settings.dsn, logger=_logger, write_only=True From e1b2612e254e4364426f1edf6047da7fd4ea6729 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 11:36:09 +0200 Subject: [PATCH 097/122] rename and remove not meaningful types --- .../services/service_tracker/_api.py | 37 ++++++++----------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 39deec0087d6..bd4475f12c0b 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -17,7 +17,6 @@ from ._models import SchedulerServiceState, TrackedServiceModel, UserRequestedState from ._setup import get_tracker -from ._tracker import Tracker _logger = logging.getLogger(__name__) @@ -31,12 +30,12 @@ async def set_request_as_running( app: FastAPI, dynamic_service_start: DynamicServiceStart, ) -> None: - """Stores the intention fo the user: ``start`` requested""" - tracker: Tracker = get_tracker(app) + """Stores intention to `start` request""" + tracker = get_tracker(app) - node_id: NodeID = dynamic_service_start.node_uuid - - model: TrackedServiceModel | None = await tracker.load(node_id) + model: TrackedServiceModel | None = await tracker.load( + dynamic_service_start.node_uuid + ) if model is not None: model.dynamic_service_start = dynamic_service_start model.requested_state = UserRequestedState.RUNNING @@ -44,7 +43,7 @@ async def set_request_as_running( model.user_id = dynamic_service_start.user_id await tracker.save( - node_id, + dynamic_service_start.node_uuid, TrackedServiceModel( dynamic_service_start=dynamic_service_start, requested_state=UserRequestedState.RUNNING, @@ -57,8 +56,8 @@ async def set_request_as_running( async def set_request_as_stopped( app: FastAPI, dynamic_service_stop: DynamicServiceStop ) -> None: - """Stores the intention of the user: ``stop`` requested""" - tracker: Tracker = get_tracker(app) + """Stores intention to `stop` request""" + tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(dynamic_service_stop.node_id) if model is None: @@ -135,7 +134,7 @@ async def set_if_status_changed( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> bool: """returns ``True`` if the tracker detected a status change""" - tracker: Tracker = get_tracker(app) + tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: _logger.info( @@ -169,7 +168,7 @@ async def can_notify_frontend( The frontend will be notified at regular intervals and on changes Avoids sending too many updates. """ - tracker: Tracker = get_tracker(app) + tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: @@ -191,7 +190,7 @@ async def can_notify_frontend( async def set_scheduled_to_run( app: FastAPI, node_id: NodeID, delay_from_now: timedelta ) -> None: - tracker: Tracker = get_tracker(app) + tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: _logger.info( @@ -209,7 +208,7 @@ async def set_scheduled_to_run( async def set_service_status_task_uid( app: FastAPI, node_id: NodeID, task_uid: TaskUID ) -> None: - tracker: Tracker = get_tracker(app) + tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: _logger.info( @@ -226,24 +225,20 @@ async def set_service_status_task_uid( async def remove_tracked(app: FastAPI, node_id: NodeID) -> None: """Removes the service from tracking (usually after stop completes)""" # NOTE: does not raise if node_id is not found - tracker: Tracker = get_tracker(app) - await tracker.delete(node_id) + await get_tracker(app).delete(node_id) async def get_tracked(app: FastAPI, node_id: NodeID) -> TrackedServiceModel | None: """Returns information about the tracked service""" - tracker: Tracker = get_tracker(app) - return await tracker.load(node_id) + return await get_tracker(app).load(node_id) async def get_all_tracked(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: """Returns all tracked services""" - tracker: Tracker = get_tracker(app) - return await tracker.all() + return await get_tracker(app).all() async def get_user_id(app: FastAPI, node_id: NodeID) -> UserID | None: """returns user_id for the user""" - tracker: Tracker = get_tracker(app) - model: TrackedServiceModel | None = await tracker.load(node_id) + model: TrackedServiceModel | None = await get_tracker(app).load(node_id) return model.user_id if model else None From 3f5f33453c6923cf7cf0167d7305408d1a329f16 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 12:55:49 +0200 Subject: [PATCH 098/122] simplified code --- .../services/service_tracker/_api.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index bd4475f12c0b..e520438a5b84 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -31,18 +31,7 @@ async def set_request_as_running( dynamic_service_start: DynamicServiceStart, ) -> None: """Stores intention to `start` request""" - tracker = get_tracker(app) - - model: TrackedServiceModel | None = await tracker.load( - dynamic_service_start.node_uuid - ) - if model is not None: - model.dynamic_service_start = dynamic_service_start - model.requested_state = UserRequestedState.RUNNING - model.project_id = dynamic_service_start.project_id - model.user_id = dynamic_service_start.user_id - - await tracker.save( + await get_tracker(app).save( dynamic_service_start.node_uuid, TrackedServiceModel( dynamic_service_start=dynamic_service_start, From f1d323eb0a9244f6cab8fc033cd013fdd20d65ca Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 13:00:11 +0200 Subject: [PATCH 099/122] rename --- .../services/service_tracker/_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index e520438a5b84..43fc66da48c4 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -61,7 +61,7 @@ async def set_request_as_stopped( await tracker.save(dynamic_service_stop.node_id, model) -def __get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: +def _get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: # Attributes where to find the state # NodeGet -> service_state # DynamicServiceGet -> state @@ -74,7 +74,7 @@ def __get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: - if __get_state_str(status) != "running": + if _get_state_str(status) != "running": return _LOW_RATE_POLL_INTERVAL return NORMAL_RATE_POLL_INTERVAL @@ -92,7 +92,7 @@ def _get_current_state( if isinstance(status, NodeGetIdle): return SchedulerServiceState.IDLE - service_state: ServiceState = ServiceState(__get_state_str(status)) + service_state: ServiceState = ServiceState(_get_state_str(status)) if requested_sate == UserRequestedState.RUNNING: if service_state == ServiceState.RUNNING: From 690fdfa2fd73f21c244942dc728efc1ef8e534ca Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 13:03:24 +0200 Subject: [PATCH 100/122] refactor interface --- .../services/service_tracker/_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 43fc66da48c4..1802cf365d77 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -61,7 +61,7 @@ async def set_request_as_stopped( await tracker.save(dynamic_service_stop.node_id, model) -def _get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: +def _get_state(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> ServiceState: # Attributes where to find the state # NodeGet -> service_state # DynamicServiceGet -> state @@ -70,11 +70,11 @@ def _get_state_str(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> str: state: ServiceState | str = getattr(status, state_key) result: str = state.value if isinstance(state, ServiceState) else state - return result + return ServiceState(result) def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: - if _get_state_str(status) != "running": + if _get_state(status) != ServiceState.RUNNING: return _LOW_RATE_POLL_INTERVAL return NORMAL_RATE_POLL_INTERVAL @@ -92,7 +92,7 @@ def _get_current_state( if isinstance(status, NodeGetIdle): return SchedulerServiceState.IDLE - service_state: ServiceState = ServiceState(_get_state_str(status)) + service_state: ServiceState = _get_state(status) if requested_sate == UserRequestedState.RUNNING: if service_state == ServiceState.RUNNING: From 162e5987b129c9047d27f058ea324b2766ab5ae0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 13:37:12 +0200 Subject: [PATCH 101/122] renaming --- .../services/service_tracker/_api.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 1802cf365d77..525c66be1902 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -61,7 +61,9 @@ async def set_request_as_stopped( await tracker.save(dynamic_service_stop.node_id, model) -def _get_state(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> ServiceState: +def _get_service_state( + status: NodeGet | DynamicServiceGet | NodeGetIdle, +) -> ServiceState: # Attributes where to find the state # NodeGet -> service_state # DynamicServiceGet -> state @@ -74,14 +76,14 @@ def _get_state(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> ServiceStat def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> timedelta: - if _get_state(status) != ServiceState.RUNNING: + if _get_service_state(status) != ServiceState.RUNNING: return _LOW_RATE_POLL_INTERVAL return NORMAL_RATE_POLL_INTERVAL def _get_current_state( - requested_sate: UserRequestedState, + requested_state: UserRequestedState, status: NodeGet | DynamicServiceGet | NodeGetIdle, ) -> SchedulerServiceState: """ @@ -92,9 +94,9 @@ def _get_current_state( if isinstance(status, NodeGetIdle): return SchedulerServiceState.IDLE - service_state: ServiceState = _get_state(status) + service_state: ServiceState = _get_service_state(status) - if requested_sate == UserRequestedState.RUNNING: + if requested_state == UserRequestedState.RUNNING: if service_state == ServiceState.RUNNING: return SchedulerServiceState.RUNNING @@ -108,14 +110,14 @@ def _get_current_state( if service_state < ServiceState.PENDING or service_state > ServiceState.RUNNING: return SchedulerServiceState.UNEXPECTED_OUTCOME - if requested_sate == UserRequestedState.STOPPED: + if requested_state == UserRequestedState.STOPPED: if service_state >= ServiceState.RUNNING: # type:ignore[operator] return SchedulerServiceState.STOPPING if service_state < ServiceState.RUNNING: return SchedulerServiceState.UNEXPECTED_OUTCOME - msg = f"Could not determine current_state from: '{requested_sate=}', '{status=}'" + msg = f"Could not determine current_state from: '{requested_state=}', '{status=}'" raise TypeError(msg) From 98e900889e05abd201dc05ce8e6afbb93a5a6daf Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 13:45:46 +0200 Subject: [PATCH 102/122] renaming --- .../services/service_tracker/__init__.py | 4 ++-- .../services/service_tracker/_api.py | 8 +++++--- .../services/status_monitor/_deferred_get_status.py | 2 +- .../tests/unit/service_tracker/test__api.py | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index 95de4a751470..c1b80a19e464 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -1,6 +1,5 @@ from ._api import ( NORMAL_RATE_POLL_INTERVAL, - can_notify_frontend, get_all_tracked, get_tracked, get_user_id, @@ -10,12 +9,12 @@ set_request_as_stopped, set_scheduled_to_run, set_service_status_task_uid, + should_notify_frontend, ) from ._models import TrackedServiceModel from ._setup import setup_service_tracker __all__: tuple[str, ...] = ( - "can_notify_frontend", "get_all_tracked", "get_tracked", "get_user_id", @@ -27,5 +26,6 @@ "set_scheduled_to_run", "set_service_status_task_uid", "setup_service_tracker", + "should_notify_frontend", "TrackedServiceModel", ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 525c66be1902..d80973f0d198 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -82,7 +82,7 @@ def _get_poll_interval(status: NodeGet | DynamicServiceGet | NodeGetIdle) -> tim return NORMAL_RATE_POLL_INTERVAL -def _get_current_state( +def _get_current_scheduler_service_state( requested_state: UserRequestedState, status: NodeGet | DynamicServiceGet | NodeGetIdle, ) -> SchedulerServiceState: @@ -144,14 +144,16 @@ async def set_if_status_changed( json_status = status.json() if model.service_status != json_status: model.service_status = json_status - model.current_state = _get_current_state(model.requested_state, status) + model.current_state = _get_current_scheduler_service_state( + model.requested_state, status + ) await tracker.save(node_id, model) return True return False -async def can_notify_frontend( +async def should_notify_frontend( app: FastAPI, node_id: NodeID, *, status_changed: bool ) -> bool: """ diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index d34dd1c15e3a..ad598bb5ca48 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -68,7 +68,7 @@ async def on_result( status_changed: bool = await service_tracker.set_if_status_changed( app, node_id, result ) - if await service_tracker.can_notify_frontend( + if await service_tracker.should_notify_frontend( app, node_id, status_changed=status_changed ): user_id: UserID | None = await service_tracker.get_user_id(app, node_id) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 847caba48212..eedd2f328b5e 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -34,7 +34,7 @@ from simcore_service_dynamic_scheduler.services.service_tracker._api import ( _LOW_RATE_POLL_INTERVAL, NORMAL_RATE_POLL_INTERVAL, - _get_current_state, + _get_current_scheduler_service_state, _get_poll_interval, ) from simcore_service_dynamic_scheduler.services.service_tracker._models import ( @@ -318,4 +318,4 @@ def test__get_current_state( status: NodeGet | DynamicServiceGet | NodeGetIdle, expected: SchedulerServiceState, ): - assert _get_current_state(requested_state, status) == expected + assert _get_current_scheduler_service_state(requested_state, status) == expected From 2f04e9b8aeb19177da8e79a6e54c6b073a130ecb Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 13:46:35 +0200 Subject: [PATCH 103/122] docstring refactor --- .../services/service_tracker/_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index d80973f0d198..15bdd3b70f7d 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -216,8 +216,10 @@ async def set_service_status_task_uid( async def remove_tracked(app: FastAPI, node_id: NodeID) -> None: - """Removes the service from tracking (usually after stop completes)""" + """ + Removes the service from tracking (usually after stop completes) # NOTE: does not raise if node_id is not found + """ await get_tracker(app).delete(node_id) From d606bbded522fa5c7295048f06d284f0f8b65426 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 13:49:17 +0200 Subject: [PATCH 104/122] moving postion of field --- .../services/service_tracker/_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 8e577dae13a0..3f02532ad071 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -67,12 +67,12 @@ class TrackedServiceModel: # pylint:disable=too-many-instance-attributes default_factory=lambda: arrow.utcnow().timestamp() ) - def set_check_status_after_to(self, delay_from_now: timedelta) -> None: - self.check_status_after = (arrow.utcnow() + delay_from_now).timestamp() - # used to determine when was the last time the status was notified last_status_notification: float = 0 + def set_check_status_after_to(self, delay_from_now: timedelta) -> None: + self.check_status_after = (arrow.utcnow() + delay_from_now).timestamp() + def set_last_status_notification_to_now(self) -> None: self.last_status_notification = arrow.utcnow().timestamp() From 32a6c5cdbbae7ec4b54c0a1dd8277dc86a4182fa Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 14:02:52 +0200 Subject: [PATCH 105/122] reusign node_id --- packages/models-library/tests/conftest.py | 1 + packages/models-library/tests/test_utils_nodes.py | 6 ------ services/director-v2/tests/conftest.py | 3 ++- services/director-v2/tests/unit/test_modules_dask_client.py | 5 ----- services/dynamic-scheduler/tests/conftest.py | 1 + .../tests/unit/service_tracker/test__api.py | 5 ----- 6 files changed, 4 insertions(+), 17 deletions(-) diff --git a/packages/models-library/tests/conftest.py b/packages/models-library/tests/conftest.py index 9169e570b510..8bf433b901d7 100644 --- a/packages/models-library/tests/conftest.py +++ b/packages/models-library/tests/conftest.py @@ -9,6 +9,7 @@ import pytest pytest_plugins = [ + "pytest_simcore.faker_projects_data", "pytest_simcore.pydantic_models", "pytest_simcore.pytest_global_environs", "pytest_simcore.repository_paths", diff --git a/packages/models-library/tests/test_utils_nodes.py b/packages/models-library/tests/test_utils_nodes.py index 47465ce236d3..b4634770a97e 100644 --- a/packages/models-library/tests/test_utils_nodes.py +++ b/packages/models-library/tests/test_utils_nodes.py @@ -16,12 +16,6 @@ from models_library.utils.nodes import compute_node_hash from pydantic import AnyUrl, parse_obj_as - -@pytest.fixture() -def node_id() -> NodeID: - return uuid4() - - ANOTHER_NODE_ID = uuid4() ANOTHER_NODE_OUTPUT_KEY = "the_output_link" ANOTHER_NODE_PAYLOAD = {"outputs": {ANOTHER_NODE_OUTPUT_KEY: 36}} diff --git a/services/director-v2/tests/conftest.py b/services/director-v2/tests/conftest.py index eafe6bb15fcd..0084d9cc040f 100644 --- a/services/director-v2/tests/conftest.py +++ b/services/director-v2/tests/conftest.py @@ -42,12 +42,12 @@ "pytest_simcore.docker_registry", "pytest_simcore.docker_swarm", "pytest_simcore.environment_configs", + "pytest_simcore.faker_projects_data", "pytest_simcore.faker_users_data", "pytest_simcore.minio_service", "pytest_simcore.postgres_service", "pytest_simcore.pydantic_models", "pytest_simcore.pytest_global_environs", - "pytest_simcore.socketio", "pytest_simcore.rabbit_service", "pytest_simcore.redis_service", "pytest_simcore.repository_paths", @@ -55,6 +55,7 @@ "pytest_simcore.simcore_dask_service", "pytest_simcore.simcore_services", "pytest_simcore.simcore_storage_service", + "pytest_simcore.socketio", ] logger = logging.getLogger(__name__) diff --git a/services/director-v2/tests/unit/test_modules_dask_client.py b/services/director-v2/tests/unit/test_modules_dask_client.py index a01980027c02..f63381c538bc 100644 --- a/services/director-v2/tests/unit/test_modules_dask_client.py +++ b/services/director-v2/tests/unit/test_modules_dask_client.py @@ -284,11 +284,6 @@ def project_id() -> ProjectID: return uuid4() -@pytest.fixture -def node_id() -> NodeID: - return uuid4() - - @dataclass class ImageParams: image: Image diff --git a/services/dynamic-scheduler/tests/conftest.py b/services/dynamic-scheduler/tests/conftest.py index 464db028147e..2cb14086b2a2 100644 --- a/services/dynamic-scheduler/tests/conftest.py +++ b/services/dynamic-scheduler/tests/conftest.py @@ -24,6 +24,7 @@ "pytest_simcore.docker_compose", "pytest_simcore.docker_swarm", "pytest_simcore.environment_configs", + "pytest_simcore.faker_projects_data", "pytest_simcore.rabbit_service", "pytest_simcore.redis_service", "pytest_simcore.repository_paths", diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index eedd2f328b5e..7ea8786c0777 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -59,11 +59,6 @@ def app_environment( return app_environment -@pytest.fixture -def node_id() -> NodeID: - return uuid4() - - async def test_services_tracer_set_as_running_set_as_stopped( app: FastAPI, node_id: NodeID, From 67276db4e0219c63d7e6aa41d6056503c3e1dec4 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 14:36:26 +0200 Subject: [PATCH 106/122] refactor to support idle state --- .../src/models_library/services_enums.py | 2 ++ .../tests/unit/service_tracker/test__api.py | 13 +++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/models-library/src/models_library/services_enums.py b/packages/models-library/src/models_library/services_enums.py index b084ff7f4725..ec5414218e3c 100644 --- a/packages/models-library/src/models_library/services_enums.py +++ b/packages/models-library/src/models_library/services_enums.py @@ -21,6 +21,7 @@ class ServiceState(Enum): STOPPING = "stopping" COMPLETE = "complete" + IDLE = "idle" def __lt__(self, other): if self.__class__ is other.__class__: @@ -42,6 +43,7 @@ def comparison_order() -> dict["ServiceState", int]: ServiceState.RUNNING: 4, ServiceState.STOPPING: 5, ServiceState.COMPLETE: 6, + ServiceState.IDLE: 7, } diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 7ea8786c0777..b44a7ec849c8 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -20,7 +20,7 @@ from pydantic import NonNegativeInt from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.deferred_tasks import TaskUID -from servicelib.utils import logged_gather +from servicelib.utils import limited_gather from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( get_all_tracked, @@ -102,12 +102,12 @@ async def test_services_tracer_workflow( get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], ): # ensure more than one service can be tracked - await logged_gather( + await limited_gather( *[ set_request_as_stopped(app, get_dynamic_service_stop(uuid4())) for _ in range(item_count) ], - max_concurrency=100, + limit=100, ) assert len(await get_all_tracked(app)) == item_count @@ -189,7 +189,7 @@ def _get_node_get_from(service_state: ServiceState) -> NodeGet: def _get_dynamic_service_get_from( - service_state: DynamicServiceGet, + service_state: ServiceState, ) -> DynamicServiceGet: dict_data = DynamicServiceGet.Config.schema_extra["examples"][1] assert "state" in dict_data @@ -297,13 +297,10 @@ def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: ] _FLAT_EXPECTED_TEST_CASES = __get_flat_list(_EXPECTED_TEST_CASES) # ensure enum changes do not break above rules -_IDLE_ITEM_COUNT: Final[int] = 1 _NODE_STATUS_FORMATS_COUNT: Final[int] = 2 assert ( len(_FLAT_EXPECTED_TEST_CASES) - == (len(ServiceState) + _IDLE_ITEM_COUNT) - * len(UserRequestedState) - * _NODE_STATUS_FORMATS_COUNT + == (len(ServiceState)) * len(UserRequestedState) * _NODE_STATUS_FORMATS_COUNT ) From 610103cae0f8459c19c836be5202f6074fb2d654 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 14:38:58 +0200 Subject: [PATCH 107/122] rename --- .../tests/unit/service_tracker/test__api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index b44a7ec849c8..290ab6ac3f9c 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -115,10 +115,10 @@ async def test_services_tracer_workflow( @pytest.mark.parametrize( "status", [ - *[NodeGet.parse_obj(x) for x in NodeGet.Config.schema_extra["examples"]], + *[NodeGet.parse_obj(o) for o in NodeGet.Config.schema_extra["examples"]], *[ - DynamicServiceGet.parse_obj(x) - for x in DynamicServiceGet.Config.schema_extra["examples"] + DynamicServiceGet.parse_obj(o) + for o in DynamicServiceGet.Config.schema_extra["examples"] ], NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), ], @@ -166,8 +166,8 @@ async def test_set_service_status_task_uid( _LOW_RATE_POLL_INTERVAL, ), *[ - (DynamicServiceGet.parse_obj(x), NORMAL_RATE_POLL_INTERVAL) - for x in DynamicServiceGet.Config.schema_extra["examples"] + (DynamicServiceGet.parse_obj(o), NORMAL_RATE_POLL_INTERVAL) + for o in DynamicServiceGet.Config.schema_extra["examples"] ], ( NodeGetIdle.parse_obj(NodeGetIdle.Config.schema_extra["example"]), From 6e7769df537d12e4c53f918a8a384d6d3a977c41 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 14:48:18 +0200 Subject: [PATCH 108/122] refactor --- .../tests/unit/service_tracker/test__api.py | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 290ab6ac3f9c..9bf2f59c1486 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -3,7 +3,7 @@ from collections.abc import Callable from datetime import timedelta -from typing import Any, Final +from typing import Any, Final, NamedTuple from uuid import uuid4 import pytest @@ -205,86 +205,92 @@ def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: return [item for sublist in nested_list for item in sublist] -_EXPECTED_TEST_CASES: list[list[tuple]] = [ +class ServiceStatusToSchedulerState(NamedTuple): + requested: UserRequestedState + service_status: NodeGet | DynamicServiceGet | NodeGetIdle + expected: SchedulerServiceState + + +_EXPECTED_TEST_CASES: list[list[ServiceStatusToSchedulerState]] = [ [ # UserRequestedState.RUNNING - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.PENDING), SchedulerServiceState.STARTING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.PULLING), SchedulerServiceState.STARTING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.STARTING), SchedulerServiceState.STARTING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.RUNNING), SchedulerServiceState.RUNNING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.COMPLETE), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.FAILED), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, status_generator(ServiceState.STOPPING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.RUNNING, _get_node_get_idle(), SchedulerServiceState.IDLE, ), # UserRequestedState.STOPPED - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.PENDING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.PULLING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.STARTING), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.RUNNING), SchedulerServiceState.STOPPING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.COMPLETE), SchedulerServiceState.STOPPING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.FAILED), SchedulerServiceState.UNEXPECTED_OUTCOME, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, status_generator(ServiceState.STOPPING), SchedulerServiceState.STOPPING, ), - ( + ServiceStatusToSchedulerState( UserRequestedState.STOPPED, _get_node_get_idle(), SchedulerServiceState.IDLE, @@ -295,19 +301,25 @@ def __get_flat_list(nested_list: list[list[Any]]) -> list[Any]: _get_dynamic_service_get_from, ) ] -_FLAT_EXPECTED_TEST_CASES = __get_flat_list(_EXPECTED_TEST_CASES) +_FLAT_EXPECTED_TEST_CASES: list[ServiceStatusToSchedulerState] = __get_flat_list( + _EXPECTED_TEST_CASES +) # ensure enum changes do not break above rules _NODE_STATUS_FORMATS_COUNT: Final[int] = 2 assert ( len(_FLAT_EXPECTED_TEST_CASES) - == (len(ServiceState)) * len(UserRequestedState) * _NODE_STATUS_FORMATS_COUNT + == len(ServiceState) * len(UserRequestedState) * _NODE_STATUS_FORMATS_COUNT ) -@pytest.mark.parametrize("requested_state, status, expected", _FLAT_EXPECTED_TEST_CASES) -def test__get_current_state( - requested_state: UserRequestedState, - status: NodeGet | DynamicServiceGet | NodeGetIdle, - expected: SchedulerServiceState, +@pytest.mark.parametrize("service_status_to_scheduler_state", _FLAT_EXPECTED_TEST_CASES) +def test__get_current_scheduler_service_state( + service_status_to_scheduler_state: ServiceStatusToSchedulerState, ): - assert _get_current_scheduler_service_state(requested_state, status) == expected + assert ( + _get_current_scheduler_service_state( + service_status_to_scheduler_state.requested, + service_status_to_scheduler_state.service_status, + ) + == service_status_to_scheduler_state.expected + ) From 771e16cb31dc593a3a4a305e8e60a705e3b2325a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 21 Aug 2024 15:36:36 +0200 Subject: [PATCH 109/122] fixeed broken tests --- .../tests/unit/api_rpc/test_api_rpc__services.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py index 7c8dada1e183..c484f722ff95 100644 --- a/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py +++ b/services/dynamic-scheduler/tests/unit/api_rpc/test_api_rpc__services.py @@ -59,7 +59,7 @@ def service_status_new_style() -> DynamicServiceGet: @pytest.fixture def service_status_legacy() -> NodeGet: - return NodeGet.parse_obj(NodeGet.Config.schema_extra["example"]) + return NodeGet.parse_obj(NodeGet.Config.schema_extra["examples"][1]) @pytest.fixture From 3e4d54353a0c122632dfdad3d37eb611f180ac54 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 13:33:00 +0200 Subject: [PATCH 110/122] update docstring --- .../models-library/src/models_library/utils/enums.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/models-library/src/models_library/utils/enums.py b/packages/models-library/src/models_library/utils/enums.py index 62d81cbcffe7..575f97bf185e 100644 --- a/packages/models-library/src/models_library/utils/enums.py +++ b/packages/models-library/src/models_library/utils/enums.py @@ -7,7 +7,16 @@ class auto_str(enum.auto): # noqa: N801 """ To be used in place of `auto()` when inheriting form `StrAutoEnum` - Makes typechecking play nice. + + Helps avoding tooling to report the following error: + `Type "int" is not assignable to declared type "str"` + + Usage: + + ``` + class MyEnum(StrAutoEnum): + FIELD_ONE = auto_str() + ``` """ value: str = enum._auto_null # pylint:disable=protected-access # noqa: SLF001 From 5907c4d89acfb8e31633365093ab73066ffaf371 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 13:54:22 +0200 Subject: [PATCH 111/122] refactor --- .../tests/deferred_tasks/example_app.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/service-library/tests/deferred_tasks/example_app.py b/packages/service-library/tests/deferred_tasks/example_app.py index e6acf3fb8c1a..e9e9571d737f 100644 --- a/packages/service-library/tests/deferred_tasks/example_app.py +++ b/packages/service-library/tests/deferred_tasks/example_app.py @@ -7,6 +7,7 @@ from typing import Any from uuid import uuid4 +import aioredis from pydantic import NonNegativeInt from servicelib.deferred_tasks import ( BaseDeferredHandler, @@ -54,22 +55,22 @@ async def on_result(cls, result: str, context: DeferredContext) -> None: class InMemoryLists: def __init__(self, redis_settings: RedisSettings, port: int) -> None: - self.redis_sdk = RedisClientSDK( + # NOTE: RedisClientSDK is not required here but it's used to easily construct + # a redis connection + self.redis: aioredis.Redis = RedisClientSDK( redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS), decode_responses=True, - ) + ).redis self.port = port def _get_queue_name(self, queue_name: str) -> str: return f"in_memory_lists::{queue_name}.{self.port}" async def append_to(self, queue_name: str, value: Any) -> None: - await self.redis_sdk.redis.rpush(self._get_queue_name(queue_name), value) + await self.redis.rpush(self._get_queue_name(queue_name), value) # type: ignore async def get_all_from(self, queue_name: str) -> list: - return await self.redis_sdk.redis.lrange( - self._get_queue_name(queue_name), 0, -1 - ) # type: ignore + return await self.redis.lrange(self._get_queue_name(queue_name), 0, -1) # type: ignore class ExampleApp: From 4c99a26a711b7e66338132895161cf601f21224e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 13:58:05 +0200 Subject: [PATCH 112/122] revert rename --- .../src/simcore_service_director_v2/modules/socketio.py | 2 +- .../services/notifier/_socketio.py | 2 +- .../modules/system_monitor/_socketio.py | 2 +- .../payments/src/simcore_service_payments/services/socketio.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/socketio.py b/services/director-v2/src/simcore_service_director_v2/modules/socketio.py index 27e8025dca88..5a6a561e9733 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/socketio.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/socketio.py @@ -15,7 +15,7 @@ def setup(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the webserver's external process in write-only mode + # Connect to the as an external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes app.state.external_socketio = socketio.AsyncAioPikaManager( url=settings.DIRECTOR_V2_RABBITMQ.dsn, diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py index 1de03a9aa97b..2f0abfbd3af1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/notifier/_socketio.py @@ -15,7 +15,7 @@ def setup(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the webserver's external process in write-only mode + # Connect to the as an external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes assert settings.DYNAMIC_SCHEDULER_RABBITMQ # nosec app.state.external_socketio = socketio.AsyncAioPikaManager( diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py index 18e286ec1bc3..bdbe9808a8a0 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/system_monitor/_socketio.py @@ -15,7 +15,7 @@ def setup_socketio(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the webserver's external process in write-only mode + # Connect to the as an external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes assert settings.RABBIT_SETTINGS # nosec app.state.external_socketio = socketio.AsyncAioPikaManager( diff --git a/services/payments/src/simcore_service_payments/services/socketio.py b/services/payments/src/simcore_service_payments/services/socketio.py index 0cabedea47a9..bfa4d98c5d6f 100644 --- a/services/payments/src/simcore_service_payments/services/socketio.py +++ b/services/payments/src/simcore_service_payments/services/socketio.py @@ -16,7 +16,7 @@ def setup_socketio(app: FastAPI): async def _on_startup() -> None: assert app.state.rabbitmq_client # nosec - # Connect to the webserver's external process in write-only mode + # Connect to the as an external process in write-only mode # SEE https://python-socketio.readthedocs.io/en/stable/server.html#emitting-from-external-processes app.state.external_socketio = socketio.AsyncAioPikaManager( url=settings.dsn, logger=_logger, write_only=True From 806410d5920a94977598983f894a3c1e267c9f5d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 14:13:39 +0200 Subject: [PATCH 113/122] removed code that does not help solve any issue --- .../src/models_library/utils/enums.py | 19 ------------------- .../services/service_tracker/_models.py | 19 ++++++++++--------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/packages/models-library/src/models_library/utils/enums.py b/packages/models-library/src/models_library/utils/enums.py index 575f97bf185e..7f0ff7eaf486 100644 --- a/packages/models-library/src/models_library/utils/enums.py +++ b/packages/models-library/src/models_library/utils/enums.py @@ -1,27 +1,8 @@ -import enum import inspect from enum import Enum, StrEnum, unique from typing import Any -class auto_str(enum.auto): # noqa: N801 - """ - To be used in place of `auto()` when inheriting form `StrAutoEnum` - - Helps avoding tooling to report the following error: - `Type "int" is not assignable to declared type "str"` - - Usage: - - ``` - class MyEnum(StrAutoEnum): - FIELD_ONE = auto_str() - ``` - """ - - value: str = enum._auto_null # pylint:disable=protected-access # noqa: SLF001 - - @unique class StrAutoEnum(StrEnum): @staticmethod diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index 3f02532ad071..e6f2d995961b 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -1,6 +1,7 @@ import pickle from dataclasses import dataclass, field from datetime import timedelta +from enum import auto import arrow from models_library.api_schemas_dynamic_scheduler.dynamic_services import ( @@ -8,30 +9,30 @@ ) from models_library.projects import ProjectID from models_library.users import UserID -from models_library.utils.enums import StrAutoEnum, auto_str +from models_library.utils.enums import StrAutoEnum from servicelib.deferred_tasks import TaskUID class UserRequestedState(StrAutoEnum): - RUNNING = auto_str() - STOPPED = auto_str() + RUNNING = auto() + STOPPED = auto() class SchedulerServiceState(StrAutoEnum): # service was started and is running as expected - RUNNING = auto_str() + RUNNING = auto() # service is not present - IDLE = auto_str() + IDLE = auto() # something went wrong while starting/stopping service - UNEXPECTED_OUTCOME = auto_str() + UNEXPECTED_OUTCOME = auto() # service is being started - STARTING = auto_str() + STARTING = auto() # service is being stopped - STOPPING = auto_str() + STOPPING = auto() # service status has not been determined - UNKNOWN = auto_str() + UNKNOWN = auto() @dataclass From 519446d36d5c6e954c53782946020a8051df3644 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 15:04:59 +0200 Subject: [PATCH 114/122] refactor to be formally correct --- .../services/service_tracker/__init__.py | 2 ++ .../services/service_tracker/_api.py | 24 +++++++++++++------ .../status_monitor/_deferred_get_status.py | 1 + 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index c1b80a19e464..abf8e3b34ed9 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -4,6 +4,7 @@ get_tracked, get_user_id, remove_tracked, + set_frontned_notified, set_if_status_changed, set_request_as_running, set_request_as_stopped, @@ -20,6 +21,7 @@ "get_user_id", "NORMAL_RATE_POLL_INTERVAL", "remove_tracked", + "set_frontned_notified", "set_if_status_changed", "set_request_as_running", "set_request_as_stopped", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 15bdd3b70f7d..8703fd017700 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -168,16 +168,26 @@ async def should_notify_frontend( return False # check if too much time has passed since the last time an update was sent - if ( + return ( status_changed - or (arrow.utcnow().timestamp() - model.last_status_notification) + or arrow.utcnow().timestamp() - model.last_status_notification > _MAX_PERIOD_WITHOUT_SERVICE_STATUS_UPDATES.total_seconds() - ): - model.set_last_status_notification_to_now() - await tracker.save(node_id, model) - return True + ) - return False + +async def set_frontned_notified(app: FastAPI, node_id: NodeID) -> None: + tracker = get_tracker(app) + model: TrackedServiceModel | None = await tracker.load(node_id) + if model is None: + _logger.info( + "Could not find a %s entry for node_id %s: skipping set_last_status_notification_to_now", + TrackedServiceModel.__name__, + node_id, + ) + return + + model.set_last_status_notification_to_now() + await tracker.save(node_id, model) async def set_scheduled_to_run( diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index ad598bb5ca48..ba030c3a6edb 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -74,6 +74,7 @@ async def on_result( user_id: UserID | None = await service_tracker.get_user_id(app, node_id) if user_id: await notify_service_status_change(app, user_id, result) + await service_tracker.set_frontned_notified(app, node_id) else: _logger.info( "Did not find a user for '%s', skipping status delivery of: %s", From ca7ec56c182e927ad450127df9ea2ed9c21792c2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 15:13:20 +0200 Subject: [PATCH 115/122] refactor logging --- .../services/service_tracker/_api.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 8703fd017700..031e2d43a47e 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -1,3 +1,4 @@ +import inspect import logging from datetime import timedelta from typing import Final @@ -121,6 +122,18 @@ def _get_current_scheduler_service_state( raise TypeError(msg) +def _log_skipping_operation(node_id: NodeID) -> None: + # the caller is at index 1 (index 0 is the current function) + caller_name = inspect.stack()[1].function + + _logger.info( + "Could not find a %s entry for node_id %s: skipping %s", + TrackedServiceModel.__name__, + node_id, + caller_name, + ) + + async def set_if_status_changed( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> bool: @@ -128,11 +141,7 @@ async def set_if_status_changed( tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: - _logger.info( - "Could not find a %s entry for node_id %s: skipping set_if_status_changed", - TrackedServiceModel.__name__, - node_id, - ) + _log_skipping_operation(node_id) return False # set new polling interval in the future @@ -179,11 +188,7 @@ async def set_frontned_notified(app: FastAPI, node_id: NodeID) -> None: tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: - _logger.info( - "Could not find a %s entry for node_id %s: skipping set_last_status_notification_to_now", - TrackedServiceModel.__name__, - node_id, - ) + _log_skipping_operation(node_id) return model.set_last_status_notification_to_now() @@ -196,11 +201,7 @@ async def set_scheduled_to_run( tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: - _logger.info( - "Could not find a %s entry for node_id %s: skipping set_scheduled_to_start", - TrackedServiceModel.__name__, - node_id, - ) + _log_skipping_operation(node_id) return model.scheduled_to_run = True @@ -214,11 +215,7 @@ async def set_service_status_task_uid( tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: - _logger.info( - "Could not find a %s entry for node_id %s: skipping set_service_status_task_uid", - TrackedServiceModel.__name__, - node_id, - ) + _log_skipping_operation(node_id) return model.service_status_task_uid = task_uid From c529d4dd25d7931911bef45fb83840e1bc03feaa Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 15:14:36 +0200 Subject: [PATCH 116/122] docstring --- .../services/service_tracker/_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 031e2d43a47e..2ff44980d39c 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -241,6 +241,6 @@ async def get_all_tracked(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: async def get_user_id(app: FastAPI, node_id: NodeID) -> UserID | None: - """returns user_id for the user""" + """returns user_id for the service""" model: TrackedServiceModel | None = await get_tracker(app).load(node_id) return model.user_id if model else None From a7f1ac6bd5361e73d366e63cfc596c2b738c00bf Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 15:21:11 +0200 Subject: [PATCH 117/122] renamed functions --- .../services/service_tracker/__init__.py | 32 +++++++++---------- .../services/service_tracker/_api.py | 18 ++++++----- .../status_monitor/_deferred_get_status.py | 10 +++--- .../services/status_monitor/_monitor.py | 6 ++-- .../tests/unit/service_tracker/test__api.py | 28 ++++++++-------- .../test_services_status_monitor__monitor.py | 8 +++-- 6 files changed, 54 insertions(+), 48 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py index abf8e3b34ed9..abf543d1befa 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/__init__.py @@ -1,33 +1,33 @@ from ._api import ( NORMAL_RATE_POLL_INTERVAL, - get_all_tracked, - get_tracked, - get_user_id, - remove_tracked, - set_frontned_notified, - set_if_status_changed, + get_all_tracked_services, + get_tracked_service, + get_user_id_for_service, + remove_tracked_service, + set_frontned_notified_for_service, + set_if_status_changed_for_service, set_request_as_running, set_request_as_stopped, - set_scheduled_to_run, + set_service_scheduled_to_run, set_service_status_task_uid, - should_notify_frontend, + should_notify_frontend_for_service, ) from ._models import TrackedServiceModel from ._setup import setup_service_tracker __all__: tuple[str, ...] = ( - "get_all_tracked", - "get_tracked", - "get_user_id", + "get_all_tracked_services", + "get_tracked_service", + "get_user_id_for_service", "NORMAL_RATE_POLL_INTERVAL", - "remove_tracked", - "set_frontned_notified", - "set_if_status_changed", + "remove_tracked_service", + "set_frontned_notified_for_service", + "set_if_status_changed_for_service", "set_request_as_running", "set_request_as_stopped", - "set_scheduled_to_run", + "set_service_scheduled_to_run", "set_service_status_task_uid", "setup_service_tracker", - "should_notify_frontend", + "should_notify_frontend_for_service", "TrackedServiceModel", ) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py index 2ff44980d39c..1b1b4a0d9f8f 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_api.py @@ -134,7 +134,7 @@ def _log_skipping_operation(node_id: NodeID) -> None: ) -async def set_if_status_changed( +async def set_if_status_changed_for_service( app: FastAPI, node_id: NodeID, status: NodeGet | DynamicServiceGet | NodeGetIdle ) -> bool: """returns ``True`` if the tracker detected a status change""" @@ -162,7 +162,7 @@ async def set_if_status_changed( return False -async def should_notify_frontend( +async def should_notify_frontend_for_service( app: FastAPI, node_id: NodeID, *, status_changed: bool ) -> bool: """ @@ -184,7 +184,7 @@ async def should_notify_frontend( ) -async def set_frontned_notified(app: FastAPI, node_id: NodeID) -> None: +async def set_frontned_notified_for_service(app: FastAPI, node_id: NodeID) -> None: tracker = get_tracker(app) model: TrackedServiceModel | None = await tracker.load(node_id) if model is None: @@ -195,7 +195,7 @@ async def set_frontned_notified(app: FastAPI, node_id: NodeID) -> None: await tracker.save(node_id, model) -async def set_scheduled_to_run( +async def set_service_scheduled_to_run( app: FastAPI, node_id: NodeID, delay_from_now: timedelta ) -> None: tracker = get_tracker(app) @@ -222,7 +222,7 @@ async def set_service_status_task_uid( await tracker.save(node_id, model) -async def remove_tracked(app: FastAPI, node_id: NodeID) -> None: +async def remove_tracked_service(app: FastAPI, node_id: NodeID) -> None: """ Removes the service from tracking (usually after stop completes) # NOTE: does not raise if node_id is not found @@ -230,17 +230,19 @@ async def remove_tracked(app: FastAPI, node_id: NodeID) -> None: await get_tracker(app).delete(node_id) -async def get_tracked(app: FastAPI, node_id: NodeID) -> TrackedServiceModel | None: +async def get_tracked_service( + app: FastAPI, node_id: NodeID +) -> TrackedServiceModel | None: """Returns information about the tracked service""" return await get_tracker(app).load(node_id) -async def get_all_tracked(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: +async def get_all_tracked_services(app: FastAPI) -> dict[NodeID, TrackedServiceModel]: """Returns all tracked services""" return await get_tracker(app).all() -async def get_user_id(app: FastAPI, node_id: NodeID) -> UserID | None: +async def get_user_id_for_service(app: FastAPI, node_id: NodeID) -> UserID | None: """returns user_id for the service""" model: TrackedServiceModel | None = await get_tracker(app).load(node_id) return model.user_id if model else None diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py index ba030c3a6edb..f710204504c2 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_deferred_get_status.py @@ -65,16 +65,18 @@ async def on_result( _logger.debug("Received status for service '%s': '%s'", node_id, result) - status_changed: bool = await service_tracker.set_if_status_changed( + status_changed: bool = await service_tracker.set_if_status_changed_for_service( app, node_id, result ) - if await service_tracker.should_notify_frontend( + if await service_tracker.should_notify_frontend_for_service( app, node_id, status_changed=status_changed ): - user_id: UserID | None = await service_tracker.get_user_id(app, node_id) + user_id: UserID | None = await service_tracker.get_user_id_for_service( + app, node_id + ) if user_id: await notify_service_status_change(app, user_id, result) - await service_tracker.set_frontned_notified(app, node_id) + await service_tracker.set_frontned_notified_for_service(app, node_id) else: _logger.info( "Did not find a user for '%s', skipping status delivery of: %s", diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index 3a2f8fcd267d..74ef067d18c1 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -27,7 +27,7 @@ async def _start_get_status_deferred( app: FastAPI, node_id: NodeID, *, next_check_delay: timedelta ) -> None: - await service_tracker.set_scheduled_to_run(app, node_id, next_check_delay) + await service_tracker.set_service_scheduled_to_run(app, node_id, next_check_delay) await DeferredGetStatus.start(node_id=node_id) @@ -51,7 +51,7 @@ async def _worker_start_get_status_requests(self) -> None: models: dict[ NodeID, TrackedServiceModel - ] = await service_tracker.get_all_tracked(self.app) + ] = await service_tracker.get_all_tracked_services(self.app) to_remove: list[NodeID] = [] to_start: list[NodeID] = [] @@ -90,7 +90,7 @@ async def _worker_start_get_status_requests(self) -> None: _logger.debug("Removing tracked services: '%s'", to_remove) await logged_gather( *( - service_tracker.remove_tracked(self.app, node_id) + service_tracker.remove_tracked_service(self.app, node_id) for node_id in to_remove ), max_concurrency=_MAX_CONCURRENCY, diff --git a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py index 9bf2f59c1486..0755f7e5d786 100644 --- a/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py +++ b/services/dynamic-scheduler/tests/unit/service_tracker/test__api.py @@ -23,10 +23,10 @@ from servicelib.utils import limited_gather from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( - get_all_tracked, - get_tracked, - remove_tracked, - set_if_status_changed, + get_all_tracked_services, + get_tracked_service, + remove_tracked_service, + set_if_status_changed_for_service, set_request_as_running, set_request_as_stopped, set_service_status_task_uid, @@ -66,19 +66,19 @@ async def test_services_tracer_set_as_running_set_as_stopped( get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], ): async def _remove_service() -> None: - await remove_tracked(app, node_id) - assert await get_tracked(app, node_id) is None - assert await get_all_tracked(app) == {} + await remove_tracked_service(app, node_id) + assert await get_tracked_service(app, node_id) is None + assert await get_all_tracked_services(app) == {} async def _set_as_running() -> None: await set_request_as_running(app, get_dynamic_service_start(node_id)) - tracked_model = await get_tracked(app, node_id) + tracked_model = await get_tracked_service(app, node_id) assert tracked_model assert tracked_model.requested_state == UserRequestedState.RUNNING async def _set_as_stopped() -> None: await set_request_as_stopped(app, get_dynamic_service_stop(node_id)) - tracked_model = await get_tracked(app, node_id) + tracked_model = await get_tracked_service(app, node_id) assert tracked_model assert tracked_model.requested_state == UserRequestedState.STOPPED @@ -109,7 +109,7 @@ async def test_services_tracer_workflow( ], limit=100, ) - assert len(await get_all_tracked(app)) == item_count + assert len(await get_all_tracked_services(app)) == item_count @pytest.mark.parametrize( @@ -131,11 +131,11 @@ async def test_set_if_status_changed( ): await set_request_as_running(app, get_dynamic_service_start(node_id)) - assert await set_if_status_changed(app, node_id, status) is True + assert await set_if_status_changed_for_service(app, node_id, status) is True - assert await set_if_status_changed(app, node_id, status) is False + assert await set_if_status_changed_for_service(app, node_id, status) is False - model = await get_tracked(app, node_id) + model = await get_tracked_service(app, node_id) assert model assert model.service_status == status.json() @@ -152,7 +152,7 @@ async def test_set_service_status_task_uid( task_uid = TaskUID(faker.uuid4()) await set_service_status_task_uid(app, node_id, task_uid) - model = await get_tracked(app, node_id) + model = await get_tracked_service(app, node_id) assert model assert model.service_status_task_uid == task_uid diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index 8098b67d41f7..cb4814c9187d 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -27,7 +27,7 @@ from settings_library.rabbit import RabbitSettings from settings_library.redis import RedisSettings from simcore_service_dynamic_scheduler.services.service_tracker import ( - get_all_tracked, + get_all_tracked_services, set_request_as_running, set_request_as_stopped, ) @@ -252,7 +252,9 @@ def deferred_status_spies(mocker: MockerFixture) -> dict[str, AsyncMock]: @pytest.fixture def remove_tracked_spy(mocker: MockerFixture) -> AsyncMock: - mock_method = mocker.AsyncMock(wraps=_monitor.service_tracker.remove_tracked) + mock_method = mocker.AsyncMock( + wraps=_monitor.service_tracker.remove_tracked_service + ) return mocker.patch.object(_monitor.service_tracker, "remove_tracked", mock_method) @@ -366,7 +368,7 @@ async def test_expected_calls_to_notify_frontend( # pylint:disable=too-many-arg get_dynamic_service_start: Callable[[NodeID], DynamicServiceStart], get_dynamic_service_stop: Callable[[NodeID], DynamicServiceStop], ): - assert await get_all_tracked(app) == {} + assert await get_all_tracked_services(app) == {} if user_requests_running: await set_request_as_running(app, get_dynamic_service_start(node_id)) From 7f779d427c2a4a0792416135310b46aff778fcc8 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 18 Sep 2024 15:27:20 +0200 Subject: [PATCH 118/122] added description via field --- .../services/service_tracker/_models.py | 74 ++++++++++++++----- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py index e6f2d995961b..985ca8feef5a 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/service_tracker/_models.py @@ -37,39 +37,73 @@ class SchedulerServiceState(StrAutoEnum): @dataclass class TrackedServiceModel: # pylint:disable=too-many-instance-attributes - # used to create the service in any given moment if the requested_state is RUNNING - # can be set to None only when stopping the service - dynamic_service_start: DynamicServiceStart | None - # required for propagating status changes to the frontend - user_id: UserID | None - project_id: ProjectID | None + dynamic_service_start: DynamicServiceStart | None = field( + metadata={ + "description": ( + "used to create the service in any given moment if the requested_state is RUNNING" + "can be set to None only when stopping the service" + ) + } + ) - # what the user desires (RUNNING or STOPPED) - requested_state: UserRequestedState + user_id: UserID | None = field( + metadata={ + "description": "required for propagating status changes to the frontend" + } + ) + project_id: ProjectID | None = field( + metadata={ + "description": "required for propagating status changes to the frontend" + } + ) + + requested_state: UserRequestedState = field( + metadata={ + "description": ( + "status of the service desidered by the user RUNNING or STOPPED" + ) + } + ) - # set this after parsing the incoming state via the API calls - current_state: SchedulerServiceState = SchedulerServiceState.UNKNOWN + current_state: SchedulerServiceState = field( + default=SchedulerServiceState.UNKNOWN, + metadata={ + "description": "to set after parsing the incoming state via the API calls" + }, + ) ############################# ### SERVICE STATUS UPDATE ### ############################# - # set when a job will be immediately scheduled - scheduled_to_run: bool = False + scheduled_to_run: bool = field( + default=False, + metadata={"description": "set when a job will be immediately scheduled"}, + ) - # stored for debug mainly this is used to compute ``current_state`` - service_status: str = "" - # uid of the job currently fetching the status - service_status_task_uid: TaskUID | None = None + service_status: str = field( + default="", + metadata={ + "description": "stored for debug mainly this is used to compute ``current_state``" + }, + ) + service_status_task_uid: TaskUID | None = field( + default=None, + metadata={"description": "uid of the job currently fetching the status"}, + ) - # used to determine when to poll the status again check_status_after: float = field( - default_factory=lambda: arrow.utcnow().timestamp() + default_factory=lambda: arrow.utcnow().timestamp(), + metadata={"description": "used to determine when to poll the status again"}, ) - # used to determine when was the last time the status was notified - last_status_notification: float = 0 + last_status_notification: float = field( + default=0, + metadata={ + "description": "used to determine when was the last time the status was notified" + }, + ) def set_check_status_after_to(self, delay_from_now: timedelta) -> None: self.check_status_after = (arrow.utcnow() + delay_from_now).timestamp() From 08208edd684511c60f506c7d70d42d7c210ac115 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 24 Sep 2024 09:14:04 +0200 Subject: [PATCH 119/122] using limited_gather --- .../services/status_monitor/_monitor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py index 74ef067d18c1..0d8b5a2723f3 100644 --- a/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py +++ b/services/dynamic-scheduler/src/simcore_service_dynamic_scheduler/services/status_monitor/_monitor.py @@ -9,7 +9,7 @@ from pydantic import NonNegativeFloat, NonNegativeInt from servicelib.background_task import stop_periodic_task from servicelib.redis_utils import start_exclusive_periodic_task -from servicelib.utils import logged_gather +from servicelib.utils import limited_gather from settings_library.redis import RedisDatabase from .. import service_tracker @@ -88,23 +88,23 @@ async def _worker_start_get_status_requests(self) -> None: ) _logger.debug("Removing tracked services: '%s'", to_remove) - await logged_gather( + await limited_gather( *( service_tracker.remove_tracked_service(self.app, node_id) for node_id in to_remove ), - max_concurrency=_MAX_CONCURRENCY, + limit=_MAX_CONCURRENCY, ) _logger.debug("Poll status for tracked services: '%s'", to_start) - await logged_gather( + await limited_gather( *( _start_get_status_deferred( self.app, node_id, next_check_delay=NORMAL_RATE_POLL_INTERVAL ) for node_id in to_start ), - max_concurrency=_MAX_CONCURRENCY, + limit=_MAX_CONCURRENCY, ) async def setup(self) -> None: From 299fab447952a2f97b13f5f6d3eb79bf6f6c8624 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 24 Sep 2024 09:15:53 +0200 Subject: [PATCH 120/122] refactor test fixture --- .../status_monitor/test_services_status_monitor__monitor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py index cb4814c9187d..e3d6acffa39c 100644 --- a/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py +++ b/services/dynamic-scheduler/tests/unit/status_monitor/test_services_status_monitor__monitor.py @@ -255,7 +255,11 @@ def remove_tracked_spy(mocker: MockerFixture) -> AsyncMock: mock_method = mocker.AsyncMock( wraps=_monitor.service_tracker.remove_tracked_service ) - return mocker.patch.object(_monitor.service_tracker, "remove_tracked", mock_method) + return mocker.patch.object( + _monitor.service_tracker, + _monitor.service_tracker.remove_tracked_service.__name__, + mock_method, + ) @pytest.fixture From 6c5c9d7566dd3b84f6b2fb57043c9947eaf05006 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 24 Sep 2024 12:47:42 +0200 Subject: [PATCH 121/122] fixed failing test --- packages/service-library/tests/deferred_tasks/example_app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/service-library/tests/deferred_tasks/example_app.py b/packages/service-library/tests/deferred_tasks/example_app.py index e9e9571d737f..3e0e138eba57 100644 --- a/packages/service-library/tests/deferred_tasks/example_app.py +++ b/packages/service-library/tests/deferred_tasks/example_app.py @@ -7,8 +7,8 @@ from typing import Any from uuid import uuid4 -import aioredis from pydantic import NonNegativeInt +from redis import Redis from servicelib.deferred_tasks import ( BaseDeferredHandler, DeferredContext, @@ -57,7 +57,7 @@ class InMemoryLists: def __init__(self, redis_settings: RedisSettings, port: int) -> None: # NOTE: RedisClientSDK is not required here but it's used to easily construct # a redis connection - self.redis: aioredis.Redis = RedisClientSDK( + self.redis: Redis = RedisClientSDK( redis_settings.build_redis_dsn(RedisDatabase.DEFERRED_TASKS), decode_responses=True, ).redis From a54edfa156803d914d43e603810d1ef499dcb7e1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 24 Sep 2024 12:51:46 +0200 Subject: [PATCH 122/122] fixed typing --- packages/service-library/tests/deferred_tasks/example_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/service-library/tests/deferred_tasks/example_app.py b/packages/service-library/tests/deferred_tasks/example_app.py index 3e0e138eba57..0ba848178d8e 100644 --- a/packages/service-library/tests/deferred_tasks/example_app.py +++ b/packages/service-library/tests/deferred_tasks/example_app.py @@ -8,7 +8,7 @@ from uuid import uuid4 from pydantic import NonNegativeInt -from redis import Redis +from redis.asyncio import Redis from servicelib.deferred_tasks import ( BaseDeferredHandler, DeferredContext,