From 61e7bb61f58919cfce6a867bcc541f49e772d37f Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 09:12:40 +0200 Subject: [PATCH 01/52] fix warning polution --- .../simcore_service_director_v2/api/dependencies/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py b/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py index 75a8a8848ac8..df91e1e85938 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py +++ b/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py @@ -30,7 +30,7 @@ def get_base_repository(engine: AsyncEngine, repo_type: type[RepoType]) -> RepoT checkedout = engine.pool.checkedout() # type: ignore # connections in use total_size = engine.pool.size() # type: ignore # current total connections - if checkedin <= 1: + if (checkedin <= 1) and (total_size > 1): logger.warning( "Database connection pool near limits: total=%d, in_use=%d, available=%d", total_size, From a53db9f229c1a3340763fe0dced09f1a0bdf2166 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 09:13:05 +0200 Subject: [PATCH 02/52] ensure comp run is marked as started --- .../modules/comp_scheduler/_scheduler_base.py | 9 ++++++++- .../modules/db/repositories/comp_runs.py | 16 +++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py index 98e5d5432c5b..4c27cb4fa543 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py @@ -198,7 +198,7 @@ async def _update_run_result_from_tasks( iteration: Iteration, pipeline_tasks: dict[NodeIDStr, CompTaskAtDB], ) -> RunningState: - pipeline_state_from_tasks: RunningState = get_pipeline_state_from_task_states( + pipeline_state_from_tasks = get_pipeline_state_from_task_states( list(pipeline_tasks.values()), ) _logger.debug( @@ -441,6 +441,13 @@ async def _process_started_tasks( for t in tasks ) ) + if tasks: + await CompRunsRepository.instance(self.db_engine).mark_as_started( + user_id=user_id, + project_id=tasks[0].project_id, + iteration=iteration, + started_time=utc_now, + ) async def _process_waiting_tasks(self, tasks: list[CompTaskAtDB]) -> None: comp_tasks_repo = CompTasksRepository(self.db_engine) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_runs.py b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_runs.py index 507ac369b173..b2f366b99d47 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_runs.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_runs.py @@ -295,7 +295,6 @@ async def create( project_uuid=f"{project_id}", iteration=iteration, result=RUNNING_STATE_TO_DB[RunningState.PUBLISHED], - started=datetime.datetime.now(tz=datetime.UTC), metadata=jsonable_encoder(metadata), use_on_demand_clusters=use_on_demand_clusters, ) @@ -343,6 +342,21 @@ async def set_run_result( **values, ) + async def mark_as_started( + self, + *, + user_id: UserID, + project_id: ProjectID, + iteration: PositiveInt, + started_time: datetime.datetime, + ) -> CompRunsAtDB | None: + return await self.update( + user_id, + project_id, + iteration, + started=started_time, + ) + async def mark_for_cancellation( self, *, user_id: UserID, project_id: ProjectID, iteration: PositiveInt ) -> CompRunsAtDB | None: From 87872c414926a4efe4c1630c75ce921d011b2485 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 10:25:12 +0200 Subject: [PATCH 03/52] ensure comp_runs is set correctly --- .../modules/comp_scheduler/_scheduler_base.py | 15 ++++++++------- .../modules/comp_scheduler/_scheduler_dask.py | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py index 4c27cb4fa543..49465084d5d7 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_base.py @@ -342,6 +342,7 @@ async def _process_started_tasks( tasks: list[CompTaskAtDB], *, user_id: UserID, + project_id: ProjectID, iteration: Iteration, run_metadata: RunMetadataDict, ) -> None: @@ -441,13 +442,12 @@ async def _process_started_tasks( for t in tasks ) ) - if tasks: - await CompRunsRepository.instance(self.db_engine).mark_as_started( - user_id=user_id, - project_id=tasks[0].project_id, - iteration=iteration, - started_time=utc_now, - ) + await CompRunsRepository.instance(self.db_engine).mark_as_started( + user_id=user_id, + project_id=project_id, + iteration=iteration, + started_time=utc_now, + ) async def _process_waiting_tasks(self, tasks: list[CompTaskAtDB]) -> None: comp_tasks_repo = CompTasksRepository(self.db_engine) @@ -495,6 +495,7 @@ async def _update_states_from_comp_backend( await self._process_started_tasks( sorted_tasks.started, user_id=user_id, + project_id=project_id, iteration=iteration, run_metadata=comp_run.metadata, ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py index 955ca1d1c9ec..cb133a73dd4d 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py @@ -363,6 +363,7 @@ async def _task_progress_change_handler(self, event: str) -> None: await self._process_started_tasks( [task], user_id=user_id, + project_id=project_id, iteration=run.iteration, run_metadata=run.metadata, ) From 0c94f09bc2edba03e8cc320b9a78cd8172be2dcf Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 10:33:07 +0200 Subject: [PATCH 04/52] refactor --- .../dask-sidecar/src/simcore_service_dask_sidecar/cli.py | 6 ++++-- .../computational_sidecar/core.py | 4 ++-- .../computational_sidecar/docker_utils.py | 4 ++-- .../src/simcore_service_dask_sidecar/settings.py | 8 +------- .../src/simcore_service_dask_sidecar/tasks.py | 4 ++-- 5 files changed, 11 insertions(+), 15 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/cli.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/cli.py index 827d23d3491c..55fef7662329 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/cli.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/cli.py @@ -4,7 +4,7 @@ from settings_library.utils_cli import create_settings_command, create_version_callback from ._meta import PROJECT_NAME, __version__ -from .settings import Settings +from .settings import ApplicationSettings # SEE setup entrypoint 'simcore_service_dask_sidecar.cli:the_app' _logger = logging.getLogger(__name__) @@ -15,4 +15,6 @@ # COMMANDS # main.callback()(create_version_callback(__version__)) -main.command()(create_settings_command(settings_cls=Settings, logger=_logger)) +main.command()( + create_settings_command(settings_cls=ApplicationSettings, logger=_logger) +) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py index 7b753e306207..8083caa0deb5 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py @@ -27,7 +27,7 @@ from ..dask_utils import TaskPublisher from ..file_utils import pull_file_from_remote, push_file_to_remote -from ..settings import Settings +from ..settings import ApplicationSettings from .docker_utils import ( create_container_config, get_computational_shared_data_mount_point, @@ -172,7 +172,7 @@ async def run(self, command: list[str]) -> TaskOutputData: # NOTE: this is for tracing purpose _logger.info("Running task owner: %s", self.task_parameters.task_owner) - settings = Settings.create_from_envs() + settings = ApplicationSettings.create_from_envs() run_id = f"{uuid4()}" async with Docker() as docker_client, TaskSharedVolumes( Path(f"{settings.SIDECAR_COMP_SERVICES_SHARED_FOLDER}/{run_id}") diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py index 5fd4f24e71d7..d50e84a34182 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py @@ -40,7 +40,7 @@ from ..dask_utils import TaskPublisher from ..file_utils import push_file_to_remote -from ..settings import Settings +from ..settings import ApplicationSettings from .constants import LEGACY_SERVICE_LOG_FILE_NAME from .models import ( LEGACY_INTEGRATION_VERSION, @@ -474,7 +474,7 @@ async def get_image_labels( async def get_computational_shared_data_mount_point(docker_client: Docker) -> Path: - app_settings = Settings.create_from_envs() + app_settings = ApplicationSettings.create_from_envs() try: logger.debug( "getting computational shared data mount point for %s", diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py index b77811fd57fb..cf740853ec75 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py @@ -8,9 +8,7 @@ from settings_library.utils_logging import MixinLoggingSettings -class Settings(BaseCustomSettings, MixinLoggingSettings): - """Dask-sidecar app settings""" - +class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): SC_BUILD_TARGET: str | None = None SC_BOOT_MODE: str | None = None LOG_LEVEL: Annotated[ @@ -22,15 +20,11 @@ class Settings(BaseCustomSettings, MixinLoggingSettings): ), ] = LogLevel.INFO - # sidecar config --- - SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME: str SIDECAR_COMP_SERVICES_SHARED_FOLDER: Path SIDECAR_INTERVAL_TO_CHECK_TASK_ABORTED_S: int | None = 5 - # dask config ---- - DASK_START_AS_SCHEDULER: bool | None = Field( default=False, description="If this env is set, then the app boots as scheduler" ) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py index cc061d6cd39d..e9d24b30ea8f 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py @@ -18,7 +18,7 @@ from ._meta import print_dask_sidecar_banner from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion -from .settings import Settings +from .settings import ApplicationSettings _logger = logging.getLogger(__name__) @@ -53,7 +53,7 @@ def exit_gracefully(self, *_args): async def dask_setup(worker: distributed.Worker) -> None: """This is a special function recognized by the dask worker when starting with flag --preload""" - settings = Settings.create_from_envs() + settings = ApplicationSettings.create_from_envs() # set up logging logging.basicConfig(level=settings.LOG_LEVEL.value) logging.root.setLevel(level=settings.LOG_LEVEL.value) From c174aea196949798392a077a22c100bc274eba99 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 10:37:03 +0200 Subject: [PATCH 05/52] clean --- .../simcore_service_dask_sidecar/settings.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py index cf740853ec75..cfff3bbf420a 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py @@ -4,14 +4,12 @@ from models_library.basic_types import LogLevel from pydantic import AliasChoices, Field, field_validator from servicelib.logging_utils_filtering import LoggerName, MessageSubstring -from settings_library.base import BaseCustomSettings +from settings_library.application import BaseApplicationSettings from settings_library.utils_logging import MixinLoggingSettings -class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): - SC_BUILD_TARGET: str | None = None - SC_BOOT_MODE: str | None = None - LOG_LEVEL: Annotated[ +class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): + DASK_SIDECAR_LOGLEVEL: Annotated[ LogLevel, Field( validation_alias=AliasChoices( @@ -23,7 +21,7 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME: str SIDECAR_COMP_SERVICES_SHARED_FOLDER: Path - SIDECAR_INTERVAL_TO_CHECK_TASK_ABORTED_S: int | None = 5 + DASK_SIDECAR_INTERVAL_TO_CHECK_TASK_ABORTED_S: int | None = 5 DASK_START_AS_SCHEDULER: bool | None = Field( default=False, description="If this env is set, then the app boots as scheduler" @@ -48,16 +46,7 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): description="is a dictionary that maps specific loggers (such as 'uvicorn.access' or 'gunicorn.access') to a list of log message patterns that should be filtered out.", ) - def as_scheduler(self) -> bool: - return bool(self.DASK_START_AS_SCHEDULER) - - def as_worker(self) -> bool: - as_worker = not self.as_scheduler() - if as_worker: - assert self.DASK_SCHEDULER_HOST is not None # nosec - return as_worker - - @field_validator("LOG_LEVEL", mode="before") + @field_validator("DASK_SIDECAR_LOGLEVEL", mode="before") @classmethod def _validate_loglevel(cls, value: Any) -> str: return cls.validate_log_level(f"{value}") From f61b4e05743369bb2610934d5b0a3015c79a3283 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 10:41:54 +0200 Subject: [PATCH 06/52] added RABBIT_MQ --- .../simcore_service_dask_sidecar/settings.py | 53 ++++++++++++------- services/docker-compose.yml | 5 ++ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py index cfff3bbf420a..e0a3e41d3a58 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/settings.py @@ -5,6 +5,7 @@ from pydantic import AliasChoices, Field, field_validator from servicelib.logging_utils_filtering import LoggerName, MessageSubstring from settings_library.application import BaseApplicationSettings +from settings_library.rabbit import RabbitSettings from settings_library.utils_logging import MixinLoggingSettings @@ -23,28 +24,42 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): DASK_SIDECAR_INTERVAL_TO_CHECK_TASK_ABORTED_S: int | None = 5 - DASK_START_AS_SCHEDULER: bool | None = Field( - default=False, description="If this env is set, then the app boots as scheduler" - ) + DASK_START_AS_SCHEDULER: Annotated[ + bool | None, + Field(description="If this env is set, then the app boots as scheduler"), + ] = False - DASK_SCHEDULER_HOST: str | None = Field( - None, - description="Address of the scheduler to register (only if started as worker )", - ) + DASK_SCHEDULER_HOST: Annotated[ + str | None, + Field( + description="Address of the scheduler to register (only if started as worker )", + ), + ] = None - DASK_LOG_FORMAT_LOCAL_DEV_ENABLED: bool = Field( - default=False, - validation_alias=AliasChoices( - "DASK_LOG_FORMAT_LOCAL_DEV_ENABLED", - "LOG_FORMAT_LOCAL_DEV_ENABLED", + DASK_LOG_FORMAT_LOCAL_DEV_ENABLED: Annotated[ + bool, + Field( + validation_alias=AliasChoices( + "DASK_LOG_FORMAT_LOCAL_DEV_ENABLED", + "LOG_FORMAT_LOCAL_DEV_ENABLED", + ), + description="Enables local development log format. WARNING: make sure it is disabled if you want to have structured logs!", ), - description="Enables local development log format. WARNING: make sure it is disabled if you want to have structured logs!", - ) - DASK_LOG_FILTER_MAPPING: dict[LoggerName, list[MessageSubstring]] = Field( - default_factory=dict, - validation_alias=AliasChoices("DASK_LOG_FILTER_MAPPING", "LOG_FILTER_MAPPING"), - description="is a dictionary that maps specific loggers (such as 'uvicorn.access' or 'gunicorn.access') to a list of log message patterns that should be filtered out.", - ) + ] = False + DASK_LOG_FILTER_MAPPING: Annotated[ + dict[LoggerName, list[MessageSubstring]], + Field( + default_factory=dict, + validation_alias=AliasChoices( + "DASK_LOG_FILTER_MAPPING", "LOG_FILTER_MAPPING" + ), + description="is a dictionary that maps specific loggers (such as 'uvicorn.access' or 'gunicorn.access') to a list of log message patterns that should be filtered out.", + ), + ] + + DASK_SIDECAR_RABBITMQ: Annotated[ + RabbitSettings | None, Field(json_schema_extra={"auto_default_from_env": True}) + ] @field_validator("DASK_SIDECAR_LOGLEVEL", mode="before") @classmethod diff --git a/services/docker-compose.yml b/services/docker-compose.yml index f61c6156f47b..828c75709b1d 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -1164,6 +1164,11 @@ services: DASK_SIDECAR_LOGLEVEL: ${DASK_SIDECAR_LOGLEVEL} SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME: ${SWARM_STACK_NAME}_computational_shared_data SIDECAR_COMP_SERVICES_SHARED_FOLDER: ${SIDECAR_COMP_SERVICES_SHARED_FOLDER:-/home/scu/computational_shared_data} + RABBIT_HOST: ${RABBIT_HOST} + RABBIT_PASSWORD: ${RABBIT_PASSWORD} + RABBIT_PORT: ${RABBIT_PORT} + RABBIT_SECURE: ${RABBIT_SECURE} + RABBIT_USER: ${RABBIT_USER} networks: - computational_services_subnet secrets: *dask_tls_secrets From 4a44811eb1b2eadc3b20acf7b68d4e49f3a202a0 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 10:45:02 +0200 Subject: [PATCH 07/52] refactor --- services/dask-sidecar/tests/unit/test_cli.py | 6 ++--- .../dask-sidecar/tests/unit/test_settings.py | 25 ------------------- 2 files changed, 3 insertions(+), 28 deletions(-) delete mode 100644 services/dask-sidecar/tests/unit/test_settings.py diff --git a/services/dask-sidecar/tests/unit/test_cli.py b/services/dask-sidecar/tests/unit/test_cli.py index 7a359d44cc0e..479a60f4bf62 100644 --- a/services/dask-sidecar/tests/unit/test_cli.py +++ b/services/dask-sidecar/tests/unit/test_cli.py @@ -10,7 +10,7 @@ from pytest_simcore.helpers.typing_env import EnvVarsDict from simcore_service_dask_sidecar._meta import API_VERSION from simcore_service_dask_sidecar.cli import main -from simcore_service_dask_sidecar.settings import Settings +from simcore_service_dask_sidecar.settings import ApplicationSettings from typer.testing import CliRunner @@ -28,5 +28,5 @@ def test_list_settings(cli_runner: CliRunner, app_environment: EnvVarsDict): result = cli_runner.invoke(main, ["settings", "--show-secrets", "--as-json"]) assert result.exit_code == os.EX_OK, result.output - settings = Settings(result.output) - assert settings.model_dump() == Settings.create_from_envs().model_dump() + settings = ApplicationSettings(result.output) + assert settings.model_dump() == ApplicationSettings.create_from_envs().model_dump() diff --git a/services/dask-sidecar/tests/unit/test_settings.py b/services/dask-sidecar/tests/unit/test_settings.py deleted file mode 100644 index 3f7596469a62..000000000000 --- a/services/dask-sidecar/tests/unit/test_settings.py +++ /dev/null @@ -1,25 +0,0 @@ -# pylint: disable=redefined-outer-name -# pylint: disable=unused-argument -# pylint: disable=unused-variable - - -import pytest -from pytest_simcore.helpers.typing_env import EnvVarsDict -from simcore_service_dask_sidecar.settings import Settings - - -def test_settings_as_worker( - app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch -): - settings = Settings.create_from_envs() - assert settings.as_worker() - - -def test_settings_as_scheduler( - app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch -): - assert app_environment.get("DASK_START_AS_SCHEDULER", None) != "1" - monkeypatch.setenv("DASK_START_AS_SCHEDULER", "1") - - settings = Settings.create_from_envs() - assert settings.as_scheduler() From 63d78ab56f0dcada75abecb892271df8c3e39835 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 11:22:16 +0200 Subject: [PATCH 08/52] added rabbitmq client --- .../computational_sidecar/errors.py | 6 +-- .../simcore_service_dask_sidecar/errors.py | 8 ++++ .../simcore_service_dask_sidecar/rabbitmq.py | 46 +++++++++++++++++++ .../src/simcore_service_dask_sidecar/tasks.py | 38 ++++++++------- 4 files changed, 77 insertions(+), 21 deletions(-) create mode 100644 services/dask-sidecar/src/simcore_service_dask_sidecar/errors.py create mode 100644 services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/errors.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/errors.py index 8e5d1e8794ff..009ae95f650e 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/errors.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/errors.py @@ -1,8 +1,4 @@ -from common_library.errors_classes import OsparcErrorMixin - - -class ComputationalSidecarRuntimeError(OsparcErrorMixin, RuntimeError): - ... +from ..errors import ComputationalSidecarRuntimeError class ServiceBadFormattedOutputError(ComputationalSidecarRuntimeError): diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/errors.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/errors.py new file mode 100644 index 000000000000..1400bf1a2699 --- /dev/null +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/errors.py @@ -0,0 +1,8 @@ +from common_library.errors_classes import OsparcErrorMixin + + +class ComputationalSidecarRuntimeError(OsparcErrorMixin, RuntimeError): ... + + +class ConfigurationError(ComputationalSidecarRuntimeError): + msg_template: str = "Application misconfiguration: {msg}" diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py new file mode 100644 index 000000000000..2140e4f40e02 --- /dev/null +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py @@ -0,0 +1,46 @@ +import contextlib +import logging +from typing import cast + +import distributed +from models_library.rabbitmq_messages import RabbitMessageBase +from servicelib.logging_utils import log_catch +from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive +from settings_library.rabbit import RabbitSettings + +from .errors import ConfigurationError + +logger = logging.getLogger(__name__) + + +async def on_startup( + worker: distributed.Worker, rabbit_settings: RabbitSettings +) -> None: + worker.rabbitmq_client = None + settings: RabbitSettings | None = rabbit_settings + if not settings: + logger.warning("Rabbit MQ client is de-activated in the settings") + return + await wait_till_rabbitmq_responsive(settings.dsn) + worker.rabbitmq_client = RabbitMQClient( + client_name="dask-sidecar", settings=settings + ) + + +async def on_shutdown(worker: distributed.Worker) -> None: + if worker.rabbitmq_client: + await worker.rabbitmq_client.close() + + +def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: + if not worker.rabbitmq_client: + raise ConfigurationError( + msg="RabbitMQ client is not available. Please check the configuration." + ) + return cast(RabbitMQClient, worker.rabbitmq_client) + + +async def post_message(worker: distributed.Worker, message: RabbitMessageBase) -> None: + with log_catch(logger, reraise=False), contextlib.suppress(ConfigurationError): + # NOTE: if rabbitmq was not initialized the error does not need to flood the logs + await get_rabbitmq_client(worker).publish(message.channel_name, message) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py index e9d24b30ea8f..e744636eab5b 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py @@ -11,13 +11,15 @@ ContainerTaskParameters, LogFileUploadURL, ) -from distributed.worker import logger -from servicelib.logging_utils import config_all_loggers +from distributed.worker import logger as dask_worker_logger +from servicelib.logging_utils import config_all_loggers, log_context from settings_library.s3 import S3Settings from ._meta import print_dask_sidecar_banner from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion +from .rabbitmq import on_shutdown as shutdown_rabbitmq +from .rabbitmq import on_startup as setup_rabbitmq from .settings import ApplicationSettings _logger = logging.getLogger(__name__) @@ -40,7 +42,7 @@ def __init__(self, worker: distributed.Worker): def exit_gracefully(self, *_args): tasks = asyncio.all_tasks() - logger.warning( + dask_worker_logger.warning( "Application shutdown detected!\n %s", pformat([t.get_name() for t in tasks]), ) @@ -55,9 +57,9 @@ async def dask_setup(worker: distributed.Worker) -> None: """This is a special function recognized by the dask worker when starting with flag --preload""" settings = ApplicationSettings.create_from_envs() # set up logging - logging.basicConfig(level=settings.LOG_LEVEL.value) - logging.root.setLevel(level=settings.LOG_LEVEL.value) - logger.setLevel(level=settings.LOG_LEVEL.value) + logging.basicConfig(level=settings.DASK_SIDECAR_LOGLEVEL.value) + logging.root.setLevel(level=settings.DASK_SIDECAR_LOGLEVEL.value) + dask_worker_logger.setLevel(level=settings.DASK_SIDECAR_LOGLEVEL.value) # NOTE: Dask attaches a StreamHandler to the logger in distributed # removing them solves dual propagation of logs for handler in logging.getLogger("distributed").handlers: @@ -68,21 +70,25 @@ async def dask_setup(worker: distributed.Worker) -> None: tracing_settings=None, # no tracing for dask sidecar ) - logger.info("Setting up worker...") - logger.info("Settings: %s", pformat(settings.model_dump())) + with log_context(dask_worker_logger, logging.INFO, "Launch dask worker"): + dask_worker_logger.info("app settings: %s", settings.model_dump_json(indent=1)) - print_dask_sidecar_banner() + print_dask_sidecar_banner() - if threading.current_thread() is threading.main_thread(): - loop = asyncio.get_event_loop() - logger.info("We do have a running loop in the main thread: %s", f"{loop=}") + if threading.current_thread() is threading.main_thread(): + GracefulKiller(worker) - if threading.current_thread() is threading.main_thread(): - GracefulKiller(worker) + loop = asyncio.get_event_loop() + dask_worker_logger.info( + "We do have a running loop in the main thread: %s", f"{loop=}" + ) + if settings.DASK_SIDECAR_RABBITMQ: + await setup_rabbitmq(worker, settings.DASK_SIDECAR_RABBITMQ) -async def dask_teardown(_worker: distributed.Worker) -> None: - logger.warning("Tearing down worker!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") +async def dask_teardown(worker: distributed.Worker) -> None: + with log_context(dask_worker_logger, logging.INFO, "tear down dask worker"): + await shutdown_rabbitmq(worker) async def _run_computational_sidecar_async( From ecfca3117c0046016e8e7e0fc5a3eec368a59e60 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 13:33:03 +0200 Subject: [PATCH 09/52] creating dask plugin --- .../simcore_service_dask_sidecar/rabbitmq.py | 98 +++++++++++++------ .../src/simcore_service_dask_sidecar/tasks.py | 4 +- .../tests/unit/test_deployment.py | 4 +- 3 files changed, 74 insertions(+), 32 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py index 2140e4f40e02..60ae3beff7a1 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py @@ -1,46 +1,88 @@ -import contextlib import logging -from typing import cast import distributed from models_library.rabbitmq_messages import RabbitMessageBase -from servicelib.logging_utils import log_catch from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive from settings_library.rabbit import RabbitSettings from .errors import ConfigurationError -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) -async def on_startup( - worker: distributed.Worker, rabbit_settings: RabbitSettings -) -> None: - worker.rabbitmq_client = None - settings: RabbitSettings | None = rabbit_settings - if not settings: - logger.warning("Rabbit MQ client is de-activated in the settings") - return - await wait_till_rabbitmq_responsive(settings.dsn) - worker.rabbitmq_client = RabbitMQClient( - client_name="dask-sidecar", settings=settings - ) +class RabbitMQPlugin(distributed.WorkerPlugin): + """Dask Worker Plugin for RabbitMQ integration""" + name = "rabbitmq_plugin" + _client: RabbitMQClient | None = None + _settings: RabbitSettings | None = None -async def on_shutdown(worker: distributed.Worker) -> None: - if worker.rabbitmq_client: - await worker.rabbitmq_client.close() + def __init__(self, settings: RabbitSettings): + self._settings = settings + async def setup(self, worker: distributed.Worker) -> None: + """Called when the plugin is attached to a worker""" + _logger.info("Setting up RabbitMQ plugin") + if not self._settings: + _logger.warning("RabbitMQ client is de-activated (no settings provided)") + return -def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: - if not worker.rabbitmq_client: - raise ConfigurationError( - msg="RabbitMQ client is not available. Please check the configuration." + await wait_till_rabbitmq_responsive(self._settings.dsn) + self._client = RabbitMQClient( + client_name="dask-sidecar", settings=self._settings ) - return cast(RabbitMQClient, worker.rabbitmq_client) + _logger.info("RabbitMQ client initialized successfully") + async def teardown(self, worker: distributed.Worker) -> None: + """Called when the worker shuts down or the plugin is removed""" + _logger.info("Tearing down RabbitMQ plugin") + if self._client: + await self._client.close() + self._client = None + _logger.info("RabbitMQ client closed") -async def post_message(worker: distributed.Worker, message: RabbitMessageBase) -> None: - with log_catch(logger, reraise=False), contextlib.suppress(ConfigurationError): - # NOTE: if rabbitmq was not initialized the error does not need to flood the logs - await get_rabbitmq_client(worker).publish(message.channel_name, message) + def get_client(self) -> RabbitMQClient: + """Returns the RabbitMQ client or raises an error if not available""" + if not self._client: + raise ConfigurationError( + msg="RabbitMQ client is not available. Please check the configuration." + ) + return self._client + + async def publish(self, channel_name: str, message: RabbitMessageBase) -> None: + """Publishes a message to the specified channel""" + if self._client: + await self._client.publish(channel_name, message) + + +# async def on_startup( +# worker: distributed.Worker, rabbit_settings: RabbitSettings +# ) -> None: +# worker.rabbitmq_client = None +# settings: RabbitSettings | None = rabbit_settings +# if not settings: +# __logger.warning("Rabbit MQ client is de-activated in the settings") +# return +# await wait_till_rabbitmq_responsive(settings.dsn) +# worker.rabbitmq_client = RabbitMQClient( +# client_name="dask-sidecar", settings=settings +# ) + + +# async def on_shutdown(worker: distributed.Worker) -> None: +# if worker.rabbitmq_client: +# await worker.rabbitmq_client.close() + + +# def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: +# if not worker.rabbitmq_client: +# raise ConfigurationError( +# msg="RabbitMQ client is not available. Please check the configuration." +# ) +# return cast(RabbitMQClient, worker.rabbitmq_client) + + +# async def post_message(worker: distributed.Worker, message: RabbitMessageBase) -> None: +# with log_catch(__logger, reraise=False), contextlib.suppress(ConfigurationError): +# # NOTE: if rabbitmq was not initialized the error does not need to flood the logs +# await get_rabbitmq_client(worker).publish(message.channel_name, message) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py index e744636eab5b..a1e7bdeca62e 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py @@ -18,8 +18,8 @@ from ._meta import print_dask_sidecar_banner from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion +from .rabbitmq import RabbitMQPlugin from .rabbitmq import on_shutdown as shutdown_rabbitmq -from .rabbitmq import on_startup as setup_rabbitmq from .settings import ApplicationSettings _logger = logging.getLogger(__name__) @@ -83,7 +83,7 @@ async def dask_setup(worker: distributed.Worker) -> None: "We do have a running loop in the main thread: %s", f"{loop=}" ) if settings.DASK_SIDECAR_RABBITMQ: - await setup_rabbitmq(worker, settings.DASK_SIDECAR_RABBITMQ) + await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) async def dask_teardown(worker: distributed.Worker) -> None: diff --git a/services/dask-sidecar/tests/unit/test_deployment.py b/services/dask-sidecar/tests/unit/test_deployment.py index 08beb0cd2e7d..ee4fedf3d387 100644 --- a/services/dask-sidecar/tests/unit/test_deployment.py +++ b/services/dask-sidecar/tests/unit/test_deployment.py @@ -1,8 +1,8 @@ -from typing import Any, Dict +from typing import Any def test_sidecar_service_is_deployed_in_global_mode( - simcore_docker_compose: Dict[str, Any] + simcore_docker_compose: dict[str, Any], ): dask_sidecar_deploy_config = simcore_docker_compose["services"]["dask-sidecar"][ "deploy" From b7c4097b06548d9d23304fc58ba8b4226f9b84cc Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 22:47:19 +0200 Subject: [PATCH 10/52] clean --- .../dask-sidecar/src/simcore_service_dask_sidecar/tasks.py | 5 ++--- services/dask-sidecar/tests/unit/test__requirements.py | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py index a1e7bdeca62e..80cff026b171 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py @@ -19,7 +19,6 @@ from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion from .rabbitmq import RabbitMQPlugin -from .rabbitmq import on_shutdown as shutdown_rabbitmq from .settings import ApplicationSettings _logger = logging.getLogger(__name__) @@ -86,9 +85,9 @@ async def dask_setup(worker: distributed.Worker) -> None: await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) -async def dask_teardown(worker: distributed.Worker) -> None: +async def dask_teardown(_worker: distributed.Worker) -> None: with log_context(dask_worker_logger, logging.INFO, "tear down dask worker"): - await shutdown_rabbitmq(worker) + ... async def _run_computational_sidecar_async( diff --git a/services/dask-sidecar/tests/unit/test__requirements.py b/services/dask-sidecar/tests/unit/test__requirements.py index 737f4417a9fb..de6bd947e8c9 100644 --- a/services/dask-sidecar/tests/unit/test__requirements.py +++ b/services/dask-sidecar/tests/unit/test__requirements.py @@ -4,6 +4,7 @@ import re from pathlib import Path +from typing import TypeAlias import pytest @@ -16,11 +17,13 @@ def requirements_folder(project_slug_dir: Path) -> Path: return reqs_dir +NameVersionTuple: TypeAlias = tuple[str, str] + + def test_dask_requirements_in_sync(requirements_folder: Path): """If this test fails, do update requirements to re-sync all listings""" REQS_ENTRY_REGEX = re.compile(r"(\w+)==([\.\w]+)") - NameVersionTuple = tuple[str, str] def get_reqs(fname: str) -> set[NameVersionTuple]: return set(REQS_ENTRY_REGEX.findall((requirements_folder / fname).read_text())) From 756943f92172c0d1dc779dc6eba98f2c8d13b68b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 1 May 2025 23:16:53 +0200 Subject: [PATCH 11/52] renaming and testing --- .../simcore_service_dask_sidecar/rabbitmq.py | 88 ------------------- .../rabbitmq_plugin.py | 63 +++++++++++++ .../src/simcore_service_dask_sidecar/tasks.py | 16 ++-- services/dask-sidecar/tests/unit/conftest.py | 3 +- .../tests/unit/test_rabbitmq_plugin.py | 31 +++++++ 5 files changed, 103 insertions(+), 98 deletions(-) delete mode 100644 services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py create mode 100644 services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py create mode 100644 services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py deleted file mode 100644 index 60ae3beff7a1..000000000000 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq.py +++ /dev/null @@ -1,88 +0,0 @@ -import logging - -import distributed -from models_library.rabbitmq_messages import RabbitMessageBase -from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive -from settings_library.rabbit import RabbitSettings - -from .errors import ConfigurationError - -_logger = logging.getLogger(__name__) - - -class RabbitMQPlugin(distributed.WorkerPlugin): - """Dask Worker Plugin for RabbitMQ integration""" - - name = "rabbitmq_plugin" - _client: RabbitMQClient | None = None - _settings: RabbitSettings | None = None - - def __init__(self, settings: RabbitSettings): - self._settings = settings - - async def setup(self, worker: distributed.Worker) -> None: - """Called when the plugin is attached to a worker""" - _logger.info("Setting up RabbitMQ plugin") - if not self._settings: - _logger.warning("RabbitMQ client is de-activated (no settings provided)") - return - - await wait_till_rabbitmq_responsive(self._settings.dsn) - self._client = RabbitMQClient( - client_name="dask-sidecar", settings=self._settings - ) - _logger.info("RabbitMQ client initialized successfully") - - async def teardown(self, worker: distributed.Worker) -> None: - """Called when the worker shuts down or the plugin is removed""" - _logger.info("Tearing down RabbitMQ plugin") - if self._client: - await self._client.close() - self._client = None - _logger.info("RabbitMQ client closed") - - def get_client(self) -> RabbitMQClient: - """Returns the RabbitMQ client or raises an error if not available""" - if not self._client: - raise ConfigurationError( - msg="RabbitMQ client is not available. Please check the configuration." - ) - return self._client - - async def publish(self, channel_name: str, message: RabbitMessageBase) -> None: - """Publishes a message to the specified channel""" - if self._client: - await self._client.publish(channel_name, message) - - -# async def on_startup( -# worker: distributed.Worker, rabbit_settings: RabbitSettings -# ) -> None: -# worker.rabbitmq_client = None -# settings: RabbitSettings | None = rabbit_settings -# if not settings: -# __logger.warning("Rabbit MQ client is de-activated in the settings") -# return -# await wait_till_rabbitmq_responsive(settings.dsn) -# worker.rabbitmq_client = RabbitMQClient( -# client_name="dask-sidecar", settings=settings -# ) - - -# async def on_shutdown(worker: distributed.Worker) -> None: -# if worker.rabbitmq_client: -# await worker.rabbitmq_client.close() - - -# def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: -# if not worker.rabbitmq_client: -# raise ConfigurationError( -# msg="RabbitMQ client is not available. Please check the configuration." -# ) -# return cast(RabbitMQClient, worker.rabbitmq_client) - - -# async def post_message(worker: distributed.Worker, message: RabbitMessageBase) -> None: -# with log_catch(__logger, reraise=False), contextlib.suppress(ConfigurationError): -# # NOTE: if rabbitmq was not initialized the error does not need to flood the logs -# await get_rabbitmq_client(worker).publish(message.channel_name, message) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py new file mode 100644 index 000000000000..e77b7fb1be8f --- /dev/null +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -0,0 +1,63 @@ +import logging + +import distributed +from models_library.rabbitmq_messages import RabbitMessageBase +from servicelib.logging_utils import log_catch, log_context +from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive +from settings_library.rabbit import RabbitSettings + +from .errors import ConfigurationError + +_logger = logging.getLogger(__name__) + + +class RabbitMQPlugin(distributed.WorkerPlugin): + """Dask Worker Plugin for RabbitMQ integration""" + + name = "rabbitmq_plugin" + _client: RabbitMQClient | None = None + _settings: RabbitSettings | None = None + + def __init__(self, settings: RabbitSettings): + self._settings = settings + + async def setup(self, worker: distributed.Worker) -> None: + """Called when the plugin is attached to a worker""" + if not self._settings: + _logger.warning("RabbitMQ client is de-activated (no settings provided)") + return + + with log_context( + _logger, + logging.INFO, + f"RabbitMQ client initialization for worker {worker.address}", + ): + await wait_till_rabbitmq_responsive(self._settings.dsn) + self._client = RabbitMQClient( + client_name="dask-sidecar", settings=self._settings + ) + + async def teardown(self, worker: distributed.Worker) -> None: + """Called when the worker shuts down or the plugin is removed""" + with log_context( + _logger, + logging.INFO, + f"RabbitMQ client teardown for worker {worker.address}", + ): + if self._client: + await self._client.close() + self._client = None + + def get_client(self) -> RabbitMQClient: + """Returns the RabbitMQ client or raises an error if not available""" + if not self._client: + raise ConfigurationError( + msg="RabbitMQ client is not available. Please check the configuration." + ) + return self._client + + async def publish(self, *, channel_name: str, message: RabbitMessageBase) -> None: + """Publishes a message to the specified channel""" + with log_catch(_logger, reraise=False): + if self._client: + await self._client.publish(channel_name, message) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py index 80cff026b171..2c518de8104b 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py @@ -18,7 +18,7 @@ from ._meta import print_dask_sidecar_banner from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion -from .rabbitmq import RabbitMQPlugin +from .rabbitmq_plugin import RabbitMQPlugin from .settings import ApplicationSettings _logger = logging.getLogger(__name__) @@ -41,7 +41,7 @@ def __init__(self, worker: distributed.Worker): def exit_gracefully(self, *_args): tasks = asyncio.all_tasks() - dask_worker_logger.warning( + _logger.warning( "Application shutdown detected!\n %s", pformat([t.get_name() for t in tasks]), ) @@ -69,8 +69,8 @@ async def dask_setup(worker: distributed.Worker) -> None: tracing_settings=None, # no tracing for dask sidecar ) - with log_context(dask_worker_logger, logging.INFO, "Launch dask worker"): - dask_worker_logger.info("app settings: %s", settings.model_dump_json(indent=1)) + with log_context(_logger, logging.INFO, "Launch dask worker"): + _logger.info("app settings: %s", settings.model_dump_json(indent=1)) print_dask_sidecar_banner() @@ -78,15 +78,13 @@ async def dask_setup(worker: distributed.Worker) -> None: GracefulKiller(worker) loop = asyncio.get_event_loop() - dask_worker_logger.info( - "We do have a running loop in the main thread: %s", f"{loop=}" - ) + _logger.info("We do have a running loop in the main thread: %s", f"{loop=}") if settings.DASK_SIDECAR_RABBITMQ: await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) -async def dask_teardown(_worker: distributed.Worker) -> None: - with log_context(dask_worker_logger, logging.INFO, "tear down dask worker"): +async def dask_teardown(worker: distributed.Worker) -> None: + with log_context(_logger, logging.INFO, f"tear down dask {worker.address}"): ... diff --git a/services/dask-sidecar/tests/unit/conftest.py b/services/dask-sidecar/tests/unit/conftest.py index 4d4801752d94..b3f2f1f35a33 100644 --- a/services/dask-sidecar/tests/unit/conftest.py +++ b/services/dask-sidecar/tests/unit/conftest.py @@ -37,6 +37,7 @@ "pytest_simcore.docker_swarm", "pytest_simcore.environment_configs", "pytest_simcore.faker_users_data", + "pytest_simcore.rabbit_service", "pytest_simcore.repository_paths", ] @@ -231,7 +232,7 @@ def creator() -> AnyUrl: open_file = fsspec.open(f"{new_remote_file}", mode="wt", **s3_storage_kwargs) with open_file as fp: fp.write( # type: ignore - f"This is the file contents of file #'{(len(list_of_created_files)+1):03}'\n" + f"This is the file contents of file #'{(len(list_of_created_files) + 1):03}'\n" ) for s in faker.sentences(5): fp.write(f"{s}\n") # type: ignore diff --git a/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py b/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py new file mode 100644 index 000000000000..b0ff910ba11b --- /dev/null +++ b/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py @@ -0,0 +1,31 @@ +import distributed +import pytest +from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict +from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.rabbit import RabbitSettings + +# Selection of core and tool services started in this swarm fixture (integration) +pytest_simcore_core_services_selection = [ + "rabbit", +] + +pytest_simcore_ops_services_selection = [] + + +@pytest.fixture +def app_environment( + app_environment: EnvVarsDict, + monkeypatch: pytest.MonkeyPatch, + rabbit_service: RabbitSettings, +) -> EnvVarsDict: + # configured as worker + envs = setenvs_from_dict( + monkeypatch, + { + "DASK_WORKER_RABBITMQ": rabbit_service.model_dump_json(), + }, + ) + return app_environment | envs + + +def test_rabbitmq_plugin_initializes(dask_client: distributed.Client): ... From f506ac38273752b9fbe2cec1a52f7269ee31cc57 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 18:32:00 +0200 Subject: [PATCH 12/52] rename files --- services/dask-sidecar/docker/boot.sh | 4 ++-- .../src/simcore_service_dask_sidecar/{tasks.py => worker.py} | 0 services/dask-sidecar/tests/unit/conftest.py | 4 ++-- services/dask-sidecar/tests/unit/test_tasks.py | 2 +- .../src/simcore_service_director_v2/modules/dask_client.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) rename services/dask-sidecar/src/simcore_service_dask_sidecar/{tasks.py => worker.py} (100%) diff --git a/services/dask-sidecar/docker/boot.sh b/services/dask-sidecar/docker/boot.sh index 89a4d14afc68..a7212f834a70 100755 --- a/services/dask-sidecar/docker/boot.sh +++ b/services/dask-sidecar/docker/boot.sh @@ -173,7 +173,7 @@ else exec watchmedo auto-restart --recursive --pattern="*.py;*/src/*" --ignore-patterns="*test*;pytest_simcore/*;setup.py;*ignore*" --ignore-directories -- \ dask worker "${DASK_SCHEDULER_URL}" \ --local-directory /tmp/dask-sidecar \ - --preload simcore_service_dask_sidecar.tasks \ + --preload simcore_service_dask_sidecar.worker \ --nworkers ${DASK_NPROCS} \ --nthreads "${DASK_NTHREADS}" \ --dashboard-address 8787 \ @@ -183,7 +183,7 @@ else else exec dask worker "${DASK_SCHEDULER_URL}" \ --local-directory /tmp/dask-sidecar \ - --preload simcore_service_dask_sidecar.tasks \ + --preload simcore_service_dask_sidecar.worker \ --nworkers ${DASK_NPROCS} \ --nthreads "${DASK_NTHREADS}" \ --dashboard-address 8787 \ diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py similarity index 100% rename from services/dask-sidecar/src/simcore_service_dask_sidecar/tasks.py rename to services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py diff --git a/services/dask-sidecar/tests/unit/conftest.py b/services/dask-sidecar/tests/unit/conftest.py index b3f2f1f35a33..7487e763c949 100644 --- a/services/dask-sidecar/tests/unit/conftest.py +++ b/services/dask-sidecar/tests/unit/conftest.py @@ -108,7 +108,7 @@ def local_cluster(app_environment: EnvVarsDict) -> Iterator[distributed.LocalClu with distributed.LocalCluster( worker_class=distributed.Worker, resources={"CPU": 10, "GPU": 10}, - preload="simcore_service_dask_sidecar.tasks", + preload="simcore_service_dask_sidecar.worker", ) as cluster: assert cluster assert isinstance(cluster, distributed.LocalCluster) @@ -131,7 +131,7 @@ async def async_local_cluster( async with distributed.LocalCluster( worker_class=distributed.Worker, resources={"CPU": 10, "GPU": 10}, - preload="simcore_service_dask_sidecar.tasks", + preload="simcore_service_dask_sidecar.worker", asynchronous=True, ) as cluster: assert cluster diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 423f057b3779..33160d0442ac 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -58,7 +58,7 @@ ) from simcore_service_dask_sidecar.dask_utils import _DEFAULT_MAX_RESOURCES from simcore_service_dask_sidecar.file_utils import _s3fs_settings_from_s3_settings -from simcore_service_dask_sidecar.tasks import run_computational_sidecar +from simcore_service_dask_sidecar.worker import run_computational_sidecar logger = logging.getLogger(__name__) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py b/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py index 63617a3eb617..a51da58fc2a3 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py @@ -223,7 +223,7 @@ def _comp_sidecar_fct( ) -> TaskOutputData: """This function is serialized by the Dask client and sent over to the Dask sidecar(s) Therefore, (screaming here) DO NOT MOVE THAT IMPORT ANYWHERE ELSE EVER!!""" - from simcore_service_dask_sidecar.tasks import ( # type: ignore[import-not-found] # this runs inside the dask-sidecar + from simcore_service_dask_sidecar.worker import ( # type: ignore[import-not-found] # this runs inside the dask-sidecar run_computational_sidecar, ) From 238ffdee09a269a31063f4c2f0c04e1e8f75080a Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 18:41:44 +0200 Subject: [PATCH 13/52] refactor --- .../simcore_service_dask_sidecar/app_utils.py | 20 ++++++++++++++++++ .../simcore_service_dask_sidecar/scheduler.py | 19 +++++++++++------ .../simcore_service_dask_sidecar/worker.py | 21 +++++-------------- 3 files changed, 38 insertions(+), 22 deletions(-) create mode 100644 services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py new file mode 100644 index 000000000000..13a86ee3dafe --- /dev/null +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py @@ -0,0 +1,20 @@ +import logging + +from servicelib.logging_utils import config_all_loggers + +from .settings import ApplicationSettings + + +def setup_app_logging(settings: ApplicationSettings) -> None: + # set up logging + logging.basicConfig(level=settings.DASK_SIDECAR_LOGLEVEL.value) + logging.root.setLevel(level=settings.DASK_SIDECAR_LOGLEVEL.value) + # NOTE: Dask attaches a StreamHandler to the logger in distributed + # removing them solves dual propagation of logs + for handler in logging.getLogger("distributed").handlers: + logging.getLogger("distributed").removeHandler(handler) + config_all_loggers( + log_format_local_dev_enabled=settings.DASK_LOG_FORMAT_LOCAL_DEV_ENABLED, + logger_filter_mapping=settings.DASK_LOG_FILTER_MAPPING, + tracing_settings=None, # no tracing for dask sidecar + ) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py index 8d229c9c8a87..fdd5ef73fe4f 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py @@ -1,20 +1,27 @@ import logging -import dask.config import distributed +from servicelib.logging_utils import log_context from ._meta import print_dask_scheduler_banner +from .app_utils import setup_app_logging +from .settings import ApplicationSettings _logger = logging.getLogger(__name__) async def dask_setup(scheduler: distributed.Scheduler) -> None: - """This is a special function recognized by the dask worker when starting with flag --preload""" - _logger.info("Setting up scheduler...") + """This is a special function recognized by dask when starting with flag --preload""" assert scheduler # nosec - print(f"dask config: {dask.config.config}", flush=True) # noqa: T201 - print_dask_scheduler_banner() + + settings = ApplicationSettings.create_from_envs() + setup_app_logging(settings) + + with log_context(_logger, logging.INFO, "Launch dask scheduler"): + _logger.info("app settings: %s", settings.model_dump_json(indent=1)) + print_dask_scheduler_banner() async def dask_teardown(_worker: distributed.Worker) -> None: - _logger.info("Shutting down scheduler") + with log_context(_logger, logging.INFO, "Tear down dask scheduler"): + ... diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index 2c518de8104b..0b33737e6bde 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -11,11 +11,11 @@ ContainerTaskParameters, LogFileUploadURL, ) -from distributed.worker import logger as dask_worker_logger -from servicelib.logging_utils import config_all_loggers, log_context +from servicelib.logging_utils import log_context from settings_library.s3 import S3Settings from ._meta import print_dask_sidecar_banner +from .app_utils import setup_app_logging from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion from .rabbitmq_plugin import RabbitMQPlugin @@ -53,21 +53,9 @@ def exit_gracefully(self, *_args): async def dask_setup(worker: distributed.Worker) -> None: - """This is a special function recognized by the dask worker when starting with flag --preload""" + """This is a special function recognized by dask when starting with flag --preload""" settings = ApplicationSettings.create_from_envs() - # set up logging - logging.basicConfig(level=settings.DASK_SIDECAR_LOGLEVEL.value) - logging.root.setLevel(level=settings.DASK_SIDECAR_LOGLEVEL.value) - dask_worker_logger.setLevel(level=settings.DASK_SIDECAR_LOGLEVEL.value) - # NOTE: Dask attaches a StreamHandler to the logger in distributed - # removing them solves dual propagation of logs - for handler in logging.getLogger("distributed").handlers: - logging.getLogger("distributed").removeHandler(handler) - config_all_loggers( - log_format_local_dev_enabled=settings.DASK_LOG_FORMAT_LOCAL_DEV_ENABLED, - logger_filter_mapping=settings.DASK_LOG_FILTER_MAPPING, - tracing_settings=None, # no tracing for dask sidecar - ) + setup_app_logging(settings) with log_context(_logger, logging.INFO, "Launch dask worker"): _logger.info("app settings: %s", settings.model_dump_json(indent=1)) @@ -79,6 +67,7 @@ async def dask_setup(worker: distributed.Worker) -> None: loop = asyncio.get_event_loop() _logger.info("We do have a running loop in the main thread: %s", f"{loop=}") + if settings.DASK_SIDECAR_RABBITMQ: await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) From 634fbb5306245e1b0ec65b495403f68693faf755 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 18:45:07 +0200 Subject: [PATCH 14/52] refactor --- .../rabbitmq_plugin.py | 57 +++++++++++-------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index e77b7fb1be8f..99877a4ec95a 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -1,4 +1,5 @@ import logging +from collections.abc import Awaitable import distributed from models_library.rabbitmq_messages import RabbitMessageBase @@ -21,32 +22,42 @@ class RabbitMQPlugin(distributed.WorkerPlugin): def __init__(self, settings: RabbitSettings): self._settings = settings - async def setup(self, worker: distributed.Worker) -> None: + def setup(self, worker: distributed.Worker) -> Awaitable[None]: """Called when the plugin is attached to a worker""" - if not self._settings: - _logger.warning("RabbitMQ client is de-activated (no settings provided)") - return - - with log_context( - _logger, - logging.INFO, - f"RabbitMQ client initialization for worker {worker.address}", - ): - await wait_till_rabbitmq_responsive(self._settings.dsn) - self._client = RabbitMQClient( - client_name="dask-sidecar", settings=self._settings - ) - async def teardown(self, worker: distributed.Worker) -> None: + async def _() -> None: + if not self._settings: + _logger.warning( + "RabbitMQ client is de-activated (no settings provided)" + ) + return + + with log_context( + _logger, + logging.INFO, + f"RabbitMQ client initialization for worker {worker.address}", + ): + await wait_till_rabbitmq_responsive(self._settings.dsn) + self._client = RabbitMQClient( + client_name="dask-sidecar", settings=self._settings + ) + + return _() + + def teardown(self, worker: distributed.Worker) -> Awaitable[None]: """Called when the worker shuts down or the plugin is removed""" - with log_context( - _logger, - logging.INFO, - f"RabbitMQ client teardown for worker {worker.address}", - ): - if self._client: - await self._client.close() - self._client = None + + async def _() -> None: + with log_context( + _logger, + logging.INFO, + f"RabbitMQ client teardown for worker {worker.address}", + ): + if self._client: + await self._client.close() + self._client = None + + return _() def get_client(self) -> RabbitMQClient: """Returns the RabbitMQ client or raises an error if not available""" From aebe90485ef4177eed5eca5d8c3a89349ffac64c Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 18:57:15 +0200 Subject: [PATCH 15/52] create message --- .../dask_utils.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py index d04682dac075..1bd675c9311a 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py @@ -17,6 +17,7 @@ from distributed.worker import get_worker from distributed.worker_state_machine import TaskState from models_library.progress_bar import ProgressReport +from models_library.rabbitmq_messages import LoggerRabbitMessage from servicelib.logging_utils import LogLevelInt, LogMessageStr, log_catch _logger = logging.getLogger(__name__) @@ -82,13 +83,32 @@ def publish_progress(self, report: ProgressReport) -> None: self._last_published_progress_value = rounded_value _logger.debug("PROGRESS: %s", rounded_value) - def publish_logs( + async def publish_logs( self, *, message: LogMessageStr, log_level: LogLevelInt, ) -> None: with log_catch(logger=_logger, reraise=False): + + base_message = LoggerRabbitMessage( + user_id=self.task_owner.user_id, + project_id=self.task_owner.project_id, + node_id=self.task_owner.node_id, + messages=[message], + log_level=log_level, + ) + if self.task_owner.has_parent: + assert self.task_owner.parent_project_id # nosec + assert self.task_owner.parent_node_id # nosec + parent_message = LoggerRabbitMessage( + user_id=self.task_owner.user_id, + project_id=self.task_owner.parent_project_id, + node_id=self.task_owner.parent_node_id, + messages=[message], + log_level=log_level, + ) + publish_event( self.logs, TaskLogEvent.from_dask_worker( From 2a9ea358efd278673c9189b7fd9b2f9cbe528ace Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 18:59:44 +0200 Subject: [PATCH 16/52] simplify --- .../src/simcore_service_dask_sidecar/rabbitmq_plugin.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index 99877a4ec95a..2f9ed17754d6 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -2,8 +2,7 @@ from collections.abc import Awaitable import distributed -from models_library.rabbitmq_messages import RabbitMessageBase -from servicelib.logging_utils import log_catch, log_context +from servicelib.logging_utils import log_context from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive from settings_library.rabbit import RabbitSettings @@ -66,9 +65,3 @@ def get_client(self) -> RabbitMQClient: msg="RabbitMQ client is not available. Please check the configuration." ) return self._client - - async def publish(self, *, channel_name: str, message: RabbitMessageBase) -> None: - """Publishes a message to the specified channel""" - with log_catch(_logger, reraise=False): - if self._client: - await self._client.publish(channel_name, message) From a3d577b35e3a8a1a73c1769c59811bf000426062 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 19:04:07 +0200 Subject: [PATCH 17/52] simplify --- .../rabbitmq_plugin.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index 2f9ed17754d6..699174573347 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -65,3 +65,17 @@ def get_client(self) -> RabbitMQClient: msg="RabbitMQ client is not available. Please check the configuration." ) return self._client + + +def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: + """Returns the RabbitMQ client or raises an error if not available""" + if not worker.plugins: + raise ConfigurationError( + msg="RabbitMQ client is not available. Please check the configuration." + ) + rabbitmq_plugin = worker.plugins.get(RabbitMQPlugin.name) + if not isinstance(rabbitmq_plugin, RabbitMQPlugin): + raise ConfigurationError( + msg="RabbitMQ client is not available. Please check the configuration." + ) + return rabbitmq_plugin.get_client() From 1b0a967990e1c154148c2ed1306b8166c0e943c5 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Mon, 5 May 2025 19:07:34 +0200 Subject: [PATCH 18/52] done --- .../src/simcore_service_dask_sidecar/dask_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py index 1bd675c9311a..9f9b413936b9 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py @@ -20,6 +20,8 @@ from models_library.rabbitmq_messages import LoggerRabbitMessage from servicelib.logging_utils import LogLevelInt, LogMessageStr, log_catch +from .rabbitmq_plugin import get_rabbitmq_client + _logger = logging.getLogger(__name__) @@ -90,7 +92,7 @@ async def publish_logs( log_level: LogLevelInt, ) -> None: with log_catch(logger=_logger, reraise=False): - + rabbitmq_client = get_rabbitmq_client(get_worker()) base_message = LoggerRabbitMessage( user_id=self.task_owner.user_id, project_id=self.task_owner.project_id, @@ -98,6 +100,7 @@ async def publish_logs( messages=[message], log_level=log_level, ) + await rabbitmq_client.publish(base_message.channel_name, base_message) if self.task_owner.has_parent: assert self.task_owner.parent_project_id # nosec assert self.task_owner.parent_node_id # nosec @@ -108,6 +111,7 @@ async def publish_logs( messages=[message], log_level=log_level, ) + await rabbitmq_client.publish(parent_message.channel_name, base_message) publish_event( self.logs, From 33a2e57f8c6382efccbc20aaef85b4dc443222c4 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 09:04:57 +0200 Subject: [PATCH 19/52] bootmode --- services/dask-sidecar/tests/unit/conftest.py | 3 +++ .../tests/unit/test_rabbitmq_plugin.py | 20 ------------------- .../dask-sidecar/tests/unit/test_tasks.py | 14 ++++++++----- 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/services/dask-sidecar/tests/unit/conftest.py b/services/dask-sidecar/tests/unit/conftest.py index 7487e763c949..45a586a30ff5 100644 --- a/services/dask-sidecar/tests/unit/conftest.py +++ b/services/dask-sidecar/tests/unit/conftest.py @@ -25,6 +25,7 @@ from pytest_mock.plugin import MockerFixture from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict from pytest_simcore.helpers.typing_env import EnvVarsDict +from settings_library.rabbit import RabbitSettings from settings_library.s3 import S3Settings from simcore_service_dask_sidecar.file_utils import _s3fs_settings_from_s3_settings from yarl import URL @@ -81,6 +82,7 @@ def app_environment( monkeypatch: pytest.MonkeyPatch, env_devel_dict: EnvVarsDict, shared_data_folder: Path, + rabbit_service: RabbitSettings, ) -> EnvVarsDict: # configured as worker envs = setenvs_from_dict( @@ -89,6 +91,7 @@ def app_environment( # .env-devel **env_devel_dict, # Variables directly define inside Dockerfile + "DASK_WORKER_RABBITMQ": rabbit_service.model_dump_json(), "SC_BOOT_MODE": "debug", "SIDECAR_LOGLEVEL": "DEBUG", "SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME": "simcore_computational_shared_data", diff --git a/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py b/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py index b0ff910ba11b..1510cc99e079 100644 --- a/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py +++ b/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py @@ -1,8 +1,4 @@ import distributed -import pytest -from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict -from pytest_simcore.helpers.typing_env import EnvVarsDict -from settings_library.rabbit import RabbitSettings # Selection of core and tool services started in this swarm fixture (integration) pytest_simcore_core_services_selection = [ @@ -12,20 +8,4 @@ pytest_simcore_ops_services_selection = [] -@pytest.fixture -def app_environment( - app_environment: EnvVarsDict, - monkeypatch: pytest.MonkeyPatch, - rabbit_service: RabbitSettings, -) -> EnvVarsDict: - # configured as worker - envs = setenvs_from_dict( - monkeypatch, - { - "DASK_WORKER_RABBITMQ": rabbit_service.model_dump_json(), - }, - ) - return app_environment | envs - - def test_rabbitmq_plugin_initializes(dask_client: distributed.Client): ... diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 33160d0442ac..96057d52b3dc 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -60,7 +60,11 @@ from simcore_service_dask_sidecar.file_utils import _s3fs_settings_from_s3_settings from simcore_service_dask_sidecar.worker import run_computational_sidecar -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) + +pytest_simcore_core_services_selection = [ + "rabbit", +] @pytest.fixture() @@ -159,7 +163,10 @@ def _bash_check_env_exist(variable_name: str, variable_value: str) -> list[str]: ] -@pytest.fixture(params=list(BootMode), ids=str) +@pytest.fixture( + params=list(BootMode), + ids=lambda v: f"boot_mode.{v.name}", +) def boot_mode(request: pytest.FixtureRequest) -> BootMode: return request.param @@ -443,9 +450,6 @@ def caplog_info_level( yield caplog -# from pydantic.json_schema import JsonDict - - @pytest.fixture def mocked_get_image_labels( integration_version: version.Version, mocker: MockerFixture From 7fb3592ec87e9aff76d3875f81ea5b87c97dcc51 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 09:15:09 +0200 Subject: [PATCH 20/52] cleanup --- .../computational_sidecar/core.py | 62 +++++++++++-------- .../computational_sidecar/docker_utils.py | 2 +- .../dask_utils.py | 4 +- .../simcore_service_dask_sidecar/scheduler.py | 2 +- .../{app_utils.py => utils/logs.py} | 2 +- .../simcore_service_dask_sidecar/worker.py | 2 +- 6 files changed, 41 insertions(+), 33 deletions(-) rename services/dask-sidecar/src/simcore_service_dask_sidecar/{app_utils.py => utils/logs.py} (94%) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py index 8083caa0deb5..53c79ace1e36 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py @@ -160,7 +160,7 @@ async def _retrieve_output_data( async def _publish_sidecar_log( self, log: LogMessageStr, log_level: LogLevelInt = logging.INFO ) -> None: - self.task_publishers.publish_logs( + await self.task_publishers.publish_logs( message=f"[sidecar] {log}", log_level=log_level ) @@ -174,14 +174,18 @@ async def run(self, command: list[str]) -> TaskOutputData: settings = ApplicationSettings.create_from_envs() run_id = f"{uuid4()}" - async with Docker() as docker_client, TaskSharedVolumes( - Path(f"{settings.SIDECAR_COMP_SERVICES_SHARED_FOLDER}/{run_id}") - ) as task_volumes, ProgressBarData( - num_steps=3, - step_weights=[5 / 100, 90 / 100, 5 / 100], - progress_report_cb=self.task_publishers.publish_progress, - description="running", - ) as progress_bar: + async with ( + Docker() as docker_client, + TaskSharedVolumes( + Path(f"{settings.SIDECAR_COMP_SERVICES_SHARED_FOLDER}/{run_id}") + ) as task_volumes, + ProgressBarData( + num_steps=3, + step_weights=[5 / 100, 90 / 100, 5 / 100], + progress_report_cb=self.task_publishers.publish_progress, + description="running", + ) as progress_bar, + ): # PRE-PROCESSING await pull_image( docker_client, @@ -216,24 +220,28 @@ async def run(self, command: list[str]) -> TaskOutputData: ) await progress_bar.update() # NOTE: (1 step weighting 5%) # PROCESSING (1 step weighted 90%) - async with managed_container( - docker_client, - config, - name=f"{self.task_parameters.image.split(sep='/')[-1]}_{run_id}", - ) as container, progress_bar.sub_progress( - 100, description="processing" - ) as processing_progress_bar, managed_monitor_container_log_task( - container=container, - progress_regexp=image_labels.get_progress_regexp(), - service_key=self.task_parameters.image, - service_version=self.task_parameters.tag, - task_publishers=self.task_publishers, - integration_version=image_labels.get_integration_version(), - task_volumes=task_volumes, - log_file_url=self.log_file_url, - log_publishing_cb=self._publish_sidecar_log, - s3_settings=self.s3_settings, - progress_bar=processing_progress_bar, + async with ( + managed_container( + docker_client, + config, + name=f"{self.task_parameters.image.split(sep='/')[-1]}_{run_id}", + ) as container, + progress_bar.sub_progress( + 100, description="processing" + ) as processing_progress_bar, + managed_monitor_container_log_task( + container=container, + progress_regexp=image_labels.get_progress_regexp(), + service_key=self.task_parameters.image, + service_version=self.task_parameters.tag, + task_publishers=self.task_publishers, + integration_version=image_labels.get_integration_version(), + task_volumes=task_volumes, + log_file_url=self.log_file_url, + log_publishing_cb=self._publish_sidecar_log, + s3_settings=self.s3_settings, + progress_bar=processing_progress_bar, + ), ): await container.start() await self._publish_sidecar_log( diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py index d50e84a34182..bcd2d400dbd6 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py @@ -188,7 +188,7 @@ async def _parse_and_publish_logs( if progress_value is not None: await progress_bar.set_(round(progress_value * 100.0)) - task_publishers.publish_logs( + await task_publishers.publish_logs( message=log_line, log_level=guess_message_log_level(log_line) ) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py index 9f9b413936b9..6df8c218baa8 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py @@ -138,7 +138,7 @@ async def cancel_task(task_name: str) -> None: if task := next( (t for t in asyncio.all_tasks() if t.get_name() == task_name), None ): - task_publishers.publish_logs( + await task_publishers.publish_logs( message="[sidecar] cancelling task...", log_level=logging.INFO ) task.cancel() @@ -158,7 +158,7 @@ async def periodicaly_check_if_aborted(task_name: str) -> None: yield except asyncio.CancelledError as exc: - task_publishers.publish_logs( + await task_publishers.publish_logs( message="[sidecar] task run was aborted", log_level=logging.INFO ) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py index fdd5ef73fe4f..4127fca2528b 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/scheduler.py @@ -4,8 +4,8 @@ from servicelib.logging_utils import log_context from ._meta import print_dask_scheduler_banner -from .app_utils import setup_app_logging from .settings import ApplicationSettings +from .utils.logs import setup_app_logging _logger = logging.getLogger(__name__) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/logs.py similarity index 94% rename from services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py rename to services/dask-sidecar/src/simcore_service_dask_sidecar/utils/logs.py index 13a86ee3dafe..74b158de9e2e 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/app_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/logs.py @@ -2,7 +2,7 @@ from servicelib.logging_utils import config_all_loggers -from .settings import ApplicationSettings +from ..settings import ApplicationSettings def setup_app_logging(settings: ApplicationSettings) -> None: diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index 0b33737e6bde..711e53e4f363 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -15,11 +15,11 @@ from settings_library.s3 import S3Settings from ._meta import print_dask_sidecar_banner -from .app_utils import setup_app_logging from .computational_sidecar.core import ComputationalSidecar from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion from .rabbitmq_plugin import RabbitMQPlugin from .settings import ApplicationSettings +from .utils.logs import setup_app_logging _logger = logging.getLogger(__name__) From a3808854233f382a63eb57bab5a565bb24cd1f04 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 09:18:45 +0200 Subject: [PATCH 21/52] use construct --- .../src/simcore_service_dask_sidecar/dask_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py index 6df8c218baa8..54850c0404c1 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py @@ -93,7 +93,7 @@ async def publish_logs( ) -> None: with log_catch(logger=_logger, reraise=False): rabbitmq_client = get_rabbitmq_client(get_worker()) - base_message = LoggerRabbitMessage( + base_message = LoggerRabbitMessage.model_construct( user_id=self.task_owner.user_id, project_id=self.task_owner.project_id, node_id=self.task_owner.node_id, @@ -104,7 +104,7 @@ async def publish_logs( if self.task_owner.has_parent: assert self.task_owner.parent_project_id # nosec assert self.task_owner.parent_node_id # nosec - parent_message = LoggerRabbitMessage( + parent_message = LoggerRabbitMessage.model_construct( user_id=self.task_owner.user_id, project_id=self.task_owner.parent_project_id, node_id=self.task_owner.parent_node_id, From 9c617adb3696f26dffe6331da2785531aef0b3df Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 09:57:24 +0200 Subject: [PATCH 22/52] removing TaskLogEvent --- .../container_tasks/events.py | 47 ------------------- .../dask_utils.py | 9 ---- .../utils/__init__.py | 0 .../dask-sidecar/tests/unit/test_tasks.py | 45 +++++++++++------- 4 files changed, 28 insertions(+), 73 deletions(-) create mode 100644 services/dask-sidecar/src/simcore_service_dask_sidecar/utils/__init__.py diff --git a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py index a27bb027e948..ea9292d483c4 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/events.py @@ -1,6 +1,4 @@ -import logging from abc import ABC, abstractmethod -from typing import TypeAlias import dask.typing from distributed.worker import get_worker @@ -85,48 +83,3 @@ def ensure_between_0_1(cls, v): if 0 <= v <= 1: return v return min(max(0, v), 1) - - -LogMessageStr: TypeAlias = str -LogLevelInt: TypeAlias = int - - -class TaskLogEvent(BaseTaskEvent): - log: LogMessageStr - log_level: LogLevelInt - - @staticmethod - def topic_name() -> str: - return "task_logs" - - @classmethod - def from_dask_worker( - cls, log: str, log_level: LogLevelInt, *, task_owner: TaskOwner - ) -> "TaskLogEvent": - worker = get_worker() - job_id = worker.get_current_task() - return cls( - job_id=_dask_key_to_dask_task_id(job_id), - log=log, - log_level=log_level, - task_owner=task_owner, - ) - - model_config = ConfigDict( - json_schema_extra={ - "examples": [ - { - "job_id": "simcore/services/comp/sleeper:1.1.0:projectid_ec7e595a-63ee-46a1-a04a-901b11b649f8:nodeid_39467d89-b659-4914-9359-c40b1b6d1d6d:uuid_5ee5c655-450d-4711-a3ec-32ffe16bc580", - "log": "some logs", - "log_level": logging.INFO, - "task_owner": { - "user_id": 32, - "project_id": "ec7e595a-63ee-46a1-a04a-901b11b649f8", - "node_id": "39467d89-b659-4914-9359-c40b1b6d1d6d", - "parent_project_id": None, - "parent_node_id": None, - }, - }, - ] - } - ) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py index 54850c0404c1..6e332cee0cbe 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py @@ -9,7 +9,6 @@ from dask_task_models_library.container_tasks.errors import TaskCancelledError from dask_task_models_library.container_tasks.events import ( BaseTaskEvent, - TaskLogEvent, TaskProgressEvent, ) from dask_task_models_library.container_tasks.io import TaskCancelEventName @@ -66,11 +65,9 @@ class TaskPublisher: task_owner: TaskOwner progress: distributed.Pub = field(init=False) _last_published_progress_value: float = -1 - logs: distributed.Pub = field(init=False) def __post_init__(self) -> None: self.progress = distributed.Pub(TaskProgressEvent.topic_name()) - self.logs = distributed.Pub(TaskLogEvent.topic_name()) def publish_progress(self, report: ProgressReport) -> None: rounded_value = round(report.percent_value, ndigits=2) @@ -113,12 +110,6 @@ async def publish_logs( ) await rabbitmq_client.publish(parent_message.channel_name, base_message) - publish_event( - self.logs, - TaskLogEvent.from_dask_worker( - log=message, log_level=log_level, task_owner=self.task_owner - ), - ) _logger.log(log_level, message) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/__init__.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 96057d52b3dc..22ff405e7bbe 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -8,7 +8,7 @@ import json import logging import re -from collections.abc import Callable, Coroutine, Iterable +from collections.abc import AsyncIterator, Callable, Coroutine, Iterable # copied out from dask from dataclasses import dataclass @@ -23,10 +23,7 @@ from common_library.json_serialization import json_dumps from dask_task_models_library.container_tasks.docker import DockerBasicAuth from dask_task_models_library.container_tasks.errors import ServiceRuntimeError -from dask_task_models_library.container_tasks.events import ( - TaskLogEvent, - TaskProgressEvent, -) +from dask_task_models_library.container_tasks.events import TaskProgressEvent from dask_task_models_library.container_tasks.io import ( FileUrl, TaskInputData, @@ -39,12 +36,14 @@ ) from faker import Faker from models_library.basic_types import EnvVarKey +from models_library.rabbitmq_messages import LoggerRabbitMessage from models_library.services import ServiceMetaDataPublished from models_library.services_resources import BootMode from packaging import version from pydantic import AnyUrl, SecretStr, TypeAdapter from pytest_mock.plugin import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict +from servicelib.rabbitmq._client import RabbitMQClient from settings_library.s3 import S3Settings from simcore_service_dask_sidecar.computational_sidecar.docker_utils import ( LEGACY_SERVICE_LOG_FILE_NAME, @@ -466,6 +465,19 @@ def mocked_get_image_labels( ) +@pytest.fixture +async def log_rabbit_client_parser( + create_rabbitmq_client: Callable[[str], RabbitMQClient], mocker: MockerFixture +) -> AsyncIterator[mock.AsyncMock]: + client = create_rabbitmq_client("dask_sidecar_pytest_logs_consumer") + mock = mocker.AsyncMock(return_value=True) + queue_name, _ = await client.subscribe( + LoggerRabbitMessage.get_channel_name(), mock, exclusive_queue=False + ) + yield mock + await client.unsubscribe(queue_name) + + def test_run_computational_sidecar_real_fct( caplog_info_level: pytest.LogCaptureFixture, event_loop: asyncio.AbstractEventLoop, @@ -474,6 +486,7 @@ def test_run_computational_sidecar_real_fct( sleeper_task: ServiceExampleParam, mocked_get_image_labels: mock.Mock, s3_settings: S3Settings, + log_rabbit_client_parser: mock.AsyncMock, ): output_data = run_computational_sidecar( **sleeper_task.sidecar_params(), @@ -484,10 +497,11 @@ def test_run_computational_sidecar_real_fct( sleeper_task.service_key, sleeper_task.service_version, ) - for event in [TaskProgressEvent, TaskLogEvent]: + for event in [TaskProgressEvent]: dask_subsystem_mock["dask_event_publish"].assert_any_call( name=event.topic_name() ) + log_rabbit_client_parser.assert_called_once() # check that the task produces expected logs for log in sleeper_task.expected_logs: @@ -561,13 +575,6 @@ def test_run_multiple_computational_sidecar_dask( mocked_get_image_labels.assert_called() -@pytest.fixture -def log_sub( - dask_client: distributed.Client, -) -> distributed.Sub: - return distributed.Sub(TaskLogEvent.topic_name(), client=dask_client) - - @pytest.fixture def progress_sub(dask_client: distributed.Client) -> distributed.Sub: return distributed.Sub(TaskProgressEvent.topic_name(), client=dask_client) @@ -579,10 +586,10 @@ def progress_sub(dask_client: distributed.Client) -> distributed.Sub: async def test_run_computational_sidecar_dask( dask_client: distributed.Client, sleeper_task: ServiceExampleParam, - log_sub: distributed.Sub, progress_sub: distributed.Sub, mocked_get_image_labels: mock.Mock, s3_settings: S3Settings, + log_rabbit_client_parser: mock.AsyncMock, ): future = dask_client.submit( run_computational_sidecar, @@ -607,7 +614,9 @@ async def test_run_computational_sidecar_dask( ), "ordering of progress values incorrectly sorted!" assert worker_progresses[0] == 0, "missing/incorrect initial progress value" assert worker_progresses[-1] == 1, "missing/incorrect final progress value" - worker_logs = [TaskLogEvent.model_validate_json(msg).log for msg in log_sub.buffer] + log_rabbit_client_parser.assert_called_once() + # worker_logs = [TaskLogEvent.model_validate_json(msg).log for msg in log_sub.buffer] + worker_logs = [] print(f"<-- we got {len(worker_logs)} lines of logs") for log in sleeper_task.expected_logs: @@ -641,9 +650,9 @@ async def test_run_computational_sidecar_dask( async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub( dask_client: distributed.Client, sidecar_task: Callable[..., ServiceExampleParam], - log_sub: distributed.Sub, progress_sub: distributed.Sub, mocked_get_image_labels: mock.Mock, + log_rabbit_client_parser: mock.AsyncMock, ): mocked_get_image_labels.assert_not_called() NUMBER_OF_LOGS = 20000 @@ -679,7 +688,9 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub assert worker_progresses[0] == 0, "missing/incorrect initial progress value" assert worker_progresses[-1] == 1, "missing/incorrect final progress value" - worker_logs = [TaskLogEvent.model_validate_json(msg).log for msg in log_sub.buffer] + log_rabbit_client_parser.assert_called_once() + # worker_logs = [TaskLogEvent.model_validate_json(msg).log for msg in log_sub.buffer] + worker_logs = [] # check all the awaited logs are in there filtered_worker_logs = filter(lambda log: "This is iteration" in log, worker_logs) assert len(list(filtered_worker_logs)) == NUMBER_OF_LOGS From d7d95aa9a04c44d0b4a79196902e478481d820aa Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 11:14:45 +0200 Subject: [PATCH 23/52] ruff --- packages/pytest-simcore/src/pytest_simcore/rabbit_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/pytest-simcore/src/pytest_simcore/rabbit_service.py b/packages/pytest-simcore/src/pytest_simcore/rabbit_service.py index 61aed94151a4..c42075704b06 100644 --- a/packages/pytest-simcore/src/pytest_simcore/rabbit_service.py +++ b/packages/pytest-simcore/src/pytest_simcore/rabbit_service.py @@ -43,7 +43,7 @@ def rabbit_env_vars_dict( assert f"{prefix}_rabbit" in docker_stack["services"] port = get_service_published_port( - "rabbit", env_vars_for_docker_compose["RABBIT_PORT"] + "rabbit", int(env_vars_for_docker_compose["RABBIT_PORT"]) ) return { From 732cbebe36615f1fad182565a29209b1018ca612 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 11:15:06 +0200 Subject: [PATCH 24/52] not in main thread only --- .../dask-sidecar/src/simcore_service_dask_sidecar/worker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index 711e53e4f363..ea0bb5e96e1f 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -68,8 +68,8 @@ async def dask_setup(worker: distributed.Worker) -> None: loop = asyncio.get_event_loop() _logger.info("We do have a running loop in the main thread: %s", f"{loop=}") - if settings.DASK_SIDECAR_RABBITMQ: - await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) + if settings.DASK_SIDECAR_RABBITMQ: + await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) async def dask_teardown(worker: distributed.Worker) -> None: From 2439aa74aca612bcccc87925c1fca35e2b572b4f Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 13:52:05 +0200 Subject: [PATCH 25/52] use correct ENV --- services/dask-sidecar/tests/unit/conftest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/dask-sidecar/tests/unit/conftest.py b/services/dask-sidecar/tests/unit/conftest.py index 45a586a30ff5..a6b29096df23 100644 --- a/services/dask-sidecar/tests/unit/conftest.py +++ b/services/dask-sidecar/tests/unit/conftest.py @@ -15,6 +15,8 @@ import pytest import simcore_service_dask_sidecar from aiobotocore.session import AioBaseClient, get_session +from common_library.json_serialization import json_dumps +from common_library.serialization import model_dump_with_secrets from dask_task_models_library.container_tasks.protocol import TaskOwner from faker import Faker from models_library.projects import ProjectID @@ -91,7 +93,9 @@ def app_environment( # .env-devel **env_devel_dict, # Variables directly define inside Dockerfile - "DASK_WORKER_RABBITMQ": rabbit_service.model_dump_json(), + "DASK_SIDECAR_RABBITMQ": json_dumps( + model_dump_with_secrets(rabbit_service, show_secrets=True) + ), "SC_BOOT_MODE": "debug", "SIDECAR_LOGLEVEL": "DEBUG", "SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME": "simcore_computational_shared_data", From b6b033100fae80126ee9f7d28965ee1ef884f9f1 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 13:52:26 +0200 Subject: [PATCH 26/52] tests are almost good to go --- .../dask-sidecar/tests/unit/test_tasks.py | 47 +++++++++++++------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 22ff405e7bbe..e8e2376ed040 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -44,6 +44,7 @@ from pytest_mock.plugin import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict from servicelib.rabbitmq._client import RabbitMQClient +from servicelib.rabbitmq._constants import BIND_TO_ALL_TOPICS from settings_library.s3 import S3Settings from simcore_service_dask_sidecar.computational_sidecar.docker_utils import ( LEGACY_SERVICE_LOG_FILE_NAME, @@ -67,7 +68,9 @@ @pytest.fixture() -def dask_subsystem_mock(mocker: MockerFixture) -> dict[str, mock.Mock]: +def dask_subsystem_mock( + mocker: MockerFixture, create_rabbitmq_client: Callable[[str], RabbitMQClient] +) -> dict[str, mock.Mock]: # mock dask client dask_client_mock = mocker.patch("distributed.Client", autospec=True) @@ -107,6 +110,13 @@ def dask_subsystem_mock(mocker: MockerFixture) -> dict[str, mock.Mock]: autospec=True, return_value=False, ) + # mock dask rabbitmq plugin + mock_rabbitmq_client = create_rabbitmq_client("pytest_dask_sidecar_logs_publisher") + mocker.patch( + "simcore_service_dask_sidecar.dask_utils.get_rabbitmq_client", + autospec=True, + return_value=mock_rabbitmq_client, + ) return { "dask_client": dask_client_mock, @@ -151,10 +161,6 @@ def sidecar_params(self) -> dict[str, Any]: } -pytest_simcore_core_services_selection = ["postgres"] -pytest_simcore_ops_services_selection = [] - - def _bash_check_env_exist(variable_name: str, variable_value: str) -> list[str]: return [ f"if [ -z ${{{variable_name}+x}} ];then echo {variable_name} does not exist && exit 9;fi", @@ -472,7 +478,10 @@ async def log_rabbit_client_parser( client = create_rabbitmq_client("dask_sidecar_pytest_logs_consumer") mock = mocker.AsyncMock(return_value=True) queue_name, _ = await client.subscribe( - LoggerRabbitMessage.get_channel_name(), mock, exclusive_queue=False + LoggerRabbitMessage.get_channel_name(), + mock, + exclusive_queue=False, + topics=[BIND_TO_ALL_TOPICS], ) yield mock await client.unsubscribe(queue_name) @@ -501,7 +510,7 @@ def test_run_computational_sidecar_real_fct( dask_subsystem_mock["dask_event_publish"].assert_any_call( name=event.topic_name() ) - log_rabbit_client_parser.assert_called_once() + assert log_rabbit_client_parser.called # check that the task produces expected logs for log in sleeper_task.expected_logs: @@ -584,12 +593,13 @@ def progress_sub(dask_client: distributed.Client) -> distributed.Sub: "integration_version, boot_mode", [("1.0.0", BootMode.CPU)], indirect=True ) async def test_run_computational_sidecar_dask( - dask_client: distributed.Client, + app_environment: EnvVarsDict, sleeper_task: ServiceExampleParam, progress_sub: distributed.Sub, mocked_get_image_labels: mock.Mock, s3_settings: S3Settings, log_rabbit_client_parser: mock.AsyncMock, + dask_client: distributed.Client, ): future = dask_client.submit( run_computational_sidecar, @@ -614,9 +624,14 @@ async def test_run_computational_sidecar_dask( ), "ordering of progress values incorrectly sorted!" assert worker_progresses[0] == 0, "missing/incorrect initial progress value" assert worker_progresses[-1] == 1, "missing/incorrect final progress value" - log_rabbit_client_parser.assert_called_once() - # worker_logs = [TaskLogEvent.model_validate_json(msg).log for msg in log_sub.buffer] - worker_logs = [] + await asyncio.sleep(5) + assert log_rabbit_client_parser.called + worker_logs = [ + message + for msg in log_rabbit_client_parser.call_args_list + for message in LoggerRabbitMessage.model_validate_json(msg.args[0]).messages + ] + print(f"<-- we got {len(worker_logs)} lines of logs") for log in sleeper_task.expected_logs: @@ -688,9 +703,13 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub assert worker_progresses[0] == 0, "missing/incorrect initial progress value" assert worker_progresses[-1] == 1, "missing/incorrect final progress value" - log_rabbit_client_parser.assert_called_once() - # worker_logs = [TaskLogEvent.model_validate_json(msg).log for msg in log_sub.buffer] - worker_logs = [] + await asyncio.sleep(5) + assert log_rabbit_client_parser.called + worker_logs = [ + message + for msg in log_rabbit_client_parser.call_args_list + for message in LoggerRabbitMessage.model_validate_json(msg.args[0]).messages + ] # check all the awaited logs are in there filtered_worker_logs = filter(lambda log: "This is iteration" in log, worker_logs) assert len(list(filtered_worker_logs)) == NUMBER_OF_LOGS From 919b236f79cd3991d03fc897181a045115ea1e8b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 18:15:34 +0200 Subject: [PATCH 27/52] ongoing --- .../dask-sidecar/tests/unit/test_tasks.py | 60 +++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index e8e2376ed040..6a08a4bbb903 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -8,6 +8,7 @@ import json import logging import re +import threading from collections.abc import AsyncIterator, Callable, Coroutine, Iterable # copied out from dask @@ -475,16 +476,55 @@ def mocked_get_image_labels( async def log_rabbit_client_parser( create_rabbitmq_client: Callable[[str], RabbitMQClient], mocker: MockerFixture ) -> AsyncIterator[mock.AsyncMock]: - client = create_rabbitmq_client("dask_sidecar_pytest_logs_consumer") - mock = mocker.AsyncMock(return_value=True) - queue_name, _ = await client.subscribe( - LoggerRabbitMessage.get_channel_name(), - mock, - exclusive_queue=False, - topics=[BIND_TO_ALL_TOPICS], + # Create a threading event to track when subscription is ready + ready_event = threading.Event() + shutdown_event = threading.Event() + the_mock = mocker.AsyncMock(return_value=True) + + # Worker function to process messages in a separate thread + def message_processor(a_mock: mock.AsyncMock): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + client = create_rabbitmq_client("dask_sidecar_pytest_logs_consumer") + + async def subscribe_and_process(a_mock: mock.AsyncMock): + queue_name, _ = await client.subscribe( + LoggerRabbitMessage.get_channel_name(), + a_mock, + exclusive_queue=False, + topics=[BIND_TO_ALL_TOPICS], + ) + ready_event.set() + + # Wait until the test is done + while not shutdown_event.is_set(): + await asyncio.sleep(0.1) + + # Cleanup + await client.unsubscribe(queue_name) + + loop.run_until_complete(subscribe_and_process(a_mock)) + loop.run_until_complete(client.close()) + loop.close() + + # Start the worker thread + worker = threading.Thread( + target=message_processor, kwargs={"a_mock": the_mock}, daemon=False ) - yield mock - await client.unsubscribe(queue_name) + worker.start() + + # Wait for subscription to be ready + assert ready_event.wait(timeout=10), "Failed to initialize RabbitMQ subscription" + + try: + yield the_mock + finally: + # Signal the worker thread to shut down + shutdown_event.set() + worker.join(timeout=5) + if worker.is_alive(): + _logger.warning("RabbitMQ worker thread did not terminate properly") def test_run_computational_sidecar_real_fct( @@ -670,7 +710,7 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub log_rabbit_client_parser: mock.AsyncMock, ): mocked_get_image_labels.assert_not_called() - NUMBER_OF_LOGS = 20000 + NUMBER_OF_LOGS = 200 future = dask_client.submit( run_computational_sidecar, **sidecar_task( From 52c78497b5730939355bfc9b1a94ca9e6ed80a97 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 22:47:27 +0200 Subject: [PATCH 28/52] tests are passing --- services/dask-sidecar/docker/boot.sh | 4 +-- .../rabbitmq_plugin.py | 16 +++++++++-- .../{utils.py => utils/gpus.py} | 0 services/dask-sidecar/tests/unit/test_cli.py | 4 +++ .../tests/unit/test_dask_utils.py | 28 ++++++++++--------- .../dask-sidecar/tests/unit/test_tasks.py | 12 ++++++-- .../dask-sidecar/tests/unit/test_utils.py | 8 ++++-- 7 files changed, 50 insertions(+), 22 deletions(-) rename services/dask-sidecar/src/simcore_service_dask_sidecar/{utils.py => utils/gpus.py} (100%) diff --git a/services/dask-sidecar/docker/boot.sh b/services/dask-sidecar/docker/boot.sh index a7212f834a70..e86c8518abc2 100755 --- a/services/dask-sidecar/docker/boot.sh +++ b/services/dask-sidecar/docker/boot.sh @@ -117,7 +117,7 @@ else fi # GPUs - num_gpus=$(python -c "from simcore_service_dask_sidecar.utils import num_available_gpus; print(num_available_gpus());") + num_gpus=$(python -c "from simcore_service_dask_sidecar.utils.gpus import num_available_gpus; print(num_available_gpus());") # RAM (is computed similarly as the default dask-sidecar computation) _value=$(python -c "import psutil; print(int(psutil.virtual_memory().total * $num_cpus/$(nproc)))") @@ -128,7 +128,7 @@ else # add the GPUs if there are any if [ "$num_gpus" -gt 0 ]; then - total_vram=$(python -c "from simcore_service_dask_sidecar.utils import video_memory; print(video_memory());") + total_vram=$(python -c "from simcore_service_dask_sidecar.utils.gpus import video_memory; print(video_memory());") resources="$resources,GPU=$num_gpus,VRAM=$total_vram" fi diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index 699174573347..eed7eabc330c 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -1,8 +1,10 @@ +import asyncio import logging +from asyncio import AbstractEventLoop from collections.abc import Awaitable import distributed -from servicelib.logging_utils import log_context +from servicelib.logging_utils import log_catch, log_context from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive from settings_library.rabbit import RabbitSettings @@ -15,6 +17,7 @@ class RabbitMQPlugin(distributed.WorkerPlugin): """Dask Worker Plugin for RabbitMQ integration""" name = "rabbitmq_plugin" + _loop: AbstractEventLoop | None = None _client: RabbitMQClient | None = None _settings: RabbitSettings | None = None @@ -36,6 +39,7 @@ async def _() -> None: logging.INFO, f"RabbitMQ client initialization for worker {worker.address}", ): + self._loop = asyncio.get_event_loop() await wait_till_rabbitmq_responsive(self._settings.dsn) self._client = RabbitMQClient( client_name="dask-sidecar", settings=self._settings @@ -53,7 +57,15 @@ async def _() -> None: f"RabbitMQ client teardown for worker {worker.address}", ): if self._client: - await self._client.close() + current_loop = asyncio.get_event_loop() + if self._loop != current_loop: + _logger.warning( + "RabbitMQ client is de-activated (loop mismatch)" + ) + assert self._loop # nosec + with log_catch(_logger, reraise=False): + await asyncio.wait_for(self._client.close(), timeout=5.0) + self._client = None return _() diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/gpus.py similarity index 100% rename from services/dask-sidecar/src/simcore_service_dask_sidecar/utils.py rename to services/dask-sidecar/src/simcore_service_dask_sidecar/utils/gpus.py diff --git a/services/dask-sidecar/tests/unit/test_cli.py b/services/dask-sidecar/tests/unit/test_cli.py index 479a60f4bf62..09762400f4e7 100644 --- a/services/dask-sidecar/tests/unit/test_cli.py +++ b/services/dask-sidecar/tests/unit/test_cli.py @@ -13,6 +13,10 @@ from simcore_service_dask_sidecar.settings import ApplicationSettings from typer.testing import CliRunner +pytest_simcore_core_services_selection = [ + "rabbit", +] + def test_cli_help_and_version(cli_runner: CliRunner): # invitations-maker --help diff --git a/services/dask-sidecar/tests/unit/test_dask_utils.py b/services/dask-sidecar/tests/unit/test_dask_utils.py index 214a95502009..a6779743ec33 100644 --- a/services/dask-sidecar/tests/unit/test_dask_utils.py +++ b/services/dask-sidecar/tests/unit/test_dask_utils.py @@ -6,7 +6,6 @@ import asyncio import concurrent.futures -import logging import time from collections.abc import AsyncIterator, Callable, Coroutine from typing import Any @@ -14,7 +13,7 @@ import distributed import pytest from dask_task_models_library.container_tasks.errors import TaskCancelledError -from dask_task_models_library.container_tasks.events import TaskLogEvent +from dask_task_models_library.container_tasks.events import TaskProgressEvent from dask_task_models_library.container_tasks.io import TaskCancelEventName from dask_task_models_library.container_tasks.protocol import TaskOwner from simcore_service_dask_sidecar.dask_utils import ( @@ -33,16 +32,20 @@ DASK_TASK_STARTED_EVENT = "task_started" DASK_TESTING_TIMEOUT_S = 25 +pytest_simcore_core_services_selection = [ + "rabbit", +] + def test_publish_event( dask_client: distributed.Client, job_id: str, task_owner: TaskOwner ): dask_pub = distributed.Pub("some_topic", client=dask_client) dask_sub = distributed.Sub("some_topic", client=dask_client) - event_to_publish = TaskLogEvent( + event_to_publish = TaskProgressEvent( job_id=job_id, - log="the log", - log_level=logging.INFO, + msg="the log", + progress=1, task_owner=task_owner, ) publish_event(dask_pub=dask_pub, event=event_to_publish) @@ -53,7 +56,7 @@ def test_publish_event( message = dask_sub.get(timeout=DASK_TESTING_TIMEOUT_S) assert message is not None assert isinstance(message, str) - received_task_log_event = TaskLogEvent.model_validate_json(message) + received_task_log_event = TaskProgressEvent.model_validate_json(message) assert received_task_log_event == event_to_publish @@ -62,8 +65,8 @@ async def test_publish_event_async( ): dask_pub = distributed.Pub("some_topic", client=async_dask_client) dask_sub = distributed.Sub("some_topic", client=async_dask_client) - event_to_publish = TaskLogEvent( - job_id=job_id, log="the log", log_level=logging.INFO, task_owner=task_owner + event_to_publish = TaskProgressEvent( + job_id=job_id, msg="the log", progress=2, task_owner=task_owner ) publish_event(dask_pub=dask_pub, event=event_to_publish) @@ -74,7 +77,7 @@ async def test_publish_event_async( assert isinstance(message, Coroutine) message = await message assert message is not None - received_task_log_event = TaskLogEvent.model_validate_json(message) + received_task_log_event = TaskProgressEvent.model_validate_json(message) assert received_task_log_event == event_to_publish @@ -117,11 +120,10 @@ async def _dask_sub_consumer_task(sub: distributed.Sub) -> None: async def _dask_publisher_task(pub: distributed.Pub) -> None: print("--> starting publisher task") - for n in range(NUMBER_OF_MESSAGES): - event_to_publish = TaskLogEvent( + for _ in range(NUMBER_OF_MESSAGES): + event_to_publish = TaskProgressEvent( job_id=job_id, - log=f"the log {n}", - log_level=logging.INFO, + progress=0.5, task_owner=task_owner, ) publish_event(dask_pub=pub, event=event_to_publish) diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 6a08a4bbb903..638f4b01cf1e 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -9,7 +9,7 @@ import logging import re import threading -from collections.abc import AsyncIterator, Callable, Coroutine, Iterable +from collections.abc import AsyncIterator, Callable, Iterable # copied out from dask from dataclasses import dataclass @@ -613,7 +613,7 @@ def test_run_multiple_computational_sidecar_dask( results = dask_client.gather(futures) assert results - assert not isinstance(results, Coroutine) + assert isinstance(results, list) # for result in results: # check that the task produce the expected data, not less not more for output_data in results: @@ -708,6 +708,7 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub progress_sub: distributed.Sub, mocked_get_image_labels: mock.Mock, log_rabbit_client_parser: mock.AsyncMock, + task_owner: TaskOwner, ): mocked_get_image_labels.assert_not_called() NUMBER_OF_LOGS = 200 @@ -745,6 +746,7 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub await asyncio.sleep(5) assert log_rabbit_client_parser.called + worker_logs = [ message for msg in log_rabbit_client_parser.call_args_list @@ -752,7 +754,11 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub ] # check all the awaited logs are in there filtered_worker_logs = filter(lambda log: "This is iteration" in log, worker_logs) - assert len(list(filtered_worker_logs)) == NUMBER_OF_LOGS + assert ( + len(list(filtered_worker_logs)) == (2 * NUMBER_OF_LOGS) + if task_owner.has_parent + else NUMBER_OF_LOGS + ) mocked_get_image_labels.assert_called() diff --git a/services/dask-sidecar/tests/unit/test_utils.py b/services/dask-sidecar/tests/unit/test_utils.py index f3d162952ff8..88e2d5ecec57 100644 --- a/services/dask-sidecar/tests/unit/test_utils.py +++ b/services/dask-sidecar/tests/unit/test_utils.py @@ -10,13 +10,17 @@ import pytest from pytest_mock.plugin import MockerFixture from pytest_simcore.helpers.typing_env import EnvVarsDict -from simcore_service_dask_sidecar.utils import num_available_gpus +from simcore_service_dask_sidecar.utils.gpus import num_available_gpus + +pytest_simcore_core_services_selection = [ + "rabbit", +] @pytest.fixture def mock_aiodocker(mocker: MockerFixture) -> mock.MagicMock: return mocker.patch( - "simcore_service_dask_sidecar.utils.aiodocker.Docker", autospec=True + "simcore_service_dask_sidecar.utils.gpus.aiodocker.Docker", autospec=True ) From 2b6e7391538f915a6eb0d93f0a3506d5180e2c82 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 6 May 2025 22:50:36 +0200 Subject: [PATCH 29/52] moved utils to folder --- .../computational_sidecar/core.py | 4 ++-- .../computational_sidecar/docker_utils.py | 4 ++-- .../{dask_utils.py => utils/dask.py} | 2 +- .../{file_utils.py => utils/files.py} | 0 .../simcore_service_dask_sidecar/worker.py | 6 +++++- services/dask-sidecar/tests/unit/conftest.py | 4 +++- .../tests/unit/test_dask_utils.py | 2 +- .../dask-sidecar/tests/unit/test_tasks.py | 18 +++++++++------- ...test_file_utils.py => test_utils_files.py} | 21 +++++++++++-------- .../{test_utils.py => test_utils_gpus.py} | 0 10 files changed, 36 insertions(+), 25 deletions(-) rename services/dask-sidecar/src/simcore_service_dask_sidecar/{dask_utils.py => utils/dask.py} (99%) rename services/dask-sidecar/src/simcore_service_dask_sidecar/{file_utils.py => utils/files.py} (100%) rename services/dask-sidecar/tests/unit/{test_file_utils.py => test_utils_files.py} (97%) rename services/dask-sidecar/tests/unit/{test_utils.py => test_utils_gpus.py} (100%) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py index 53c79ace1e36..2bd094306fbd 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/core.py @@ -25,9 +25,9 @@ from settings_library.s3 import S3Settings from yarl import URL -from ..dask_utils import TaskPublisher -from ..file_utils import pull_file_from_remote, push_file_to_remote from ..settings import ApplicationSettings +from ..utils.dask import TaskPublisher +from ..utils.files import pull_file_from_remote, push_file_to_remote from .docker_utils import ( create_container_config, get_computational_shared_data_mount_point, diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py index bcd2d400dbd6..9b472fa2f1c4 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/computational_sidecar/docker_utils.py @@ -38,9 +38,9 @@ from servicelib.progress_bar import ProgressBarData from settings_library.s3 import S3Settings -from ..dask_utils import TaskPublisher -from ..file_utils import push_file_to_remote from ..settings import ApplicationSettings +from ..utils.dask import TaskPublisher +from ..utils.files import push_file_to_remote from .constants import LEGACY_SERVICE_LOG_FILE_NAME from .models import ( LEGACY_INTEGRATION_VERSION, diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py similarity index 99% rename from services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py rename to services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py index 6e332cee0cbe..92296a2407b6 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/dask_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py @@ -19,7 +19,7 @@ from models_library.rabbitmq_messages import LoggerRabbitMessage from servicelib.logging_utils import LogLevelInt, LogMessageStr, log_catch -from .rabbitmq_plugin import get_rabbitmq_client +from ..rabbitmq_plugin import get_rabbitmq_client _logger = logging.getLogger(__name__) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/files.py similarity index 100% rename from services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py rename to services/dask-sidecar/src/simcore_service_dask_sidecar/utils/files.py diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index ea0bb5e96e1f..a080dcb92957 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -16,9 +16,13 @@ from ._meta import print_dask_sidecar_banner from .computational_sidecar.core import ComputationalSidecar -from .dask_utils import TaskPublisher, get_current_task_resources, monitor_task_abortion from .rabbitmq_plugin import RabbitMQPlugin from .settings import ApplicationSettings +from .utils.dask import ( + TaskPublisher, + get_current_task_resources, + monitor_task_abortion, +) from .utils.logs import setup_app_logging _logger = logging.getLogger(__name__) diff --git a/services/dask-sidecar/tests/unit/conftest.py b/services/dask-sidecar/tests/unit/conftest.py index a6b29096df23..edc61b84db6e 100644 --- a/services/dask-sidecar/tests/unit/conftest.py +++ b/services/dask-sidecar/tests/unit/conftest.py @@ -29,7 +29,9 @@ from pytest_simcore.helpers.typing_env import EnvVarsDict from settings_library.rabbit import RabbitSettings from settings_library.s3 import S3Settings -from simcore_service_dask_sidecar.file_utils import _s3fs_settings_from_s3_settings +from simcore_service_dask_sidecar.utils.files import ( + _s3fs_settings_from_s3_settings, +) from yarl import URL pytest_plugins = [ diff --git a/services/dask-sidecar/tests/unit/test_dask_utils.py b/services/dask-sidecar/tests/unit/test_dask_utils.py index a6779743ec33..9a1f6c7d18ae 100644 --- a/services/dask-sidecar/tests/unit/test_dask_utils.py +++ b/services/dask-sidecar/tests/unit/test_dask_utils.py @@ -16,7 +16,7 @@ from dask_task_models_library.container_tasks.events import TaskProgressEvent from dask_task_models_library.container_tasks.io import TaskCancelEventName from dask_task_models_library.container_tasks.protocol import TaskOwner -from simcore_service_dask_sidecar.dask_utils import ( +from simcore_service_dask_sidecar.utils.dask import ( _DEFAULT_MAX_RESOURCES, TaskPublisher, get_current_task_resources, diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 638f4b01cf1e..62fab3633acb 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -57,8 +57,10 @@ LEGACY_INTEGRATION_VERSION, ImageLabels, ) -from simcore_service_dask_sidecar.dask_utils import _DEFAULT_MAX_RESOURCES -from simcore_service_dask_sidecar.file_utils import _s3fs_settings_from_s3_settings +from simcore_service_dask_sidecar.utils.dask import _DEFAULT_MAX_RESOURCES +from simcore_service_dask_sidecar.utils.files import ( + _s3fs_settings_from_s3_settings, +) from simcore_service_dask_sidecar.worker import run_computational_sidecar _logger = logging.getLogger(__name__) @@ -77,10 +79,10 @@ def dask_subsystem_mock( # mock tasks get worker and state dask_distributed_worker_mock = mocker.patch( - "simcore_service_dask_sidecar.dask_utils.get_worker", autospec=True + "simcore_service_dask_sidecar.utils.dask.get_worker", autospec=True ) dask_task_mock = mocker.patch( - "simcore_service_dask_sidecar.dask_utils.TaskState", autospec=True + "simcore_service_dask_sidecar.utils.dask.TaskState", autospec=True ) dask_task_mock.resource_restrictions = {} dask_distributed_worker_mock.return_value.state.tasks.get.return_value = ( @@ -99,22 +101,22 @@ def dask_subsystem_mock( ) # mock dask event publishing dask_utils_publish_event_mock = mocker.patch( - "simcore_service_dask_sidecar.dask_utils.distributed.Pub", + "simcore_service_dask_sidecar.utils.dask.distributed.Pub", autospec=True, ) mocker.patch( - "simcore_service_dask_sidecar.dask_utils.distributed.Sub", + "simcore_service_dask_sidecar.utils.dask.distributed.Sub", autospec=True, ) mocker.patch( - "simcore_service_dask_sidecar.dask_utils.is_current_task_aborted", + "simcore_service_dask_sidecar.utils.dask.is_current_task_aborted", autospec=True, return_value=False, ) # mock dask rabbitmq plugin mock_rabbitmq_client = create_rabbitmq_client("pytest_dask_sidecar_logs_publisher") mocker.patch( - "simcore_service_dask_sidecar.dask_utils.get_rabbitmq_client", + "simcore_service_dask_sidecar.utils.dask.get_rabbitmq_client", autospec=True, return_value=mock_rabbitmq_client, ) diff --git a/services/dask-sidecar/tests/unit/test_file_utils.py b/services/dask-sidecar/tests/unit/test_utils_files.py similarity index 97% rename from services/dask-sidecar/tests/unit/test_file_utils.py rename to services/dask-sidecar/tests/unit/test_utils_files.py index b31980b46a50..770d05e3cb87 100644 --- a/services/dask-sidecar/tests/unit/test_file_utils.py +++ b/services/dask-sidecar/tests/unit/test_utils_files.py @@ -19,7 +19,7 @@ from pytest_localftpserver.servers import ProcessFTPServer from pytest_mock.plugin import MockerFixture from settings_library.s3 import S3Settings -from simcore_service_dask_sidecar.file_utils import ( +from simcore_service_dask_sidecar.utils.files import ( _s3fs_settings_from_s3_settings, pull_file_from_remote, push_file_to_remote, @@ -310,14 +310,17 @@ async def test_pull_compressed_zip_file_from_remote( if remote_parameters.s3_settings: storage_kwargs = _s3fs_settings_from_s3_settings(remote_parameters.s3_settings) - with cast( - fsspec.core.OpenFile, - fsspec.open( - f"{destination_url}", - mode="wb", - **storage_kwargs, - ), - ) as dest_fp, local_zip_file_path.open("rb") as src_fp: + with ( + cast( + fsspec.core.OpenFile, + fsspec.open( + f"{destination_url}", + mode="wb", + **storage_kwargs, + ), + ) as dest_fp, + local_zip_file_path.open("rb") as src_fp, + ): dest_fp.write(src_fp.read()) # now we want to download that file so it becomes the source diff --git a/services/dask-sidecar/tests/unit/test_utils.py b/services/dask-sidecar/tests/unit/test_utils_gpus.py similarity index 100% rename from services/dask-sidecar/tests/unit/test_utils.py rename to services/dask-sidecar/tests/unit/test_utils_gpus.py From 2e6463c0ff17e22012266db9d70a6625948c190f Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 08:48:52 +0200 Subject: [PATCH 30/52] cleanup --- .../container_tasks/io.py | 190 +++++++++++------- .../container_tasks/protocol.py | 56 +++--- .../tests/container_tasks/test_events.py | 44 ++-- .../tests/container_tasks/test_io.py | 11 +- .../tests/container_tasks/test_protocol.py | 6 +- 5 files changed, 175 insertions(+), 132 deletions(-) diff --git a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/io.py b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/io.py index dc87c52b1210..71eecbbe362d 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/io.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/io.py @@ -17,6 +17,7 @@ StrictInt, StrictStr, ) +from pydantic.config import JsonDict TaskCancelEventName = "cancel_event_{}" @@ -24,18 +25,24 @@ class PortSchema(BaseModel): required: bool + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "required": True, + }, + { + "required": False, + }, + ] + } + ) + model_config = ConfigDict( extra="forbid", - json_schema_extra={ - "examples": [ - { - "required": True, - }, - { - "required": False, - }, - ] - }, + json_schema_extra=_update_json_schema_extra, ) @@ -43,20 +50,26 @@ class FilePortSchema(PortSchema): mapping: str | None = None url: AnyUrl + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "mapping": "some_filename.txt", + "url": "sftp://some_file_url", + "required": True, + }, + { + "required": False, + "url": "s3://another_file_url", + }, + ] + } + ) + model_config = ConfigDict( - json_schema_extra={ - "examples": [ - { - "mapping": "some_filename.txt", - "url": "sftp://some_file_url", - "required": True, - }, - { - "required": False, - "url": "s3://another_file_url", - }, - ] - } + json_schema_extra=_update_json_schema_extra, ) @@ -70,18 +83,27 @@ class FileUrl(BaseModel): default=None, description="the file MIME type", pattern=MIME_TYPE_RE ) + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "url": "https://some_file_url", + "file_mime_type": "application/json", + }, + { + "url": "https://some_file_url", + "file_mapping": "some_file_name.txt", + "file_mime_type": "application/json", + }, + ] + } + ) + model_config = ConfigDict( extra="forbid", - json_schema_extra={ - "examples": [ - {"url": "https://some_file_url", "file_mime_type": "application/json"}, - { - "url": "https://some_file_url", - "file_mapping": "some_file_name.txt", - "file_mime_type": "application/json", - }, - ] - }, + json_schema_extra=_update_json_schema_extra, ) @@ -99,18 +121,24 @@ class FileUrl(BaseModel): class TaskInputData(DictModel[ServicePortKey, PortValue]): + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "boolean_input": False, + "int_input": -45, + "float_input": 4564.45, + "string_input": "nobody thinks like a string", + "file_input": {"url": "s3://thatis_file_url"}, + }, + ] + } + ) + model_config = ConfigDict( - json_schema_extra={ - "examples": [ - { - "boolean_input": False, - "int_input": -45, - "float_input": 4564.45, - "string_input": "nobody thinks like a string", - "file_input": {"url": "s3://thatis_file_url"}, - }, - ] - } + json_schema_extra=_update_json_schema_extra, ) @@ -126,26 +154,32 @@ class TaskOutputDataSchema(DictModel[ServicePortKey, PortSchemaValue]): # does not work well in that case. For that reason, the schema is # sent as a json-schema instead of with a dynamically-created model class # - model_config = ConfigDict( - json_schema_extra={ - "examples": [ - { - "boolean_output": {"required": False}, - "int_output": {"required": True}, - "float_output": {"required": True}, - "string_output": {"required": False}, - "file_output": { - "required": True, - "url": "https://some_file_url", - "mapping": "the_output_filename", - }, - "optional_file_output": { - "required": False, - "url": "s3://one_file_url", + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "boolean_output": {"required": False}, + "int_output": {"required": True}, + "float_output": {"required": True}, + "string_output": {"required": False}, + "file_output": { + "required": True, + "url": "https://some_file_url", + "mapping": "the_output_filename", + }, + "optional_file_output": { + "required": False, + "url": "s3://one_file_url", + }, }, - }, - ] - } + ] + } + ) + + model_config = ConfigDict( + json_schema_extra=_update_json_schema_extra, ) @@ -181,16 +215,20 @@ def from_task_output( return cls.model_validate(data) - model_config = ConfigDict( - json_schema_extra={ - "examples": [ - { - "boolean_output": False, - "int_output": -45, - "float_output": 4564.45, - "string_output": "nobody thinks like a string", - "file_output": {"url": "s3://yet_another_file_url"}, - }, - ] - } - ) + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "boolean_output": False, + "int_output": -45, + "float_output": 4564.45, + "string_output": "nobody thinks like a string", + "file_output": {"url": "s3://yet_another_file_url"}, + }, + ] + } + ) + + model_config = ConfigDict(json_schema_extra=_update_json_schema_extra) diff --git a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py index fd6acf554e02..27fe4e97995b 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py @@ -7,6 +7,7 @@ from models_library.services_resources import BootMode from models_library.users import UserID from pydantic import AnyUrl, BaseModel, ConfigDict, model_validator +from pydantic.config import JsonDict from settings_library.s3 import S3Settings from .docker import DockerBasicAuth @@ -44,25 +45,31 @@ def check_parent_valid(cls, values: dict[str, Any]) -> dict[str, Any]: raise ValueError(msg) return values + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "user_id": 32, + "project_id": "ec7e595a-63ee-46a1-a04a-901b11b649f8", + "node_id": "39467d89-b659-4914-9359-c40b1b6d1d6d", + "parent_project_id": None, + "parent_node_id": None, + }, + { + "user_id": 32, + "project_id": "ec7e595a-63ee-46a1-a04a-901b11b649f8", + "node_id": "39467d89-b659-4914-9359-c40b1b6d1d6d", + "parent_project_id": "887e595a-63ee-46a1-a04a-901b11b649f8", + "parent_node_id": "aa467d89-b659-4914-9359-c40b1b6d1d6d", + }, + ] + } + ) + model_config = ConfigDict( - json_schema_extra={ - "examples": [ - { - "user_id": 32, - "project_id": "ec7e595a-63ee-46a1-a04a-901b11b649f8", - "node_id": "39467d89-b659-4914-9359-c40b1b6d1d6d", - "parent_project_id": None, - "parent_node_id": None, - }, - { - "user_id": 32, - "project_id": "ec7e595a-63ee-46a1-a04a-901b11b649f8", - "node_id": "39467d89-b659-4914-9359-c40b1b6d1d6d", - "parent_project_id": "887e595a-63ee-46a1-a04a-901b11b649f8", - "parent_node_id": "aa467d89-b659-4914-9359-c40b1b6d1d6d", - }, - ] - } + json_schema_extra=_update_json_schema_extra, ) @@ -83,13 +90,17 @@ class ContainerTaskParameters(BaseModel): { "image": "ubuntu", "tag": "latest", - "input_data": TaskInputData.model_config["json_schema_extra"]["examples"][0], # type: ignore[index] - "output_data_keys": TaskOutputDataSchema.model_config["json_schema_extra"]["examples"][0], # type: ignore[index] + "input_data": TaskInputData.model_json_schema()["examples"][0], + "output_data_keys": TaskOutputDataSchema.model_json_schema()[ + "examples" + ][ + 0 + ], # type: ignore[index] "command": ["sleep 10", "echo hello"], "envs": {"MYENV": "is an env"}, "labels": {"io.simcore.thelabel": "is amazing"}, "boot_mode": BootMode.CPU.value, - "task_owner": TaskOwner.model_config["json_schema_extra"]["examples"][0], # type: ignore[index] + "task_owner": TaskOwner.model_json_schema()["examples"][0], }, ] } @@ -104,5 +115,4 @@ def __call__( docker_auth: DockerBasicAuth, log_file_url: LogFileUploadURL, s3_settings: S3Settings | None, - ) -> TaskOutputData: - ... + ) -> TaskOutputData: ... diff --git a/packages/dask-task-models-library/tests/container_tasks/test_events.py b/packages/dask-task-models-library/tests/container_tasks/test_events.py index 1aa4139720d6..2d49f7d0310d 100644 --- a/packages/dask-task-models-library/tests/container_tasks/test_events.py +++ b/packages/dask-task-models-library/tests/container_tasks/test_events.py @@ -5,12 +5,10 @@ # pylint:disable=protected-access # pylint:disable=too-many-arguments -import logging import pytest from dask_task_models_library.container_tasks.events import ( BaseTaskEvent, - TaskLogEvent, TaskProgressEvent, ) from dask_task_models_library.container_tasks.protocol import TaskOwner @@ -24,7 +22,7 @@ def test_task_event_abstract(): BaseTaskEvent(job_id="some_fake") # type: ignore -@pytest.mark.parametrize("model_cls", [TaskProgressEvent, TaskLogEvent]) +@pytest.mark.parametrize("model_cls", [TaskProgressEvent]) def test_events_models_examples(model_cls): examples = model_cls.model_config["json_schema_extra"]["examples"] @@ -37,13 +35,15 @@ def test_events_models_examples(model_cls): assert model_instance.topic_name() -@pytest.fixture -def job_id(faker: Faker) -> str: - return faker.pystr() +@pytest.fixture(params=["string", "bytes"]) +def job_id(faker: Faker, request: pytest.FixtureRequest) -> str | bytes: + return faker.pystr() if request.param == "string" else faker.pystr().encode() @pytest.fixture() -def mocked_dask_worker_job_id(mocker: MockerFixture, job_id: str) -> str: +def mocked_dask_worker_job_id( + mocker: MockerFixture, job_id: str | bytes +) -> str | bytes: mock_get_worker = mocker.patch( "dask_task_models_library.container_tasks.events.get_worker", autospec=True ) @@ -51,41 +51,41 @@ def mocked_dask_worker_job_id(mocker: MockerFixture, job_id: str) -> str: return job_id -@pytest.fixture(params=TaskOwner.model_config["json_schema_extra"]["examples"]) +@pytest.fixture(params=TaskOwner.model_json_schema()["examples"]) def task_owner(request: pytest.FixtureRequest) -> TaskOwner: return TaskOwner(**request.param) def test_task_progress_from_worker( - mocked_dask_worker_job_id: str, task_owner: TaskOwner + mocked_dask_worker_job_id: str | bytes, task_owner: TaskOwner ): event = TaskProgressEvent.from_dask_worker(0.7, task_owner=task_owner) - assert event.job_id == mocked_dask_worker_job_id - assert event.progress == 0.7 - - -def test_task_log_from_worker(mocked_dask_worker_job_id: str, task_owner: TaskOwner): - event = TaskLogEvent.from_dask_worker( - log="here is the amazing logs", log_level=logging.INFO, task_owner=task_owner + assert ( + event.job_id == mocked_dask_worker_job_id.decode() + if isinstance(mocked_dask_worker_job_id, bytes) + else mocked_dask_worker_job_id ) - - assert event.job_id == mocked_dask_worker_job_id - assert event.log == "here is the amazing logs" - assert event.log_level == logging.INFO + assert event.progress == 0.7 @pytest.mark.parametrize( "progress_value, expected_progress", [(1.5, 1), (-0.5, 0), (0.75, 0.75)] ) def test_task_progress_progress_value_is_capped_between_0_and_1( - mocked_dask_worker_job_id: str, + mocked_dask_worker_job_id: str | bytes, task_owner: TaskOwner, progress_value: float, expected_progress: float, ): event = TaskProgressEvent( - job_id=mocked_dask_worker_job_id, task_owner=task_owner, progress=progress_value + job_id=( + mocked_dask_worker_job_id.decode() + if isinstance(mocked_dask_worker_job_id, bytes) + else mocked_dask_worker_job_id + ), + task_owner=task_owner, + progress=progress_value, ) assert event assert event.progress == expected_progress diff --git a/packages/dask-task-models-library/tests/container_tasks/test_io.py b/packages/dask-task-models-library/tests/container_tasks/test_io.py index db6357d930cd..f5340d379c08 100644 --- a/packages/dask-task-models-library/tests/container_tasks/test_io.py +++ b/packages/dask-task-models-library/tests/container_tasks/test_io.py @@ -53,9 +53,9 @@ def _create_fake_outputs( a_file.write_text(faker.text(max_nb_chars=450)) assert a_file.exists() else: - jsonable_data[ - key - ] = "some value just for testing, does not represent any kind of type" + jsonable_data[key] = ( + "some value just for testing, does not represent any kind of type" + ) if jsonable_data: output_file = output_folder / faker.file_name() with output_file.open("wt") as fp: @@ -69,10 +69,7 @@ def _create_fake_outputs( def test_create_task_output_from_task_with_optional_fields_as_required( tmp_path: Path, optional_fields_set: bool, faker: Faker ): - for schema_example in TaskOutputDataSchema.model_config["json_schema_extra"][ - "examples" - ]: - + for schema_example in TaskOutputDataSchema.model_json_schema()["examples"]: task_output_schema = TaskOutputDataSchema.model_validate(schema_example) outputs_file_name = _create_fake_outputs( task_output_schema, tmp_path, optional_fields_set, faker diff --git a/packages/dask-task-models-library/tests/container_tasks/test_protocol.py b/packages/dask-task-models-library/tests/container_tasks/test_protocol.py index 3c70924a0437..68f8aec751ae 100644 --- a/packages/dask-task-models-library/tests/container_tasks/test_protocol.py +++ b/packages/dask-task-models-library/tests/container_tasks/test_protocol.py @@ -9,7 +9,7 @@ @pytest.mark.parametrize("model_cls", [TaskOwner, ContainerTaskParameters]) def test_events_models_examples(model_cls): - examples = model_cls.model_config["json_schema_extra"]["examples"] + examples = model_cls.model_json_schema()["examples"] for index, example in enumerate(examples): print(f"{index:-^10}:\n", example) @@ -19,9 +19,7 @@ def test_events_models_examples(model_cls): def test_task_owner_parent_valid(faker: Faker): - invalid_task_owner_example = TaskOwner.model_config["json_schema_extra"][ - "examples" - ][0] + invalid_task_owner_example = TaskOwner.model_json_schema()["examples"][0] invalid_task_owner_example["parent_project_id"] = faker.uuid4() assert invalid_task_owner_example["parent_node_id"] is None with pytest.raises(ValidationError, match=r".+ are None or both are set!"): From d144bf1ff716a152bf8e916d28defdbfb5db0165 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 08:56:17 +0200 Subject: [PATCH 31/52] removed logs sub --- .../modules/comp_scheduler/_scheduler_dask.py | 27 ------------------- .../modules/dask_client.py | 1 - .../utils/dask_client_utils.py | 4 --- .../tests/unit/test_modules_dask_client.py | 5 +--- 4 files changed, 1 insertion(+), 36 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py index cb133a73dd4d..77a4c807e6e8 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/comp_scheduler/_scheduler_dask.py @@ -8,7 +8,6 @@ import arrow from dask_task_models_library.container_tasks.errors import TaskCancelledError from dask_task_models_library.container_tasks.events import ( - TaskLogEvent, TaskProgressEvent, ) from dask_task_models_library.container_tasks.io import TaskOutputData @@ -38,7 +37,6 @@ ) from ...utils.dask_client_utils import TaskHandlers from ...utils.rabbitmq import ( - publish_service_log, publish_service_progress, publish_service_resource_tracking_stopped, publish_service_stopped_metrics, @@ -92,7 +90,6 @@ def __post_init__(self) -> None: self.dask_clients_pool.register_handlers( TaskHandlers( self._task_progress_change_handler, - self._task_log_change_handler, ) ) @@ -378,27 +375,3 @@ async def _task_progress_change_handler(self, event: str) -> None: node_id=node_id, progress=task_progress_event.progress, ) - - async def _task_log_change_handler(self, event: str) -> None: - with log_catch(_logger, reraise=False): - task_log_event = TaskLogEvent.model_validate_json(event) - _logger.debug("received task log update: %s", task_log_event) - await publish_service_log( - self.rabbitmq_client, - user_id=task_log_event.task_owner.user_id, - project_id=task_log_event.task_owner.project_id, - node_id=task_log_event.task_owner.node_id, - log=task_log_event.log, - log_level=task_log_event.log_level, - ) - if task_log_event.task_owner.has_parent: - assert task_log_event.task_owner.parent_project_id # nosec - assert task_log_event.task_owner.parent_node_id # nosec - await publish_service_log( - self.rabbitmq_client, - user_id=task_log_event.task_owner.user_id, - project_id=task_log_event.task_owner.parent_project_id, - node_id=task_log_event.task_owner.parent_node_id, - log=task_log_event.log, - log_level=task_log_event.log_level, - ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py b/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py index a51da58fc2a3..6ac294238e84 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dask_client.py @@ -187,7 +187,6 @@ async def delete(self) -> None: def register_handlers(self, task_handlers: TaskHandlers) -> None: _event_consumer_map = [ (self.backend.progress_sub, task_handlers.task_progress_handler), - (self.backend.logs_sub, task_handlers.task_log_handler), ] self._subscribed_tasks = [ asyncio.create_task( diff --git a/services/director-v2/src/simcore_service_director_v2/utils/dask_client_utils.py b/services/director-v2/src/simcore_service_director_v2/utils/dask_client_utils.py index 0ec66eeabdd8..34e11952d314 100644 --- a/services/director-v2/src/simcore_service_director_v2/utils/dask_client_utils.py +++ b/services/director-v2/src/simcore_service_director_v2/utils/dask_client_utils.py @@ -6,7 +6,6 @@ import distributed from dask_task_models_library.container_tasks.events import ( - TaskLogEvent, TaskProgressEvent, ) from models_library.clusters import ClusterAuthentication, TLSAuthentication @@ -19,7 +18,6 @@ @dataclass class TaskHandlers: task_progress_handler: Callable[[str], Awaitable[None]] - task_log_handler: Callable[[str], Awaitable[None]] logger = logging.getLogger(__name__) @@ -30,13 +28,11 @@ class DaskSubSystem: client: distributed.Client scheduler_id: str progress_sub: distributed.Sub = field(init=False) - logs_sub: distributed.Sub = field(init=False) def __post_init__(self) -> None: self.progress_sub = distributed.Sub( TaskProgressEvent.topic_name(), client=self.client ) - self.logs_sub = distributed.Sub(TaskLogEvent.topic_name(), client=self.client) async def close(self) -> None: # NOTE: if the Sub are deleted before closing the connection, diff --git a/services/director-v2/tests/unit/test_modules_dask_client.py b/services/director-v2/tests/unit/test_modules_dask_client.py index 479efcd47b18..7180435d0255 100644 --- a/services/director-v2/tests/unit/test_modules_dask_client.py +++ b/services/director-v2/tests/unit/test_modules_dask_client.py @@ -1078,9 +1078,7 @@ def fake_remote_fct( @pytest.fixture async def fake_task_handlers(mocker: MockerFixture) -> TaskHandlers: - return TaskHandlers( - task_progress_handler=mocker.MagicMock(), task_log_handler=mocker.MagicMock() - ) + return TaskHandlers(task_progress_handler=mocker.MagicMock()) async def test_dask_sub_handlers( @@ -1154,7 +1152,6 @@ def fake_remote_fct( fake_task_handlers.task_progress_handler.assert_called_with( "my name is progress" ) - fake_task_handlers.task_log_handler.assert_called_with("my name is logs") await _assert_wait_for_cb_call(mocked_user_completed_cb) From 8257b7aaaa1e5af97d15af76d63f348875cd948b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 08:58:39 +0200 Subject: [PATCH 32/52] pylint --- .../simcore_service_director_v2/api/dependencies/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py b/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py index df91e1e85938..949ef83bbdf6 100644 --- a/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py +++ b/services/director-v2/src/simcore_service_director_v2/api/dependencies/database.py @@ -30,7 +30,7 @@ def get_base_repository(engine: AsyncEngine, repo_type: type[RepoType]) -> RepoT checkedout = engine.pool.checkedout() # type: ignore # connections in use total_size = engine.pool.size() # type: ignore # current total connections - if (checkedin <= 1) and (total_size > 1): + if (checkedin < 2) and (total_size > 1): # noqa: PLR2004 logger.warning( "Database connection pool near limits: total=%d, in_use=%d, available=%d", total_size, From 8180e5e1361ddbff64ebc2bd72897009c682207c Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 09:03:11 +0200 Subject: [PATCH 33/52] sonar --- .../rabbitmq_plugin.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index eed7eabc330c..f0bffe31a22b 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -2,6 +2,7 @@ import logging from asyncio import AbstractEventLoop from collections.abc import Awaitable +from typing import Final import distributed from servicelib.logging_utils import log_catch, log_context @@ -12,6 +13,10 @@ _logger = logging.getLogger(__name__) +_RABBITMQ_CONFIGURATION_ERROR: Final[str] = ( + "RabbitMQ client is not available. Please check the configuration." +) + class RabbitMQPlugin(distributed.WorkerPlugin): """Dask Worker Plugin for RabbitMQ integration""" @@ -73,21 +78,15 @@ async def _() -> None: def get_client(self) -> RabbitMQClient: """Returns the RabbitMQ client or raises an error if not available""" if not self._client: - raise ConfigurationError( - msg="RabbitMQ client is not available. Please check the configuration." - ) + raise ConfigurationError(msg=_RABBITMQ_CONFIGURATION_ERROR) return self._client def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: """Returns the RabbitMQ client or raises an error if not available""" if not worker.plugins: - raise ConfigurationError( - msg="RabbitMQ client is not available. Please check the configuration." - ) + raise ConfigurationError(msg=_RABBITMQ_CONFIGURATION_ERROR) rabbitmq_plugin = worker.plugins.get(RabbitMQPlugin.name) if not isinstance(rabbitmq_plugin, RabbitMQPlugin): - raise ConfigurationError( - msg="RabbitMQ client is not available. Please check the configuration." - ) + raise ConfigurationError(msg=_RABBITMQ_CONFIGURATION_ERROR) return rabbitmq_plugin.get_client() From 2c221c144638b70251d12a37c9ece1c08b3b6989 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 10:20:52 +0200 Subject: [PATCH 34/52] if available_space is not filled set to 0 --- .../computational-clusters/autoscaled_monitor/ssh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/maintenance/computational-clusters/autoscaled_monitor/ssh.py b/scripts/maintenance/computational-clusters/autoscaled_monitor/ssh.py index 0d3159f818fc..229dab0c3f40 100644 --- a/scripts/maintenance/computational-clusters/autoscaled_monitor/ssh.py +++ b/scripts/maintenance/computational-clusters/autoscaled_monitor/ssh.py @@ -139,7 +139,7 @@ async def get_available_disk_space( # Available disk space will be captured here available_space = stdout.read().decode("utf-8").strip() - return ByteSize(available_space) + return ByteSize(available_space if available_space else 0) except ( paramiko.AuthenticationException, paramiko.SSHException, From 300d08d9659e24e8567c22d0c96204adbd66f8dc Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 10:45:31 +0200 Subject: [PATCH 35/52] rabbit mq must be in the network of the sidecar now --- services/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 828c75709b1d..3374ed032c44 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -1271,6 +1271,7 @@ services: - rabbit_data:/var/lib/rabbitmq networks: - default + - computational_services_subnet - interactive_services_subnet - autoscaling_subnet healthcheck: From 9c0f0197afb42843daf07cb5e3b552a50a6699c0 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 10:46:52 +0200 Subject: [PATCH 36/52] ensure errors do not let the sidecar start --- .../src/simcore_service_dask_sidecar/worker.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index a080dcb92957..6d3f91d649b9 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -64,8 +64,6 @@ async def dask_setup(worker: distributed.Worker) -> None: with log_context(_logger, logging.INFO, "Launch dask worker"): _logger.info("app settings: %s", settings.model_dump_json(indent=1)) - print_dask_sidecar_banner() - if threading.current_thread() is threading.main_thread(): GracefulKiller(worker) @@ -73,7 +71,11 @@ async def dask_setup(worker: distributed.Worker) -> None: _logger.info("We do have a running loop in the main thread: %s", f"{loop=}") if settings.DASK_SIDECAR_RABBITMQ: - await worker.plugin_add(RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ)) + await worker.plugin_add( + RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ), catch_errors=False + ) + + print_dask_sidecar_banner() async def dask_teardown(worker: distributed.Worker) -> None: From 4303d78a49927ee97d553a2abc63e06a542ece2b Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 10:51:35 +0200 Subject: [PATCH 37/52] allow queueing of messages from any thread --- .../rabbitmq_plugin.py | 107 +++++++++++++++--- .../utils/dask.py | 8 +- 2 files changed, 100 insertions(+), 15 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index f0bffe31a22b..275fd81a2207 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -1,12 +1,15 @@ import asyncio import logging +import threading from asyncio import AbstractEventLoop from collections.abc import Awaitable from typing import Final import distributed +from servicelib.async_utils import cancel_wait_task from servicelib.logging_utils import log_catch, log_context from servicelib.rabbitmq import RabbitMQClient, wait_till_rabbitmq_responsive +from servicelib.rabbitmq._models import RabbitMessage from settings_library.rabbit import RabbitSettings from .errors import ConfigurationError @@ -22,13 +25,40 @@ class RabbitMQPlugin(distributed.WorkerPlugin): """Dask Worker Plugin for RabbitMQ integration""" name = "rabbitmq_plugin" - _loop: AbstractEventLoop | None = None + _main_thread_loop: AbstractEventLoop | None = None _client: RabbitMQClient | None = None _settings: RabbitSettings | None = None + _message_queue: asyncio.Queue | None = None + _message_processor: asyncio.Task | None = None def __init__(self, settings: RabbitSettings): self._settings = settings + async def _process_messages(self) -> None: + """Process messages from worker threads in the main thread""" + assert self._message_queue is not None # nosec + assert self._client is not None # nosec + + _logger.info("Starting message processor for RabbitMQ") + try: + while True: + # Get message from queue + exchange_name, message_data = await self._message_queue.get() + + try: + # Publish to RabbitMQ + await self._client.publish(exchange_name, message_data) + except Exception as e: + _logger.exception("Failed to publish message: %s", str(e)) + finally: + # Mark task as done + self._message_queue.task_done() + except asyncio.CancelledError: + _logger.info("RabbitMQ message processor shutting down") + raise + except Exception: + _logger.exception("Unexpected error in RabbitMQ message processor") + def setup(self, worker: distributed.Worker) -> Awaitable[None]: """Called when the plugin is attached to a worker""" @@ -39,17 +69,30 @@ async def _() -> None: ) return + if threading.current_thread() is threading.main_thread(): + _logger.info( + "RabbitMQ client plugin setup is in the main thread! That is good." + ) + else: + msg = "RabbitMQ client plugin setup is not the main thread!" + raise ConfigurationError(msg=msg) + with log_context( _logger, logging.INFO, f"RabbitMQ client initialization for worker {worker.address}", ): - self._loop = asyncio.get_event_loop() + self._main_thread_loop = asyncio.get_event_loop() await wait_till_rabbitmq_responsive(self._settings.dsn) self._client = RabbitMQClient( client_name="dask-sidecar", settings=self._settings ) + self._message_queue = asyncio.Queue() + self._message_processor = asyncio.create_task( + self._process_messages(), name="rabbit_message_processor" + ) + return _() def teardown(self, worker: distributed.Worker) -> Awaitable[None]: @@ -61,17 +104,32 @@ async def _() -> None: logging.INFO, f"RabbitMQ client teardown for worker {worker.address}", ): - if self._client: - current_loop = asyncio.get_event_loop() - if self._loop != current_loop: - _logger.warning( - "RabbitMQ client is de-activated (loop mismatch)" - ) - assert self._loop # nosec + if not self._client: + return + if threading.current_thread() is threading.main_thread(): + _logger.info( + "RabbitMQ client plugin setup is in the main thread! That is good." + ) + else: + _logger.warning( + "RabbitMQ client plugin setup is not the main thread!" + ) + + # Cancel the message processor task + if self._message_processor: with log_catch(_logger, reraise=False): - await asyncio.wait_for(self._client.close(), timeout=5.0) + await cancel_wait_task(self._message_processor, max_delay=5) + self._message_processor = None + + # close client + current_loop = asyncio.get_event_loop() + if self._main_thread_loop != current_loop: + _logger.warning("RabbitMQ client is de-activated (loop mismatch)") + assert self._main_thread_loop # nosec + with log_catch(_logger, reraise=False): + await asyncio.wait_for(self._client.close(), timeout=5.0) - self._client = None + self._client = None return _() @@ -81,12 +139,35 @@ def get_client(self) -> RabbitMQClient: raise ConfigurationError(msg=_RABBITMQ_CONFIGURATION_ERROR) return self._client + async def publish_message_from_any_thread( + self, exchange_name: str, message_data: RabbitMessage + ) -> None: + """Enqueue a message to be published to RabbitMQ from any thread""" + assert self._message_queue # nosec + + if threading.current_thread() is threading.main_thread(): + # If we're in the main thread, add directly to the queue + await self._message_queue.put((exchange_name, message_data)) + return + + # If we're in a worker thread, we need to use a different approach + assert self._main_thread_loop # nosec + + # Create a Future in the main thread's event loop + future = asyncio.run_coroutine_threadsafe( + self._message_queue.put((exchange_name, message_data)), + self._main_thread_loop, + ) + + # waiting here is quick, just queueing + future.result() + -def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQClient: +def get_rabbitmq_client(worker: distributed.Worker) -> RabbitMQPlugin: """Returns the RabbitMQ client or raises an error if not available""" if not worker.plugins: raise ConfigurationError(msg=_RABBITMQ_CONFIGURATION_ERROR) rabbitmq_plugin = worker.plugins.get(RabbitMQPlugin.name) if not isinstance(rabbitmq_plugin, RabbitMQPlugin): raise ConfigurationError(msg=_RABBITMQ_CONFIGURATION_ERROR) - return rabbitmq_plugin.get_client() + return rabbitmq_plugin diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py index 92296a2407b6..a4476a16306c 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py @@ -97,7 +97,9 @@ async def publish_logs( messages=[message], log_level=log_level, ) - await rabbitmq_client.publish(base_message.channel_name, base_message) + await rabbitmq_client.publish_message_from_any_thread( + base_message.channel_name, base_message + ) if self.task_owner.has_parent: assert self.task_owner.parent_project_id # nosec assert self.task_owner.parent_node_id # nosec @@ -108,7 +110,9 @@ async def publish_logs( messages=[message], log_level=log_level, ) - await rabbitmq_client.publish(parent_message.channel_name, base_message) + await rabbitmq_client.publish_message_from_any_thread( + parent_message.channel_name, base_message + ) _logger.log(log_level, message) From f278199d0370c6c0d8f64902b2840ae1fed5cea1 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 10:51:56 +0200 Subject: [PATCH 38/52] cleanup --- services/dask-sidecar/tests/unit/test_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 62fab3633acb..212b1990efc3 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -713,7 +713,7 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub task_owner: TaskOwner, ): mocked_get_image_labels.assert_not_called() - NUMBER_OF_LOGS = 200 + NUMBER_OF_LOGS = 20000 future = dask_client.submit( run_computational_sidecar, **sidecar_task( From 90af8e71c1016609ba764905009e4be0910482d8 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Wed, 7 May 2025 12:04:21 +0200 Subject: [PATCH 39/52] ongoing --- services/dask-sidecar/tests/unit/test_tasks.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 212b1990efc3..3f6a585450f3 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -114,11 +114,16 @@ def dask_subsystem_mock( return_value=False, ) # mock dask rabbitmq plugin + mock_dask_rabbitmq_plugin = mocker.patch( + "simcore_service_dask_sidecar.rabbitmq_plugin.RabbitMQPlugin", autospec=True + ) mock_rabbitmq_client = create_rabbitmq_client("pytest_dask_sidecar_logs_publisher") + mock_dask_rabbitmq_plugin.get_client.return_value = mock_rabbitmq_client + mocker.patch( "simcore_service_dask_sidecar.utils.dask.get_rabbitmq_client", autospec=True, - return_value=mock_rabbitmq_client, + return_value=mock_dask_rabbitmq_plugin, ) return { From d0b1c4f1e50895e82ffd3b8455a44ca52463531a Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 11:26:26 +0200 Subject: [PATCH 40/52] ensure we close the worker if the plugin cannot start --- .../src/simcore_service_dask_sidecar/worker.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py index 6d3f91d649b9..de3ecd6d66cc 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/worker.py @@ -71,9 +71,13 @@ async def dask_setup(worker: distributed.Worker) -> None: _logger.info("We do have a running loop in the main thread: %s", f"{loop=}") if settings.DASK_SIDECAR_RABBITMQ: - await worker.plugin_add( - RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ), catch_errors=False - ) + try: + await worker.plugin_add( + RabbitMQPlugin(settings.DASK_SIDECAR_RABBITMQ), catch_errors=False + ) + except Exception: + await worker.close() + raise print_dask_sidecar_banner() From c3b8bba7dc180d5d0858f0a141882a8e98eeb977 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 11:26:36 +0200 Subject: [PATCH 41/52] add some checks --- services/dask-sidecar/tests/unit/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/dask-sidecar/tests/unit/conftest.py b/services/dask-sidecar/tests/unit/conftest.py index edc61b84db6e..49d61fb1bd26 100644 --- a/services/dask-sidecar/tests/unit/conftest.py +++ b/services/dask-sidecar/tests/unit/conftest.py @@ -121,6 +121,7 @@ def local_cluster(app_environment: EnvVarsDict) -> Iterator[distributed.LocalClu ) as cluster: assert cluster assert isinstance(cluster, distributed.LocalCluster) + print(cluster.workers) yield cluster @@ -129,6 +130,7 @@ def dask_client( local_cluster: distributed.LocalCluster, ) -> Iterator[distributed.Client]: with distributed.Client(local_cluster) as client: + client.wait_for_workers(1, timeout=10) yield client From 713c1553eda30f8ae6400fb142befacca93cbef7 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 11:59:00 +0200 Subject: [PATCH 42/52] fixed tests --- .../rabbitmq_plugin.py | 9 +++------ services/dask-sidecar/tests/unit/test_tasks.py | 14 +++++++------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index 275fd81a2207..65efa1d227e4 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -69,13 +69,10 @@ async def _() -> None: ) return - if threading.current_thread() is threading.main_thread(): - _logger.info( - "RabbitMQ client plugin setup is in the main thread! That is good." + if threading.current_thread() is not threading.main_thread(): + _logger.warning( + "RabbitMQ client plugin setup is not in the main thread! Beware! if in pytest it's ok." ) - else: - msg = "RabbitMQ client plugin setup is not the main thread!" - raise ConfigurationError(msg=msg) with log_context( _logger, diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 3f6a585450f3..6a8f4b095f7b 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -119,6 +119,9 @@ def dask_subsystem_mock( ) mock_rabbitmq_client = create_rabbitmq_client("pytest_dask_sidecar_logs_publisher") mock_dask_rabbitmq_plugin.get_client.return_value = mock_rabbitmq_client + mock_dask_rabbitmq_plugin.publish_message_from_any_thread = ( + mock_rabbitmq_client.publish + ) mocker.patch( "simcore_service_dask_sidecar.utils.dask.get_rabbitmq_client", @@ -707,7 +710,9 @@ async def test_run_computational_sidecar_dask( @pytest.mark.parametrize( - "integration_version, boot_mode", [("1.0.0", BootMode.CPU)], indirect=True + "integration_version, boot_mode, task_owner", + [("1.0.0", BootMode.CPU, "no_parent_node")], + indirect=True, ) async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub( dask_client: distributed.Client, @@ -715,7 +720,6 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub progress_sub: distributed.Sub, mocked_get_image_labels: mock.Mock, log_rabbit_client_parser: mock.AsyncMock, - task_owner: TaskOwner, ): mocked_get_image_labels.assert_not_called() NUMBER_OF_LOGS = 20000 @@ -761,11 +765,7 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub ] # check all the awaited logs are in there filtered_worker_logs = filter(lambda log: "This is iteration" in log, worker_logs) - assert ( - len(list(filtered_worker_logs)) == (2 * NUMBER_OF_LOGS) - if task_owner.has_parent - else NUMBER_OF_LOGS - ) + assert len(list(filtered_worker_logs)) == NUMBER_OF_LOGS mocked_get_image_labels.assert_called() From f9e0896af362f134560fda29e0fa4254f7747892 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:01:36 +0200 Subject: [PATCH 43/52] mypy --- .../src/dask_task_models_library/container_tasks/protocol.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py index 27fe4e97995b..f7179be78c0a 100644 --- a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py +++ b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/protocol.py @@ -93,9 +93,7 @@ class ContainerTaskParameters(BaseModel): "input_data": TaskInputData.model_json_schema()["examples"][0], "output_data_keys": TaskOutputDataSchema.model_json_schema()[ "examples" - ][ - 0 - ], # type: ignore[index] + ][0], "command": ["sleep 10", "echo hello"], "envs": {"MYENV": "is an env"}, "labels": {"io.simcore.thelabel": "is amazing"}, From c8a3901e4a3c7f3e501c4884f71e519c803b4988 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:02:41 +0200 Subject: [PATCH 44/52] pylint --- .../tests/unit/with_dbs/comp_scheduler/test_scheduler_dask.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/director-v2/tests/unit/with_dbs/comp_scheduler/test_scheduler_dask.py b/services/director-v2/tests/unit/with_dbs/comp_scheduler/test_scheduler_dask.py index bb4adba21357..5b1cbf64aa3f 100644 --- a/services/director-v2/tests/unit/with_dbs/comp_scheduler/test_scheduler_dask.py +++ b/services/director-v2/tests/unit/with_dbs/comp_scheduler/test_scheduler_dask.py @@ -108,7 +108,6 @@ def _assert_dask_client_correctly_initialized( cast( # noqa: SLF001 DaskScheduler, scheduler )._task_progress_change_handler, - cast(DaskScheduler, scheduler)._task_log_change_handler, # noqa: SLF001 ) ) From 82659c37441767ebfba3b1fcca1f9f0080d82a61 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:06:15 +0200 Subject: [PATCH 45/52] fixed test --- services/director-v2/tests/unit/test_modules_dask_client.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/director-v2/tests/unit/test_modules_dask_client.py b/services/director-v2/tests/unit/test_modules_dask_client.py index 7180435d0255..c52647b70479 100644 --- a/services/director-v2/tests/unit/test_modules_dask_client.py +++ b/services/director-v2/tests/unit/test_modules_dask_client.py @@ -20,7 +20,6 @@ from dask_task_models_library.container_tasks.docker import DockerBasicAuth from dask_task_models_library.container_tasks.errors import TaskCancelledError from dask_task_models_library.container_tasks.events import ( - TaskLogEvent, TaskProgressEvent, ) from dask_task_models_library.container_tasks.io import ( @@ -1104,9 +1103,7 @@ def fake_remote_fct( s3_settings: S3Settings | None, ) -> TaskOutputData: progress_pub = distributed.Pub(TaskProgressEvent.topic_name()) - logs_pub = distributed.Pub(TaskLogEvent.topic_name()) progress_pub.put("my name is progress") - logs_pub.put("my name is logs") # tell the client we are done published_event = Event(name=_DASK_START_EVENT) published_event.set() From c720845ea491aa2513d1c70c0fc472521ccec78c Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:18:31 +0200 Subject: [PATCH 46/52] use tenacity instead of sleeps --- .../dask-sidecar/tests/unit/test_tasks.py | 78 ++++++++++++------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_tasks.py index 6a8f4b095f7b..d826270fca4e 100644 --- a/services/dask-sidecar/tests/unit/test_tasks.py +++ b/services/dask-sidecar/tests/unit/test_tasks.py @@ -62,6 +62,12 @@ _s3fs_settings_from_s3_settings, ) from simcore_service_dask_sidecar.worker import run_computational_sidecar +from tenacity import ( + AsyncRetrying, + retry_if_exception_type, + stop_after_delay, + wait_fixed, +) _logger = logging.getLogger(__name__) @@ -674,22 +680,30 @@ async def test_run_computational_sidecar_dask( ), "ordering of progress values incorrectly sorted!" assert worker_progresses[0] == 0, "missing/incorrect initial progress value" assert worker_progresses[-1] == 1, "missing/incorrect final progress value" - await asyncio.sleep(5) - assert log_rabbit_client_parser.called - worker_logs = [ - message - for msg in log_rabbit_client_parser.call_args_list - for message in LoggerRabbitMessage.model_validate_json(msg.args[0]).messages - ] - - print(f"<-- we got {len(worker_logs)} lines of logs") - - for log in sleeper_task.expected_logs: - r = re.compile(rf"^({log}).*") - search_results = list(filter(r.search, worker_logs)) - assert ( - len(search_results) > 0 - ), f"Could not find {log} in worker_logs:\n {pformat(worker_logs, width=240)}" + async for attempt in AsyncRetrying( + wait=wait_fixed(1), + stop=stop_after_delay(30), + reraise=True, + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert log_rabbit_client_parser.called + worker_logs = [ + message + for msg in log_rabbit_client_parser.call_args_list + for message in LoggerRabbitMessage.model_validate_json( + msg.args[0] + ).messages + ] + + print(f"<-- we got {len(worker_logs)} lines of logs") + + for log in sleeper_task.expected_logs: + r = re.compile(rf"^({log}).*") + search_results = list(filter(r.search, worker_logs)) + assert ( + len(search_results) > 0 + ), f"Could not find {log} in worker_logs:\n {pformat(worker_logs, width=240)}" # check that the task produce the expected data, not less not more assert isinstance(output_data, TaskOutputData) @@ -755,17 +769,27 @@ async def test_run_computational_sidecar_dask_does_not_lose_messages_with_pubsub assert worker_progresses[0] == 0, "missing/incorrect initial progress value" assert worker_progresses[-1] == 1, "missing/incorrect final progress value" - await asyncio.sleep(5) - assert log_rabbit_client_parser.called - - worker_logs = [ - message - for msg in log_rabbit_client_parser.call_args_list - for message in LoggerRabbitMessage.model_validate_json(msg.args[0]).messages - ] - # check all the awaited logs are in there - filtered_worker_logs = filter(lambda log: "This is iteration" in log, worker_logs) - assert len(list(filtered_worker_logs)) == NUMBER_OF_LOGS + async for attempt in AsyncRetrying( + wait=wait_fixed(1), + stop=stop_after_delay(30), + reraise=True, + retry=retry_if_exception_type(AssertionError), + ): + with attempt: + assert log_rabbit_client_parser.called + + worker_logs = [ + message + for msg in log_rabbit_client_parser.call_args_list + for message in LoggerRabbitMessage.model_validate_json( + msg.args[0] + ).messages + ] + # check all the awaited logs are in there + filtered_worker_logs = filter( + lambda log: "This is iteration" in log, worker_logs + ) + assert len(list(filtered_worker_logs)) == NUMBER_OF_LOGS mocked_get_image_labels.assert_called() From 5d577516341d8fd2961072345a37f1fe80a30938 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:31:16 +0200 Subject: [PATCH 47/52] move things around --- .../{test_docker_utils.py => test_computational_docker_utils.py} | 0 .../tests/unit/{test_models.py => test_computational_models.py} | 0 ...volume.py => test_computational_sidecar_task_shared_volume.py} | 0 .../tests/unit/{test_dask_utils.py => test_utils_dask.py} | 0 .../dask-sidecar/tests/unit/{test_tasks.py => test_worker.py} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename services/dask-sidecar/tests/unit/{test_docker_utils.py => test_computational_docker_utils.py} (100%) rename services/dask-sidecar/tests/unit/{test_models.py => test_computational_models.py} (100%) rename services/dask-sidecar/tests/unit/{test_task_shared_volume.py => test_computational_sidecar_task_shared_volume.py} (100%) rename services/dask-sidecar/tests/unit/{test_dask_utils.py => test_utils_dask.py} (100%) rename services/dask-sidecar/tests/unit/{test_tasks.py => test_worker.py} (100%) diff --git a/services/dask-sidecar/tests/unit/test_docker_utils.py b/services/dask-sidecar/tests/unit/test_computational_docker_utils.py similarity index 100% rename from services/dask-sidecar/tests/unit/test_docker_utils.py rename to services/dask-sidecar/tests/unit/test_computational_docker_utils.py diff --git a/services/dask-sidecar/tests/unit/test_models.py b/services/dask-sidecar/tests/unit/test_computational_models.py similarity index 100% rename from services/dask-sidecar/tests/unit/test_models.py rename to services/dask-sidecar/tests/unit/test_computational_models.py diff --git a/services/dask-sidecar/tests/unit/test_task_shared_volume.py b/services/dask-sidecar/tests/unit/test_computational_sidecar_task_shared_volume.py similarity index 100% rename from services/dask-sidecar/tests/unit/test_task_shared_volume.py rename to services/dask-sidecar/tests/unit/test_computational_sidecar_task_shared_volume.py diff --git a/services/dask-sidecar/tests/unit/test_dask_utils.py b/services/dask-sidecar/tests/unit/test_utils_dask.py similarity index 100% rename from services/dask-sidecar/tests/unit/test_dask_utils.py rename to services/dask-sidecar/tests/unit/test_utils_dask.py diff --git a/services/dask-sidecar/tests/unit/test_tasks.py b/services/dask-sidecar/tests/unit/test_worker.py similarity index 100% rename from services/dask-sidecar/tests/unit/test_tasks.py rename to services/dask-sidecar/tests/unit/test_worker.py From b261d5fad32de6ab8597843bc0c0ab7d9866a54d Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:35:25 +0200 Subject: [PATCH 48/52] add rabbitmq for private clusters --- .../data/docker-compose.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml index 87f4ef94560f..8d8bc77b58e7 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml @@ -13,6 +13,11 @@ services: DASK_START_AS_SCHEDULER: 1 DASK_WORKER_SATURATION: ${DASK_WORKER_SATURATION} LOG_LEVEL: ${LOG_LEVEL} + RABBIT_HOST: ${RABBIT_HOST} + RABBIT_PASSWORD: ${RABBIT_PASSWORD} + RABBIT_PORT: ${RABBIT_PORT} + RABBIT_SECURE: ${RABBIT_SECURE} + RABBIT_USER: ${RABBIT_USER} ports: - 8786:8786 # dask-scheduler access - 8787:8787 # dashboard @@ -59,6 +64,11 @@ services: DASK_TLS_KEY: ${DASK_TLS_KEY} DASK_WORKER_SATURATION: ${DASK_WORKER_SATURATION} LOG_LEVEL: ${LOG_LEVEL} + RABBIT_HOST: ${RABBIT_HOST} + RABBIT_PASSWORD: ${RABBIT_PASSWORD} + RABBIT_PORT: ${RABBIT_PORT} + RABBIT_SECURE: ${RABBIT_SECURE} + RABBIT_USER: ${RABBIT_USER} SIDECAR_COMP_SERVICES_SHARED_FOLDER: /home/scu/computational_shared_data SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME: computational_shared_data deploy: From e4f279388c3900034440887bf7f3b642b2387403 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:39:54 +0200 Subject: [PATCH 49/52] use single env --- .../data/docker-compose.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml index 8d8bc77b58e7..f0901f1093cd 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml @@ -9,15 +9,12 @@ services: DASK_TLS_CA_FILE: ${DASK_TLS_CA_FILE} DASK_TLS_CERT: ${DASK_TLS_CERT} DASK_TLS_KEY: ${DASK_TLS_KEY} + DASK_SIDECAR_RABBITMQ: ${AUTOSCALING_RABBITMQ} DASK_SCHEDULER_URL: tls://dask-scheduler:8786 DASK_START_AS_SCHEDULER: 1 DASK_WORKER_SATURATION: ${DASK_WORKER_SATURATION} LOG_LEVEL: ${LOG_LEVEL} - RABBIT_HOST: ${RABBIT_HOST} - RABBIT_PASSWORD: ${RABBIT_PASSWORD} - RABBIT_PORT: ${RABBIT_PORT} - RABBIT_SECURE: ${RABBIT_SECURE} - RABBIT_USER: ${RABBIT_USER} + ports: - 8786:8786 # dask-scheduler access - 8787:8787 # dashboard @@ -59,16 +56,13 @@ services: DASK_SCHEDULER_URL: tls://dask-scheduler:8786 DASK_SIDECAR_NON_USABLE_RAM: 0 DASK_SIDECAR_NUM_NON_USABLE_CPUS: 0 + DASK_SIDECAR_RABBITMQ: ${AUTOSCALING_RABBITMQ} DASK_TLS_CA_FILE: ${DASK_TLS_CA_FILE} DASK_TLS_CERT: ${DASK_TLS_CERT} DASK_TLS_KEY: ${DASK_TLS_KEY} DASK_WORKER_SATURATION: ${DASK_WORKER_SATURATION} LOG_LEVEL: ${LOG_LEVEL} - RABBIT_HOST: ${RABBIT_HOST} - RABBIT_PASSWORD: ${RABBIT_PASSWORD} - RABBIT_PORT: ${RABBIT_PORT} - RABBIT_SECURE: ${RABBIT_SECURE} - RABBIT_USER: ${RABBIT_USER} + SIDECAR_COMP_SERVICES_SHARED_FOLDER: /home/scu/computational_shared_data SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME: computational_shared_data deploy: From 7805c17468045c251768bd53f185b0632e9a2682 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 13:45:57 +0200 Subject: [PATCH 50/52] @copilot review: fix --- .../dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py index a4476a16306c..feab5e4d632a 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/utils/dask.py @@ -111,7 +111,7 @@ async def publish_logs( log_level=log_level, ) await rabbitmq_client.publish_message_from_any_thread( - parent_message.channel_name, base_message + parent_message.channel_name, parent_message ) _logger.log(log_level, message) From b72f7198f100a26a78f34d4cd89989b7ebe891f4 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 15:35:25 +0200 Subject: [PATCH 51/52] pylint --- .../rabbitmq_plugin.py | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py index 65efa1d227e4..554988aa0b2f 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/rabbitmq_plugin.py @@ -39,25 +39,14 @@ async def _process_messages(self) -> None: assert self._message_queue is not None # nosec assert self._client is not None # nosec - _logger.info("Starting message processor for RabbitMQ") - try: + with log_context(_logger, logging.INFO, "RabbitMQ message processor"): while True: - # Get message from queue - exchange_name, message_data = await self._message_queue.get() - - try: - # Publish to RabbitMQ - await self._client.publish(exchange_name, message_data) - except Exception as e: - _logger.exception("Failed to publish message: %s", str(e)) - finally: - # Mark task as done - self._message_queue.task_done() - except asyncio.CancelledError: - _logger.info("RabbitMQ message processor shutting down") - raise - except Exception: - _logger.exception("Unexpected error in RabbitMQ message processor") + with log_catch(_logger, reraise=False): + exchange_name, message_data = await self._message_queue.get() + try: + await self._client.publish(exchange_name, message_data) + finally: + self._message_queue.task_done() def setup(self, worker: distributed.Worker) -> Awaitable[None]: """Called when the plugin is attached to a worker""" From 6bec045d5b45b349447febee3b28c451744b2b99 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Thu, 8 May 2025 15:36:02 +0200 Subject: [PATCH 52/52] pylint --- services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py b/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py index 1510cc99e079..de632c818ec8 100644 --- a/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py +++ b/services/dask-sidecar/tests/unit/test_rabbitmq_plugin.py @@ -1,3 +1,8 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable +# pylint: disable=no-member + import distributed # Selection of core and tool services started in this swarm fixture (integration)