diff --git a/.env-devel b/.env-devel index 6996637ad3f4..ee5f91a2783c 100644 --- a/.env-devel +++ b/.env-devel @@ -53,6 +53,7 @@ CELERY_RESULT_EXPIRES=P7D CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH='{"type":"tls","tls_ca_file":"/home/scu/.dask/dask-crt.pem","tls_client_cert":"/home/scu/.dask/dask-crt.pem","tls_client_key":"/home/scu/.dask/dask-key.pem"}' CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG=master-github-latest +CLUSTERS_KEEPER_DASK_NPROCS=1 CLUSTERS_KEEPER_DASK_NTHREADS=0 CLUSTERS_KEEPER_DASK_WORKER_SATURATION=inf CLUSTERS_KEEPER_EC2_ACCESS=null diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py index 612abeabf771..aef3bd798e3a 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/core/settings.py @@ -427,10 +427,17 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): ), ] + CLUSTERS_KEEPER_DASK_NPROCS: Annotated[ + int, + Field( + description="overrides the default number of worker processes in the dask-sidecars, setting it to negative values will use dask defaults (see description in 'dask worker --help')", + ), + ] + CLUSTERS_KEEPER_DASK_NTHREADS: Annotated[ NonNegativeInt, Field( - description="overrides the default number of threads in the dask-sidecars, setting it to 0 will use the default (see description in dask-sidecar)", + description="overrides the default number of threads per process in the dask-sidecars, setting it to 0 will use the default (see description in dask-sidecar)", ), ] diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml index 8ddbc592f19b..1a9bc6b65526 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml @@ -56,7 +56,7 @@ services: - cluster environment: DASK_LOG_FORMAT_LOCAL_DEV_ENABLED: 1 - DASK_NPROCS: 1 + DASK_NPROCS: ${DASK_NPROCS} DASK_NTHREADS: ${DASK_NTHREADS} DASK_SCHEDULER_URL: tls://dask-scheduler:8786 DASK_SIDECAR_NON_USABLE_RAM: 0 diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py index d4424b19f15e..8cbd057fce32 100644 --- a/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py +++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/utils/clusters.py @@ -89,7 +89,8 @@ def _convert_to_env_dict(entries: dict[str, Any]) -> str: f"CLUSTERS_KEEPER_EC2_ENDPOINT={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_ENDPOINT or 'null'}", f"CLUSTERS_KEEPER_EC2_REGION_NAME={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_REGION_NAME}", f"CLUSTERS_KEEPER_EC2_SECRET_ACCESS_KEY={app_settings.CLUSTERS_KEEPER_EC2_ACCESS.EC2_SECRET_ACCESS_KEY}", - f"DASK_NTHREADS={app_settings.CLUSTERS_KEEPER_DASK_NTHREADS or ''}", + f"DASK_NPROCS={app_settings.CLUSTERS_KEEPER_DASK_NPROCS}", + f"DASK_NTHREADS={app_settings.CLUSTERS_KEEPER_DASK_NTHREADS}", f"DASK_TLS_CA_FILE={_HOST_TLS_CA_FILE_PATH}", f"DASK_TLS_CERT={_HOST_TLS_CERT_FILE_PATH}", f"DASK_TLS_KEY={_HOST_TLS_KEY_FILE_PATH}", diff --git a/services/clusters-keeper/tests/unit/conftest.py b/services/clusters-keeper/tests/unit/conftest.py index cbcbd09ee5a3..b464bd38da48 100644 --- a/services/clusters-keeper/tests/unit/conftest.py +++ b/services/clusters-keeper/tests/unit/conftest.py @@ -130,6 +130,7 @@ def app_environment( "CLUSTERS_KEEPER_SSM_SECRET_ACCESS_KEY": faker.pystr(), "CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES": "{}", "CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX": faker.pystr(), + "CLUSTERS_KEEPER_DASK_NPROCS": f"{faker.pyint()}", "CLUSTERS_KEEPER_DASK_NTHREADS": f"{faker.pyint(min_value=0)}", "CLUSTERS_KEEPER_DASK_WORKER_SATURATION": f"{faker.pyfloat(min_value=0.1)}", "CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH": "{}", diff --git a/services/dask-sidecar/docker/boot.sh b/services/dask-sidecar/docker/boot.sh index f26cdd9f8a3f..56ca3b6310e5 100755 --- a/services/dask-sidecar/docker/boot.sh +++ b/services/dask-sidecar/docker/boot.sh @@ -171,14 +171,14 @@ else # 'daemonic processes are not allowed to have children' arises when running the sidecar.cli # because multi-processing library is used by the sidecar and the nanny does not like it # setting --no-nanny fixes this: see https://github.com/dask/distributed/issues/2142 - print_info "Starting as a dask worker "${DASK_WORKER_VERSION}" -> "${DASK_SCHEDULER_URL}" ..." - print_info "Worker resources set as: "$resources"" + print_info "Starting as a dask worker ${DASK_WORKER_VERSION} -> ${DASK_SCHEDULER_URL} ..." + print_info "Worker resources set as: $resources" if [ "${SC_BOOT_MODE}" = "debug" ]; then exec watchmedo auto-restart --recursive --pattern="*.py;*/src/*" --ignore-patterns="*test*;pytest_simcore/*;setup.py;*ignore*" --ignore-directories -- \ dask worker "${DASK_SCHEDULER_URL}" \ --local-directory /tmp/dask-sidecar \ --preload simcore_service_dask_sidecar.worker \ - --nworkers ${DASK_NPROCS} \ + --nworkers "${DASK_NPROCS}" \ --nthreads "${DASK_NTHREADS}" \ --dashboard-address 8787 \ --memory-limit "${DASK_MEMORY_LIMIT}" \ @@ -188,7 +188,7 @@ else exec dask worker "${DASK_SCHEDULER_URL}" \ --local-directory /tmp/dask-sidecar \ --preload simcore_service_dask_sidecar.worker \ - --nworkers ${DASK_NPROCS} \ + --nworkers "${DASK_NPROCS}" \ --nthreads "${DASK_NTHREADS}" \ --dashboard-address 8787 \ --memory-limit "${DASK_MEMORY_LIMIT}" \ diff --git a/services/docker-compose.yml b/services/docker-compose.yml index e8f7fba296a2..b77fabd82acb 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -221,6 +221,7 @@ services: <<: *tracing_open_telemetry_environs CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG: ${CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG} CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH: ${CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH} + CLUSTERS_KEEPER_DASK_NPROCS: ${CLUSTERS_KEEPER_DASK_NPROCS} CLUSTERS_KEEPER_DASK_NTHREADS: ${CLUSTERS_KEEPER_DASK_NTHREADS} CLUSTERS_KEEPER_DASK_WORKER_SATURATION: ${CLUSTERS_KEEPER_DASK_WORKER_SATURATION} CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION: ${CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION}