From 395f12560344a371eab3d53fcaaead55b65eb263 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 6 Jun 2025 08:14:50 +0200 Subject: [PATCH 1/7] only require stickyness on specific endpoints --- services/docker-compose.yml | 99 ++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 46 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 8d59f487263..5ceff3d888b 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -14,7 +14,7 @@ services: environment: API_SERVER_DEV_FEATURES_ENABLED: ${API_SERVER_DEV_FEATURES_ENABLED} API_SERVER_LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - API_SERVER_LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + API_SERVER_LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} API_SERVER_LOGLEVEL: ${API_SERVER_LOGLEVEL} API_SERVER_PROFILING: ${API_SERVER_PROFILING} @@ -112,7 +112,7 @@ services: SSM_REGION_NAME: ${SSM_REGION_NAME} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} RABBIT_HOST: ${RABBIT_HOST} RABBIT_PASSWORD: ${RABBIT_PASSWORD} RABBIT_PORT: ${RABBIT_PORT} @@ -161,7 +161,7 @@ services: DIRECTOR_HOST: ${DIRECTOR_HOST:-director} DIRECTOR_PORT: ${DIRECTOR_PORT:-8080} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} POSTGRES_DB: ${POSTGRES_DB} POSTGRES_HOST: ${POSTGRES_HOST} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} @@ -204,7 +204,7 @@ services: CLUSTERS_KEEPER_SSM_SECRET_ACCESS_KEY: ${CLUSTERS_KEEPER_SSM_SECRET_ACCESS_KEY} CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX: ${CLUSTERS_KEEPER_EC2_INSTANCES_PREFIX} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES: ${CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES} PRIMARY_EC2_INSTANCES_ALLOWED_TYPES: ${PRIMARY_EC2_INSTANCES_ALLOWED_TYPES} PRIMARY_EC2_INSTANCES_KEY_NAME: ${PRIMARY_EC2_INSTANCES_KEY_NAME} @@ -330,7 +330,7 @@ services: DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT: ${DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} DIRECTOR_V2_LOGLEVEL: ${DIRECTOR_V2_LOGLEVEL} MONITORING_ENABLED: ${MONITORING_ENABLED} @@ -411,7 +411,7 @@ services: - default environment: LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} RABBIT_HOST: ${RABBIT_HOST} RABBIT_PASSWORD: ${RABBIT_PASSWORD} RABBIT_PORT: ${RABBIT_PORT} @@ -455,7 +455,7 @@ services: INVITATIONS_SWAGGER_API_DOC_ENABLED: ${INVITATIONS_SWAGGER_API_DOC_ENABLED} INVITATIONS_USERNAME: ${INVITATIONS_USERNAME} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} INVITATIONS_TRACING: ${INVITATIONS_TRACING} TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} @@ -467,7 +467,7 @@ services: - default environment: LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} PAYMENTS_ACCESS_TOKEN_EXPIRE_MINUTES: ${PAYMENTS_ACCESS_TOKEN_EXPIRE_MINUTES} PAYMENTS_ACCESS_TOKEN_SECRET_KEY: ${PAYMENTS_ACCESS_TOKEN_SECRET_KEY} PAYMENTS_AUTORECHARGE_DEFAULT_MONTHLY_LIMIT: ${PAYMENTS_AUTORECHARGE_DEFAULT_MONTHLY_LIMIT} @@ -513,7 +513,7 @@ services: - default environment: LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} POSTGRES_DB: ${POSTGRES_DB} POSTGRES_ENDPOINT: ${POSTGRES_ENDPOINT} POSTGRES_HOST: ${POSTGRES_HOST} @@ -570,7 +570,7 @@ services: DYNAMIC_SCHEDULER_USE_INTERNAL_SCHEDULER: ${DYNAMIC_SCHEDULER_USE_INTERNAL_SCHEDULER} DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT: ${DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} POSTGRES_DB: ${POSTGRES_DB} @@ -598,7 +598,7 @@ services: init: true environment: DOCKER_API_PROXY_PASSWORD: ${DOCKER_API_PROXY_PASSWORD} - DOCKER_API_PROXY_USER : ${DOCKER_API_PROXY_USER} + DOCKER_API_PROXY_USER: ${DOCKER_API_PROXY_USER} deploy: placement: constraints: @@ -607,7 +607,7 @@ services: volumes: - /var/run/docker.sock:/var/run/docker.sock networks: - - docker-api-network + - docker-api-network static-webserver: image: ${DOCKER_REGISTRY:-itisfoundation}/static-webserver:${DOCKER_IMAGE_TAG:-latest} @@ -676,7 +676,7 @@ services: WEBSERVER_PROFILING: ${WEBSERVER_PROFILING} WEBSERVER_LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - WEBSERVER_LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + WEBSERVER_LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} # WEBSERVER_SERVER_HOST @@ -741,14 +741,13 @@ services: INVITATIONS_USERNAME: ${INVITATIONS_USERNAME} WEBSERVER_LICENSES: ${WEBSERVER_LICENSES} - LICENSES_ITIS_VIP_SYNCER_ENABLED : ${LICENSES_ITIS_VIP_SYNCER_ENABLED} + LICENSES_ITIS_VIP_SYNCER_ENABLED: ${LICENSES_ITIS_VIP_SYNCER_ENABLED} LICENSES_ITIS_VIP_SYNCER_PERIODICITY: ${LICENSES_ITIS_VIP_SYNCER_PERIODICITY} LICENSES_ITIS_VIP_API_URL: ${LICENSES_ITIS_VIP_API_URL} LICENSES_ITIS_VIP_CATEGORIES: ${LICENSES_ITIS_VIP_CATEGORIES} LICENSES_SPEAG_PHANTOMS_API_URL: ${LICENSES_SPEAG_PHANTOMS_API_URL} LICENSES_SPEAG_PHANTOMS_CATEGORIES: ${LICENSES_SPEAG_PHANTOMS_CATEGORIES} - WEBSERVER_LOGIN: ${WEBSERVER_LOGIN} LOGIN_ACCOUNT_DELETION_RETENTION_DAYS: ${LOGIN_ACCOUNT_DELETION_RETENTION_DAYS} LOGIN_REGISTRATION_CONFIRMATION_REQUIRED: ${LOGIN_REGISTRATION_CONFIRMATION_REQUIRED} @@ -826,7 +825,6 @@ services: PROJECTS_MAX_COPY_SIZE_BYTES: ${PROJECTS_MAX_COPY_SIZE_BYTES} PROJECTS_MAX_NUM_RUNNING_DYNAMIC_NODES: ${PROJECTS_MAX_NUM_RUNNING_DYNAMIC_NODES} - # WEBSERVER_RABBITMQ RABBIT_HOST: ${RABBIT_HOST} RABBIT_PASSWORD: ${RABBIT_PASSWORD} @@ -837,7 +835,6 @@ services: # WEBSERVER_TRASH TRASH_RETENTION_DAYS: ${TRASH_RETENTION_DAYS} - # ARBITRARY ENV VARS # see [https://docs.gunicorn.org/en/stable/settings.html#timeout], @@ -868,18 +865,43 @@ services: - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.healthcheck.path=/v0/ - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.healthcheck.interval=2000ms - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.healthcheck.timeout=1000ms - # NOTE: stickyness must remain until the long running tasks in the webserver are removed - # and also https://github.com/ITISFoundation/osparc-simcore/pull/4180 is resolved. - - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.sticky.cookie=true - - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.sticky.cookie.samesite=lax - - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.sticky.cookie.httponly=true - - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.sticky.cookie.secure=true + # NOTE: stickyness must remain only for specific endpoints, see https://github.com/ITISFoundation/osparc-simcore/pull/4180 - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_retry.retry.attempts=2 - traefik.http.routers.${SWARM_STACK_NAME}_webserver.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`)) - traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver.priority=6 - traefik.http.routers.${SWARM_STACK_NAME}_webserver.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_retry + # Sticky cookie only for specific endpoints + - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie=true + - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie.samesite=lax + - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie.httponly=true + - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie.secure=true + # Per-endpoint routers for sticky cookie + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.rule=Path(`/v0/projects`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.service=${SWARM_STACK_NAME}_webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.rule=Path(`/v0/projects:clone`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.service=${SWARM_STACK_NAME}_webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.rule=PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.service=${SWARM_STACK_NAME}_webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.service=${SWARM_STACK_NAME}_webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.service=${SWARM_STACK_NAME}_webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/export-data$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.service=${SWARM_STACK_NAME}_webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry networks: &webserver_networks - default - interactive_services_subnet @@ -895,7 +917,6 @@ services: WEBSERVER_STATICWEB: "null" WEBSERVER_FUNCTIONS: ${WEBSERVER_FUNCTIONS} # needed for api-server - networks: *webserver_networks wb-db-event-listener: @@ -927,7 +948,7 @@ services: GUNICORN_CMD_ARGS: ${WEBSERVER_GUNICORN_CMD_ARGS} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} SWARM_STACK_NAME: ${SWARM_STACK_NAME} SESSION_SECRET_KEY: ${WEBSERVER_SESSION_SECRET_KEY} WEBSERVER_ACTIVITY: ${WB_DB_EL_ACTIVITY} @@ -997,7 +1018,7 @@ services: GUNICORN_CMD_ARGS: ${WEBSERVER_GUNICORN_CMD_ARGS} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} # WEBSERVER_DB @@ -1076,7 +1097,6 @@ services: WEBSERVER_USERS: ${WB_GC_USERS} WEBSERVER_WALLETS: ${WB_GC_WALLETS} - networks: - default - interactive_services_subnet @@ -1097,7 +1117,7 @@ services: environment: AGENT_LOGLEVEL: ${AGENT_LOGLEVEL} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} AGENT_VOLUMES_CLEANUP_S3_ENDPOINT: ${AGENT_VOLUMES_CLEANUP_S3_ENDPOINT} AGENT_VOLUMES_CLEANUP_S3_REGION: ${AGENT_VOLUMES_CLEANUP_S3_REGION} AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY: ${AGENT_VOLUMES_CLEANUP_S3_ACCESS_KEY} @@ -1121,7 +1141,7 @@ services: hostname: "{{.Node.Hostname}}-{{.Task.Slot}}" environment: - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} NOTIFICATIONS_LOGLEVEL: ${NOTIFICATIONS_LOGLEVEL} @@ -1143,7 +1163,6 @@ services: TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} - dask-sidecar: image: ${DOCKER_REGISTRY:-itisfoundation}/dask-sidecar:${DOCKER_IMAGE_TAG:-latest} init: true @@ -1164,7 +1183,7 @@ services: DASK_TLS_CERT: ${DASK_TLS_CERT} DASK_SCHEDULER_HOST: ${DASK_SCHEDULER_HOST:-dask-scheduler} DASK_LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - DASK_LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + DASK_LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} DASK_SIDECAR_LOGLEVEL: ${DASK_SIDECAR_LOGLEVEL} SIDECAR_COMP_SERVICES_SHARED_VOLUME_NAME: ${SWARM_STACK_NAME}_computational_shared_data SIDECAR_COMP_SERVICES_SHARED_FOLDER: ${SIDECAR_COMP_SERVICES_SHARED_FOLDER:-/home/scu/computational_shared_data} @@ -1196,7 +1215,7 @@ services: networks: - storage_subnet environment: - DATCORE_ADAPTER_LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + DATCORE_ADAPTER_LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} DATCORE_ADAPTER_LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} DATCORE_ADAPTER_TRACING: ${DATCORE_ADAPTER_TRACING} TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} @@ -1209,7 +1228,7 @@ services: environment: &storage_environment DATCORE_ADAPTER_HOST: ${DATCORE_ADAPTER_HOST:-datcore-adapter} LOG_FORMAT_LOCAL_DEV_ENABLED: ${LOG_FORMAT_LOCAL_DEV_ENABLED} - LOG_FILTER_MAPPING : ${LOG_FILTER_MAPPING} + LOG_FILTER_MAPPING: ${LOG_FILTER_MAPPING} POSTGRES_DB: ${POSTGRES_DB} POSTGRES_ENDPOINT: ${POSTGRES_ENDPOINT} POSTGRES_HOST: ${POSTGRES_HOST} @@ -1358,19 +1377,7 @@ services: # also aof (append only) is also enabled such that we get full durability at the expense # of backup size. The backup is written into /data. # https://redis.io/topics/persistence - [ - "redis-server", - "--save", - "60 1", - "--loglevel", - "verbose", - "--databases", - "10", - "--appendonly", - "yes", - "--requirepass", - "${REDIS_PASSWORD}" - ] + [ "redis-server", "--save", "60 1", "--loglevel", "verbose", "--databases", "10", "--appendonly", "yes", "--requirepass", "${REDIS_PASSWORD}" ] networks: - default - autoscaling_subnet From aba93303f9f17f57a6267e44ed0b2d55dd6a3d83 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 6 Jun 2025 10:36:53 +0200 Subject: [PATCH 2/7] remove special character at the end --- services/docker-compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 5ceff3d888b..b91953b734b 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -886,19 +886,19 @@ services: - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.rule=PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.rule=PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop`) - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size`) - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete`) - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/export-data$`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/export-data`) - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry From d1b600969c90b4ae3fe491462771a9e0b04d438d Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Fri, 6 Jun 2025 15:04:08 +0200 Subject: [PATCH 3/7] sticky --- services/docker-compose.yml | 40 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index b91953b734b..fce54051c10 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -872,36 +872,16 @@ services: - traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver.priority=6 - traefik.http.routers.${SWARM_STACK_NAME}_webserver.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_retry - # Sticky cookie only for specific endpoints - - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie=true - - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie.samesite=lax - - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie.httponly=true - - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_sticky_cookie.sticky.cookie.secure=true - # Per-endpoint routers for sticky cookie - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.rule=Path(`/v0/projects`) - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.entrypoints=http - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.service=${SWARM_STACK_NAME}_webserver - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.rule=Path(`/v0/projects:clone`) - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.entrypoints=http - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.service=${SWARM_STACK_NAME}_webserver - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_projects_clone.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.rule=PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop`) - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.entrypoints=http - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.service=${SWARM_STACK_NAME}_webserver - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_project_node_stop.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size`) - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.entrypoints=http - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.service=${SWARM_STACK_NAME}_webserver - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_size.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete`) - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.entrypoints=http - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.service=${SWARM_STACK_NAME}_webserver - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_batch_delete.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.rule=PathRegexp(`^/v0/storage/locations/[0-9]+/export-data`) - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.entrypoints=http - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.service=${SWARM_STACK_NAME}_webserver - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky_storage_export_data.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_sticky_cookie, ${SWARM_STACK_NAME}_webserver_retry + # Create a dedicated sticky service for specific endpoints + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.server.port=8080 + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.secure=true + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.httpOnly=true + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.sameSite=lax + # Single consolidated router for all sticky endpoints + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.rule=Path(`/v0/projects`) || Path(`/v0/projects:clone`) || PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop`) || PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size`) || PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete`) || PathRegexp(`^/v0/storage/locations/[0-9]+/export-data`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.service=${SWARM_STACK_NAME}_webserver_sticky + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_retry networks: &webserver_networks - default - interactive_services_subnet From c0b894d679169d706911a7a1b50caa5f4ca3a3bd Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 10 Jun 2025 16:29:44 +0200 Subject: [PATCH 4/7] add missing libraries --- .vscode/settings.template.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.template.json b/.vscode/settings.template.json index 4ebda848845..8112185b430 100644 --- a/.vscode/settings.template.json +++ b/.vscode/settings.template.json @@ -1,7 +1,6 @@ // This is a template. Clone and replace extension ".template.json" by ".json" { "autoDocstring.docstringFormat": "pep257", - "editor.tabSize": 2, "editor.insertSpaces": true, "editor.detectIndentation": false, @@ -34,6 +33,8 @@ "python.analysis.typeCheckingMode": "basic", "python.analysis.extraPaths": [ "./packages/aws-library/src", + "./packages/common-library/src", + "./packages/dask-task-models-library/src", "./packages/models-library/src", "./packages/postgres-database/src", "./packages/postgres-database/tests", From 2c67ccca48e437514dce9f9041a5a5e9dcd40538 Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 10 Jun 2025 17:39:57 +0200 Subject: [PATCH 5/7] locally sticky is http --- services/docker-compose.local.yml | 3 ++- services/docker-compose.yml | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/services/docker-compose.local.yml b/services/docker-compose.local.yml index 5be8ddc4917..4f5d11e6b90 100644 --- a/services/docker-compose.local.yml +++ b/services/docker-compose.local.yml @@ -158,7 +158,8 @@ services: - "3001:3000" deploy: labels: - - traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.sticky.cookie.secure=false + # locally webserver is accessible through http:// + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.secure=false - traefik.http.routers.${SWARM_STACK_NAME}_webserver_local.service=${SWARM_STACK_NAME}_webserver - traefik.http.routers.${SWARM_STACK_NAME}_webserver_local.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_local.rule=PathPrefix(`/dev/`) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index fce54051c10..e0ddd4646f3 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -854,6 +854,7 @@ services: WEBSERVER_FOLDERS: ${WEBSERVER_FOLDERS} deploy: + replicas: 5 labels: - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} # gzip compression @@ -874,11 +875,18 @@ services: - traefik.http.routers.${SWARM_STACK_NAME}_webserver.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_retry # Create a dedicated sticky service for specific endpoints - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.server.port=8080 + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie=true - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.secure=true - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.httpOnly=true - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.sameSite=lax # Single consolidated router for all sticky endpoints - - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.rule=Path(`/v0/projects`) || Path(`/v0/projects:clone`) || PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop`) || PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size`) || PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete`) || PathRegexp(`^/v0/storage/locations/[0-9]+/export-data`) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.rule=Path(`/v0/projects`) || + Path(`/v0/projects:clone`) || + PathRegexp(`^/v0/projects/[0-9a-fA-F-]+/nodes/[0-9a-fA-F-]+:stop`) || + PathRegexp(`^/v0/storage/locations/[0-9]+/paths/.+:size`) || + PathRegexp(`^/v0/storage/locations/[0-9]+/-/paths:batchDelete`) || + PathRegexp(`^/v0/storage/locations/[0-9]+/export-data`) || + PathRegexp(`^/v0/tasks-legacy/.+`) - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.service=${SWARM_STACK_NAME}_webserver_sticky - traefik.http.routers.${SWARM_STACK_NAME}_webserver_sticky.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_retry From a2c61e5507462c7435ba3d9c9c116882f4b488cb Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 10 Jun 2025 17:47:41 +0200 Subject: [PATCH 6/7] added documentation --- services/docker-compose.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index e0ddd4646f3..37a5def1145 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -854,7 +854,9 @@ services: WEBSERVER_FOLDERS: ${WEBSERVER_FOLDERS} deploy: - replicas: 5 + # NOTE: having 2 replicas is necessary to detect early on if in-process tasks are mistakenly added to the webserver + # in case this cannot be done otherwise, the sticky rule below will need to be adapted + replicas: 2 labels: - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} # gzip compression From 72c3fb2cd8e08caa0a130ebad0e45579a34712ce Mon Sep 17 00:00:00 2001 From: sanderegg <35365065+sanderegg@users.noreply.github.com> Date: Tue, 10 Jun 2025 17:55:18 +0200 Subject: [PATCH 7/7] missing healthcheck --- services/docker-compose.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 37a5def1145..510e44cce4e 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -877,6 +877,9 @@ services: - traefik.http.routers.${SWARM_STACK_NAME}_webserver.middlewares=${SWARM_STACK_NAME}_gzip@swarm, ${SWARM_STACK_NAME_NO_HYPHEN}_sslheader@swarm, ${SWARM_STACK_NAME}_webserver_retry # Create a dedicated sticky service for specific endpoints - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.server.port=8080 + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.healthcheck.path=/v0/ + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.healthcheck.interval=2000ms + - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.healthcheck.timeout=1000ms - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie=true - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.secure=true - traefik.http.services.${SWARM_STACK_NAME}_webserver_sticky.loadbalancer.sticky.cookie.httpOnly=true