diff --git a/packages/pytest-simcore/src/pytest_simcore/simcore_services.py b/packages/pytest-simcore/src/pytest_simcore/simcore_services.py index 2a4f6d2ff4dc..d0c62e33010d 100644 --- a/packages/pytest-simcore/src/pytest_simcore/simcore_services.py +++ b/packages/pytest-simcore/src/pytest_simcore/simcore_services.py @@ -40,6 +40,7 @@ "whoami", "sto-worker", "sto-worker-cpu-bound", + "traefik-configuration-placeholder", } # TODO: unify healthcheck policies see https://github.com/ITISFoundation/osparc-simcore/pull/2281 DEFAULT_SERVICE_HEALTHCHECK_ENTRYPOINT: Final[str] = "/v0/" diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 4d3a4d7da584..cb92ec8b43c5 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -57,6 +57,7 @@ services: - traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.path=/ - traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.interval=2000ms - traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.timeout=1000ms + # NOTE: keep in sync with fallback router (rule and entrypoint) - traefik.http.routers.${SWARM_STACK_NAME}_api-server.rule=(Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`)) - traefik.http.routers.${SWARM_STACK_NAME}_api-server.entrypoints=simcore_api - traefik.http.routers.${SWARM_STACK_NAME}_api-server.priority=3 @@ -628,6 +629,7 @@ services: - traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.interval=2000ms - traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.timeout=1000ms - traefik.http.middlewares.${SWARM_STACK_NAME}_static_webserver_retry.retry.attempts=2 + # NOTE: keep in sync with fallback router (rule and entrypoint) - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`)) - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.service=${SWARM_STACK_NAME}_static_webserver - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.entrypoints=http @@ -871,6 +873,7 @@ services: # NOTE: stickyness must remain only for specific endpoints, see https://github.com/ITISFoundation/osparc-simcore/pull/4180 - traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_retry.retry.attempts=2 - traefik.http.routers.${SWARM_STACK_NAME}_webserver.service=${SWARM_STACK_NAME}_webserver + # NOTE: keep in sync with fallback router (rule and entrypoint) - traefik.http.routers.${SWARM_STACK_NAME}_webserver.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`)) - traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http - traefik.http.routers.${SWARM_STACK_NAME}_webserver.priority=6 @@ -1456,6 +1459,62 @@ services: - default - interactive_services_subnet # for legacy dynamic services + # use to define fallback routes for simcore services + # if docker healthcheck fails, container's traefik configuration is removed + # leading to 404 https://github.com/traefik/traefik/issues/7842 + # + # use fallback routes to return proper 503 (instead of 404) + # this service must be running at all times + traefik-configuration-placeholder: + image: busybox:1.35.0 + command: sleep infinity + networks: + - default + deploy: + labels: + # route to internal traefik + - traefik.enable=true + - io.simcore.zone=${TRAEFIK_SIMCORE_ZONE} + + ### Fallback for api-server + - traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.rule=(Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`)) + - traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.service=${SWARM_STACK_NAME}_api-server_fallback + - traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.entrypoints=simcore_api + - traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.priority=1 + # always fail and return 503 via unhealthy loadbalancer healthcheck + - traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.server.port=0 # port is required (otherwise traefik service is not created) + - traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503 + - traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.interval=10s + - traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.timeout=1ms + + ### Fallback for webserver + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.service=${SWARM_STACK_NAME}_webserver_fallback + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`)) + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.priority=1 + # always fail and return 503 via unhealthy loadbalancer healthcheck + - traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.server.port=0 + - traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.path=/v0/ + - traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.interval=10s + - traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.timeout=1ms + + ### Fallback for static-webserver + - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`)) + - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.service=${SWARM_STACK_NAME}_static_webserver_fallback + - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.entrypoints=http + - traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.priority=1 + # always fail and return 503 via unhealthy loadbalancer healthcheck + - traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.server.port=0 + - traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503 + - traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.interval=10s + - traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.timeout=1ms + healthcheck: + test: command -v sleep + interval: 10s + timeout: 1s + start_period: 1s + retries: 3 + volumes: postgres_data: name: ${SWARM_STACK_NAME}_postgres_data