Skip to content

Commit bfb683d

Browse files
🎨 Add fallback traefik 503 routes 🚨⚠️ DEVOPS (#7899)
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent f98ad3a commit bfb683d

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

packages/pytest-simcore/src/pytest_simcore/simcore_services.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"whoami",
4141
"sto-worker",
4242
"sto-worker-cpu-bound",
43+
"traefik-configuration-placeholder",
4344
}
4445
# TODO: unify healthcheck policies see https://github.com/ITISFoundation/osparc-simcore/pull/2281
4546
DEFAULT_SERVICE_HEALTHCHECK_ENTRYPOINT: Final[str] = "/v0/"

services/docker-compose.yml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ services:
5757
- traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.path=/
5858
- traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.interval=2000ms
5959
- traefik.http.services.${SWARM_STACK_NAME}_api-server.loadbalancer.healthcheck.timeout=1000ms
60+
# NOTE: keep in sync with fallback router (rule and entrypoint)
6061
- traefik.http.routers.${SWARM_STACK_NAME}_api-server.rule=(Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`))
6162
- traefik.http.routers.${SWARM_STACK_NAME}_api-server.entrypoints=simcore_api
6263
- traefik.http.routers.${SWARM_STACK_NAME}_api-server.priority=3
@@ -628,6 +629,7 @@ services:
628629
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.interval=2000ms
629630
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver.loadbalancer.healthcheck.timeout=1000ms
630631
- traefik.http.middlewares.${SWARM_STACK_NAME}_static_webserver_retry.retry.attempts=2
632+
# NOTE: keep in sync with fallback router (rule and entrypoint)
631633
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`))
632634
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.service=${SWARM_STACK_NAME}_static_webserver
633635
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver.entrypoints=http
@@ -871,6 +873,7 @@ services:
871873
# NOTE: stickyness must remain only for specific endpoints, see https://github.com/ITISFoundation/osparc-simcore/pull/4180
872874
- traefik.http.middlewares.${SWARM_STACK_NAME}_webserver_retry.retry.attempts=2
873875
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.service=${SWARM_STACK_NAME}_webserver
876+
# NOTE: keep in sync with fallback router (rule and entrypoint)
874877
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P<study_uuid>\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`))
875878
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.entrypoints=http
876879
- traefik.http.routers.${SWARM_STACK_NAME}_webserver.priority=6
@@ -1456,6 +1459,62 @@ services:
14561459
- default
14571460
- interactive_services_subnet # for legacy dynamic services
14581461

1462+
# use to define fallback routes for simcore services
1463+
# if docker healthcheck fails, container's traefik configuration is removed
1464+
# leading to 404 https://github.com/traefik/traefik/issues/7842
1465+
#
1466+
# use fallback routes to return proper 503 (instead of 404)
1467+
# this service must be running at all times
1468+
traefik-configuration-placeholder:
1469+
image: busybox:1.35.0
1470+
command: sleep infinity
1471+
networks:
1472+
- default
1473+
deploy:
1474+
labels:
1475+
# route to internal traefik
1476+
- traefik.enable=true
1477+
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
1478+
1479+
### Fallback for api-server
1480+
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.rule=(Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`))
1481+
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.service=${SWARM_STACK_NAME}_api-server_fallback
1482+
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.entrypoints=simcore_api
1483+
- traefik.http.routers.${SWARM_STACK_NAME}_api-server_fallback.priority=1
1484+
# always fail and return 503 via unhealthy loadbalancer healthcheck
1485+
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.server.port=0 # port is required (otherwise traefik service is not created)
1486+
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
1487+
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.interval=10s
1488+
- traefik.http.services.${SWARM_STACK_NAME}_api-server_fallback.loadbalancer.healthcheck.timeout=1ms
1489+
1490+
### Fallback for webserver
1491+
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.service=${SWARM_STACK_NAME}_webserver_fallback
1492+
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P<study_uuid>\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`))
1493+
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.entrypoints=http
1494+
- traefik.http.routers.${SWARM_STACK_NAME}_webserver_fallback.priority=1
1495+
# always fail and return 503 via unhealthy loadbalancer healthcheck
1496+
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.server.port=0
1497+
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.path=/v0/
1498+
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.interval=10s
1499+
- traefik.http.services.${SWARM_STACK_NAME}_webserver_fallback.loadbalancer.healthcheck.timeout=1ms
1500+
1501+
### Fallback for static-webserver
1502+
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.rule=(Path(`/osparc`) || Path(`/s4l`) || Path(`/s4llite`) || Path(`/s4lacad`) || Path(`/s4lengine`) || Path(`/s4ldesktop`) || Path(`/s4ldesktopacad`) || Path(`/tis`) || Path(`/tiplite`) || Path(`/transpiled`) || Path(`/resource`) || PathPrefix(`/osparc/`) || PathPrefix(`/s4l/`) || PathPrefix(`/s4llite/`) || PathPrefix(`/s4lacad/`) || PathPrefix(`/s4lengine/`) || PathPrefix(`/s4ldesktop/`) || PathPrefix(`/s4ldesktopacad/`) || PathPrefix(`/tis/`) || PathPrefix(`/tiplite/`) || PathPrefix(`/transpiled/`) || PathPrefix(`/resource/`))
1503+
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.service=${SWARM_STACK_NAME}_static_webserver_fallback
1504+
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.entrypoints=http
1505+
- traefik.http.routers.${SWARM_STACK_NAME}_static_webserver_fallback.priority=1
1506+
# always fail and return 503 via unhealthy loadbalancer healthcheck
1507+
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.server.port=0
1508+
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
1509+
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.interval=10s
1510+
- traefik.http.services.${SWARM_STACK_NAME}_static_webserver_fallback.loadbalancer.healthcheck.timeout=1ms
1511+
healthcheck:
1512+
test: command -v sleep
1513+
interval: 10s
1514+
timeout: 1s
1515+
start_period: 1s
1516+
retries: 3
1517+
14591518
volumes:
14601519
postgres_data:
14611520
name: ${SWARM_STACK_NAME}_postgres_data

0 commit comments

Comments
 (0)