Skip to content

Commit cd71c84

Browse files
committed
Add traefik placeholder service and fallback routers
Traefik placeholder will be used to define low priority traeifk configuration that will kick in once main docker serviec is not heathy (its configuration is removed)
1 parent c41f329 commit cd71c84

File tree

8 files changed

+178
-125
lines changed

8 files changed

+178
-125
lines changed

services/simcore/configs/fallback-service-nginx/api/default.conf

Lines changed: 0 additions & 19 deletions
This file was deleted.

services/simcore/configs/fallback-service-nginx/web/default.conf

Lines changed: 0 additions & 19 deletions
This file was deleted.

services/simcore/docker-compose.deploy.local.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,7 @@ services:
109109
- traefik.tcp.services.${SWARM_STACK_NAME}_postgresRoute.loadbalancer.server.port=5432
110110
- "traefik.tcp.routers.${SWARM_STACK_NAME}_postgresRoute.rule=ClientIP(`195.176.8.0/24`) || ClientIP(`10.0.0.0/8`) || ClientIP(`172.16.0.0/12`) || ClientIP(`192.168.0.0/16`)"
111111
replicas: 1
112-
traefik_api:
113-
deploy:
114-
replicas: 1
112+
115113
webserver:
116114
deploy:
117115
replicas: 1

services/simcore/docker-compose.yml.j2

Lines changed: 75 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ services:
186186
# internal traefik
187187
- traefik.enable=true
188188
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
189+
# NOTE: keep in sync with fallback router (rule and entrypoint)
189190
- traefik.http.routers.${SWARM_STACK_NAME}_invitations.rule=(${DEPLOYMENT_FQDNS_CAPTURE_INVITATIONS})
190191
- traefik.http.routers.${SWARM_STACK_NAME}_invitations.entrypoints=http
191192
- traefik.http.services.${SWARM_STACK_NAME}_invitations.loadbalancer.server.port=8000
@@ -769,9 +770,11 @@ services:
769770
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.tls=true
770771
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.middlewares=ops_gzip@swarm, ops_sslheader@swarm, ops_ratelimit@swarm
771772
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.service=${SWARM_STACK_NAME}_simcore_http
773+
# Note: keep in sync with fallback router (rule and entrypoint)
772774
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.rule=((${DEPLOYMENT_FQDNS_CAPTURE_TRAEFIK_RULE_CATCHALL}) && PathPrefix(`/`)) || ( (PathPrefix(`/dashboard`) || PathPrefix(`/api`) || PathPrefix(`/doc`) ) && Host(`traefikdashboard.${MACHINE_FQDN}`))
773775
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.priority=3
774776
# oSparc publicAPI
777+
# Note: keep in sync with fallback router (rule and entrypoint)
775778
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api.rule=(${DEPLOYMENT_API_DOMAIN_CAPTURE_TRAEFIK_RULE}) && PathPrefix(`/`)
776779
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api.entrypoints=https
777780
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api.loadbalancer.server.port=10081
@@ -813,31 +816,96 @@ services:
813816
memory: 128M
814817
cpus: '0.1'
815818

816-
traefik_api:
817-
# NOTE: this is a trick to allow to access the internal traefik REST API
818-
# A comment
819-
# list router like so: curl https://domain/api/http/routers | jq
819+
# use to define fallback routes for simcore traefik
820+
# if docker healthcheck fails, simcore traefik configurarion is
821+
# removed from ops traeifik https://github.com/traefik/traefik/issues/7842
822+
#
823+
# use fallback routes to return proper 503 (instead of 404)
824+
# this service must be running at all times
825+
ops-traefik-configuration-placeholder: # TODO reuse same skeleton
820826
image: busybox:1.35.0
821-
command: sleep 900000d
827+
command: sleep infinity
828+
networks:
829+
- public
830+
deploy:
831+
placement:
832+
constraints:
833+
- node.labels.traefik==true
834+
resources:
835+
limits:
836+
memory: 16M
837+
cpus: '0.1'
838+
reservations:
839+
memory: 8M
840+
cpus: '0.1'
841+
842+
labels:
843+
# external traefik
844+
- traefik.enable=true
845+
846+
### oSparc web (fallback low priority rule)
847+
# TODO: remove copy / paste
848+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.entrypoints=https
849+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.tls=true
850+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.middlewares=ops_gzip@swarm, ops_sslheader@swarm, ops_ratelimit@swarm
851+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.service=${SWARM_STACK_NAME}_simcore_http_fallback
852+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.rule=((${DEPLOYMENT_FQDNS_CAPTURE_TRAEFIK_RULE_CATCHALL}) && PathPrefix(`/`)) || ( (PathPrefix(`/dashboard`) || PathPrefix(`/api`) || PathPrefix(`/doc`) ) && Host(`traefikdashboard.${MACHINE_FQDN}`))
853+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.priority=1
854+
# always return 503
855+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.server.port=0
856+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
857+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.healthcheck.interval=10s
858+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.healthcheck.timeout=1ms
859+
860+
### oSparc publicAPI (fallback low priority rule)
861+
# TODO: remove copy / paste
862+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.rule=(${DEPLOYMENT_API_DOMAIN_CAPTURE_TRAEFIK_RULE}) && PathPrefix(`/`)
863+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.priority=1
864+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.entrypoints=https
865+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.tls=true
866+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.middlewares=ops_gzip@swarm, ops_ratelimit@swarm
867+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.service=${SWARM_STACK_NAME}_simcore_api_fallback
868+
# always return 503
869+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.server.port=0
870+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
871+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.healthcheck.interval=10s
872+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.healthcheck.timeout=1ms
873+
874+
traefik-configuration-placeholder: # simcore traefik with `io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}` label
875+
image: busybox:1.35.0
876+
command: sleep infinity
822877
networks:
823878
- default
824879
deploy:
825880
labels:
826881
# route to internal traefik
882+
- traefik.enable=true
827883
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
884+
828885
# traefik UI
829-
- traefik.enable=true
830886
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.service=api@internal
831887
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.rule=(PathPrefix(`/dashboard`) || PathPrefix(`/api`) ) && Host(`traefikdashboard.${MACHINE_FQDN}`)
832888
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.entrypoints=http
833-
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.priority=6
834889
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.middlewares=${SWARM_STACK_NAME}_auth@swarm, ${SWARM_STACK_NAME}_whitelist_ips@swarm
835890
- traefik.http.services.${SWARM_STACK_NAME}_traefik_api.loadbalancer.server.port=8080
891+
836892
# Middlewares
837893
# basic authentication
838894
- traefik.http.middlewares.${SWARM_STACK_NAME}_auth.basicauth.users=${TRAEFIK_USER}:${TRAEFIK_PASSWORD}
839895
# OPS IP Whitelist
840896
- traefik.http.middlewares.${SWARM_STACK_NAME}_whitelist_ips.ipallowlist.sourcerange=${TRAEFIK_IPWHITELIST_SOURCERANGE}
897+
898+
### Fallback for invitations
899+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.rule=${DEPLOYMENT_FQDNS_CAPTURE_INVITATIONS}
900+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.service=${SWARM_STACK_NAME}_invitations_fallback
901+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.entrypoints=http
902+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.priority=1
903+
# always fail and return 503 via unhealthy loadbalancer healthcheck
904+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.server.port=0
905+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
906+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.healthcheck.interval=10s
907+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.healthcheck.timeout=1ms
908+
841909
update_config:
842910
parallelism: 2
843911
order: start-first
@@ -992,78 +1060,6 @@ services:
9921060
cpus: "0.5"
9931061
memory: "512M"
9941062

995-
fallback-service-web:
996-
image: nginx:1.25.1
997-
configs:
998-
- source: {{ SWARM_STACK_NAME }}_web_html
999-
target: /usr/share/nginx/html/503.html
1000-
- source: {{ SWARM_STACK_NAME }}_web_nginx_config
1001-
target: /etc/nginx/conf.d/default.conf
1002-
networks:
1003-
- public
1004-
- monitored
1005-
deploy:
1006-
placement:
1007-
constraints:
1008-
- node.labels.simcore==true
1009-
update_config:
1010-
order: start-first
1011-
labels:
1012-
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
1013-
- traefik.enable=true
1014-
1015-
# webserver
1016-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_html.priority=1
1017-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_html.rule=(Path(`/`) || Path(`/v0`) || Path(`/socket.io/`) || Path(`/static-frontend-data.json`) || PathRegexp(`^/study/(?P<study_uuid>\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)`) || Path(`/view`) || Path(`/#/view`) || Path(`/#/error`) || PathPrefix(`/v0/`))
1018-
- traefik.http.services.${PREFIX_STACK_NAME}_fallback_html.loadbalancer.server.port=80
1019-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_html.entrypoints=http
1020-
1021-
fallback-service-api:
1022-
image: nginx:1.25.1
1023-
configs:
1024-
- source: {{ SWARM_STACK_NAME }}_api_json
1025-
target: /usr/share/nginx/api/503.json
1026-
- source: {{ SWARM_STACK_NAME }}_api_nginx_config
1027-
target: /etc/nginx/conf.d/default.conf
1028-
networks:
1029-
- public
1030-
- monitored
1031-
deploy:
1032-
placement:
1033-
constraints:
1034-
- node.labels.simcore==true
1035-
update_config:
1036-
order: start-first
1037-
labels:
1038-
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
1039-
- traefik.enable=true
1040-
# api-server
1041-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_api.priority=1
1042-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_api.rule=Path(`/`) || Path(`/v0`) || PathPrefix(`/v0/`) || Path(`/api/v0/openapi.json`)
1043-
- traefik.http.services.${PREFIX_STACK_NAME}_fallback_api.loadbalancer.server.port=80
1044-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_api.entrypoints=simcore_api
1045-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_api.service=${PREFIX_STACK_NAME}_fallback_api
1046-
# invitations
1047-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_invitations_api.priority=1
1048-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_invitations_api.rule=(${DEPLOYMENT_FQDNS_CAPTURE_INVITATIONS})
1049-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_invitations_api.entrypoints=http
1050-
- traefik.http.services.${PREFIX_STACK_NAME}_fallback_invitations_api.loadbalancer.server.port=80
1051-
- traefik.http.routers.${PREFIX_STACK_NAME}_fallback_invitations_api.service=${PREFIX_STACK_NAME}_fallback_invitations_api
1052-
1053-
configs:
1054-
{{ SWARM_STACK_NAME }}_web_html:
1055-
file: ./configs/fallback-service-nginx/web/503.html
1056-
name: {{ SWARM_STACK_NAME }}_web_html_{{ "./configs/fallback-service-nginx/web/503.html" | sha256file | substring(0,10) }}
1057-
{{ SWARM_STACK_NAME }}_api_json:
1058-
file: ./configs/fallback-service-nginx/api/503.json
1059-
name: {{ SWARM_STACK_NAME }}_api_json_{{ "./configs/fallback-service-nginx/api/503.json" | sha256file | substring(0,10) }}
1060-
{{ SWARM_STACK_NAME }}_web_nginx_config:
1061-
file: ./configs/fallback-service-nginx/web/default.conf
1062-
name: {{ SWARM_STACK_NAME }}_web_nginx_config_{{ "./configs/fallback-service-nginx/web/default.conf" | sha256file | substring(0,10) }}
1063-
{{ SWARM_STACK_NAME }}_api_nginx_config:
1064-
file: ./configs/fallback-service-nginx/api/default.conf
1065-
name: {{ SWARM_STACK_NAME }}_api_nginx_config_{{ "./configs/fallback-service-nginx/api/default.conf" | sha256file | substring(0,10) }}
1066-
10671063
volumes:
10681064
rabbit_data:
10691065
name: ${SWARM_STACK_NAME}_rabbit_data
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# api (json) fallback
2+
# once simcore needs special error pages different from OPS services,
3+
# it can be customized via url: /simcore/503.html /simcore/503.json
4+
server {
5+
listen 8000;
6+
listen [::]:8000;
7+
server_name localhost;
8+
9+
error_page 503 /503.json;
10+
11+
location / {
12+
return 503;
13+
}
14+
15+
location = /503.json {
16+
default_type application/json;
17+
18+
add_header Retry-After "10" always; # https://serverfault.com/a/647552
19+
20+
root /usr/share/nginx/api;
21+
}
22+
}
23+
24+
# web (html) fallback
25+
server {
26+
listen 80;
27+
listen [::]:80;
28+
server_name localhost;
29+
30+
error_page 503 /503.html;
31+
32+
location / {
33+
return 503;
34+
}
35+
36+
location = /503.html {
37+
default_type text/html;
38+
39+
add_header Retry-After "10" always;
40+
41+
root /usr/share/nginx/html;
42+
}
43+
}

services/traefik/docker-compose.yml.j2

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ services:
194194
networks:
195195
public: null
196196
monitored: null
197+
197198
whoami:
198199
image: "containous/whoami"
199200
deploy:
@@ -220,15 +221,68 @@ services:
220221
networks:
221222
- public
222223

224+
error-pages-static-webserver:
225+
image: nginx:1.25.1
226+
networks:
227+
- public
228+
configs:
229+
- source: ${STACK_NAME}_503_api_json
230+
target: /usr/share/nginx/api/503.json
231+
- source: ${STACK_NAME}_503_web_html
232+
target: /usr/share/nginx/html/503.html
233+
- source: ${STACK_NAME}_nginx_error_pages_config
234+
target: /etc/nginx/conf.d/default.conf
235+
deploy:
236+
labels:
237+
# defining labels as list has advantage in interpolation
238+
# https://docs.docker.com/reference/compose-file/interpolation/
239+
# disadvantage: we cannot reuse values with yaml `& and *` syntax
240+
- traefik.enable=true
241+
242+
# middleware for ops traefik. Use for custom 503 (json) api response
243+
- traefik.http.middlewares.custom-50x-json-errors.errors.status=500,501,503,505-599
244+
- traefik.http.middlewares.custom-50x-json-errors.errors.service=traefik_error_page_json
245+
- traefik.http.middlewares.custom-50x-json-errors.errors.query=/503.json
246+
247+
# middleware for ops traefik. Use for custom 503 html response
248+
- traefik.http.middlewares.custom-50x-html-errors.errors.status=500,501,503,505-599
249+
- traefik.http.middlewares.custom-50x-html-errors.errors.service=traefik_error_page_html
250+
- traefik.http.middlewares.custom-50x-html-errors.errors.query=/503.json
251+
252+
# rule to serve custom 503 error html page
253+
- traefik.http.routers.traefik_error_page_html.entrypoints=http
254+
- traefik.http.routers.traefik_error_page_html.priority=1
255+
- traefik.http.routers.traefik_error_page_html.rule=Path(`/503.html`)
256+
- traefik.http.routers.traefik_error_page_html.tls=false
257+
- traefik.http.routers.traefik_error_page_html.service=traefik_error_page_html
258+
- traefik.http.services.traefik_error_page_html.loadbalancer.server.port=80 # nginx port for (non-api) http
259+
260+
# rule to serve custom 503 error json page
261+
- traefik.http.routers.traefik_error_page_json.entrypoints=http
262+
- traefik.http.routers.traefik_error_page_json.priority=1
263+
- traefik.http.routers.traefik_error_page_json.rule=Path(`/503.json`)
264+
- traefik.http.routers.traefik_error_page_json.tls=false
265+
- traefik.http.routers.traefik_error_page_json.service=traefik_error_page_json
266+
- traefik.http.services.traefik_error_page_json.loadbalancer.server.port=8000 # nginx port for (non-api) http
267+
268+
configs:
269+
traefik_dynamic_config.yml:
270+
name: {{ STACK_NAME }}_traefik_dynamic_config_{{ "./traefik_dynamic_config.yml" | sha256file | substring(0,10) }}
271+
file: ./traefik_dynamic_config.yml
272+
{{ STACK_NAME }}_503_web_html:
273+
file: ./config/error_pages/503.html
274+
name: {{ STACK_NAME }}_web_html_{{ "./config/error_pages/503.html" | sha256file | substring(0,10) }}
275+
{{ STACK_NAME }}_503_api_json:
276+
file: ./config/error_pages/503.json
277+
name: {{ STACK_NAME }}_api_json_{{ "./config/error_pages/503.json" | sha256file | substring(0,10) }}
278+
{{ STACK_NAME }}_nginx_error_pages_config:
279+
file: ./config/error_pages/default.conf
280+
name: {{ STACK_NAME }}_web_nginx_config_{{ "./config/error_pages/default.conf" | sha256file | substring(0,10) }}
281+
223282
networks:
224283
public:
225284
external: true
226285
name: ${PUBLIC_NETWORK}
227286
monitored:
228287
name: ${MONITORED_NETWORK}
229288
external: true
230-
231-
configs:
232-
traefik_dynamic_config.yml:
233-
name: ${STACK_NAME}_traefik_dynamic_config_{{ "./traefik_dynamic_config.yml" | sha256file | substring(0,10) }}
234-
file: ./traefik_dynamic_config.yml

0 commit comments

Comments
 (0)