Skip to content

Commit 7a94f0d

Browse files
authored
Add fallback simcore traefik routes (#950)
* Draft files * update * minor fixes * Add traefik placeholder service and fallback routers Traefik placeholder will be used to define low priority traeifk configuration that will kick in once main docker serviec is not heathy (its configuration is removed) * Remove nginx config This shall be done in separate PR. First we introduce proper 503 status codes and then we sync about serving some content * Remove nginx config leftovers Check first commit for explanations * Remove stale comments * Invitations: add traefik healthcheck * ops traefik placeholder: add healthcheck * Healthcheck retries 3 * Define replicas via ENV
1 parent ee52b2f commit 7a94f0d

File tree

4 files changed

+95
-16
lines changed

4 files changed

+95
-16
lines changed

services/simcore/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
.env
22
docker-compose.deploy.yml
3+
docker-compose.yml
34
dask-sidecar/**
45
assets/
56
docker-compose.yml

services/simcore/docker-compose.deploy.local.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,7 @@ services:
109109
- traefik.tcp.services.${SWARM_STACK_NAME}_postgresRoute.loadbalancer.server.port=5432
110110
- "traefik.tcp.routers.${SWARM_STACK_NAME}_postgresRoute.rule=ClientIP(`195.176.8.0/24`) || ClientIP(`10.0.0.0/8`) || ClientIP(`172.16.0.0/12`) || ClientIP(`192.168.0.0/16`)"
111111
replicas: 1
112-
traefik_api:
113-
deploy:
114-
replicas: 1
112+
115113
webserver:
116114
deploy:
117115
replicas: 1

services/simcore/docker-compose.yml.j2

Lines changed: 87 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,13 @@ services:
186186
# internal traefik
187187
- traefik.enable=true
188188
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
189+
# NOTE: keep in sync with fallback router (rule and entrypoint)
189190
- traefik.http.routers.${SWARM_STACK_NAME}_invitations.rule=(${DEPLOYMENT_FQDNS_CAPTURE_INVITATIONS})
190191
- traefik.http.routers.${SWARM_STACK_NAME}_invitations.entrypoints=http
191192
- traefik.http.services.${SWARM_STACK_NAME}_invitations.loadbalancer.server.port=8000
193+
- traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.healthcheck.path=/
194+
- traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.healthcheck.interval=2000ms
195+
- traefik.http.services.${SWARM_STACK_NAME}_webserver.loadbalancer.healthcheck.timeout=1000ms
192196
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_swagger.rule=(${DEPLOYMENT_FQDNS_CAPTURE_INVITATIONS}) && PathPrefix(`/dev/doc`)
193197
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_swagger.entrypoints=http
194198
- traefik.http.middlewares.${SWARM_STACK_NAME_NO_HYPHEN}_invitations_swagger_auth.basicauth.users=${TRAEFIK_USER}:${TRAEFIK_PASSWORD}
@@ -270,7 +274,6 @@ services:
270274
<<: *webserver_resources
271275
extra_hosts: []
272276

273-
274277
wb-db-event-listener:
275278
hostname: "{% raw %}{{.Service.Name}}{% endraw %}"
276279
environment:
@@ -770,9 +773,11 @@ services:
770773
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.tls=true
771774
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.middlewares=ops_gzip@swarm, ops_sslheader@swarm, ops_ratelimit@swarm
772775
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.service=${SWARM_STACK_NAME}_simcore_http
776+
# Note: keep in sync with fallback router (rule and entrypoint)
773777
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.rule=((${DEPLOYMENT_FQDNS_CAPTURE_TRAEFIK_RULE_CATCHALL}) && PathPrefix(`/`)) || ( (PathPrefix(`/dashboard`) || PathPrefix(`/api`) || PathPrefix(`/doc`) ) && Host(`traefikdashboard.${MACHINE_FQDN}`))
774778
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http.priority=3
775779
# oSparc publicAPI
780+
# Note: keep in sync with fallback router (rule and entrypoint)
776781
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api.rule=(${DEPLOYMENT_API_DOMAIN_CAPTURE_TRAEFIK_RULE}) && PathPrefix(`/`)
777782
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api.entrypoints=https
778783
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api.loadbalancer.server.port=10081
@@ -813,31 +818,103 @@ services:
813818
reservations:
814819
memory: 128M
815820
cpus: '0.1'
816-
traefik_api:
817-
# NOTE: this is a trick to allow to access the internal traefik REST API
818-
# A comment
819-
# list router like so: curl https://domain/api/http/routers | jq
821+
822+
# use to define fallback routes for simcore traefik
823+
# if docker healthcheck fails, simcore traefik configurarion is
824+
# removed from ops traeifik https://github.com/traefik/traefik/issues/7842
825+
#
826+
# use fallback routes to return proper 503 (instead of 404)
827+
# this service must be running at all times
828+
ops-traefik-configuration-placeholder:
820829
image: busybox:1.35.0
821-
command: sleep 900000d
830+
command: sleep infinity
831+
networks:
832+
- public
833+
deploy:
834+
replicas: ${OPS_TRAEFIK_CONFIGURATION_PLACEHOLDER_REPLICAS}
835+
placement:
836+
constraints:
837+
- node.labels.traefik==true
838+
resources:
839+
limits:
840+
memory: 16M
841+
cpus: '0.1'
842+
reservations:
843+
memory: 8M
844+
cpus: '0.1'
845+
846+
labels:
847+
# external traefik
848+
- traefik.enable=true
849+
850+
### oSparc web (fallback low priority rule)
851+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.entrypoints=https
852+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.tls=true
853+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.middlewares=ops_gzip@swarm, ops_sslheader@swarm, ops_ratelimit@swarm
854+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.service=${SWARM_STACK_NAME}_simcore_http_fallback
855+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.rule=((${DEPLOYMENT_FQDNS_CAPTURE_TRAEFIK_RULE_CATCHALL}) && PathPrefix(`/`)) || ( (PathPrefix(`/dashboard`) || PathPrefix(`/api`) || PathPrefix(`/doc`) ) && Host(`traefikdashboard.${MACHINE_FQDN}`))
856+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_http_fallback.priority=1
857+
# always return 503
858+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.server.port=0
859+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
860+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.healthcheck.interval=10s
861+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_http_fallback.loadbalancer.healthcheck.timeout=1ms
862+
863+
### oSparc publicAPI (fallback low priority rule)
864+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.rule=(${DEPLOYMENT_API_DOMAIN_CAPTURE_TRAEFIK_RULE}) && PathPrefix(`/`)
865+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.priority=1
866+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.entrypoints=https
867+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.tls=true
868+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.middlewares=ops_gzip@swarm, ops_ratelimit@swarm
869+
- traefik.http.routers.${SWARM_STACK_NAME}_simcore_api_fallback.service=${SWARM_STACK_NAME}_simcore_api_fallback
870+
# always return 503
871+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.server.port=0
872+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
873+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.healthcheck.interval=10s
874+
- traefik.http.services.${SWARM_STACK_NAME}_simcore_api_fallback.loadbalancer.healthcheck.timeout=1ms
875+
healthcheck:
876+
test: command -v sleep
877+
interval: 10s
878+
timeout: 1s
879+
start_period: 1s
880+
retries: 3
881+
882+
traefik-configuration-placeholder: # simcore traefik with `io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}` label
883+
image: busybox:1.35.0
884+
command: sleep infinity
822885
networks:
823886
- default
824887
deploy:
888+
replicas: ${SIMCORE_TRAEFIK_CONFIGURATION_PLACEHOLDER_REPLICAS}
825889
labels:
826890
# route to internal traefik
891+
- traefik.enable=true
827892
- io.simcore.zone=${TRAEFIK_SIMCORE_ZONE}
893+
828894
# traefik UI
829-
- traefik.enable=true
830895
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.service=api@internal
831896
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.rule=(PathPrefix(`/dashboard`) || PathPrefix(`/api`) ) && Host(`traefikdashboard.${MACHINE_FQDN}`)
832897
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.entrypoints=http
833-
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.priority=6
834898
- traefik.http.routers.${SWARM_STACK_NAME}_traefik_api.middlewares=${SWARM_STACK_NAME}_auth@swarm, ${SWARM_STACK_NAME}_whitelist_ips@swarm
835899
- traefik.http.services.${SWARM_STACK_NAME}_traefik_api.loadbalancer.server.port=8080
900+
836901
# Middlewares
837902
# basic authentication
838903
- traefik.http.middlewares.${SWARM_STACK_NAME}_auth.basicauth.users=${TRAEFIK_USER}:${TRAEFIK_PASSWORD}
839904
# OPS IP Whitelist
840905
- traefik.http.middlewares.${SWARM_STACK_NAME}_whitelist_ips.ipallowlist.sourcerange=${TRAEFIK_IPWHITELIST_SOURCERANGE}
906+
907+
### Fallback for invitations
908+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.rule=${DEPLOYMENT_FQDNS_CAPTURE_INVITATIONS}
909+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.service=${SWARM_STACK_NAME}_invitations_fallback
910+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.entrypoints=http
911+
- traefik.http.routers.${SWARM_STACK_NAME}_invitations_fallback.priority=1
912+
# always fail and return 503 via unhealthy loadbalancer healthcheck
913+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.server.port=0
914+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.healthcheck.path=/some/invalid/path/to/generate/a/503
915+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.healthcheck.interval=10s
916+
- traefik.http.services.${SWARM_STACK_NAME}_invitations_fallback.loadbalancer.healthcheck.timeout=1ms
917+
841918
update_config:
842919
parallelism: 2
843920
order: start-first
@@ -858,6 +935,7 @@ services:
858935
reservations:
859936
memory: 8M
860937
cpus: '0.1'
938+
861939
whoami:
862940
image: "containous/whoami:v1.5.0"
863941
networks:
@@ -896,6 +974,7 @@ services:
896974
reservations:
897975
memory: 8M
898976
cpus: '0.1'
977+
899978
payments:
900979
deploy:
901980
placement:

services/traefik/docker-compose.yml.j2

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ services:
194194
networks:
195195
public: null
196196
monitored: null
197+
197198
whoami:
198199
image: "containous/whoami"
199200
deploy:
@@ -220,15 +221,15 @@ services:
220221
networks:
221222
- public
222223

224+
configs:
225+
traefik_dynamic_config.yml:
226+
name: {{ STACK_NAME }}_traefik_dynamic_config_{{ "./traefik_dynamic_config.yml" | sha256file | substring(0,10) }}
227+
file: ./traefik_dynamic_config.yml
228+
223229
networks:
224230
public:
225231
external: true
226232
name: ${PUBLIC_NETWORK}
227233
monitored:
228234
name: ${MONITORED_NETWORK}
229235
external: true
230-
231-
configs:
232-
traefik_dynamic_config.yml:
233-
name: ${STACK_NAME}_traefik_dynamic_config_{{ "./traefik_dynamic_config.yml" | sha256file | substring(0,10) }}
234-
file: ./traefik_dynamic_config.yml

0 commit comments

Comments
 (0)