Skip to content

Commit e42a00e

Browse files
committed
Robust volume clean up
+ haproxy extra configuration
1 parent 51cd721 commit e42a00e

File tree

7 files changed

+124
-87
lines changed

7 files changed

+124
-87
lines changed

services/rabbit/Makefile

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ include ${REPO_BASE_DIR}/scripts/common-services.Makefile
55
# relies on STACK_NAME var which is defined in common-services.Makefile
66
include ${REPO_BASE_DIR}/scripts/common.Makefile
77

8+
#
9+
# Running cluster
10+
#
11+
812
.PHONY: up
913
up: ${TEMP_COMPOSE} prune-docker-stack-configs prune-docker-stack-secrets ## Deploys metabase stack
1014
@docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE} ${STACK_NAME}
@@ -22,6 +26,10 @@ up-public: up
2226
${TEMP_COMPOSE}: docker-compose.yml .env
2327
@${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< > $@
2428

29+
#
30+
# Config / Secret files
31+
#
32+
2533
erlang.cookie.secret: erlang.cookie.secret.template .env
2634
@set -a; source .env; set +a; \
2735
envsubst < $< > $@
@@ -41,15 +49,27 @@ docker-compose.yml: docker-compose.yml.j2 .env rabbitmq.conf erlang.cookie.secre
4149
# Deleting volumes (data)
4250
#
4351

44-
TEMP_COMPOSE_OPERATOR := .stack.${STACK_NAME}.operator.yaml
45-
STACK_NAME_OPERATOR := ${STACK_NAME}-operator
52+
CLEAN_VOLUMES_TEMP_COMPOSE := .stack.${STACK_NAME}.clean-volumes.yaml
53+
CLEAN_VOLUMES_STACK_NAME := ${STACK_NAME}-clean-volumes
4654

47-
docker-compose.operator.yaml: docker-compose.operator.yaml.j2 .env
55+
docker-compose.clean-volumes.yaml: docker-compose.clean-volumes.yaml.j2 .env
4856
@$(call jinja, $<, .env, $@)
4957

50-
${TEMP_COMPOSE_OPERATOR}: docker-compose.operator.yaml .env
58+
${CLEAN_VOLUMES_TEMP_COMPOSE}: docker-compose.clean-volumes.yaml .env
5159
@${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< > $@
5260

53-
destroy-cluster-with-data: ${TEMP_COMPOSE_OPERATOR} .env down
54-
@docker stack rm ${STACK_NAME_OPERATOR}
55-
@docker stack deploy --with-registry-auth --prune --compose-file ${TEMP_COMPOSE_OPERATOR} ${STACK_NAME_OPERATOR}
61+
TIMEOUT = 2m
62+
delete-volumes: ${CLEAN_VOLUMES_TEMP_COMPOSE}
63+
## avoid accumulating tasks (in case they fail). Always start fresh new
64+
@docker stack rm --detach=false ${CLEAN_VOLUMES_STACK_NAME}
65+
## use --detach=false to wait until all jobs complete successfully
66+
@timeout ${TIMEOUT} \
67+
docker stack deploy --detach=false --with-registry-auth --prune --compose-file ${CLEAN_VOLUMES_TEMP_COMPOSE} ${CLEAN_VOLUMES_STACK_NAME}
68+
@docker stack rm ${CLEAN_VOLUMES_STACK_NAME}
69+
70+
#
71+
# Destroy cluster with data (to start fresh new)
72+
#
73+
74+
destroy-cluster-with-data: down delete-volumes
75+
@echo "Cluster has been destroyed. Data has been deleted"
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{% set NODE_IXS = range(1, (RABBIT_CLUSTER_NODE_COUNT | int) + 1) -%}
2+
3+
services:
4+
5+
{% for ix in NODE_IXS %}
6+
clean_rabbit0{{ ix }}_volume:
7+
image: docker:25.0.3-cli
8+
volumes:
9+
- /var/run/docker.sock:/var/run/docker.sock
10+
init: true
11+
deploy:
12+
mode: replicated-job
13+
replicas: 1
14+
restart_policy:
15+
# do not restart jobs to avoid running these jobs in background
16+
# unawared. It may lead to unexpected volume removal once rabbit
17+
# cluster is not running
18+
condition: none
19+
placement:
20+
constraints:
21+
- node.labels.rabbit0{{ ix }} == true
22+
resources:
23+
limits:
24+
cpus: "0.5"
25+
memory: "512M"
26+
reservations:
27+
cpus: "0.1"
28+
memory: "128M"
29+
configs:
30+
- source: delete_rabbit_docker_volume_on_node_script
31+
target: /app/delete_rabbit_docker_volume_on_node.sh
32+
mode: 0755
33+
environment:
34+
VOLUME: ${STACK_NAME}0{{ ix }}_data
35+
TIMEOUT_MINUTES: 1
36+
INTERVAL_SECONDS: 10
37+
entrypoint: ["/app/delete_rabbit_docker_volume_on_node.sh"]
38+
{% endfor %}
39+
40+
configs:
41+
delete_rabbit_docker_volume_on_node_script:
42+
file: ./scripts/delete_rabbit_docker_volume_on_node.sh
43+
name: ${STACK_NAME}_delete_rabbit_docker_volume_on_node_script_{{ "./scripts/delete_rabbit_docker_volume_on_node.sh" | sha256file | substring(0,10) }}

services/rabbit/docker-compose.operator.yaml.j2

Lines changed: 0 additions & 38 deletions
This file was deleted.

services/rabbit/docker-compose.yml.j2

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ services:
44
loadbalancer:
55
image: haproxy:3.2
66
deploy:
7+
update_config:
8+
order: start-first
9+
parallelism: 1
10+
delay: 30s
11+
failure_action: rollback
712
# https://discourse.haproxy.org/t/haproxy-high-availability-configuration/11983
813
replicas: ${RABBIT_LB_REPLICAS}
914
# necessary to preserve client ip
@@ -26,11 +31,6 @@ services:
2631
timeout: 2s
2732
retries: 2
2833
interval: 10s
29-
ports:
30-
- target: 15672
31-
published: 15672
32-
protocol: tcp
33-
mode: host
3434
networks:
3535
- rabbit
3636
configs:
@@ -100,7 +100,7 @@ services:
100100
subscriber:
101101
image: python:3.11
102102
deploy:
103-
replicas: 0
103+
replicas: 1
104104
command: sh -c "pip install pika && python /app/sub.py"
105105
environment:
106106
- RABBIT_HOSTS=rabbit_loadbalancer
@@ -114,7 +114,7 @@ services:
114114
publisher:
115115
image: python:3.11
116116
deploy:
117-
replicas: 0
117+
replicas: 1
118118
command: sh -c "pip install pika && python /app/pub.py"
119119
environment:
120120
- RABBIT_HOSTS=rabbit_loadbalancer

services/rabbit/haproxy.cfg.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,4 @@ backend rabbit_dashboard_backends
6868
server rabbit0{{ ix }} rabbit_rabbit0{{ ix }}:{{ RABBIT_MANAGEMENT_PORT }} check resolvers dockerdns init-addr libc,none inter 5s rise 2 fall 3
6969
{%- endfor %}
7070
# keep new line in the end to avoid "Missing LF on last line" error
71+
# random comment

services/rabbit/operator/delete_docker_volume_on_node.sh

Lines changed: 0 additions & 35 deletions
This file was deleted.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env sh
2+
set -eu
3+
4+
VOLUME="${VOLUME:-}"
5+
TIMEOUT_MINUTES="${TIMEOUT_MINUTES:-2}"
6+
INTERVAL_SECONDS="${INTERVAL_SECONDS:-5}"
7+
8+
if [ -z "$VOLUME" ]; then
9+
echo "ERROR: VOLUME not set. Usage: VOLUME=name [TIMEOUT_MINUTES=2] [INTERVAL_SECONDS=5] $0"
10+
exit 1
11+
fi
12+
13+
case "$VOLUME" in
14+
*rabbit*) ;;
15+
*)
16+
echo "[SAFEGUARD] ERROR: VOLUME name must contain 'rabbit'."
17+
exit 1
18+
;;
19+
esac
20+
21+
echo "Deleting volume '$VOLUME'"
22+
echo "Timeout: ${TIMEOUT_MINUTES}m"
23+
echo "Interval: ${INTERVAL_SECONDS},"
24+
25+
if ! docker volume inspect "$VOLUME" >/dev/null 2>&1; then
26+
echo "Volume '$VOLUME' does not exist. Nothing to do."
27+
exit 0
28+
fi
29+
30+
elapsed=0
31+
timeout_seconds=$(( TIMEOUT_MINUTES * 60 ))
32+
while [ "$elapsed" -lt "$timeout_seconds" ]; do
33+
if docker volume rm "$VOLUME" >/dev/null 2>&1; then
34+
echo "Volume '$VOLUME' removed successfully."
35+
exit 0
36+
fi
37+
38+
remaining=$(( timeout_seconds - elapsed ))
39+
echo "Volume '$VOLUME' still in use. Retrying in ${INTERVAL_SECONDS}s. Time left: ${remaining}s."
40+
sleep "$INTERVAL_SECONDS"
41+
42+
elapsed=$(( elapsed + INTERVAL_SECONDS ))
43+
done
44+
45+
echo "Timeout reached. Exiting."
46+
exit 1

0 commit comments

Comments
 (0)