Add (ha) rabbit cluster (but not use it) ⚠️ (#1179)

YuryHrytsuk · web-flow · commit 4fdde13d22d4 · 2025-09-16T13:45:33.000+02:00
* add-ha-rabbit

* Add ha rabbit

* Document erlang cookie rotation

* Add ha proxy

* Further configuration

* Document autoscaling (not supported)

* More configurable parameters

* minor improvements

* Add resource limits/reservations

* Add haproxy resources

* Document side effect of haproxy round robin

* Add healthcheck for haproxy

* Update readme

* Removing volumes

* Robust volume clean up

+ haproxy extra configuration

* Simplification

* Add confirmation dialogue

* Unification

* Minor clean up

* update gitignore

* fixes after clean up

* clean up

* clean up

* Deploy rabbit only if necessary

* clean up

* Document cluster update behaviour. Architecture must be changed

* Switch from services to stacks

* fixes

* improvements

* minor fixes

* update

* update

* Improvements

* improvements

* improvements

* fixes and improvements

* remove leftovers

* Improve doc

* fixes

* Improve README

* remove lines

* Clean up

* Fix readme header

* Improve node index validation

* remove TODOs from compose file

* remove unecessary headers
diff --git a/services/rabbit/.gitignore b/services/rabbit/.gitignore
@@ -0,0 +1,6 @@
+*.yml
+*.env
+!template*.env
+!erlang.cookie.secret.template
+rabbitmq.conf
+haproxy.cfg
diff --git a/services/rabbit/.operations.Makefile b/services/rabbit/.operations.Makefile
@@ -0,0 +1,105 @@
+#
+# Variables
+#
+
+LOAD_BALANCER_STACK_NAME := rabbit-loadbalancer
+
+MAKEFLAGS += --no-print-directory
+
+#
+# Helpers
+#
+
+define create_node_stack_name
+rabbit-node0$(1)
+endef
+
+validate-NODE_COUNT: guard-NODE_COUNT
+	@if ! echo "$(NODE_COUNT)" | grep --quiet --extended-regexp '^[1-9]$$'; then \
+		echo NODE_COUNT must be a positive single digit integer; \
+		exit 1; \
+	fi
+
+validate-node-ix0%: .env
+	@if ! echo "$*" | grep --quiet --extended-regexp '^[1-9]$$'; then \
+		echo "Node index $* must be a positive single digit integer"; \
+		exit 1; \
+	fi
+
+	@set -o allexport; . $<; set +o allexport; \
+	if [ "$*" -lt 1 ] || [ "$*" -gt "$$RABBIT_CLUSTER_NODE_COUNT" ]; then \
+		echo "Node index $* is out of range 1..$$RABBIT_CLUSTER_NODE_COUNT"; \
+		exit 1; \
+	fi
+
+#
+# Cluster level
+#
+
+### Note: up operation is called by CI automatically
+###       it must NOT deploy stacks if they are already running
+###       to avoid breaking existing cluster (stopping all nodes at once)
+up: start-cluster
+
+down: stop-cluster
+
+start-cluster: start-all-nodes start-loadbalancer
+
+update-cluster stop-cluster:
+	@$(error This operation may break cluster. Check README for details.)
+
+#
+# Load Balancer
+#
+
+start-loadbalancer: .stack.loadbalancer.yml
+	@docker stack deploy --with-registry-auth --prune --compose-file $< $(LOAD_BALANCER_STACK_NAME)
+
+update-loadbalancer: start-loadbalancer
+
+stop-loadbalancer:
+	@docker stack rm $(LOAD_BALANCER_STACK_NAME)
+
+#
+# Rabbit all Nodes together
+#
+
+.start-all-nodes: validate-NODE_COUNT
+	@i=1; \
+	while [ $$i -le $(NODE_COUNT) ]; do \
+		$(MAKE) start-node0$$i; \
+		i=$$((i + 1)); \
+	done
+
+start-all-nodes: .env
+	@source $<; \
+	$(MAKE) .start-all-nodes NODE_COUNT=$$RABBIT_CLUSTER_NODE_COUNT
+
+update-all-nodes:
+	@$(error Updating all nodes at the same time may break the cluster \
+	as it may restart (i.e. stop) all nodes at the same time. \
+	Update one node at a time)
+
+stop-all-nodes:
+	@$(error Stopping all nodes at the same time breaks the cluster. \
+	Update one node at a time. \
+	Read more at https://groups.google.com/g/rabbitmq-users/c/owvanX2iSqA/m/ZAyRDhRfCQAJ)
+
+#
+# Rabbit Node level
+#
+
+start-node0%: validate-node-ix0% .stack.node0%.yml
+	@STACK_NAME=$(call create_node_stack_name,$*); \
+	if docker stack ls --format '{{.Name}}' | grep --silent "$$STACK_NAME"; then \
+		echo "Rabbit Node $* is already running, skipping"; \
+	else \
+		echo "Starting Rabbit Node $* ..."; \
+		docker stack deploy --with-registry-auth --prune --compose-file $(word 2,$^) $(call create_node_stack_name,$*); \
+	fi
+
+update-node0%: validate-node-ix0% .stack.node0%.yml
+	@docker stack deploy --detach=false --with-registry-auth --prune --compose-file $(word 2,$^) $(call create_node_stack_name,$*)
+
+stop-node0%: validate-node-ix0%
+	@docker stack rm --detach=false $(call create_node_stack_name,$*)
diff --git a/services/rabbit/Makefile b/services/rabbit/Makefile
@@ -0,0 +1,66 @@
+REPO_BASE_DIR := $(shell git rev-parse --show-toplevel)
+
+include ${REPO_BASE_DIR}/scripts/common-services.Makefile
+# common-services.Makefile should be included first as common.Makefile
+# relies on STACK_NAME var which is defined in common-services.Makefile
+include ${REPO_BASE_DIR}/scripts/common.Makefile
+
+#
+# Operations
+#
+
+include ${REPO_BASE_DIR}/services/rabbit/.operations.Makefile
+
+#
+# Docker compose files
+#
+
+### Load Balancer
+docker-compose.loadbalancer.yml: docker-compose.loadbalancer.yml.j2 \
+					.env \
+					configs/rabbitmq.conf \
+					configs/erlang.cookie.secret \
+					configs/haproxy.cfg \
+					venv \
+					$(VENV_BIN)/j2
+	@$(call jinja, $<, .env, $@)
+
+.stack.loadbalancer.yml: docker-compose.loadbalancer.yml .env
+	@${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e .env $< > $@
+
+### Node
+
+.PRECIOUS: node0%.env
+node0%.env: .env
+	envsubst < $< > $@; \
+	echo NODE_INDEX=$* >> $@
+
+.PRECIOUS: docker-compose.node0%.yml
+docker-compose.node0%.yml: docker-compose.node0x.yml.j2 \
+					node0%.env \
+					configs/rabbitmq.conf \
+					configs/erlang.cookie.secret \
+					configs/haproxy.cfg \
+					venv \
+					$(VENV_BIN)/j2
+	@$(call jinja, $<, node0$*.env, $@)
+
+.PRECIOUS: .stack.node0%.yml
+.stack.node0%.yml: docker-compose.node0%.yml node0%.env
+	@${REPO_BASE_DIR}/scripts/docker-stack-config.bash -e node0$*.env $< > $@
+
+#
+# Config / Secret files
+#
+
+configs/erlang.cookie.secret: configs/erlang.cookie.secret.template .env
+	@set -a; source .env; set +a; \
+	envsubst < $< > $@
+
+configs/rabbitmq.conf: configs/rabbitmq.conf.j2 .env venv
+	# generate $@
+	@$(call jinja, $<, .env, $@)
+
+configs/haproxy.cfg: configs/haproxy.cfg.j2 .env venv
+	# generate $@
+	@$(call jinja, $<, .env, $@)
diff --git a/services/rabbit/README.md b/services/rabbit/README.md
@@ -0,0 +1,49 @@
+## Starting a cluster
+
+Make sure all nodes have joined the cluster before using it. Otherwise, number of replicas in quorum queues might be affected. Say, you have a cluster of 3 nodes. You connect to cluster before the 3rd node join it. Your quorum queue would end up with only 2 replicas and will be broken once, 1 node (of 2 nodes holding the replicas of the queue) goes down.
+
+## Updating a cluster
+
+Perform update one node at a time. Never update all nodes at the same time (this may break cluster)! Follow instructions from official documentation https://www.rabbitmq.com/docs/upgrade#rolling-upgrade.
+
+## Graceful shutdown
+
+Shutdown nodes one by one gracefully. Wait until the nodes is stopped and leaves the cluster. Then remove next node. When starting cluster, start nodes **in the reverse order**! For example, if you shutdown node01, then node02 and lastly node03, first start node03 then node02 and finally node01.
+
+If all Nodes were shutdown simultaneously, then you will see mnesia tables errors in node's logs. Restarting node solves the issue. Documentation also mentions force_boot CLI command in this case (see https://www.rabbitmq.com/docs/man/rabbitmqctl.8#force_boot)
+
+## How to add / remove nodes
+
+The only supported way, is to completely shutdown the cluster (docker stack and most likely rabbit node volumes) and start brand new.
+
+With manual effort this can be done on the running cluster, by adding 1 more rabbit node manually (as a separate docker stack or new service) and manually executing rabbitmqctl commands (some hints can be found here https://www.rabbitmq.com/docs/clustering#creating)
+
+## Updating rabbitmq.conf / advanced.config (zero-downtime)
+
+We do not support this automated (except starting from scratch with empty volumes). But manually this can be achieved in case needed. `rabbitmq.conf` and `advanced.config` changes take effect after a node restart. This can be performed with zero-downtime when RabbitMQ is clustered (have multiple nodes). This can be achieved by stopping and starting rabbitmq nodes one by one
+* `docker exec -it <container-id> bash`
+* (inside container) `rabbitmqctl stop_app` and wait some time until node is stopped (can be seen in management ui)
+* (inside container) `rabbitmqctl start_app`
+
+Source: https://www.rabbitmq.com/docs/next/configure#config-changes-effects
+
+## Enable node Maintenance mode
+
+1. Get inside container's shell (`docker exec -it <container-id> bash`)
+2. (Inside container) execute `rabbitmq-upgrade drain`
+
+Source: https://www.rabbitmq.com/docs/upgrade#maintenance-mode
+
+## Troubleshooting
+mnesia errors after all rabbit nodes (docker services) restart:
+* https://stackoverflow.com/questions/60407082/rabbit-mq-error-while-waiting-for-mnesia-tables
+
+official documentation mentioning restart scenarios
+* https://www.rabbitmq.com/docs/clustering#restarting-schema-sync
+
+all (3) cluster nodes go down simultaneosuly, cluster is broken:
+* https://groups.google.com/g/rabbitmq-users/c/owvanX2iSqA
+
+## Autoscaling
+
+Not supported at the moment.
diff --git a/services/rabbit/configs/erlang.cookie.secret.template b/services/rabbit/configs/erlang.cookie.secret.template
@@ -0,0 +1 @@
+${RABBIT_ERLANG_COOKIE}
diff --git a/services/rabbit/configs/haproxy.cfg.j2 b/services/rabbit/configs/haproxy.cfg.j2
@@ -0,0 +1,64 @@
+{% set NODE_IXS = range(1, (RABBIT_CLUSTER_NODE_COUNT | int) + 1) -%}
+
+global
+    log stdout format raw local0
+
+# haproxy by default resolves server hostname only once
+# this breaks if container restarts. By using resolvers
+# we tell haproxy to re-resolve the hostname (so container
+# restarts are handled properly)
+resolvers dockerdns
+    nameserver dns1 127.0.0.11:53
+    resolve_retries 3
+    timeout resolve 1s
+    timeout retry   1s
+    hold other      10s
+    hold refused    10s
+    hold nx         10s
+    hold timeout    10s
+    hold valid      10s
+    hold obsolete   10s
+
+defaults
+    log global
+    mode tcp
+    option tcplog
+
+    timeout connect 5s
+    timeout client 30s
+    timeout server 30s
+
+frontend rabbit
+    bind *:{{ RABBIT_PORT }}
+    default_backend rabbit_backends
+
+frontend rabbit_dashboard
+    bind *:{{ RABBIT_MANAGEMENT_PORT }}
+    default_backend rabbit_dashboard_backends
+
+frontend health
+  mode http
+  bind 127.0.0.1:32087
+  http-request return status 200 if { src 127.0.0.0/8 }
+
+backend rabbit_backends
+    # side effect of roundrobin is connection should be evenly distributed
+    # thus rabbit queue leader replica shall be also evenly distributed
+    # (https://www.rabbitmq.com/docs/4.0/clustering#replica-placement)
+    # if algorithm  below is changed, consider adjusting rabbit configuration
+    # as stated in documentation link above
+    balance roundrobin
+
+    # init-addrs libc,none - start even if there aren’t any backend servers running
+{% for ix in NODE_IXS %}
+    server rabbit0{{ ix }} rabbit-node0{{ ix }}_rabbit0{{ ix }}:{{ RABBIT_PORT }} check resolvers dockerdns init-addr libc,none inter 5s rise 2 fall 3 send-proxy
+{%- endfor %}
+
+backend rabbit_dashboard_backends
+    mode http
+    balance roundrobin
+
+{% for ix in NODE_IXS %}
+    server rabbit0{{ ix }} rabbit-node0{{ ix }}_rabbit0{{ ix }}:{{ RABBIT_MANAGEMENT_PORT }} check resolvers dockerdns init-addr libc,none inter 5s rise 2 fall 3
+{%- endfor %}
+# keep new line in the end to avoid "Missing LF on last line" error
diff --git a/services/rabbit/configs/rabbitmq.conf.j2 b/services/rabbit/configs/rabbitmq.conf.j2
@@ -0,0 +1,19 @@
+{% set NODE_IXS = range(1, (RABBIT_CLUSTER_NODE_COUNT | int) + 1) -%}
+
+# https://www.rabbitmq.com/docs/cluster-formation#peer-discovery-configuring-mechanism
+cluster_formation.peer_discovery_backend = classic_config
+
+{% for ix in NODE_IXS %}
+cluster_formation.classic_config.nodes.{{ ix }} = rabbit@rabbit-node0{{ ix }}_rabbit0{{ ix }}
+{%- endfor %}
+
+## Sets the initial quorum queue replica count for newly declared quorum queues.
+## This value can be overridden using the 'x-quorum-initial-group-size' queue argument
+## at declaration time.
+# https://www.rabbitmq.com/docs/quorum-queues#quorum-requirements
+quorum_queue.initial_cluster_size = {{ RABBIT_QUORUM_QUEUE_DEFAULT_REPLICA_COUNT }}
+
+# Extract proper client ip when behind a proxy (e.g. haproxy)
+# https://www.rabbitmq.com/docs/networking#proxy-protocol
+# WARNING: this forces clients to use a proxy (direct access to nodes does not work)
+proxy_protocol = true
diff --git a/services/rabbit/docker-compose.loadbalancer.yml.j2 b/services/rabbit/docker-compose.loadbalancer.yml.j2
@@ -0,0 +1,46 @@
+services:
+  loadbalancer:
+    image: haproxy:3.2
+    deploy:
+      update_config:
+        order: start-first
+        parallelism: 1
+        delay: 30s
+        failure_action: rollback
+      # https://discourse.haproxy.org/t/haproxy-high-availability-configuration/11983
+      replicas: ${RABBIT_LB_REPLICAS}
+      # necessary to preserve client ip
+      # otherwise we see overlay rabbit network lb ip
+      # (rabbitmq management dashboard connection section)
+      endpoint_mode: dnsrr
+      resources:
+        limits:
+          # https://help.hcl-software.com/digital-experience/dx-95-doc-archive/CF203/platform/kubernetes/haproxy-migration/haproxy-configuration.html
+          cpus: "1"
+          memory: "2G"
+        # according to local observations and link below
+        # https://github.com/haproxytech/helm-charts/blob/haproxy-1.24.0/haproxy/values.yaml#L403
+        reservations:
+          cpus: "0.1"
+          memory: "128M"
+    healthcheck: # https://stackoverflow.com/a/76513320/12124525
+      test: bash -c 'echo "" > /dev/tcp/127.0.0.1/32087 || exit 1'
+      start_period: 5s
+      timeout: 2s
+      retries: 2
+      interval: 10s
+    networks:
+      - rabbit
+    configs:
+      - source: haproxy.cfg
+        target: /usr/local/etc/haproxy/haproxy.cfg
+
+networks:
+  rabbit:
+    name: ${RABBIT_NETWORK}
+    external: true
+
+configs:
+  haproxy.cfg:
+    file: ./configs/haproxy.cfg
+    name: rabbit_haproxy_conf_{{ "./configs/haproxy.cfg" | sha256file | substring(0,10) }}
diff --git a/services/rabbit/docker-compose.node0x.yml.j2 b/services/rabbit/docker-compose.node0x.yml.j2
diff --git a/services/rabbit/template.env b/services/rabbit/template.env