Skip to content

Commit dfe41f1

Browse files
committed
Add chaos testing setup with RabbitMQ cluster configuration and management scripts
1 parent 23a7fd0 commit dfe41f1

File tree

12 files changed

+562
-0
lines changed

12 files changed

+562
-0
lines changed

chaos-testing/README.md

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# RabbitMQ Chaos Testing
2+
3+
A 3-node RabbitMQ cluster with constrained resources for testing failure scenarios.
4+
5+
## Cluster Nodes
6+
7+
| Node | AMQP Port | Management UI | Disk Limit | Memory Limit |
8+
|------|-----------|----------------------------|------------|--------------|
9+
| 1 | 5672 | <http://localhost:15672> | 50MB | 384MB |
10+
| 2 | 5673 | <http://localhost:15673> | 75MB | 448MB |
11+
| 3 | 5674 | <http://localhost:15674> | 100MB | 512MB |
12+
13+
Node 1 has the tightest limits and will fail first under load.
14+
15+
**Credentials:** `guest` / `guest`
16+
17+
## Quick Start
18+
19+
```powershell
20+
./scripts/Start.ps1
21+
./scripts/Status.ps1
22+
./scripts/Stop.ps1
23+
```
24+
25+
## Scripts
26+
27+
```powershell
28+
./scripts/Start.ps1
29+
./scripts/Stop.ps1
30+
./scripts/Status.ps1
31+
./scripts/FillDisk.ps1
32+
./scripts/FillDisk.ps1 2
33+
./scripts/FillDisk.ps1 1 40
34+
./scripts/ClearDisk.ps1
35+
./scripts/ClearDisk.ps1 2
36+
./scripts/KillNode.ps1
37+
./scripts/KillNode.ps1 2
38+
./scripts/KillNode.ps1 1 -Restart
39+
./scripts/Reset.ps1
40+
```
41+
42+
---
43+
44+
## Test Scenarios
45+
46+
### Start cluster first
47+
48+
```powershell
49+
cd chaos-testing
50+
./scripts/Start.ps1
51+
```
52+
53+
---
54+
55+
### Basic pub/sub
56+
57+
```powershell
58+
cd samples/Foundatio.RabbitMQ.Subscribe
59+
dotnet run
60+
```
61+
62+
```powershell
63+
cd samples/Foundatio.RabbitMQ.Publish
64+
dotnet run
65+
```
66+
67+
---
68+
69+
### Single node connection
70+
71+
```powershell
72+
cd samples/Foundatio.RabbitMQ.Subscribe
73+
dotnet run --connection-string "amqp://guest:guest@localhost:5672"
74+
```
75+
76+
```powershell
77+
cd samples/Foundatio.RabbitMQ.Publish
78+
dotnet run --connection-string "amqp://guest:guest@localhost:5672"
79+
```
80+
81+
---
82+
83+
### Failover connection (all 3 nodes)
84+
85+
```powershell
86+
cd samples/Foundatio.RabbitMQ.Subscribe
87+
dotnet run --connection-string "amqp://guest:guest@localhost:5672,localhost:5673,localhost:5674"
88+
```
89+
90+
```powershell
91+
cd samples/Foundatio.RabbitMQ.Publish
92+
dotnet run --connection-string "amqp://guest:guest@localhost:5672,localhost:5673,localhost:5674"
93+
```
94+
95+
---
96+
97+
### Durable queues (survive restarts)
98+
99+
```powershell
100+
cd samples/Foundatio.RabbitMQ.Subscribe
101+
dotnet run --connection-string "amqp://guest:guest@localhost:5672" --durable
102+
```
103+
104+
```powershell
105+
cd samples/Foundatio.RabbitMQ.Publish
106+
dotnet run --connection-string "amqp://guest:guest@localhost:5672" --durable
107+
```
108+
109+
---
110+
111+
### Large messages (1MB - memory pressure)
112+
113+
```powershell
114+
cd samples/Foundatio.RabbitMQ.Publish
115+
dotnet run --connection-string "amqp://guest:guest@localhost:5672" --message-size 1048576
116+
```
117+
118+
---
119+
120+
### Delayed messages
121+
122+
```powershell
123+
cd samples/Foundatio.RabbitMQ.Subscribe
124+
dotnet run --delayed
125+
```
126+
127+
```powershell
128+
cd samples/Foundatio.RabbitMQ.Publish
129+
dotnet run --delayed
130+
```
131+
132+
---
133+
134+
### Durable + failover
135+
136+
```powershell
137+
cd samples/Foundatio.RabbitMQ.Subscribe
138+
dotnet run --connection-string "amqp://guest:guest@localhost:5672,localhost:5673,localhost:5674" --durable
139+
```
140+
141+
```powershell
142+
cd samples/Foundatio.RabbitMQ.Publish
143+
dotnet run --connection-string "amqp://guest:guest@localhost:5672,localhost:5673,localhost:5674" --durable
144+
```
145+
146+
---
147+
148+
## Chaos Commands
149+
150+
Run these while your apps are connected:
151+
152+
```powershell
153+
./scripts/FillDisk.ps1
154+
```
155+
156+
```powershell
157+
./scripts/ClearDisk.ps1
158+
```
159+
160+
```powershell
161+
./scripts/KillNode.ps1
162+
```
163+
164+
```powershell
165+
./scripts/KillNode.ps1 1 -Restart
166+
```
167+
168+
```powershell
169+
./scripts/KillNode.ps1 1
170+
./scripts/KillNode.ps1 2
171+
```
172+
173+
```powershell
174+
./scripts/FillDisk.ps1 1
175+
./scripts/FillDisk.ps1 2
176+
./scripts/FillDisk.ps1 3
177+
```
178+
179+
```powershell
180+
./scripts/Status.ps1
181+
```
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# RabbitMQ configuration for chaos testing node 1
2+
# Most constrained: 50MB disk, 300MB memory watermark
3+
4+
vm_memory_high_watermark.absolute = 300MB
5+
disk_free_limit.absolute = 10MB
6+
7+
# Cluster formation - auto-join peers
8+
cluster_formation.peer_discovery_backend = classic_config
9+
cluster_formation.classic_config.nodes.1 = rabbit@rabbitmq-1
10+
cluster_formation.classic_config.nodes.2 = rabbit@rabbitmq-2
11+
cluster_formation.classic_config.nodes.3 = rabbit@rabbitmq-3
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# RabbitMQ configuration for chaos testing node 2
2+
# Medium constraints: 75MB disk, 350MB memory watermark
3+
4+
vm_memory_high_watermark.absolute = 350MB
5+
disk_free_limit.absolute = 10MB
6+
7+
# Cluster formation - auto-join peers
8+
cluster_formation.peer_discovery_backend = classic_config
9+
cluster_formation.classic_config.nodes.1 = rabbit@rabbitmq-1
10+
cluster_formation.classic_config.nodes.2 = rabbit@rabbitmq-2
11+
cluster_formation.classic_config.nodes.3 = rabbit@rabbitmq-3
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# RabbitMQ configuration for chaos testing node 3
2+
# Least constrained: 100MB disk, 400MB memory watermark
3+
4+
vm_memory_high_watermark.absolute = 400MB
5+
disk_free_limit.absolute = 10MB
6+
7+
# Cluster formation - auto-join peers
8+
cluster_formation.peer_discovery_backend = classic_config
9+
cluster_formation.classic_config.nodes.1 = rabbit@rabbitmq-1
10+
cluster_formation.classic_config.nodes.2 = rabbit@rabbitmq-2
11+
cluster_formation.classic_config.nodes.3 = rabbit@rabbitmq-3

chaos-testing/docker-compose.yml

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Chaos Testing Docker Compose Configuration
2+
#
3+
# 3-node RabbitMQ cluster with constrained resources for chaos testing.
4+
# Each node has slightly different limits so failures cascade predictably.
5+
#
6+
# Quick start:
7+
# ./scripts/Start.ps1 # Start cluster and form it automatically
8+
# ./scripts/FillDisk.ps1 # Trigger disk alarm
9+
# ./scripts/KillNode.ps1 1 # Kill node 1
10+
11+
services:
12+
# Node 1 - Most constrained (50MB disk, 384MB memory) - fails first
13+
rabbitmq-1:
14+
image: rabbitmq:4.2.2-management
15+
container_name: chaos-rabbitmq-1
16+
hostname: rabbitmq-1
17+
ports:
18+
- "5672:5672" # Primary AMQP port
19+
- "15672:15672" # Primary Management UI
20+
deploy:
21+
resources:
22+
limits:
23+
memory: 384M
24+
reservations:
25+
memory: 192M
26+
environment:
27+
RABBITMQ_DEFAULT_USER: guest
28+
RABBITMQ_DEFAULT_PASS: guest
29+
RABBITMQ_ERLANG_COOKIE: "chaos-testing-cookie"
30+
RABBITMQ_NODENAME: rabbit@rabbitmq-1
31+
volumes:
32+
- ./config/rabbitmq-1.conf:/etc/rabbitmq/conf.d/99-limits.conf:ro
33+
tmpfs:
34+
- /var/lib/rabbitmq:size=50M,mode=1777
35+
healthcheck:
36+
test: ["CMD", "rabbitmq-diagnostics", "-q", "ping"]
37+
interval: 10s
38+
timeout: 5s
39+
retries: 5
40+
start_period: 30s
41+
networks:
42+
- chaos-network
43+
44+
# Node 2 - Medium constraints (75MB disk, 448MB memory)
45+
rabbitmq-2:
46+
image: rabbitmq:4.2.2-management
47+
container_name: chaos-rabbitmq-2
48+
hostname: rabbitmq-2
49+
ports:
50+
- "5673:5672"
51+
- "15673:15672"
52+
deploy:
53+
resources:
54+
limits:
55+
memory: 448M
56+
reservations:
57+
memory: 224M
58+
environment:
59+
RABBITMQ_DEFAULT_USER: guest
60+
RABBITMQ_DEFAULT_PASS: guest
61+
RABBITMQ_ERLANG_COOKIE: "chaos-testing-cookie"
62+
RABBITMQ_NODENAME: rabbit@rabbitmq-2
63+
volumes:
64+
- ./config/rabbitmq-2.conf:/etc/rabbitmq/conf.d/99-limits.conf:ro
65+
tmpfs:
66+
- /var/lib/rabbitmq:size=75M,mode=1777
67+
depends_on:
68+
rabbitmq-1:
69+
condition: service_healthy
70+
healthcheck:
71+
test: ["CMD", "rabbitmq-diagnostics", "-q", "ping"]
72+
interval: 10s
73+
timeout: 5s
74+
retries: 5
75+
start_period: 30s
76+
networks:
77+
- chaos-network
78+
79+
# Node 3 - Least constrained (100MB disk, 512MB memory) - fails last
80+
rabbitmq-3:
81+
image: rabbitmq:4.2.2-management
82+
container_name: chaos-rabbitmq-3
83+
hostname: rabbitmq-3
84+
ports:
85+
- "5674:5672"
86+
- "15674:15672"
87+
deploy:
88+
resources:
89+
limits:
90+
memory: 512M
91+
reservations:
92+
memory: 256M
93+
environment:
94+
RABBITMQ_DEFAULT_USER: guest
95+
RABBITMQ_DEFAULT_PASS: guest
96+
RABBITMQ_ERLANG_COOKIE: "chaos-testing-cookie"
97+
RABBITMQ_NODENAME: rabbit@rabbitmq-3
98+
volumes:
99+
- ./config/rabbitmq-3.conf:/etc/rabbitmq/conf.d/99-limits.conf:ro
100+
tmpfs:
101+
- /var/lib/rabbitmq:size=100M,mode=1777
102+
depends_on:
103+
rabbitmq-1:
104+
condition: service_healthy
105+
healthcheck:
106+
test: ["CMD", "rabbitmq-diagnostics", "-q", "ping"]
107+
interval: 10s
108+
timeout: 5s
109+
retries: 5
110+
start_period: 30s
111+
networks:
112+
- chaos-network
113+
114+
ready:
115+
image: andrewlock/wait-for-dependencies
116+
command: rabbitmq-1:15672 rabbitmq-2:15672 rabbitmq-3:15672
117+
depends_on:
118+
- rabbitmq-1
119+
- rabbitmq-2
120+
- rabbitmq-3
121+
networks:
122+
- chaos-network
123+
124+
networks:
125+
chaos-network:
126+
driver: bridge
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Clear disk fill test file from a node
2+
# Removes the dummy file created by FillDisk.ps1 (does NOT affect RabbitMQ data)
3+
# Usage: ./ClearDisk.ps1 [node]
4+
# If no node specified, clears all nodes
5+
param([int]$Node = 0)
6+
7+
if ($Node -eq 0) {
8+
Write-Host " Clearing test files on all nodes..." -ForegroundColor Cyan
9+
foreach ($i in 1..3) {
10+
docker exec chaos-rabbitmq-$i rm -f /var/lib/rabbitmq/fill-test 2>$null | Out-Null
11+
}
12+
} else {
13+
Write-Host " Clearing test file on node $Node..." -ForegroundColor Cyan
14+
docker exec chaos-rabbitmq-$Node rm -f /var/lib/rabbitmq/fill-test 2>$null | Out-Null
15+
}
16+
17+
Write-Host " Done. Disk alarm should clear shortly." -ForegroundColor Green
18+
Write-Host ""
19+
20+
# Show status
21+
& "$PSScriptRoot/Status.ps1"

0 commit comments

Comments
 (0)