|
1 | 1 | # Hitless Upgrades |
2 | 2 |
|
3 | | -This package provides hitless upgrade functionality for Redis clients, enabling seamless connection migration during Redis server upgrades, failovers, or cluster topology changes without dropping active connections. |
| 3 | +Seamless Redis connection handoffs during topology changes without interrupting operations. |
4 | 4 |
|
5 | | -## How It Works |
| 5 | +## Quick Start |
6 | 6 |
|
7 | | -The hitless upgrade system integrates with go-redis through pool hooks: |
8 | | - |
9 | | -1. **Push Notifications**: Redis sends RESP3 push notifications for topology changes |
10 | | -2. **Connection Marking**: Affected connections are marked for handoff |
11 | | -3. **Pool Integration**: Marked connections are queued for handoff when returned to pool |
12 | | -4. **Async Migration**: Worker goroutines perform connection handoffs in background |
13 | | -5. **Connection Replacement**: Old connections are replaced with new ones to target endpoints |
14 | | - |
15 | | -### Push Notification Types |
16 | | -- `MOVING` - Connection handoff to new endpoint |
17 | | -- `MIGRATING` - Apply relaxed timeouts during migration |
18 | | -- `MIGRATED` - Clear relaxed timeouts after migration |
19 | | -- `FAILING_OVER` - Apply relaxed timeouts during failover |
20 | | -- `FAILED_OVER` - Clear relaxed timeouts after failover |
21 | | - |
22 | | -### Worker Management |
23 | | -The hitless upgrade system uses **on-demand workers** for processing connection handoffs: |
24 | | - |
25 | | -- **No minimum workers**: Workers are created only when needed |
26 | | -- **Automatic scaling**: Workers scale up to `MaxWorkers` under load |
27 | | -- **Automatic cleanup**: Idle workers are automatically terminated |
28 | | -- **Efficient resource usage**: No resources consumed when idle |
29 | | -- **Burst handling**: Can quickly scale to handle traffic spikes |
30 | | -- **Smart defaults**: Auto-calculated as `min(10, PoolSize/3)` to prevent over-allocation |
31 | | -- **Performance guarantee**: Explicit values enforced to minimum of 10 workers |
32 | | - |
33 | | -#### MaxWorkers Configuration Logic |
34 | | -- **When not set (0)**: `min(10, PoolSize/3)` - scales with pool size but caps at 10 |
35 | | -- **When explicitly set**: `max(10, set_value)` - ensures minimum 10 workers for performance |
36 | | - |
37 | | -This design ensures optimal resource utilization while maintaining responsiveness during Redis topology changes. |
38 | | - |
39 | | -## Configuration Examples |
40 | | - |
41 | | -### Basic Setup |
42 | 7 | ```go |
43 | | -import "github.com/redis/go-redis/v9" |
| 8 | +import "github.com/redis/go-redis/v9/hitless" |
44 | 9 |
|
45 | 10 | opt := &redis.Options{ |
46 | 11 | Addr: "localhost:6379", |
47 | | - Protocol: 3, // RESP3 required for push notifications |
| 12 | + Protocol: 3, // RESP3 required |
48 | 13 | HitlessUpgrades: &redis.HitlessUpgradeConfig{ |
49 | | - Enabled: true, |
| 14 | + Mode: hitless.MaintNotificationsEnabled, // or MaintNotificationsAuto |
50 | 15 | }, |
51 | 16 | } |
52 | | - |
53 | 17 | client := redis.NewClient(opt) |
54 | | -defer client.Close() |
55 | 18 | ``` |
56 | 19 |
|
57 | | -### Advanced Configuration |
58 | | -```go |
59 | | -import "github.com/redis/go-redis/v9/hitless" |
| 20 | +## Modes |
60 | 21 |
|
61 | | -opt := &redis.Options{ |
62 | | - Addr: "localhost:6379", |
63 | | - Protocol: 3, |
64 | | - HitlessUpgrades: &redis.HitlessUpgradeConfig{ |
65 | | - Enabled: true, |
66 | | - Config: &hitless.Config{ |
67 | | - MaxHandoffRetries: 3, // Retry failed handoffs up to 3 times |
68 | | - HandoffTimeout: 15 * time.Second, // Timeout for individual handoff operations |
69 | | - RelaxedTimeout: 10 * time.Second, // Extended timeout during migrations |
70 | | - PostHandoffRelaxedDuration: 20 * time.Second, // Keep relaxed timeout after handoff |
71 | | - LogLevel: 1, // Warning level logging |
72 | | - MaxWorkers: 15, // On-demand workers (default: min(10, PoolSize/3), enforced min: 10) |
73 | | - HandoffQueueSize: 50, // Queue size for handoff requests |
74 | | - }, |
75 | | - }, |
76 | | -} |
| 22 | +- **`MaintNotificationsDisabled`**: Hitless upgrades are completely disabled |
| 23 | +- **`MaintNotificationsEnabled`**: Hitless upgrades are forcefully enabled (fails if server doesn't support it) |
| 24 | +- **`MaintNotificationsAuto`**: Hitless upgrades are enabled if server supports it (default) |
77 | 25 |
|
78 | | -client := redis.NewClient(opt) |
79 | | -defer client.Close() |
80 | | -``` |
| 26 | +## Configuration |
81 | 27 |
|
82 | | -### Configuration Examples |
83 | | - |
84 | | -#### MaxWorkers Auto-Calculation |
85 | 28 | ```go |
86 | | -// Small pool - conservative worker allocation |
87 | | -opt := &redis.Options{ |
88 | | - PoolSize: 6, // MaxWorkers will be min(10, 6/3) = 2 |
89 | | - HitlessUpgrades: &redis.HitlessUpgradeConfig{Enabled: true}, |
90 | | -} |
91 | | - |
92 | | -// Medium pool - proportional worker allocation |
93 | | -opt := &redis.Options{ |
94 | | - PoolSize: 30, // MaxWorkers will be min(10, 30/3) = 10 |
95 | | - HitlessUpgrades: &redis.HitlessUpgradeConfig{Enabled: true}, |
96 | | -} |
| 29 | +import "github.com/redis/go-redis/v9/hitless" |
97 | 30 |
|
98 | | -// Large pool - capped worker allocation |
99 | | -opt := &redis.Options{ |
100 | | - PoolSize: 120, // MaxWorkers will be min(10, 120/3) = 10 (capped) |
101 | | - HitlessUpgrades: &redis.HitlessUpgradeConfig{Enabled: true}, |
| 31 | +Config: &hitless.Config{ |
| 32 | + Mode: hitless.MaintNotificationsAuto, // Notification mode |
| 33 | + MaxHandoffRetries: 3, // Retry failed handoffs |
| 34 | + HandoffTimeout: 15 * time.Second, // Handoff operation timeout |
| 35 | + RelaxedTimeout: 10 * time.Second, // Extended timeout during migrations |
| 36 | + PostHandoffRelaxedDuration: 20 * time.Second, // Keep relaxed timeout after handoff |
| 37 | + LogLevel: 1, // 0=errors, 1=warnings, 2=info, 3=debug |
| 38 | + MaxWorkers: 15, // Concurrent handoff workers |
| 39 | + HandoffQueueSize: 50, // Handoff request queue size |
102 | 40 | } |
103 | 41 | ``` |
104 | 42 |
|
105 | | -#### Explicit MaxWorkers Configuration |
106 | | -```go |
107 | | -// Small explicit value - enforced minimum |
108 | | -opt := &redis.Options{ |
109 | | - HitlessUpgrades: &redis.HitlessUpgradeConfig{ |
110 | | - Enabled: true, |
111 | | - Config: &hitless.Config{ |
112 | | - MaxWorkers: 5, // Will be enforced to 10 for performance |
113 | | - }, |
114 | | - }, |
115 | | -} |
| 43 | +### Worker Scaling |
| 44 | +- **Auto-calculated**: `min(10, PoolSize/3)` - scales with pool size, capped at 10 |
| 45 | +- **Explicit values**: `max(10, set_value)` - enforces minimum 10 workers |
| 46 | +- **On-demand**: Workers created when needed, cleaned up when idle |
116 | 47 |
|
117 | | -// Large explicit value - respected as-is |
118 | | -opt := &redis.Options{ |
119 | | - HitlessUpgrades: &redis.HitlessUpgradeConfig{ |
120 | | - Enabled: true, |
121 | | - Config: &hitless.Config{ |
122 | | - MaxWorkers: 20, // Will be kept as 20 |
123 | | - }, |
124 | | - }, |
125 | | -} |
126 | | -``` |
| 48 | +### Queue Sizing |
| 49 | +- **Auto-calculated**: `10 × MaxWorkers`, capped by pool size |
| 50 | +- **Always capped**: Queue size never exceeds pool size |
| 51 | + |
| 52 | +## Cluster Support |
127 | 53 |
|
128 | | -### Cluster Client |
129 | 54 | ```go |
130 | | -cluster := redis.NewClusterClient(&redis.ClusterOptions{ |
131 | | - Addrs: []string{"localhost:7000", "localhost:7001", "localhost:7002"}, |
| 55 | +opt := &redis.ClusterOptions{ |
| 56 | + Addrs: []string{"localhost:7000", "localhost:7001"}, |
132 | 57 | Protocol: 3, |
133 | 58 | HitlessUpgrades: &redis.HitlessUpgradeConfig{ |
134 | | - Enabled: true, |
| 59 | + Mode: hitless.MaintNotificationsEnabled, |
135 | 60 | }, |
136 | | -}) |
137 | | -defer cluster.Close() |
138 | | -``` |
139 | | - |
140 | | -## Hook Examples |
141 | | - |
142 | | -### Custom Pool Hook |
143 | | -```go |
144 | | -package main |
145 | | - |
146 | | -import ( |
147 | | - "context" |
148 | | - "log" |
149 | | - |
150 | | - "github.com/redis/go-redis/v9/internal/pool" |
151 | | -) |
152 | | - |
153 | | -// Custom hook that logs connection events |
154 | | -type LoggingHook struct { |
155 | | - name string |
156 | | -} |
157 | | - |
158 | | -func (lh *LoggingHook) OnGet(ctx context.Context, conn *pool.Conn, isNewConn bool) error { |
159 | | - log.Printf("Hook %s: Getting connection %d (new: %v)", lh.name, conn.GetID(), isNewConn) |
160 | | - return nil |
161 | | -} |
162 | | - |
163 | | -func (lh *LoggingHook) OnPut(ctx context.Context, conn *pool.Conn) (shouldPool bool, shouldRemove bool, err error) { |
164 | | - log.Printf("Hook %s: Putting connection %d back to pool", lh.name, conn.GetID()) |
165 | | - return true, false, nil // Pool the connection, don't remove it |
166 | | -} |
167 | | - |
168 | | -func main() { |
169 | | - // Create pool with custom hook |
170 | | - opt := &pool.Options{ |
171 | | - Dialer: func(ctx context.Context) (net.Conn, error) { |
172 | | - return net.Dial("tcp", "localhost:6379") |
173 | | - }, |
174 | | - PoolSize: 10, |
175 | | - } |
176 | | - |
177 | | - connPool := pool.NewConnPool(opt) |
178 | | - defer connPool.Close() |
179 | | - |
180 | | - // Add custom hook |
181 | | - loggingHook := &LoggingHook{name: "MyLogger"} |
182 | | - connPool.AddPoolHook(loggingHook) |
183 | | - |
184 | | - // Use the pool - hooks will be called automatically |
185 | | - ctx := context.Background() |
186 | | - conn, err := connPool.Get(ctx) |
187 | | - if err != nil { |
188 | | - log.Fatal(err) |
189 | | - } |
190 | | - |
191 | | - // Do something with connection... |
192 | | - |
193 | | - connPool.Put(ctx, conn) |
194 | 61 | } |
| 62 | +client := redis.NewClusterClient(opt) |
195 | 63 | ``` |
196 | 64 |
|
197 | | -### Multiple Hooks |
198 | | -```go |
199 | | -package main |
200 | | - |
201 | | -import ( |
202 | | - "context" |
203 | | - "log" |
204 | | - "time" |
205 | | - |
206 | | - "github.com/redis/go-redis/v9/internal/pool" |
207 | | -) |
208 | | - |
209 | | -// Metrics hook |
210 | | -type MetricsHook struct { |
211 | | - getCount int64 |
212 | | - putCount int64 |
213 | | -} |
214 | | - |
215 | | -func (mh *MetricsHook) OnGet(ctx context.Context, conn *pool.Conn, isNewConn bool) error { |
216 | | - mh.getCount++ |
217 | | - return nil |
218 | | -} |
219 | | - |
220 | | -func (mh *MetricsHook) OnPut(ctx context.Context, conn *pool.Conn) (shouldPool bool, shouldRemove bool, err error) { |
221 | | - mh.putCount++ |
222 | | - return true, false, nil |
223 | | -} |
| 65 | +## Metrics Hook Example |
224 | 66 |
|
225 | | -// Validation hook |
226 | | -type ValidationHook struct{} |
227 | | - |
228 | | -func (vh *ValidationHook) OnGet(ctx context.Context, conn *pool.Conn, isNewConn bool) error { |
229 | | - if !conn.IsUsable() { |
230 | | - return errors.New("connection not usable") |
231 | | - } |
232 | | - return nil |
233 | | -} |
| 67 | +A metrics collection hook is available in `example_hooks.go` that demonstrates how to monitor hitless upgrade operations: |
234 | 68 |
|
235 | | -func (vh *ValidationHook) OnPut(ctx context.Context, conn *pool.Conn) (shouldPool bool, shouldRemove bool, err error) { |
236 | | - // Check if connection has errors |
237 | | - if conn.HasBufferedData() { |
238 | | - return false, true, nil // Don't pool, remove connection |
239 | | - } |
240 | | - return true, false, nil |
241 | | -} |
242 | | - |
243 | | -func main() { |
244 | | - opt := &pool.Options{ |
245 | | - Dialer: func(ctx context.Context) (net.Conn, error) { |
246 | | - return net.Dial("tcp", "localhost:6379") |
247 | | - }, |
248 | | - PoolSize: 10, |
249 | | - } |
250 | | - |
251 | | - connPool := pool.NewConnPool(opt) |
252 | | - defer connPool.Close() |
253 | | - |
254 | | - // Add multiple hooks - they execute in order |
255 | | - metricsHook := &MetricsHook{} |
256 | | - validationHook := &ValidationHook{} |
| 69 | +```go |
| 70 | +import "github.com/redis/go-redis/v9/hitless" |
257 | 71 |
|
258 | | - connPool.AddPoolHook(metricsHook) |
259 | | - connPool.AddPoolHook(validationHook) |
| 72 | +metricsHook := hitless.NewMetricsHook() |
| 73 | +// Use with your monitoring system |
| 74 | +``` |
260 | 75 |
|
261 | | - // Hooks will be called in the order they were added |
262 | | - ctx := context.Background() |
263 | | - conn, err := connPool.Get(ctx) // Both OnGet methods called |
264 | | - if err != nil { |
265 | | - log.Fatal(err) |
266 | | - } |
| 76 | +The metrics hook tracks: |
| 77 | +- Handoff success/failure rates |
| 78 | +- Handoff duration |
| 79 | +- Queue depth |
| 80 | +- Worker utilization |
| 81 | +- Connection lifecycle events |
267 | 82 |
|
268 | | - connPool.Put(ctx, conn) // Both OnPut methods called |
| 83 | +## Requirements |
269 | 84 |
|
270 | | - log.Printf("Metrics: Get=%d, Put=%d", metricsHook.getCount, metricsHook.putCount) |
271 | | -} |
272 | | -``` |
| 85 | +- **RESP3 Protocol**: Required for push notifications |
| 86 | +- **Redis 7.0+**: For maintenance notification support |
0 commit comments