@@ -53,20 +53,32 @@ def down
53
53
end
54
54
55
55
def failover
56
- master , slave = take_a_replication_pair ( @clients )
56
+ rows = associate_with_clients_and_nodes ( @clients )
57
+ primary_info = rows . find { |row | row [ :role ] == 'master' }
58
+ replica_info = rows . find { |row | row [ :primary_id ] == primary_info [ :id ] }
59
+
57
60
wait_replication_delay ( @clients , replica_size : @replica_size , timeout : @timeout )
58
- slave . call ( 'CLUSTER' , 'FAILOVER' , 'TAKEOVER' )
59
- wait_failover ( @clients , master_key : to_node_key ( master ) , slave_key : to_node_key ( slave ) , max_attempts : @max_attempts )
61
+ replica_info . fetch ( :client ) . call ( 'CLUSTER' , 'FAILOVER' , 'TAKEOVER' )
62
+ wait_failover (
63
+ @clients ,
64
+ primary_node_key : primary_info . fetch ( :node_key ) ,
65
+ replica_node_key : replica_info . fetch ( :node_key ) ,
66
+ max_attempts : @max_attempts
67
+ )
60
68
wait_replication_delay ( @clients , replica_size : @replica_size , timeout : @timeout )
61
69
wait_cluster_recovering ( @clients , max_attempts : @max_attempts )
62
70
end
63
71
64
- def start_resharding ( slot :, src_node_key :, dest_node_key :)
65
- src_node_id = fetch_internal_id_by_natted_node_key ( @clients . first , src_node_key )
66
- src_client = find_client_by_natted_node_key ( @clients , src_node_key )
67
- dest_node_id = fetch_internal_id_by_natted_node_key ( @clients . first , dest_node_key )
68
- dest_client = find_client_by_natted_node_key ( @clients , dest_node_key )
69
- dest_host , dest_port = dest_node_key . split ( ':' )
72
+ def start_resharding ( slot :, src_node_key :, dest_node_key :) # rubocop:disable Metrics/CyclomaticComplexity
73
+ rows = associate_with_clients_and_nodes ( @clients )
74
+ src_info = rows . find { |r | r [ :node_key ] == src_node_key || r [ :client_node_key ] == src_node_key }
75
+ dest_info = rows . find { |r | r [ :node_key ] == dest_node_key || r [ :client_node_key ] == dest_node_key }
76
+
77
+ src_node_id = src_info . fetch ( :id )
78
+ src_client = src_info . fetch ( :client )
79
+ dest_node_id = dest_info . fetch ( :id )
80
+ dest_client = dest_info . fetch ( :client )
81
+ dest_host , dest_port = dest_info . fetch ( :node_key ) . split ( ':' )
70
82
71
83
# @see https://redis.io/commands/cluster-setslot/#redis-cluster-live-resharding-explained
72
84
dest_client . call ( 'CLUSTER' , 'SETSLOT' , slot , 'IMPORTING' , src_node_id )
@@ -91,11 +103,16 @@ def start_resharding(slot:, src_node_key:, dest_node_key:)
91
103
wait_replication_delay ( @clients , replica_size : @replica_size , timeout : @timeout )
92
104
end
93
105
94
- def finish_resharding ( slot :, src_node_key :, dest_node_key :)
95
- id = fetch_internal_id_by_natted_node_key ( @clients . first , dest_node_key )
96
- dest = find_client_by_natted_node_key ( @clients , dest_node_key )
97
- src = find_client_by_natted_node_key ( @clients , src_node_key )
98
- rest = take_masters ( @clients , shard_size : @shard_size ) . reject { |c | c . equal? ( dest ) || c . equal? ( src ) }
106
+ def finish_resharding ( slot :, src_node_key :, dest_node_key :) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
107
+ rows = associate_with_clients_and_nodes ( @clients )
108
+ src_info = rows . find { |r | r [ :node_key ] == src_node_key || r [ :client_node_key ] == src_node_key }
109
+ dest_info = rows . find { |r | r [ :node_key ] == dest_node_key || r [ :client_node_key ] == dest_node_key }
110
+
111
+ src = src_info . fetch ( :client )
112
+ dest = dest_info . fetch ( :client )
113
+ id = dest_info . fetch ( :id )
114
+ rest = rows . reject { |r | r [ :role ] == 'slave' || r [ :client ] . equal? ( src ) || r [ :client ] . equal? ( dest ) } . map { |r | r [ :client ] }
115
+
99
116
( [ dest , src ] + rest ) . each do |cli |
100
117
cli . call ( 'CLUSTER' , 'SETSLOT' , slot , 'NODE' , id )
101
118
rescue ::RedisClient ::CommandError => e
@@ -104,9 +121,9 @@ def finish_resharding(slot:, src_node_key:, dest_node_key:)
104
121
end
105
122
end
106
123
107
- def scale_out ( primary_url :, replica_url :) # rubocop:disable Metrics/CyclomaticComplexity
124
+ def scale_out ( primary_url :, replica_url :)
108
125
# @see https://redis.io/docs/manual/scaling/
109
- rows = fetch_and_parse_cluster_nodes ( @clients )
126
+ rows = associate_with_clients_and_nodes ( @clients )
110
127
target_host , target_port = rows . find { |row | row [ :role ] == 'master' } . fetch ( :node_key ) . split ( ':' )
111
128
112
129
primary = ::RedisClient . new ( url : primary_url , **@kwargs )
@@ -126,38 +143,32 @@ def scale_out(primary_url:, replica_url:) # rubocop:disable Metrics/CyclomaticCo
126
143
save_config ( @clients )
127
144
wait_for_cluster_to_be_ready
128
145
129
- rows = fetch_and_parse_cluster_nodes ( @clients )
146
+ rows = associate_with_clients_and_nodes ( @clients )
130
147
131
148
SLOT_SIZE . times . to_a . sample ( 100 ) . sort . each do |slot |
132
- src = rows . find do |row |
133
- next if row [ :slots ] . empty?
134
-
135
- row [ :slots ] . any? { |first , last | first <= slot && slot <= last }
136
- end . fetch ( :node_key )
149
+ src = rows . find { |row | row [ :slots ] . include? ( slot ) } . fetch ( :node_key )
137
150
dest = rows . find { |row | row [ :id ] == primary_id } . fetch ( :node_key )
138
151
start_resharding ( slot : slot , src_node_key : src , dest_node_key : dest )
139
152
finish_resharding ( slot : slot , src_node_key : src , dest_node_key : dest )
140
153
end
141
154
end
142
155
143
156
def scale_in # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
144
- rows = fetch_and_parse_cluster_nodes ( @clients )
145
- primary_info = rows . reject { |r | r [ :slots ] . empty? } . min_by { |r | r [ :slots ] . flat_map { |start , last | ( start ..last ) . to_a } . size }
157
+ rows = associate_with_clients_and_nodes ( @clients )
158
+
159
+ primary_info = rows . reject { |r | r [ :slots ] . empty? } . min_by { |r | r [ :slots ] . size }
146
160
replica_info = rows . find { |r | r [ :primary_id ] == primary_info [ :id ] }
147
161
rest_primary_node_keys = rows . reject { |r | r [ :id ] == primary_info [ :id ] || r [ :role ] == 'slave' } . map { |r | r [ :node_key ] }
148
162
149
- primary_info [ :slots ] . each do |start , last |
150
- ( start ..last ) . each do |slot |
151
- src = primary_info . fetch ( :node_key )
152
- dest = rest_primary_node_keys . sample
153
- start_resharding ( slot : slot , src_node_key : src , dest_node_key : dest )
154
- finish_resharding ( slot : slot , src_node_key : src , dest_node_key : dest )
155
- end
163
+ primary_info [ :slots ] . each do |slot |
164
+ src = primary_info . fetch ( :node_key )
165
+ dest = rest_primary_node_keys . sample
166
+ start_resharding ( slot : slot , src_node_key : src , dest_node_key : dest )
167
+ finish_resharding ( slot : slot , src_node_key : src , dest_node_key : dest )
156
168
end
157
169
158
- id2cli = fetch_internal_id_to_client_mappings ( @clients )
159
- replica = id2cli . fetch ( replica_info [ :id ] )
160
- primary = id2cli . fetch ( primary_info [ :id ] )
170
+ replica = replica_info . fetch ( :client )
171
+ primary = primary_info . fetch ( :client )
161
172
threads = @clients . map do |cli |
162
173
Thread . new ( cli ) do |c |
163
174
Thread . pass
@@ -184,8 +195,31 @@ def scale_in # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedCo
184
195
end
185
196
end
186
197
198
+ def select_resharding_target ( slot )
199
+ rows = associate_with_clients_and_nodes ( @clients )
200
+ src = rows . find { |r | r [ :role ] == 'master' && r [ :slots ] . include? ( slot ) }
201
+ dest = rows . reject { |r | r [ :role ] == 'slave' || r [ :id ] == src [ :id ] } . sample
202
+ [ src . fetch ( :node_key ) , dest . fetch ( :node_key ) ]
203
+ end
204
+
205
+ def select_sacrifice_of_primary
206
+ rows = associate_with_clients_and_nodes ( @clients )
207
+ rows . select { |r | r [ :role ] == 'master' }
208
+ . reject { |primary | rows . none? { |r | r [ :primary_id ] == primary [ :id ] } }
209
+ . sample . fetch ( :client )
210
+ end
211
+
212
+ def select_sacrifice_of_replica
213
+ rows = associate_with_clients_and_nodes ( @clients )
214
+ rows . select { |r | r [ :role ] == 'slave' } . sample . fetch ( :client )
215
+ end
216
+
187
217
def close
188
- @clients . each ( &:close )
218
+ @clients . each do |client |
219
+ client . close
220
+ rescue ::RedisClient ::ConnectionError
221
+ # ignore
222
+ end
189
223
end
190
224
191
225
private
@@ -194,7 +228,7 @@ def flush_all_data(clients)
194
228
clients . each do |c |
195
229
c . call ( 'FLUSHALL' )
196
230
rescue ::RedisClient ::CommandError
197
- # READONLY You can't write against a read only slave .
231
+ # READONLY You can't write against a read only replica .
198
232
nil
199
233
end
200
234
end
@@ -204,14 +238,14 @@ def reset_cluster(clients)
204
238
end
205
239
206
240
def assign_slots ( clients , shard_size :)
207
- masters = take_masters ( clients , shard_size : shard_size )
208
- slot_slice = SLOT_SIZE / masters . size
209
- mod = SLOT_SIZE % masters . size
210
- slot_sizes = Array . new ( masters . size , slot_slice )
241
+ primaries = take_primaries ( clients , shard_size : shard_size )
242
+ slot_slice = SLOT_SIZE / primaries . size
243
+ mod = SLOT_SIZE % primaries . size
244
+ slot_sizes = Array . new ( primaries . size , slot_slice )
211
245
mod . downto ( 1 ) { |i | slot_sizes [ i ] += 1 }
212
246
213
247
slot_idx = 0
214
- masters . zip ( slot_sizes ) . each do |c , s |
248
+ primaries . zip ( slot_sizes ) . each do |c , s |
215
249
slot_range = slot_idx ..slot_idx + s - 1
216
250
c . call ( 'CLUSTER' , 'ADDSLOTS' , *slot_range . to_a )
217
251
slot_idx += s
@@ -228,7 +262,9 @@ def save_config_epoch(clients)
228
262
end
229
263
230
264
def meet_each_other ( clients )
231
- target_host , target_port = fetch_cluster_nodes ( clients . first ) . first [ 1 ] . split ( '@' ) . first . split ( ':' )
265
+ rows = fetch_cluster_nodes ( clients . first )
266
+ rows = parse_cluster_nodes ( rows )
267
+ target_host , target_port = rows . first . fetch ( :node_key ) . split ( ':' )
232
268
clients . drop ( 1 ) . each { |c | c . call ( 'CLUSTER' , 'MEET' , target_host , target_port ) }
233
269
end
234
270
@@ -242,21 +278,19 @@ def wait_meeting(clients, max_attempts:)
242
278
end
243
279
244
280
def replicate ( clients , shard_size :, replica_size :)
245
- node_map = hashify_node_map ( clients )
246
- masters = take_masters ( clients , shard_size : shard_size )
281
+ primaries = take_primaries ( clients , shard_size : shard_size )
282
+ replicas = take_replicas ( clients , shard_size : shard_size )
247
283
248
- take_slaves ( clients , shard_size : shard_size ) . each_slice ( replica_size ) . each_with_index do |slaves , i |
249
- master_host = masters [ i ] . config . host
250
- master_port = masters [ i ] . config . port
284
+ replicas . each_slice ( replica_size ) . each_with_index do |subset , i |
285
+ primary_id = primaries [ i ] . call ( 'CLUSTER' , 'MYID' )
251
286
252
287
loop do
253
288
begin
254
- master_node_id = node_map . fetch ( to_node_key_by_host_port ( master_host , master_port ) )
255
- slaves . each { |slave | slave . call ( 'CLUSTER' , 'REPLICATE' , master_node_id ) }
289
+ subset . each { |replica | replica . call ( 'CLUSTER' , 'REPLICATE' , primary_id ) }
256
290
rescue ::RedisClient ::CommandError
257
291
# ERR Unknown node [key]
258
292
sleep 0.1
259
- node_map = hashify_node_map ( clients )
293
+ primary_id = primaries [ i ] . call ( 'CLUSTER' , 'MYID' )
260
294
next
261
295
end
262
296
@@ -280,17 +314,21 @@ def wait_cluster_building(clients, max_attempts:)
280
314
281
315
def wait_replication ( clients , number_of_replicas :, max_attempts :)
282
316
wait_for_state ( clients , max_attempts : max_attempts ) do |client |
283
- flags = hashify_cluster_node_flags ( clients , client : client )
284
- flags . values . count { |f | f == 'slave' } == number_of_replicas
317
+ rows = fetch_cluster_nodes ( client )
318
+ rows = parse_cluster_nodes ( rows )
319
+ rows . count { |r | r [ :role ] == 'slave' } == number_of_replicas
285
320
rescue ::RedisClient ::ConnectionError
286
321
true
287
322
end
288
323
end
289
324
290
- def wait_failover ( clients , master_key :, slave_key :, max_attempts :)
325
+ def wait_failover ( clients , primary_node_key :, replica_node_key :, max_attempts :)
291
326
wait_for_state ( clients , max_attempts : max_attempts ) do |client |
292
- flags = hashify_cluster_node_flags ( clients , client : client )
293
- flags [ master_key ] == 'slave' && flags [ slave_key ] == 'master'
327
+ rows = fetch_cluster_nodes ( client )
328
+ rows = parse_cluster_nodes ( rows )
329
+ primary_info = rows . find { |r | r [ :node_key ] == primary_node_key || r [ :client_node_key ] == primary_node_key }
330
+ replica_info = rows . find { |r | r [ :node_key ] == replica_node_key || r [ :client_node_key ] == replica_node_key }
331
+ primary_info [ :role ] == 'slave' && replica_info [ :role ] == 'master'
294
332
rescue ::RedisClient ::ConnectionError
295
333
true
296
334
end
@@ -343,87 +381,53 @@ def hashify_cluster_info(client)
343
381
client . call ( 'CLUSTER' , 'INFO' ) . split ( "\r \n " ) . to_h { |v | v . split ( ':' ) }
344
382
end
345
383
346
- def hashify_cluster_node_flags ( clients , client : nil )
347
- id2key = fetch_internal_id_to_node_key_mappings ( clients )
348
- fetch_cluster_nodes ( client || clients . first )
349
- . to_h { |arr | [ id2key [ arr [ 0 ] ] , ( arr [ 2 ] . split ( ',' ) & %w[ master slave ] ) . first ] }
384
+ def fetch_cluster_nodes ( client )
385
+ client . call ( 'CLUSTER' , 'NODES' ) . split ( "\n " ) . map ( &:split )
350
386
end
351
387
352
- def hashify_node_map ( clients )
353
- id2key = fetch_internal_id_to_node_key_mappings ( clients )
354
- clients . each do |client |
355
- return fetch_cluster_nodes ( client ) . to_h { |arr | [ id2key [ arr [ 0 ] ] , arr [ 0 ] ] }
388
+ def associate_with_clients_and_nodes ( clients )
389
+ clients . filter_map do |client |
390
+ rows = fetch_cluster_nodes ( client )
391
+ rows = parse_cluster_nodes ( rows )
392
+ row = rows . find { |r | r [ :flags ] . include? ( 'myself' ) }
393
+ row . merge ( client : client , client_node_key : "#{ client . config . host } :#{ client . config . port } " )
356
394
rescue ::RedisClient ::ConnectionError
357
395
next
358
396
end
359
397
end
360
398
361
- def fetch_internal_id_by_natted_node_key ( client , node_key )
362
- fetch_cluster_nodes ( client ) . find { |info | info [ 1 ] . split ( '@' ) . first == node_key } . first
363
- end
364
-
365
- def find_client_by_natted_node_key ( clients , node_key )
366
- id = fetch_internal_id_by_natted_node_key ( clients . first , node_key )
367
- id2key = fetch_internal_id_to_node_key_mappings ( clients )
368
- key = id2key [ id ]
369
- clients . find { |cli | key == to_node_key ( cli ) }
370
- end
371
-
372
- def fetch_cluster_nodes ( client )
373
- client . call ( 'CLUSTER' , 'NODES' ) . split ( "\n " ) . map ( &:split )
374
- end
375
-
376
- def fetch_internal_id_to_node_key_mappings ( clients )
377
- fetch_internal_id_to_client_mappings ( clients ) . transform_values { |c | to_node_key ( c ) }
378
- end
379
-
380
- def fetch_internal_id_to_client_mappings ( clients )
381
- clients . to_h { |c | [ c . call ( 'CLUSTER' , 'MYID' ) , c ] }
382
- end
383
-
384
- def fetch_and_parse_cluster_nodes ( clients ) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
385
- rows = fetch_cluster_nodes ( clients . first )
386
- rows . each { |arr | arr [ 2 ] = arr [ 2 ] . split ( ',' ) }
387
- rows . select! { |arr | arr [ 7 ] == 'connected' && ( arr [ 2 ] & %w[ fail? fail handshake noaddr noflags ] ) . empty? }
388
- rows . each do |arr |
389
- arr [ 1 ] = arr [ 1 ] . split ( '@' ) . first
390
- arr [ 2 ] = ( arr [ 2 ] & %w[ master slave ] ) . first
391
- if arr [ 8 ] . nil?
392
- arr [ 8 ] = [ ]
393
- next
394
- end
395
- arr [ 8 ] = arr [ 8 ..] . filter_map { |str | str . start_with? ( '[' ) ? nil : str . split ( '-' ) . map { |s | Integer ( s ) } }
396
- . map { |a | a . size == 1 ? a << a . first : a } . map ( &:sort )
397
- end
398
-
399
- rows . map do |arr |
400
- { id : arr [ 0 ] , node_key : arr [ 1 ] , role : arr [ 2 ] , primary_id : arr [ 3 ] , ping_sent : arr [ 4 ] ,
401
- pong_recv : arr [ 5 ] , config_epoch : arr [ 6 ] , link_state : arr [ 7 ] , slots : arr [ 8 ] }
399
+ def parse_cluster_nodes ( rows ) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
400
+ rows . map do |row |
401
+ flags = row [ 2 ] . split ( ',' )
402
+ slots = if row [ 8 ] . nil?
403
+ [ ]
404
+ else
405
+ row [ 8 ..] . filter_map { |str | str . start_with? ( '[' ) ? nil : str . split ( '-' ) . map { |s | Integer ( s ) } }
406
+ . map { |a | a . size == 1 ? a << a . first : a } . map ( &:sort )
407
+ . flat_map { |first , last | ( first ..last ) . to_a } . sort
408
+ end
409
+
410
+ {
411
+ id : row [ 0 ] ,
412
+ node_key : row [ 1 ] . split ( '@' ) . first ,
413
+ flags : flags ,
414
+ role : ( flags & %w[ master slave ] ) . first ,
415
+ primary_id : row [ 3 ] ,
416
+ ping_sent : row [ 4 ] ,
417
+ pong_recv : row [ 5 ] ,
418
+ config_epoch : row [ 6 ] ,
419
+ link_state : row [ 7 ] ,
420
+ slots : slots
421
+ }
402
422
end
403
423
end
404
424
405
- def take_masters ( clients , shard_size :)
425
+ def take_primaries ( clients , shard_size :)
406
426
clients . select { |cli | cli . call ( 'ROLE' ) . first == 'master' } . take ( shard_size )
407
427
end
408
428
409
- def take_slaves ( clients , shard_size :)
429
+ def take_replicas ( clients , shard_size :)
410
430
replicas = clients . select { |cli | cli . call ( 'ROLE' ) . first == 'slave' }
411
431
replicas . size . zero? ? clients [ shard_size ..] : replicas
412
432
end
413
-
414
- def take_a_replication_pair ( clients )
415
- rows = fetch_and_parse_cluster_nodes ( clients )
416
- primary = rows . find { |row | row [ :role ] == 'master' }
417
- replica = rows . find { |row | row [ :primary_id ] == primary [ :id ] }
418
- id2cli = fetch_internal_id_to_client_mappings ( clients )
419
- [ id2cli [ primary [ :id ] ] , id2cli [ replica [ :id ] ] ]
420
- end
421
-
422
- def to_node_key ( client )
423
- to_node_key_by_host_port ( client . config . host , client . config . port )
424
- end
425
-
426
- def to_node_key_by_host_port ( host , port )
427
- "#{ host } :#{ port } "
428
- end
429
433
end
0 commit comments