5
5
class ClusterOrchestrator
6
6
SLOT_SIZE = 16384
7
7
8
- def initialize ( node_addrs )
8
+ def initialize ( node_addrs , timeout : 30.0 )
9
9
raise 'Redis Cluster requires at least 3 master nodes.' if node_addrs . size < 3
10
- timeout_sec = Float ( ENV [ 'TIMEOUT' ] || 30.0 )
11
- @clients = node_addrs . map { | addr | Redis . new ( url : addr , timeout : timeout_sec ) }
10
+ @clients = node_addrs . map { | addr | Redis . new ( url : addr , timeout : timeout ) }
11
+ @timeout = timeout
12
12
end
13
13
14
14
def rebuild
@@ -21,6 +21,8 @@ def rebuild
21
21
replicate ( @clients )
22
22
save_config ( @clients )
23
23
wait_cluster_building ( @clients )
24
+ wait_replication ( @clients )
25
+ wait_cluster_recovering ( @clients )
24
26
end
25
27
26
28
def down
@@ -30,8 +32,11 @@ def down
30
32
31
33
def failover
32
34
master , slave = take_replication_pairs ( @clients )
35
+ wait_replication_delay ( @clients , @timeout )
33
36
slave . cluster ( :failover , :takeover )
34
37
wait_failover ( to_node_key ( master ) , to_node_key ( slave ) , @clients )
38
+ wait_replication_delay ( @clients , @timeout )
39
+ wait_cluster_recovering ( @clients )
35
40
end
36
41
37
42
def start_resharding ( slot , src_node_key , dest_node_key )
@@ -117,14 +122,12 @@ def meet_each_other(clients)
117
122
end
118
123
end
119
124
120
- def wait_meeting ( clients )
121
- first_cliient = clients . first
122
- size = clients . size
125
+ def wait_meeting ( clients , max_attempts : 600 )
126
+ size = clients . size . to_s
123
127
124
- loop do
125
- info = hashify_cluster_info ( first_cliient )
126
- break if info [ 'cluster_known_nodes' ] . to_i == size
127
- sleep 0.1
128
+ wait_for_state ( clients , max_attempts ) do |client |
129
+ info = hashify_cluster_info ( client )
130
+ info [ 'cluster_known_nodes' ] == size
128
131
end
129
132
end
130
133
@@ -157,27 +160,61 @@ def save_config(clients)
157
160
clients . each { |c | c . cluster ( :saveconfig ) }
158
161
end
159
162
160
- def wait_cluster_building ( clients , max_attempts : 200 )
161
- attempt_count = 0
163
+ def wait_cluster_building ( clients , max_attempts : 600 )
164
+ wait_for_state ( clients , max_attempts ) do |client |
165
+ info = hashify_cluster_info ( client )
166
+ info [ 'cluster_state' ] == 'ok'
167
+ end
168
+ end
162
169
163
- clients . each do |client |
164
- loop do
165
- info = hashify_cluster_info ( client )
166
- attempt_count += 1
167
- break if info [ 'cluster_state' ] == 'ok' || attempt_count > max_attempts
168
- sleep 0.1
169
- end
170
+ def wait_replication ( clients , max_attempts : 600 )
171
+ wait_for_state ( clients , max_attempts ) do |client |
172
+ flags = hashify_cluster_node_flags ( client )
173
+ flags . values . select { |f | f == 'slave' } . size == 3
174
+ end
175
+ end
176
+
177
+ def wait_failover ( master_key , slave_key , clients , max_attempts : 600 )
178
+ wait_for_state ( clients , max_attempts ) do |client |
179
+ flags = hashify_cluster_node_flags ( client )
180
+ flags [ master_key ] == 'slave' && flags [ slave_key ] == 'master'
170
181
end
171
182
end
172
183
173
- def wait_failover ( master_key , slave_key , clients , max_attempts : 200 )
174
- attempt_count = 0
184
+ def wait_replication_delay ( clients , timeout_sec )
185
+ timeout_msec = timeout_sec . to_i * 1000
186
+ wait_for_state ( clients , clients . size + 1 ) do |client |
187
+ client . wait ( 1 , timeout_msec ) if client . role . first == 'master'
188
+ true
189
+ end
190
+ end
175
191
192
+ def wait_cluster_recovering ( clients , max_attempts : 600 )
193
+ key = 0
194
+ wait_for_state ( clients , max_attempts ) do |client |
195
+ begin
196
+ client . get ( key ) if client . role . first == 'master'
197
+ true
198
+ rescue Redis ::CommandError => err
199
+ if err . message . start_with? ( 'CLUSTERDOWN' )
200
+ false
201
+ elsif err . message . start_with? ( 'MOVED' )
202
+ key += 1
203
+ false
204
+ else
205
+ true
206
+ end
207
+ end
208
+ end
209
+ end
210
+
211
+ def wait_for_state ( clients , max_attempts )
212
+ attempt_count = 1
176
213
clients . each do |client |
177
- loop do
178
- flags = hashify_cluster_node_flags ( client )
214
+ attempt_count . step ( max_attempts ) do | i |
215
+ break if i >= max_attempts
179
216
attempt_count += 1
180
- break if ( flags [ master_key ] == 'slave' && flags [ slave_key ] == 'master' ) || attempt_count > max_attempts
217
+ break if yield ( client )
181
218
sleep 0.1
182
219
end
183
220
end
0 commit comments