@@ -43,9 +43,8 @@ def warm_up
43
43
44
44
it 'some connections survive' do
45
45
threads = [ ]
46
- errors = 0
46
+ errors = Concurrent :: AtomicFixnum . new ( 0 )
47
47
sem = Concurrent ::Semaphore . new ( 0 )
48
- ( 5.0 / 25 * 25.0 ) . ceil
49
48
25 . times do
50
49
t = Thread . new do
51
50
c = 1
@@ -54,13 +53,13 @@ def warm_up
54
53
c = conn
55
54
break
56
55
rescue StandardError
57
- errors += 1
56
+ errors . increment
58
57
end
59
58
25 . times do
60
59
c . exec 'SELECT 1'
61
60
rescue PG ::SystemError
62
61
c = conn # reconnect
63
- errors += 1
62
+ errors . increment
64
63
end
65
64
end
66
65
threads << t
@@ -69,7 +68,7 @@ def warm_up
69
68
sem . release ( 25 )
70
69
threads . each ( &:join )
71
70
end
72
- expect ( errors ) . to be < 25 # 5% error rate (instead of 100%)
71
+ expect ( errors . value ) . to be < 25 # 5% error rate (instead of 100%)
73
72
end
74
73
75
74
it 'active record works' do
@@ -78,22 +77,93 @@ def warm_up
78
77
# Connect (the pool is lazy)
79
78
Sharded . where ( id : 1 ) . first
80
79
errors = 0
80
+ ok = 0
81
81
# Can't ban primary because it issues SET queries
82
82
# that we currently route to primary.
83
83
Toxiproxy [ role ] . toxic ( toxic ) . apply do
84
84
25 . times do
85
85
Sharded . where ( id : 1 ) . first
86
+ ok += 1
86
87
rescue StandardError
87
88
errors += 1
88
89
end
89
90
end
90
- expect ( errors ) . to eq ( 1 )
91
+ expect ( errors ) . to be <= 1
92
+ expect ( 25 - ok ) . to eq ( errors )
93
+ end
94
+ end
95
+
96
+ describe 'healthcheck' do
97
+ before :each do
98
+ admin_conn = admin
99
+ admin_conn . exec 'RECONNECT'
100
+ admin_conn . exec "SET read_write_split TO 'exclude_primary'"
101
+ admin_conn . exec 'SET ban_timeout TO 1'
102
+ end
103
+
104
+ describe 'will heal itself' do
105
+ def health ( role , field = 'healthy' )
106
+ admin . exec ( 'SHOW POOLS' ) . select do |pool |
107
+ pool [ 'database' ] == 'failover' && pool [ 'role' ] == role
108
+ end . map { |pool | pool [ field ] }
109
+ end
110
+
111
+ 10 . times do
112
+ it 'replica' do
113
+ # Cache connect params.
114
+ conn . exec 'SELECT 1'
115
+
116
+ Toxiproxy [ :replica ] . toxic ( :reset_peer ) . apply do
117
+ errors = 0
118
+ 4 . times do
119
+ conn . exec 'SELECT 1'
120
+ rescue PG ::Error
121
+ errors += 1
122
+ end
123
+ expect ( errors ) . to be >= 1
124
+ expect ( health ( 'replica' ) ) . to include ( 'f' )
125
+ sleep ( 0.4 ) # ban maintenance runs every 333ms
126
+ expect ( health ( 'replica' , 'banned' ) ) . to include ( 't' )
127
+ end
128
+
129
+ 4 . times do
130
+ conn . exec 'SELECT 1'
131
+ end
132
+
133
+ admin . exec 'HEALTHCHECK'
134
+ sleep ( 0.4 )
135
+
136
+ expect ( health ( 'replica' ) ) . to eq ( %w[ t t t ] )
137
+ expect ( health ( 'replica' , 'banned' ) ) . to eq ( %w[ f f f ] )
138
+ end
139
+ end
140
+
141
+ it 'primary' do
142
+ # Cache connect params.
143
+ conn . exec 'DELETE FROM sharded'
144
+
145
+ Toxiproxy [ :primary ] . toxic ( :reset_peer ) . apply do
146
+ begin
147
+ conn . exec 'DELETE FROM sharded'
148
+ rescue PG ::Error
149
+ end
150
+ expect ( health ( 'primary' ) ) . to eq ( [ 'f' ] )
151
+ end
152
+
153
+ conn . exec 'DELETE FROM sharded'
154
+
155
+ expect ( health ( 'primary' ) ) . to eq ( %w[ t ] )
156
+ end
157
+ end
158
+
159
+ after do
160
+ admin . exec 'RELOAD'
91
161
end
92
162
end
93
163
94
164
describe 'tcp' do
95
165
around :each do |example |
96
- Timeout . timeout ( 10 ) do
166
+ Timeout . timeout ( 30 ) do
97
167
example . run
98
168
end
99
169
end
@@ -138,8 +208,23 @@ def warm_up
138
208
end
139
209
140
210
describe 'both down' do
141
- it 'unbans all pools' do
142
- 25 . times do
211
+ 10 . times do
212
+ it 'unbans all pools' do
213
+ rw_config = admin . exec ( 'SHOW CONFIG' ) . select do |config |
214
+ config [ 'name' ] == 'read_write_split'
215
+ end [ 0 ] [ 'value' ]
216
+ expect ( rw_config ) . to eq ( 'include_primary' )
217
+
218
+ def pool_stat ( field , value )
219
+ failover = admin . exec ( 'SHOW POOLS' ) . select do |pool |
220
+ pool [ 'database' ] == 'failover'
221
+ end
222
+ entries = failover . select { |item | item [ field ] == value }
223
+ entries . size
224
+ end
225
+
226
+ admin . exec 'SET checkout_timeout TO 100'
227
+
143
228
Toxiproxy [ :primary ] . toxic ( :reset_peer ) . apply do
144
229
Toxiproxy [ :replica ] . toxic ( :reset_peer ) . apply do
145
230
Toxiproxy [ :replica2 ] . toxic ( :reset_peer ) . apply do
@@ -148,19 +233,16 @@ def warm_up
148
233
conn . exec_params 'SELECT $1::bigint' , [ 1 ]
149
234
rescue StandardError
150
235
end
151
- banned = admin . exec ( 'SHOW POOLS' ) . select do |pool |
152
- pool [ 'database' ] == 'failover'
153
- end . select { |item | item [ 'banned' ] == 't' }
154
- expect ( banned . size ) . to eq ( 4 )
236
+
237
+ expect ( pool_stat ( 'healthy' , 'f' ) ) . to eq ( 4 )
155
238
end
156
239
end
157
240
end
158
241
end
159
- conn . exec 'SELECT $1::bigint' , [ 25 ]
160
- banned = admin . exec ( 'SHOW POOLS' ) . select do |pool |
161
- pool [ 'database' ] == 'failover'
162
- end . select { |item | item [ 'banned' ] == 't' }
163
- expect ( banned . size ) . to eq ( 0 )
242
+
243
+ 4 . times do
244
+ conn . exec 'SELECT $1::bigint' , [ 25 ]
245
+ end
164
246
end
165
247
end
166
248
end
@@ -172,25 +254,21 @@ def warm_up
172
254
Toxiproxy [ :primary ] . toxic ( :reset_peer ) . apply do
173
255
c = conn
174
256
c . exec 'BEGIN'
175
- c . exec 'CREATE TABLE test(id BIGINT)'
257
+ c . exec 'CREATE TABLE IF NOT EXISTS test(id BIGINT)'
176
258
c . exec 'ROLLBACK'
177
259
rescue StandardError
178
260
end
261
+
179
262
banned = admin . exec ( 'SHOW POOLS' ) . select do |pool |
180
263
pool [ 'database' ] == 'failover' && pool [ 'role' ] == 'primary'
181
264
end
182
- expect ( banned [ 0 ] [ 'banned ' ] ) . to eq ( 't ' )
265
+ expect ( banned [ 0 ] [ 'healthy ' ] ) . to eq ( 'f ' )
183
266
184
267
c = conn
185
268
c . exec 'BEGIN'
186
- c . exec 'CREATE TABLE test(id BIGINT)'
269
+ c . exec 'CREATE TABLE IF NOT EXISTS test(id BIGINT)'
187
270
c . exec 'SELECT * FROM test'
188
271
c . exec 'ROLLBACK'
189
-
190
- banned = admin . exec ( 'SHOW POOLS' ) . select do |pool |
191
- pool [ 'database' ] == 'failover' && pool [ 'role' ] == 'primary'
192
- end
193
- expect ( banned [ 0 ] [ 'banned' ] ) . to eq ( 'f' )
194
272
end
195
273
196
274
it 'active record works' do
@@ -199,16 +277,58 @@ def warm_up
199
277
# Connect (the pool is lazy)
200
278
Sharded . where ( id : 1 ) . first
201
279
errors = 0
280
+ ok = 0
202
281
# Can't ban primary because it issues SET queries
203
282
# that we currently route to primary.
204
283
Toxiproxy [ :primary ] . toxic ( :reset_peer ) . apply do
205
284
25 . times do
206
285
Sharded . where ( id : 1 ) . first
286
+ ok += 1
207
287
rescue StandardError
208
288
errors += 1
209
289
end
210
290
end
211
- expect ( errors ) . to eq ( 1 )
291
+ expect ( errors ) . to be <= 1
292
+ expect ( 25 - ok ) . to eq ( errors )
293
+ end
294
+
295
+ it 'clients can connect when all servers are down after caching connection params' do
296
+ # First, establish a connection to cache connection parameters
297
+ c = conn
298
+ c . exec 'SELECT 1'
299
+ c . close
300
+
301
+ # Verify initial state - all pools should be healthy before toxics
302
+ pools = admin . exec ( 'SHOW POOLS' ) . select do |pool |
303
+ pool [ 'database' ] == 'failover'
304
+ end
305
+ expect ( pools . all? { |p | p [ 'healthy' ] == 't' } ) . to be true
306
+
307
+ # Now bring down all servers
308
+ Toxiproxy [ :primary ] . toxic ( :reset_peer ) . apply do
309
+ Toxiproxy [ :replica ] . toxic ( :reset_peer ) . apply do
310
+ Toxiproxy [ :replica2 ] . toxic ( :reset_peer ) . apply do
311
+ Toxiproxy [ :replica3 ] . toxic ( :reset_peer ) . apply do
312
+ # Try to establish many connections
313
+ connections = [ ]
314
+ 50 . times do
315
+ c = conn
316
+ expect ( c ) . not_to be_nil
317
+ connections << c
318
+ end
319
+
320
+ # Check internal state - verify we have active client connections
321
+ clients = admin . exec ( 'SHOW CLIENTS' ) . select do |client |
322
+ client [ 'database' ] == 'failover'
323
+ end
324
+ expect ( clients . size ) . to be >= 50
325
+
326
+ # Clean up connections without executing queries to avoid timeouts
327
+ connections . each { |c | c . close rescue nil }
328
+ end
329
+ end
330
+ end
331
+ end
212
332
end
213
333
end
214
334
end
0 commit comments