Skip to content

Commit 57566f6

Browse files
danolivomason-sharp
authored andcommitted
One more Z0DAN SYNC test for the 2+1 node configuration.
Here, we add a test that modifies an identity column (aid). To demonstrate that this test is correct we check 2n-configuration beforehand and see no difference in the data.
1 parent 325be0d commit 57566f6

File tree

5 files changed

+268
-22
lines changed

5 files changed

+268
-22
lines changed

samples/Z0DAN/n1.pgb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
\set aid random(1, 50000)
22

3-
UPDATE pgbench_accounts SET abalance = abalance + :aid WHERE aid = :aid;
4-
UPDATE pgbench_accounts SET abalance = abalance - :aid WHERE aid = :aid;
3+
UPDATE pgbench_accounts SET abalance = abalance + 1 WHERE aid = :aid;
4+
UPDATE pgbench_accounts SET abalance = abalance - 1 WHERE aid = :aid;
5+
6+
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
7+
UPDATE pgbench_accounts SET abalance = abalance + 2 WHERE aid = :aid;
8+
UPDATE pgbench_accounts SET abalance = abalance - 2 WHERE aid = :aid;
9+
END;

samples/Z0DAN/n1_1.pgb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
\set aid random(1, 50000)
2+
3+
UPDATE pgbench_accounts SET abalance = abalance + 1, aid = - :aid
4+
WHERE abs(aid) = :aid;

samples/Z0DAN/n2.pgb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
\set aid random(50001, 100000)
22

3-
UPDATE pgbench_accounts SET abalance = abalance + :aid WHERE aid = :aid;
4-
UPDATE pgbench_accounts SET abalance = abalance - :aid WHERE aid = :aid;
3+
UPDATE pgbench_accounts SET abalance = abalance + 1 WHERE aid = :aid;
4+
UPDATE pgbench_accounts SET abalance = abalance - 1 WHERE aid = :aid;
5+
6+
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
7+
UPDATE pgbench_accounts SET abalance = abalance + 2 WHERE aid = :aid;
8+
UPDATE pgbench_accounts SET abalance = abalance - 2 WHERE aid = :aid;
9+
END;

samples/Z0DAN/n2_1.pgb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
\set aid random(50001, 100000)
2+
3+
UPDATE pgbench_accounts SET abalance = abalance + 1, aid = - :aid
4+
WHERE abs(aid) = :aid;

tests/tap/t/011_zodan_sync_third.pl

Lines changed: 246 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use strict;
22
use warnings;
3-
use Test::More tests => 20;
3+
use Test::More tests => 34;
44
use IPC::Run;
55
use lib '.';
66
use lib 't';
@@ -38,7 +38,7 @@
3838

3939
cross_wire(2, ['n1', 'n2'], 'Cross-wire nodes N1 and N2');
4040

41-
note "Install the helper functions and do other preparatory stuff";
41+
print STDERR "Install the helper functions and do other preparatory stuff\n";
4242
my $helper_sql = '../../samples/Z0DAN/wait_subscription.sql';
4343
my $zodan_sql = '../../samples/Z0DAN/zodan.sql';
4444
psql_or_bail(1, "\\i $helper_sql");
@@ -47,12 +47,35 @@
4747
psql_or_bail(3, "CREATE EXTENSION dblink");
4848
psql_or_bail(3, "SELECT spock.node_drop('n3')");
4949

50+
# Reduce the logfile size
5051
psql_or_bail(1, "ALTER SYSTEM SET log_min_messages TO LOG");
52+
psql_or_bail(1, "ALTER SYSTEM SET log_statement TO none");
53+
psql_or_bail(1, "ALTER SYSTEM SET log_checkpoints TO off");
54+
psql_or_bail(1, "ALTER SYSTEM SET log_connections TO off");
55+
psql_or_bail(1, "ALTER SYSTEM SET log_disconnections TO off");
56+
psql_or_bail(1, "ALTER SYSTEM SET log_lock_waits TO off");
57+
psql_or_bail(1, "ALTER SYSTEM SET log_statement_stats TO off");
5158
psql_or_bail(1, "SELECT pg_reload_conf()");
59+
5260
psql_or_bail(2, "ALTER SYSTEM SET log_min_messages TO LOG");
61+
psql_or_bail(2, "ALTER SYSTEM SET log_statement TO none");
62+
psql_or_bail(2, "ALTER SYSTEM SET log_checkpoints TO off");
63+
psql_or_bail(2, "ALTER SYSTEM SET log_connections TO off");
64+
psql_or_bail(2, "ALTER SYSTEM SET log_disconnections TO off");
65+
psql_or_bail(2, "ALTER SYSTEM SET log_lock_waits TO off");
66+
psql_or_bail(2, "ALTER SYSTEM SET log_statement_stats TO off");
5367
psql_or_bail(2, "SELECT pg_reload_conf()");
5468

55-
note "Initialize pgbench database and wait for initial sync on N1 and N2 ...";
69+
psql_or_bail(3, "ALTER SYSTEM SET log_min_messages TO LOG");
70+
psql_or_bail(3, "ALTER SYSTEM SET log_statement TO none");
71+
psql_or_bail(3, "ALTER SYSTEM SET log_checkpoints TO off");
72+
psql_or_bail(3, "ALTER SYSTEM SET log_connections TO off");
73+
psql_or_bail(3, "ALTER SYSTEM SET log_disconnections TO off");
74+
psql_or_bail(3, "ALTER SYSTEM SET log_lock_waits TO off");
75+
psql_or_bail(3, "ALTER SYSTEM SET log_statement_stats TO off");
76+
psql_or_bail(3, "SELECT pg_reload_conf()");
77+
78+
print STDERR "Initialize pgbench database and wait for initial sync on N1 and N2 ...\n";
5679
system_or_bail "$pg_bin/pgbench", '-i', '-s', 1, '-h', $host,
5780
'-p', $node_ports->[0], '-U', $db_user, $dbname;
5881
# Wait until tables and data will be sent to N2
@@ -87,11 +110,11 @@
87110
$pgbench_handle2->pump();
88111

89112
# Warming up ...
90-
note "warming up pgbench for 5s";
113+
print STDERR "warming up pgbench for 5s\n";
91114
sleep(5);
92-
note "done warmup";
115+
print STDERR "done warmup\n";
93116

94-
note "Add N3 into highly loaded configuration of N1 and N2 ...";
117+
print STDERR "Add N3 into highly loaded configuration of N1 and N2 ...\n";
95118
psql_or_bail(3,
96119
"CALL spock.add_node(src_node_name := 'n1',
97120
src_dsn := 'host=$host dbname=$dbname port=$node_ports->[0] user=$db_user',
@@ -107,24 +130,24 @@
107130
$alive = kill 0, $pid;
108131
ok($alive eq 1, "pgbench load to N2 still exists");
109132

110-
note "Kill pgbench process to reduce test time";
133+
print STDERR "Kill pgbench process to reduce test time\n";
111134
$pgbench_handle1->pump();
112135
$pgbench_handle2->pump();
113136
$pgbench_handle1->kill_kill;
114137
$pgbench_handle2->kill_kill;
115138

116-
note "Check if pgbench finalised correctly";
139+
print STDERR "Check if pgbench finalised correctly\n";
117140
$pgbench_handle1->finish;
118141
$pgbench_handle2->finish;
119-
note "##### output of pgbench #####";
120-
note $pgbench_stdout1;
121-
note $pgbench_stdout2;
122-
note "##### end of output #####";
142+
print STDERR "##### output of pgbench #####\n";
143+
print STDERR $pgbench_stdout1;
144+
print STDERR $pgbench_stdout2;
145+
print STDERR "##### end of output #####\n";
123146

124147
psql_or_bail(1, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
125148
psql_or_bail(2, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
126149

127-
note "Wait until the end of replication ..";
150+
print STDERR "Wait until the end of replication ..\n";
128151
$lag = scalar_query(1, "SELECT * FROM wait_subscription(remote_node_name := 'n2',
129152
report_it := true,
130153
timeout := '10 minutes',
@@ -146,14 +169,14 @@
146169
delay := 1.)");
147170
ok($lag <= 0, "Replication N2 => N3 has been finished successfully");
148171

149-
note "Check the data consistency.";
172+
print STDERR "Check the data consistency.\n";
150173
$ret1 = scalar_query(1, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
151-
note "The N1's pgbench_accounts aggregates: $ret1";
174+
print STDERR "The N1's pgbench_accounts aggregates: $ret1\n";
152175
$ret2 = scalar_query(2, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
153-
note "The N2's pgbench_accounts aggregates: $ret2";
176+
print STDERR "The N2's pgbench_accounts aggregates: $ret2\n";
154177
$ret3 = scalar_query(3, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
155-
note "The N3's pgbench_accounts aggregates: $ret3";
156178

179+
print STDERR "The N3's pgbench_accounts aggregates: $ret3\n";
157180
ok($ret1 eq $ret3, "Equality of the data on N1 and N3 is confirmed");
158181
ok($ret2 eq $ret3, "Equality of the data on N2 and N3 is confirmed");
159182

@@ -166,7 +189,7 @@
166189
# a 'keepalive' message. Hence, we can't clearly detect the end of the process.
167190
# So, nudge it, employing the sync_event machinery.
168191
psql_or_bail(3, "SELECT spock.sync_event()");
169-
note "Wait for the end of N3->N1, N3->N2 decoding process that means the actual start of LR";
192+
print STDERR "Wait for the end of N3->N1, N3->N2 decoding process that means the actual start of LR\n";
170193
psql_or_bail(3, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
171194

172195
# Nothing sensitive should be awaited here, just to be sure.
@@ -181,5 +204,210 @@
181204
delay := 1.)");
182205
ok($lag <= 0, "Replication N1 => N2 has been finished successfully");
183206

207+
# ##############################################################################
208+
#
209+
# Try to update an IDENTITY column (pgbench_accounts.aid). This is the case of
210+
# 2n congiguration. With non-intersecting load we don't anticipate any issues
211+
# with this test. It is written to prepare infrastructure and for demonstration
212+
# purposes.
213+
#
214+
# ##############################################################################
215+
216+
$zodan_sql = '../../samples/Z0DAN/zodremove.sql';
217+
psql_or_bail(3, "\\i $zodan_sql");
218+
psql_or_bail(3, "CALL spock.remove_node(
219+
target_node_name := 'n3',
220+
target_node_dsn := 'host=$host dbname=$dbname port=$node_ports->[2] user=$db_user',
221+
verbose_mode := true)");
222+
system_or_bail "$pg_bin/pgbench", '-i', '-I', 'd', '-h', $host, '-p', $node_ports->[2], '-U', $db_user, $dbname;
223+
psql_or_bail(3, 'DROP FUNCTION wait_subscription');
224+
psql_or_bail(3, 'VACUUM FULL');
225+
226+
# To improve TPS
227+
psql_or_bail(1, "CREATE UNIQUE INDEX ON pgbench_accounts(abs(aid))");
228+
$lag = scalar_query(2, "SELECT * FROM wait_subscription(remote_node_name := 'n1',
229+
report_it := true,
230+
timeout := '10 minutes',
231+
delay := 1.)");
232+
ok($lag <= 0, "Wait replication of the CREATE INDEX");
233+
234+
# Create non-intersecting load for nodes N1 and N2.
235+
# Test duration should be enough to cover all the Z0DAN stages. We will kill
236+
# pgbench immediately after the N3 is attached.
237+
$load1 = '../../samples/Z0DAN/n1_1.pgb';
238+
$load2 = '../../samples/Z0DAN/n2_1.pgb';
239+
$pgbench_stdout1='';
240+
$pgbench_stderr1='';
241+
$pgbench_stdout2='';
242+
$pgbench_stderr2='';
243+
$pgbench_handle1 = IPC::Run::start(
244+
[ "$pg_bin/pgbench", '-n', '-f', $load1, '-T', 80, '-j', 3, '-c', 3,
245+
'-h', $host, '-p', $node_ports->[0], '-U', $db_user, $dbname],
246+
'>', \$pgbench_stdout1, '2>', \$pgbench_stderr1);
247+
$pgbench_handle2 = IPC::Run::start(
248+
[ "$pg_bin/pgbench", '-n', '-f', $load2, '-T', 80, '-j', 3, '-c', 3,
249+
'-h', $host, '-p', $node_ports->[1], '-U', $db_user, $dbname],
250+
'>', \$pgbench_stdout2, '2>', \$pgbench_stderr2);
251+
$pgbench_handle1->pump();
252+
$pgbench_handle2->pump();
253+
254+
# Warming up ...
255+
print STDERR "warming up pgbench for 20s\n";
256+
sleep(20);
257+
print STDERR "done warmup\n";
258+
259+
# Ensure that pgbench load lasts longer than the Z0DAN protocol.
260+
$pid = $pgbench_handle1->{KIDS}[0]{PID};
261+
$alive = kill 0, $pid;
262+
ok($alive eq 1, "pgbench load to N1 still exists");
263+
$pid = $pgbench_handle2->{KIDS}[0]{PID};
264+
$alive = kill 0, $pid;
265+
ok($alive eq 1, "pgbench load to N2 still exists");
266+
267+
print STDERR "Kill pgbench process to reduce test time";
268+
$pgbench_handle1->pump();
269+
$pgbench_handle2->pump();
270+
$pgbench_handle1->kill_kill;
271+
$pgbench_handle2->kill_kill;
272+
273+
print STDERR "Check if pgbench finalised correctly\n";
274+
$pgbench_handle1->finish;
275+
$pgbench_handle2->finish;
276+
print STDERR "##### output of pgbench #####\n";
277+
print STDERR "$pgbench_stdout1";
278+
print STDERR "$pgbench_stdout2";
279+
print STDERR "##### end of output #####\n";
280+
281+
psql_or_bail(1, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
282+
psql_or_bail(2, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
283+
284+
print STDERR "Wait until the end of replication ..\n";
285+
$lag = scalar_query(1, "SELECT * FROM wait_subscription(remote_node_name := 'n2',
286+
report_it := true,
287+
timeout := '10 minutes',
288+
delay := 1.)");
289+
ok($lag <= 0, "Replication N2 => N1 has been finished successfully");
290+
$lag = scalar_query(2, "SELECT * FROM wait_subscription(remote_node_name := 'n1',
291+
report_it := true,
292+
timeout := '10 minutes',
293+
delay := 1.)");
294+
ok($lag <= 0, "Replication N1 => N2 has been finished successfully");
295+
296+
print STDERR "Check the data consistency.\n";
297+
$ret1 = scalar_query(1, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
298+
print STDERR "The N1's pgbench_accounts aggregates: $ret1\n";
299+
$ret2 = scalar_query(2, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
300+
print STDERR "The N2's pgbench_accounts aggregates: $ret2\n";
301+
302+
ok($ret1 eq $ret2, "Equality of the data on N1 and N2 is confirmed");
303+
304+
# ##############################################################################
305+
#
306+
# Try to update an IDENTITY column in case of 3n configuration.
307+
# It works precisely like the previous one, but node 3 should sync its state
308+
# with loaded nodes in real time under the pgbench load.
309+
# Here, we also inderectly test how the Z0DAN add/remove protocol works in case
310+
# of multiple adding cycles.
311+
#
312+
# ##############################################################################
313+
314+
$pgbench_stdout1='';
315+
$pgbench_stderr1='';
316+
$pgbench_stdout2='';
317+
$pgbench_stderr2='';
318+
$pgbench_handle1 = IPC::Run::start(
319+
[ "$pg_bin/pgbench", '-n', '-f', $load1, '-T', 80, '-j', 3, '-c', 3,
320+
'-h', $host, '-p', $node_ports->[0], '-U', $db_user, $dbname],
321+
'>', \$pgbench_stdout1, '2>', \$pgbench_stderr1);
322+
$pgbench_handle2 = IPC::Run::start(
323+
[ "$pg_bin/pgbench", '-n', '-f', $load2, '-T', 80, '-j', 3, '-c', 3,
324+
'-h', $host, '-p', $node_ports->[1], '-U', $db_user, $dbname],
325+
'>', \$pgbench_stdout2, '2>', \$pgbench_stderr2);
326+
$pgbench_handle1->pump();
327+
$pgbench_handle2->pump();
328+
329+
# Warming up ...
330+
print STDERR "warming up pgbench for 5s\n";
331+
sleep(5);
332+
print STDERR "done warmup\n";
333+
334+
print STDERR "Add N3 into highly loaded configuration of N1 and N2 ...";
335+
psql_or_bail(3,
336+
"CALL spock.add_node(src_node_name := 'n1',
337+
src_dsn := 'host=$host dbname=$dbname port=$node_ports->[0] user=$db_user',
338+
new_node_name := 'n3',
339+
new_node_dsn := 'host=$host dbname=$dbname port=$node_ports->[2] user=$db_user',
340+
verb := false);");
341+
342+
# ...
343+
344+
# Ensure that pgbench load lasts longer than the Z0DAN protocol.
345+
$pid = $pgbench_handle1->{KIDS}[0]{PID};
346+
$alive = kill 0, $pid;
347+
ok($alive eq 1, "pgbench load to N1 still exists");
348+
$pid = $pgbench_handle2->{KIDS}[0]{PID};
349+
$alive = kill 0, $pid;
350+
ok($alive eq 1, "pgbench load to N2 still exists");
351+
352+
print STDERR "Kill pgbench process to reduce test time\n";
353+
$pgbench_handle1->pump();
354+
$pgbench_handle2->pump();
355+
$pgbench_handle1->kill_kill;
356+
$pgbench_handle2->kill_kill;
357+
358+
print STDERR "Check if pgbench finalised correctly\n";
359+
$pgbench_handle1->finish;
360+
$pgbench_handle2->finish;
361+
print STDERR "##### output of pgbench #####\n";
362+
print STDERR $pgbench_stdout1;
363+
print STDERR $pgbench_stdout2;
364+
print STDERR "##### end of output #####\n";
365+
366+
# We need such a trick: the wait_slot_confirm_lsn routine gets Last Committed
367+
# LSN position and waits for the confirmations on the remote side. But if there
368+
# a conflict has happened, feedback will not be sent and we will wait forever.
369+
psql_or_bail(1, "SELECT spock.sync_event()");
370+
psql_or_bail(2, "SELECT spock.sync_event()");
371+
psql_or_bail(3, "SELECT spock.sync_event()");
372+
373+
psql_or_bail(1, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
374+
psql_or_bail(2, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
375+
376+
print STDERR "Wait for the end of N3->N1, N3->N2 decoding process that means the actual start of LR\n";
377+
psql_or_bail(3, 'SELECT spock.wait_slot_confirm_lsn(NULL, NULL)');
378+
379+
print STDERR "Wait until the end of replication ..\n";
380+
$lag = scalar_query(1, "SELECT * FROM wait_subscription(remote_node_name := 'n2',
381+
report_it := true,
382+
timeout := '10 minutes',
383+
delay := 1.)");
384+
ok($lag <= 0, "Replication N2 => N1 has been finished successfully");
385+
$lag = scalar_query(2, "SELECT * FROM wait_subscription(remote_node_name := 'n1',
386+
report_it := true,
387+
timeout := '10 minutes',
388+
delay := 1.)");
389+
ok($lag <= 0, "Replication N1 => N2 has been finished successfully");
390+
$lag = scalar_query(3, "SELECT * FROM wait_subscription(remote_node_name := 'n1',
391+
report_it := true,
392+
timeout := '10 minutes',
393+
delay := 1.)");
394+
ok($lag <= 0, "Replication N1 => N3 has been finished successfully");
395+
$lag = scalar_query(3, "SELECT * FROM wait_subscription(remote_node_name := 'n2',
396+
report_it := true,
397+
timeout := '10 minutes',
398+
delay := 1.)");
399+
ok($lag <= 0, "Replication N2 => N3 has been finished successfully");
400+
401+
print STDERR "Check the data consistency.\n";
402+
$ret1 = scalar_query(1, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
403+
print STDERR "The N1's pgbench_accounts aggregates: $ret1\n";
404+
$ret2 = scalar_query(2, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
405+
print STDERR "The N2's pgbench_accounts aggregates: $ret2\n";
406+
$ret3 = scalar_query(3, "SELECT sum(abalance), sum(aid), count(*) FROM pgbench_accounts");
407+
print STDERR "The N3's pgbench_accounts aggregates: $ret3\n";
408+
409+
ok($ret1 eq $ret2, "Equality of the data on N1 and N2 is confirmed");
410+
ok($ret1 eq $ret3, "Equality of the data on N1 and N3 is confirmed");
411+
184412
# Cleanup will be handled by SpockTest.pm END block
185413
# No need for done_testing() when using a test plan

0 commit comments

Comments
 (0)