Skip to content

Commit 89bfb42

Browse files
committed
zodan.sql: add subscriptions pre-check.
With this commit, we require that each node of the Spock cluster has only enabled subscriptions. There is no algorithmic reason to do that, because when adding the node, we are concerned only about the donor's state and the consistency of its progress table. However, such a strict procedure ensures that users can be sure they add a node to a healthy cluster, take action beforehand if it is not healthy, and that replication doesn't delay WAL cutting. Introduce a TAP test that could be used to check Z0DAN checks. XXX: add_node finishes its job with subscriptions in the 'initialising' state. Do we need to take any action here? Also, fix the annoying WARNING on the 'exception_replay_queue_size'.
1 parent 43d3ec8 commit 89bfb42

File tree

5 files changed

+121
-4
lines changed

5 files changed

+121
-4
lines changed

samples/Z0DAN/zodan.sql

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,40 @@ BEGIN
15311531
END IF;
15321532
END;
15331533

1534+
-- Check: all nodes, included in the cluster, have only enabled subscriptions.
1535+
--
1536+
-- Connect to each node in the cluster and pass through the spock.subscription
1537+
-- table to check subscriptions statuses. Using it we try to avoid cases
1538+
-- when somewhere in the middle a crash or disconnection happens that may
1539+
-- be aggravated by add_node.
1540+
DECLARE
1541+
status_rec record;
1542+
dsn_rec record;
1543+
dsns_sql text;
1544+
sub_status_sql text;
1545+
BEGIN
1546+
dsns_sql := 'SELECT if_dsn,node_name
1547+
FROM spock.node JOIN spock.node_interface
1548+
ON (if_nodeid = node_id)
1549+
WHERE node_id NOT IN (SELECT node_id FROM spock.local_node)';
1550+
sub_status_sql := 'SELECT sub_name, sub_enabled FROM spock.subscription';
1551+
1552+
FOR dsn_rec IN SELECT * FROM dblink(src_dsn, dsns_sql)
1553+
AS t(dsn text, node name)
1554+
LOOP
1555+
FOR status_rec IN SELECT * FROM dblink(dsn_rec.dsn, sub_status_sql)
1556+
AS t(name text, status text)
1557+
LOOP
1558+
IF status_rec.status != 't' THEN
1559+
RAISE EXCEPTION ' [FAILED] %', rpad('Node ' || dsn_rec.node || ' has disabled subscription ' || status_rec.name, 60, ' ');
1560+
ELSIF verb THEN
1561+
RAISE NOTICE ' OK: %', rpad('Node with DSN ' || dsn_rec.dsn || ' has enabled subscription ' || status_rec.name, 120, ' ');
1562+
END IF;
1563+
END LOOP;
1564+
END LOOP;
1565+
RAISE NOTICE ' OK: %', rpad('Checking each Spock node has only active subscriptions', 120, ' ');
1566+
END;
1567+
15341568
-- Validating new node prerequisites
15351569
SELECT count(*) INTO new_exists FROM spock.node WHERE node_name = new_node_name;
15361570
IF new_exists > 0 THEN

src/spock.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,11 +1106,11 @@ _PG_init(void)
11061106
"This setting is deprecated and has no effect. "
11071107
"The replay queue now dynamically allocates memory as needed.",
11081108
&spock_replay_queue_size,
1109-
4194304,
1109+
4,
11101110
0,
1111-
INT_MAX,
1111+
MAX_KILOBYTES / 1024,
11121112
PGC_SIGHUP,
1113-
0,
1113+
GUC_UNIT_MB,
11141114
NULL,
11151115
NULL,
11161116
NULL);

tests/tap/schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ test: 004_non_default_repset
1515
test: 008_rmgr
1616
test: 009_zodan_add_remove_nodes
1717
test: 010_zodan_add_remove_python
18+
test: 012_zodan_basics
1819

1920
# Tests, consuming too much time to be launched on each check:
2021
#test: 011_zodan_sync_third

tests/tap/t/012_zodan_basics.pl

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
use strict;
2+
use warnings;
3+
use Test::More;
4+
use lib '.';
5+
use lib 't';
6+
use SpockTest qw(create_cluster destroy_cluster get_test_config psql_or_bail scalar_query);
7+
8+
my ($result);
9+
10+
create_cluster(3, 'Create basic Spock test cluster');
11+
12+
# Get cluster configuration
13+
my $config = get_test_config();
14+
my $node_count = $config->{node_count};
15+
my $node_ports = $config->{node_ports};
16+
my $host = $config->{host};
17+
my $dbname = $config->{db_name};
18+
my $db_user = $config->{db_user};
19+
my $db_password = $config->{db_password};
20+
my $pg_bin = $config->{pg_bin};
21+
22+
psql_or_bail(2, "SELECT spock.node_drop('n2')");
23+
psql_or_bail(3, "SELECT spock.node_drop('n3')");
24+
psql_or_bail(1, "CREATE EXTENSION snowflake");
25+
psql_or_bail(1, "CREATE EXTENSION lolor");
26+
psql_or_bail(1, "CREATE EXTENSION amcheck");
27+
psql_or_bail(2, "CREATE EXTENSION dblink");
28+
psql_or_bail(3, "CREATE EXTENSION dblink");
29+
psql_or_bail(2, "\\i ../../samples/Z0DAN/zodan.sql");
30+
psql_or_bail(3, "\\i ../../samples/Z0DAN/zodan.sql");
31+
psql_or_bail(1, "CREATE TABLE test(x serial PRIMARY KEY)");
32+
psql_or_bail(1, "INSERT INTO test DEFAULT VALUES");
33+
34+
print STDERR "All supporting stuff has been installed\n";
35+
36+
print STDERR "Call Z0DAN: n2 => n1";
37+
psql_or_bail(2, "
38+
CALL spock.add_node(
39+
src_node_name := 'n1',
40+
src_dsn := 'host=$host dbname=$dbname port=$node_ports->[0] user=$db_user password=$db_password',
41+
new_node_name := 'n2',
42+
new_node_dsn := 'host=$host dbname=$dbname port=$node_ports->[1] user=$db_user password=$db_password',
43+
verb := false
44+
)");
45+
print STDERR "Z0DAN (n2 => n1) has finished the attach process\n";
46+
$result = scalar_query(2, "SELECT x FROM test");
47+
print STDERR "Check result: $result\n";
48+
ok($result eq '1', "Check state of the test table after the attachment");
49+
50+
psql_or_bail(1, "SELECT spock.sub_disable('sub_n1_n2')");
51+
52+
print STDERR "Call Z0DAN: n3 => n2\n";
53+
54+
scalar_query(3, "
55+
CALL spock.add_node(
56+
src_node_name := 'n2',
57+
src_dsn := 'host=$host dbname=$dbname port=$node_ports->[1] user=$db_user password=$db_password',
58+
new_node_name := 'n3', new_node_dsn := 'host=$host dbname=$dbname port=$node_ports->[2] user=$db_user password=$db_password',
59+
verb := false)");
60+
61+
$result = scalar_query(3, "SELECT count(*) FROM spock.local_node");
62+
ok($result eq '0', "N3 is not in the cluster yet");
63+
print STDERR "Z0DAN should fail because of a disabled subscription\n";
64+
65+
psql_or_bail(1, "SELECT spock.sub_enable('sub_n1_n2')");
66+
scalar_query(3, "
67+
CALL spock.add_node(
68+
src_node_name := 'n2',
69+
src_dsn := 'host=$host dbname=$dbname port=$node_ports->[1] user=$db_user password=$db_password',
70+
new_node_name := 'n3', new_node_dsn := 'host=$host dbname=$dbname port=$node_ports->[2] user=$db_user password=$db_password',
71+
verb := true)");
72+
73+
$result = scalar_query(3, "SELECT count(*) FROM spock.local_node");
74+
ok($result eq '1', "N3 is in the cluster");
75+
$result = scalar_query(3, "SELECT x FROM test");
76+
print STDERR "Check result: $result\n";
77+
ok($result eq '1', "Check state of the test table on N3 after the attachment");
78+
print STDERR "Z0DAN should add N3 to the cluster\n";
79+
80+
# Clean up
81+
destroy_cluster('Destroy test cluster');
82+
done_testing();

tests/tap/t/SpockTest.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ sub create_postgresql_conf {
150150
print $conf "spock.exception_behaviour=sub_disable\n";
151151
print $conf "spock.conflict_resolution=last_update_wins\n";
152152
print $conf "track_commit_timestamp=on\n";
153-
print $conf "spock.exception_replay_queue_size=1MB\n";
153+
print $conf "spock.exception_replay_queue_size='1MB'\n";
154154
print $conf "spock.enable_spill=on\n";
155155
print $conf "port=$port\n";
156156
print $conf "listen_addresses='*'\n";

0 commit comments

Comments
 (0)