Skip to content

Commit f3ad4da

Browse files
committed
Reduce physrep-sql against the metadb
Signed-off-by: Mark Hannum <mhannum@bloomberg.net>
1 parent 02c52b5 commit f3ad4da

File tree

16 files changed

+39
-86
lines changed

16 files changed

+39
-86
lines changed

db/db_tunables.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,7 @@ extern int gbl_physrep_update_registry_interval;
522522
extern int gbl_physrep_i_am_metadb;
523523
extern int gbl_physrep_keepalive_v2;
524524
extern int gbl_physrep_keepalive_freq_sec;
525+
extern int gbl_physrep_slow_replicant_check_freq_sec;
525526
extern int gbl_physrep_max_candidates;
526527
extern int gbl_physrep_reconnect_penalty;
527528
extern int gbl_physrep_reconnect_interval;

db/db_tunables.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,7 +1821,7 @@ REGISTER_TUNABLE("tranlog_incoherent_timeout", "Timeout in seconds for incoheren
18211821
TUNABLE_INTEGER, &gbl_tranlog_incoherent_timeout, 0, NULL, NULL, NULL, NULL);
18221822
REGISTER_TUNABLE("tranlog_maxpoll", "Tranlog timeout in seconds for blocking poll. (Default: 60)", TUNABLE_INTEGER,
18231823
&gbl_tranlog_maxpoll, 0, NULL, NULL, NULL, NULL);
1824-
REGISTER_TUNABLE("physrep_check_minlog_freq_sec", "Check the minimum log number to keep this often. (Default: 10)",
1824+
REGISTER_TUNABLE("physrep_check_minlog_freq_sec", "Check the minimum log number to keep this often. (Default: 60)",
18251825
TUNABLE_INTEGER, &gbl_physrep_check_minlog_freq_sec, 0, NULL, NULL, NULL, NULL);
18261826
REGISTER_TUNABLE("physrep_debug", "Print extended physrep trace. (Default: off)", TUNABLE_BOOLEAN, &gbl_physrep_debug,
18271827
0, NULL, NULL, NULL, NULL);
@@ -1830,10 +1830,8 @@ REGISTER_TUNABLE("physrep_exit_on_invalid_logstream", "Exit physreps on invalid
18301830
REGISTER_TUNABLE("physrep_fanout",
18311831
"Maximum number of physical replicants that a node can service (Default: 8)",
18321832
TUNABLE_INTEGER, &gbl_physrep_fanout, 0, NULL, NULL, NULL, NULL);
1833-
REGISTER_TUNABLE("physrep_hung_replicant_check_freq_sec",
1834-
"Check for hung physical replicant this often. (Default: 10)",
1835-
TUNABLE_INTEGER, &gbl_physrep_hung_replicant_check_freq_sec, 0, NULL,
1836-
NULL, NULL, NULL);
1833+
REGISTER_TUNABLE("physrep_hung_replicant_check_freq_sec", "Check for hung physical replicant this often. (Default: 60)",
1834+
TUNABLE_INTEGER, &gbl_physrep_hung_replicant_check_freq_sec, 0, NULL, NULL, NULL, NULL);
18371835
REGISTER_TUNABLE("physrep_hung_replicant_threshold",
18381836
"Report if the physical replicant has been inactive for this duration. (Default: 60)",
18391837
TUNABLE_INTEGER, &gbl_physrep_hung_replicant_threshold, 0, NULL,
@@ -1856,13 +1854,14 @@ REGISTER_TUNABLE("physrep_i_am_metadb", "I am physical replication metadb (Defau
18561854
REGISTER_TUNABLE("physrep_keepalive_v2", "Use version 2 of keepalive which includes first lsn. (Default: off)",
18571855
TUNABLE_BOOLEAN, &gbl_physrep_keepalive_v2, 0, NULL, NULL, NULL, NULL);
18581856
REGISTER_TUNABLE("physrep_keepalive_freq_sec",
1859-
"Periodically send lsn to source node after this interval. (Default: 10)", TUNABLE_INTEGER,
1857+
"Periodically send lsn to source node after this interval. (Default: 60)", TUNABLE_INTEGER,
18601858
&gbl_physrep_keepalive_freq_sec, 0, NULL, NULL, NULL, NULL);
1859+
REGISTER_TUNABLE("physrep_slow_replicant_check_freq_sec", "Check for slow physical replicant this often. (Default: 60)",
1860+
TUNABLE_INTEGER, &gbl_physrep_slow_replicant_check_freq_sec, 0, NULL, NULL, NULL, NULL);
18611861
REGISTER_TUNABLE("physrep_max_candidates",
18621862
"Maximum number of candidates that should be returned to a "
18631863
"new physical replicant during registration. (Default: 6)",
1864-
TUNABLE_INTEGER, &gbl_physrep_max_candidates, 0, NULL,
1865-
NULL, NULL, NULL);
1864+
TUNABLE_INTEGER, &gbl_physrep_max_candidates, 0, NULL, NULL, NULL, NULL);
18661865
REGISTER_TUNABLE("physrep_metadb_host", "List of physical replication metadb cluster hosts.", TUNABLE_STRING,
18671866
&gbl_physrep_metadb_host, READONLY, NULL, NULL, NULL, NULL);
18681867
REGISTER_TUNABLE("physrep_metadb_name", "Physical replication metadb cluster name.",
@@ -1923,7 +1922,7 @@ REGISTER_TUNABLE("revsql_debug",
19231922
TUNABLE_BOOLEAN, &gbl_revsql_debug, EXPERIMENTAL | INTERNAL,
19241923
NULL, NULL, NULL, NULL);
19251924
REGISTER_TUNABLE("revsql_host_refresh_freq_sec", "The frequency at which the "
1926-
"reverse connection host list will be refreshed (Default: 5secs)",
1925+
"reverse connection host list will be refreshed (Default: 60 secs)",
19271926
TUNABLE_INTEGER, &gbl_revsql_host_refresh_freq_sec, EXPERIMENTAL | INTERNAL,
19281927
NULL, NULL, NULL, NULL);
19291928
REGISTER_TUNABLE("admin_revsql", "Run revsql sessions as admin. (Default: Off)", TUNABLE_BOOLEAN, &gbl_admin_revsql, 0,

db/phys_rep.c

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,10 @@ int gbl_physrep_fanout = 8;
6767
int gbl_physrep_max_candidates = 6;
6868
int gbl_physrep_max_pending_replicants = 10;
6969
int gbl_deferred_phys_flag = 0;
70-
int gbl_physrep_source_nodes_refresh_freq_sec = 10;
71-
int gbl_physrep_slow_replicant_check_freq_sec = 10;
72-
int gbl_physrep_keepalive_freq_sec = 10;
73-
int gbl_physrep_check_minlog_freq_sec = 10;
74-
int gbl_physrep_hung_replicant_check_freq_sec = 10;
70+
int gbl_physrep_slow_replicant_check_freq_sec = 60;
71+
int gbl_physrep_keepalive_freq_sec = 60;
72+
int gbl_physrep_hung_replicant_check_freq_sec = 60;
73+
int gbl_physrep_check_minlog_freq_sec = 600;
7574
int gbl_physrep_hung_replicant_threshold = 60;
7675
int gbl_physrep_revconn_check_interval = 60;
7776
int gbl_physrep_update_registry_interval = 60;
@@ -1107,27 +1106,10 @@ int gbl_physrep_keepalive_v2 = 0;
11071106

11081107
static int send_keepalive_int(cdb2_hndl_tp *metadb)
11091108
{
1110-
int rc = 0, use_v2 = 0;
1109+
int rc = 0, use_v2 = gbl_physrep_keepalive_v2;
11111110
char cmd[600];
11121111
LOG_INFO info;
11131112

1114-
/* TODO: remove after v2 enabled & new-schema is everywhere */
1115-
if (gbl_physrep_keepalive_v2) {
1116-
rc = snprintf(
1117-
cmd, sizeof(cmd),
1118-
"select count(*) from comdb2_columns where tablename='comdb2_physreps' and columnname='firstfile'");
1119-
ATOMIC_ADD64(gbl_physrep_metadb_sql_count, 1);
1120-
rc = cdb2_run_statement(metadb, cmd);
1121-
if (rc != CDB2_OK) {
1122-
physrep_logmsg(LOGMSG_ERROR, "%s:%d Failed to execute cmd %s (rc: %d)\n", __func__, __LINE__, cmd, rc);
1123-
return rc;
1124-
}
1125-
if (cdb2_next_record(metadb) == CDB2_OK) {
1126-
int64_t val = *(int64_t *)cdb2_column_value(metadb, 0);
1127-
use_v2 = (val != 0) ? 1 : 0;
1128-
}
1129-
}
1130-
11311113
info = get_last_lsn(thedb->bdb_env);
11321114
if (use_v2) {
11331115
LOG_INFO first_info;
@@ -1573,14 +1555,15 @@ static void *physrep_worker(void *args)
15731555
physrep_logmsg(LOGMSG_USER, "%s:%d Reverse connection check: do-revcon=%d, is-revcon=%d\n",
15741556
__func__, __LINE__, do_revconn, is_revconn);
15751557
}
1558+
1559+
/* The call might have failed. That's okay, don't hammer metadb */
1560+
last_revconn_check = comdb2_time_epoch();
1561+
15761562
if (do_revconn == -1) {
1577-
logmsg(LOGMSG_ERROR, "%s:%d Failed to contact physrep metadb- keeping do_revconn the same: %d\n",
1563+
logmsg(LOGMSG_DEBUG, "%s:%d Failed to contact physrep metadb- keeping do_revconn the same: %d\n",
15781564
__func__, __LINE__, is_revconn);
15791565
} else {
15801566

1581-
/* Only update timestamp on successful check */
1582-
last_revconn_check = comdb2_time_epoch();
1583-
15841567
if ((do_revconn && !is_revconn) || (!do_revconn && is_revconn)) {
15851568
logmsg(LOGMSG_USER, "Revconn changed, do_revconn=%d, is_revconn=%d\n", do_revconn, is_revconn);
15861569
close_repl_connection(repl_db_cnct, repl_db, __func__, __LINE__);

db/reverse_conn.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ int gbl_revsql_allow_command_exec;
6868
int gbl_revsql_debug = 0;
6969
int gbl_revsql_cdb2_debug;
7070
// 'reverse-connection host' list refresh frequency
71-
int gbl_revsql_host_refresh_freq_sec = 5;
71+
int gbl_revsql_host_refresh_freq_sec = 60;
7272
// 'reverse-connection' worker's new connection attempt frequency
7373
int gbl_revsql_connect_freq_sec = 5;
7474

docs/pages/operating/physical_replication.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ CREATE TABLE comdb2_physrep_sources(dbname CSTRING(60),
171171
## Tunables
172172

173173
* blocking_physrep: The `SELECT .. FROM comdb2_transaction_logs` query executed by physical replicants blocks for the next log record. (Default: `false`)
174-
* physrep_check_minlog_freq_sec: Check the minimum log number to keep this often. (Default: `10`)
174+
* physrep_check_minlog_freq_sec: Check the minimum log number to keep this often. (Default: `60`)
175175
* physrep_debug: Print extended physrep trace. (Default: `off`)
176176
* physrep_exit_on_invalid_logstream: Exit physreps on invalid logstream. (Default: off)
177177
* physrep_fanout: Maximum number of physical replicants that a node can service (Default: `8`)
@@ -189,9 +189,9 @@ CREATE TABLE comdb2_physrep_sources(dbname CSTRING(60),
189189
* physrep_source_host: List of physical replication source cluster hosts.
190190
* revsql_allow_command_execution : Allow processing and execution of command * over the `reverse connection` that has come in as part of the request. This is mostly intended for testing. (Default: off)
191191
* revsql_cdb2_debug: Print extended reversql-sql cdb2 related trace. (Default: off)
192-
* revsql_connect_freq_sec: This node will attempt to `reverse connect` to the remote host at this frequency. (Default: 5secs)
192+
* revsql_connect_freq_sec: This node will attempt to `reverse connect` to the remote host at this frequency. (Default: 5 secs)
193193
* revsql_debug: Print extended reversql-sql trace. (Default: off)
194-
* revsql_host_refresh_freq_sec: The frequency at which the reverse connection host list will be refreshed. (Default: 5secs)
194+
* revsql_host_refresh_freq_sec: The frequency at which the reverse connection host list will be refreshed. (Default: 60 secs)
195195
* revsql_force_rte: Force rte-mode for all reverse connections. (Default: on)
196196
* connect_remote_rte: Force rte-mode for both fdb and revsql connections. (Default: off)
197197

lua/lib/physrep_get_reverse_hosts.lua

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@
55
local function main(dbname, hostname)
66
db:begin()
77

8-
-- Check whether 'comdb2_physrep_sources' table exists
9-
local rs, rc = db:exec("SELECT count(*)=1 AS cnt FROM comdb2_tables WHERE tablename = 'comdb2_physrep_sources'")
10-
local row = rs:fetch()
11-
if row.cnt == 0 then
12-
db:commit()
13-
return
14-
end
15-
168
local rs, rc = db:exec("SELECT dbname, host FROM comdb2_physrep_sources WHERE " ..
179
"source_dbname = '" .. dbname .. "' AND source_host = '" .. hostname .. "'")
1810
local row = rs:fetch()

lua/lib/physrep_get_reverse_hosts_v2.lua

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@
55
local function main(dbname, hostname)
66
db:begin()
77

8-
-- Check whether 'comdb2_physrep_sources' table exists
9-
local rs, rc = db:exec("SELECT count(*)=1 AS cnt FROM comdb2_tables WHERE tablename = 'comdb2_physrep_sources'")
10-
local row = rs:fetch()
11-
if row.cnt == 0 then
12-
db:commit()
13-
return
14-
end
15-
168
local rs, rc = db:exec("SELECT dbname, host FROM comdb2_physrep_sources WHERE " ..
179
"source_dbname = '" .. dbname .. "' AND source_host = '" .. hostname .. "'")
1810
local row = rs:fetch()

lua/lib/physrep_get_revhosts_v2.lua

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@
55
local function main(dbname, hostname, tier, cluster)
66
db:begin()
77

8-
-- Check whether 'comdb2_physrep_sources' table exists
9-
local rs, rc = db:exec("SELECT count(*)=1 AS cnt FROM comdb2_tables WHERE tablename = 'comdb2_physrep_sources'")
10-
local row = rs:fetch()
11-
if row.cnt == 0 then
12-
db:commit()
13-
return
14-
end
15-
168
local sql = ("SELECT dbname, host FROM comdb2_physrep_sources WHERE " ..
179
"source_dbname = '" .. dbname .. "' AND ( source_host = '" .. hostname ..
1810
"' OR source_host = '" .. tier ..

lua/lib/physrep_should_wait_for_con.lua

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,6 @@
1010
local function main(dbname, hostname)
1111
db:begin()
1212

13-
local rs, row = db:exec("SELECT count(*)=1 AS cnt FROM comdb2_tables WHERE tablename = 'comdb2_physrep_sources'")
14-
local row = rs:fetch()
15-
16-
if row.cnt == 0 then
17-
db:emit(row)
18-
db:commit()
19-
return
20-
end
21-
2213
local rs, row = db:exec("SELECT count(*) as cnt FROM comdb2_physrep_sources " ..
2314
" WHERE dbname = '" .. dbname .. "' AND " ..
2415
" host LIKE '" .. hostname .. "'")

lua/lib/physrep_shouldwait_v2.lua

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,6 @@
1010
local function main(dbname, hostname, tier, cluster)
1111
db:begin()
1212

13-
local rs, row = db:exec("SELECT count(*)=1 AS cnt FROM comdb2_tables WHERE tablename = 'comdb2_physrep_sources'")
14-
local row = rs:fetch()
15-
16-
if row.cnt == 0 then
17-
db:emit(row)
18-
db:commit()
19-
return
20-
end
21-
2213
local sql = ("SELECT count(*) as cnt from comdb2_physrep_sources " ..
2314
" WHERE dbname = '" .. dbname .. "' AND " ..
2415
" ( host LIKE '" .. hostname .. "' OR " ..

0 commit comments

Comments
 (0)