Skip to content

Commit 9c7ec27

Browse files
committed
When mostly idle, periodically ask for missed records
Signed-off-by: Michael Ponomarenko <mponomarenko@bloomberg.net>
1 parent 96912b8 commit 9c7ec27

File tree

5 files changed

+71
-4
lines changed

5 files changed

+71
-4
lines changed

bdb/rep.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5564,8 +5564,9 @@ void *watcher_thread(void *arg)
55645564
int nrecs;
55655565
// if we're not seeing records or seeing only very old records, poke replication
55665566
// to request records in the range we expect
5567-
if (gbl_nudge_replication_when_idle && bdb_state->dbenv->get_rep_lsns(bdb_state->dbenv, &next_lsn, &gap_lsn, &nrecs) == 0) {
5568-
if (nrecs == 0 && !IS_ZERO_LSN(gap_lsn)) {
5567+
if (gbl_nudge_replication_when_idle > 0 &&
5568+
bdb_state->dbenv->get_rep_lsns(bdb_state->dbenv, &next_lsn, &gap_lsn, &nrecs) == 0) {
5569+
if (nrecs < gbl_nudge_replication_when_idle && !IS_ZERO_LSN(gap_lsn)) {
55695570
DB_LSN tmp_lsn = {0};
55705571
DBT max_lsn_dbt = {0};
55715572
LOGCOPY_TOLSN(&tmp_lsn, &gap_lsn);

db/db_tunables.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ int64_t gbl_test_tunable_int64_limit = INT64_MAX;
599599
int64_t gbl_test_tunable_int64_signed_limit = INT64_MAX;
600600

601601
int gbl_always_request_log_req = 0;
602-
int gbl_nudge_replication_when_idle = 0;
602+
int gbl_nudge_replication_when_idle = 100;
603603

604604
int parse_int64(const char *value, int64_t *num);
605605

db/db_tunables.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2561,5 +2561,7 @@ REGISTER_TUNABLE("iam_dbname",
25612561
NULL, NULL, NULL);
25622562
REGISTER_TUNABLE("queue_nonodh_scan_limit", "For comdb2_queues, stop queue scan at this depth (Default: 10000)", TUNABLE_INTEGER, &gbl_nonodh_queue_scan_limit, 0, NULL, NULL, NULL, NULL);
25632563
REGISTER_TUNABLE("always_request_log_req", "Always request the next log record on replicant if there is a gap (default: off)", TUNABLE_BOOLEAN, &gbl_always_request_log_req, 0, NULL, NULL, NULL, NULL);
2564-
REGISTER_TUNABLE("nudge_replication_when_idle", "If we haven't seen any replication events in a while, request some (default: off)", TUNABLE_BOOLEAN, &gbl_nudge_replication_when_idle, 0, NULL, NULL, NULL, NULL);
2564+
REGISTER_TUNABLE("nudge_replication_when_idle",
2565+
"If we haven't seen any replication events in a while, request some (default: 100)", TUNABLE_INTEGER,
2566+
&gbl_nudge_replication_when_idle, 0, NULL, NULL, NULL, NULL);
25652567
#endif /* _DB_TUNABLES_H */

tests/catchup_idle.test/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ifeq ($(TESTSROOTDIR),)
2+
include ../testcase.mk
3+
else
4+
include $(TESTSROOTDIR)/testcase.mk
5+
endif
6+
ifeq ($(TEST_TIMEOUT),)
7+
export TEST_TIMEOUT=60m
8+
endif

tests/catchup_idle.test/runit

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env bash
2+
bash -n "$0" | exit 1
3+
4+
dbnm=$1
5+
6+
if [ "x$dbnm" == "x" ] ; then
7+
failexit "need a DB name"
8+
fi
9+
10+
source ${TESTSROOTDIR}/tools/runit_common.sh
11+
source ${TESTSROOTDIR}/tools/waitmach.sh
12+
13+
function wait_for_coherent_state() {
14+
local host=$1
15+
local desired_state=$2
16+
local master=$3
17+
local state=""
18+
while :; do
19+
state=$(cdb2sql --tabs --host ${master} ${CDB2_OPTIONS} ${DBNAME} default "select coherent_state from comdb2_cluster where host='$host'")
20+
if [[ "$state" == "$desired_state" ]]; then
21+
break
22+
fi
23+
echo "Waiting for $host to become $desired_state (currently $state)"
24+
sleep 1
25+
done
26+
}
27+
28+
29+
30+
$CDB2SQL_EXE ${CDB2_OPTIONS} ${DBNAME} default "create table if not exists t(a int, b blob)"
31+
$CDB2SQL_EXE ${CDB2_OPTIONS} ${DBNAME} default "truncate t"
32+
33+
echo "Populating"
34+
for i in $(seq 1 3000); do
35+
$CDB2SQL_EXE ${CDB2_OPTIONS} ${DBNAME} default "insert into t values($i, randomblob(700000))"
36+
done
37+
38+
host=$($CDB2SQL_EXE --tabs ${CDB2_OPTIONS} ${DBNAME} default "select host from comdb2_cluster where is_master='N' and coherent_state='coherent' limit 1")
39+
delhost=$($CDB2SQL_EXE --tabs ${CDB2_OPTIONS} ${DBNAME} default "select host from comdb2_cluster where is_master='N' and coherent_state='coherent' and host != '$host' limit 1")
40+
master=$($CDB2SQL_EXE --tabs ${CDB2_OPTIONS} ${DBNAME} default "select host from comdb2_cluster where is_master='Y'")
41+
42+
echo "Using node $host"
43+
ssh $host "${CDB2SQL_EXE} -admin ${CDB2_OPTIONS} ${DBNAME} @localhost 'put tunable rep_debug_delay 10'"
44+
$CDB2SQL_EXE --host $delhost ${CDB2_OPTIONS} ${DBNAME} default "delete from t limit 3000" &
45+
sleep 10
46+
ssh $host "${CDB2SQL_EXE} -admin ${CDB2_OPTIONS} ${DBNAME} @localhost 'put tunable rep_debug_delay 0'"
47+
48+
# we expect the node we artificially delayed to become incoherent first, then coherent
49+
wait_for_coherent_state $host "INCOHERENT" $master
50+
wait_for_coherent_state $host "coherent" $master
51+
52+
# wait for the node to actually become available (which may happen after it becomes coherent)
53+
# if we don't do this, then the test will fail with 'db unavailable at finish'.
54+
waitmach $host
55+
56+
echo "Success"

0 commit comments

Comments
 (0)