Skip to content

Commit 4ae5744

Browse files
committed
MB-35332 [SR]: Set HPS::it correctly on ActiveDM -> PassiveDM conversion
An ActiveDurabilityMonitor can be converted to a PassiveDurabilityMonitor during cluster topology changes - for example during a rebalance where a node goes from active to replica. During this conversion the highPreparedSeqno.iterator is not reset to the correct position - it is left with the default value of trackedWrites.end(). This is ok if trackedWrites is empty (as end() is the correct value), but if trackedWrites is non-empty then it is incorrct, and the next seqno to be considered for updating HPS will be trackedWrites.begin(): ActiveDM: | 1 | 2 | ^ ^ | | | next hps.iterator PassiveDM: | 1 | 2 | ^ ^ | hps.iterator | next As a consequence, if the PassiveDM subsequently recieves another prepare and attempts to update the Monotonic highPreparedSeqno.lastWriteSeqno, it can essentially attempt to set it to a value less than it's previous - 1 in the example above. This manifests in the following error in the log: ERROR 71: exception occurred in runloop during packet execution. Cookie info: [...] - closing connection ([ 127.0.0.1:38045 - 127.0.0.1:11209 (<ud>@ns_server</ud>) ]): Monotonic<l> (unlabelled) invariant failed: new value (173) breaks invariant on current value (174) Note this is similar to MB-35189 - where the ActiveDM didn't have the node positions updated correctly on setReplicationTopology. Fix is to ensure that the highPreparedSeqno (and highComittedSeqno) iterators are correctly positioned when creating a PassiveDM from existing trackedWrites. Change-Id: Ic4d94316d49f029012d4906e76f10714e48c9f23 Reviewed-on: http://review.couchbase.org/112697 Tested-by: Build Bot <[email protected]> Reviewed-by: Dave Rigby <[email protected]>
1 parent 5360816 commit 4ae5744

File tree

3 files changed

+220
-10
lines changed

3 files changed

+220
-10
lines changed

engines/ep/src/durability/passive_durability_monitor.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,26 @@ PassiveDurabilityMonitor::PassiveDurabilityMonitor(
6363
nullptr,
6464
nullptr,
6565
SyncWrite::InfiniteTimeout{});
66+
// Advance the highPreparedSeqno iterator to point to the highest
67+
// SyncWrite which has been prepared.
68+
auto lastIt = std::prev(s->trackedWrites.end());
69+
if (lastIt->getBySeqno() <= highPreparedSeqno) {
70+
s->highPreparedSeqno.it = lastIt;
71+
}
72+
73+
// Advance the highCompletedSeqno iterator to point to the highest
74+
// SyncWrite which has been completed.
75+
//
76+
// Note: One might assume that this would always point to
77+
// trackedWrites.begin(), given that we are a newly minted PassiveDM and
78+
// hence would only be tracking incomplete SyncWrites. However, we
79+
// _could_ have been converted from an ActiveDM with null topology which
80+
// itself was converted from a previous PassiveDM which _did_ have
81+
// completed SyncWrites still in trackedWrites (because they haven't
82+
// been persisted locally yet).
83+
if (lastIt->getBySeqno() <= highCompletedSeqno) {
84+
s->highCompletedSeqno.it = lastIt;
85+
}
6686
}
6787
}
6888

engines/ep/tests/module_tests/evp_store_durability_test.cc

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1863,8 +1863,12 @@ TEST_P(DurabilityBucketTest, ActiveToReplicaAndCommit) {
18631863
auto pending = makePendingItem(key, "pending");
18641864

18651865
ASSERT_EQ(ENGINE_EWOULDBLOCK, store->set(*pending, cookie));
1866+
ASSERT_EQ(
1867+
ENGINE_EWOULDBLOCK,
1868+
store->set(*makePendingItem(makeStoredDocKey("crikey2"), "value2"),
1869+
cookie));
18661870

1867-
flushVBucketToDiskIfPersistent(vbid);
1871+
flushVBucketToDiskIfPersistent(vbid, 2);
18681872

18691873
// Now switch over to being a replica, via dead for realism
18701874
setVBucketStateAndRunPersistTask(vbid, vbucket_state_dead, {});
@@ -1875,15 +1879,19 @@ TEST_P(DurabilityBucketTest, ActiveToReplicaAndCommit) {
18751879
// Now drive the VB as if a passive stream is receiving data.
18761880
vb.checkpointManager->createSnapshot(1, 3, CheckpointType::Memory);
18771881

1878-
// seqno:2 A new prepare
1879-
auto key1 = makeStoredDocKey("crikey2");
1880-
auto pending1 = makePendingItem(
1882+
// seqno:3 A new prepare
1883+
auto key1 = makeStoredDocKey("crikey3");
1884+
auto pending3 = makePendingItem(
18811885
key1, "pending", {cb::durability::Level::Majority, {5000}});
1882-
pending1->setCas(1);
1883-
pending1->setBySeqno(2);
1884-
EXPECT_EQ(ENGINE_SUCCESS, store->prepare(*pending1, cookie));
1885-
// seqno:3 the prepare at seqno:1 is committed
1886-
ASSERT_EQ(ENGINE_SUCCESS, vb.commit(key, 1, 3, vb.lockCollections(key)));
1886+
pending3->setCas(1);
1887+
pending3->setBySeqno(3);
1888+
EXPECT_EQ(ENGINE_SUCCESS, store->prepare(*pending3, cookie));
1889+
// Trigger update of HPS (normally called by PassiveStream).
1890+
vb.notifyPassiveDMOfSnapEndReceived(3);
1891+
1892+
// seqno:4 the prepare at seqno:1 is committed
1893+
vb.checkpointManager->createSnapshot(4, 4, CheckpointType::Memory);
1894+
ASSERT_EQ(ENGINE_SUCCESS, vb.commit(key, 1, 4, vb.lockCollections(key)));
18871895
}
18881896

18891897
// Test cases which run against all persistent storage backends.

engines/ep/tests/module_tests/vbucket_durability_test.cc

Lines changed: 183 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ void VBucketDurabilityTest::storeSyncWrites(
100100

101101
void VBucketDurabilityTest::simulateLocalAck(uint64_t seqno) {
102102
vbucket->setPersistenceSeqno(seqno);
103-
vbucket->getActiveDM().notifyLocalPersistence();
103+
vbucket->notifyPersistenceToDurabilityMonitor();
104104
}
105105

106106
void VBucketDurabilityTest::testAddPrepare(
@@ -1896,6 +1896,188 @@ TEST_P(VBucketDurabilityTest,
18961896
testConvertPassiveDMToActiveDMNoPrepares(vbucket_state_replica);
18971897
}
18981898

1899+
// Test that conversion from ActiveDM to PassiveDM with in-flight trackedWrites
1900+
// calculated HPS correctly.
1901+
// MB-35332: During rebalance a node may go from being active to replica for
1902+
// a vBucket, resulting in ActiveDM -> PassiveDM conversion. If the ActiveDM
1903+
// had prepares which were already locally ack'd, after PassiveDM conversion
1904+
// the highPreparedSeqno iterator was in the wrong position; resulting in
1905+
// the highPreparedSeqno.lastWriteSeqno going backwards (breaking Monotonic
1906+
// invariant).
1907+
TEST_P(VBucketDurabilityTest, ConvertActiveDMToPassiveDMPreparedSyncWrites) {
1908+
// Setup: queue two Prepares into the ADM (needs >1 to expose the bug
1909+
// where HPS.iterator has incorrect position, resulting in attempting to
1910+
// set HPS.lastWriteSeqno to a lower value then it was (2 -> 1).
1911+
auto& adm = VBucketTestIntrospector::public_getActiveDM(*vbucket);
1912+
ASSERT_EQ(0, adm.getNumTracked());
1913+
const std::vector<SyncWriteSpec> seqnos{1, 2};
1914+
testAddPrepare(seqnos);
1915+
// checkForCommit will be called after every normal vBucket op and will
1916+
// set the HPS for us
1917+
adm.checkForCommit();
1918+
1919+
// Test: Convert to PassiveDM (via dead as ns_server can do).
1920+
vbucket->setState(vbucket_state_dead);
1921+
vbucket->setState(vbucket_state_replica);
1922+
1923+
// Check: seqnos on newly-created PassiveDM.
1924+
auto& pdm = VBucketTestIntrospector::public_getPassiveDM(*vbucket);
1925+
EXPECT_EQ(2, pdm.getHighPreparedSeqno());
1926+
EXPECT_EQ(0, pdm.getHighCompletedSeqno());
1927+
1928+
// Test(2): Simulate a new snapshot being received (i.e. from a DcpConsumer)
1929+
// and the snapshot end being reached which triggers
1930+
// updateHighPreparedSeqno()
1931+
// - which shouldn't move HPS backwards.
1932+
auto key = makeStoredDocKey("key3");
1933+
auto pending = makePendingItem(key, "replica_value"s);
1934+
VBQueueItemCtx ctx;
1935+
using namespace cb::durability;
1936+
ctx.durability = DurabilityItemCtx{
1937+
Requirements{Level::Majority, Timeout::Infinity()}, cookie};
1938+
ASSERT_EQ(MutationStatus::WasClean,
1939+
public_processSet(*pending, 0 /*cas*/, ctx));
1940+
1941+
pdm.notifySnapshotEndReceived(3);
1942+
EXPECT_EQ(3, pdm.getHighPreparedSeqno());
1943+
EXPECT_EQ(0, pdm.getHighCompletedSeqno());
1944+
1945+
// Test(3): Now commit the prepared items.
1946+
auto resolutionCommit = PassiveDurabilityMonitor::Resolution::Commit;
1947+
pdm.completeSyncWrite(makeStoredDocKey("key1"), resolutionCommit, 1);
1948+
EXPECT_EQ(1, pdm.getHighCompletedSeqno());
1949+
pdm.completeSyncWrite(makeStoredDocKey("key2"), resolutionCommit, 2);
1950+
EXPECT_EQ(2, pdm.getHighCompletedSeqno());
1951+
pdm.completeSyncWrite(makeStoredDocKey("key3"), resolutionCommit, 3);
1952+
EXPECT_EQ(3, pdm.getHighCompletedSeqno());
1953+
}
1954+
1955+
// Test that conversion from ActiveDM to PassiveDM with in-flight trackedWrites
1956+
// including at least one completed is handled correctly.
1957+
TEST_P(VBucketDurabilityTest, ConvertActiveDMToPassiveDMCompletedSyncWrites) {
1958+
// Setup: queue three Prepares into the ADM, then complete the seqno:1.
1959+
// (We want to end up with at least two SyncWrites in PDM - it only
1960+
// contains uncompleted SyncWrites (i.e. seqno 2 & 3).
1961+
auto& adm = VBucketTestIntrospector::public_getActiveDM(*vbucket);
1962+
ASSERT_EQ(0, adm.getNumTracked());
1963+
const std::vector<SyncWriteSpec> seqnos{1, 2, 3};
1964+
testAddPrepare(seqnos);
1965+
ASSERT_EQ(seqnos.size(), adm.getNumTracked());
1966+
// checkForCommit will be called after every normal vBucket op and will
1967+
// set the HPS for us
1968+
adm.checkForCommit();
1969+
1970+
// Setup: Commit the first Prepare (so we can advance HCS to non-zero and
1971+
// test it below).
1972+
adm.seqnoAckReceived(replica1, 1);
1973+
ASSERT_EQ(2, adm.getNumTracked());
1974+
ASSERT_EQ(3, adm.getHighPreparedSeqno());
1975+
ASSERT_EQ(1, adm.getHighCompletedSeqno());
1976+
1977+
// Test: Convert to PassiveDM (via dead as ns_server can do).
1978+
vbucket->setState(vbucket_state_dead);
1979+
vbucket->setState(vbucket_state_replica);
1980+
1981+
// Check: state on newly created PassiveDM.
1982+
auto& pdm = VBucketTestIntrospector::public_getPassiveDM(*vbucket);
1983+
EXPECT_EQ(2, pdm.getNumTracked());
1984+
EXPECT_EQ(3, pdm.getHighPreparedSeqno());
1985+
EXPECT_EQ(1, pdm.getHighCompletedSeqno());
1986+
1987+
// Test(2): Commit the remaining outstanding prepares.
1988+
auto resolutionCommit = PassiveDurabilityMonitor::Resolution::Commit;
1989+
pdm.completeSyncWrite(makeStoredDocKey("key2"), resolutionCommit, 2);
1990+
EXPECT_EQ(2, pdm.getHighCompletedSeqno());
1991+
pdm.completeSyncWrite(makeStoredDocKey("key3"), resolutionCommit, 3);
1992+
EXPECT_EQ(3, pdm.getHighCompletedSeqno());
1993+
EXPECT_EQ(0, pdm.getNumTracked());
1994+
}
1995+
1996+
/**
1997+
* Check that converting from Replica to Active back to Replica correctly
1998+
* preserves completed SyncWrites in trackedWrites which cannot yet be removed
1999+
* (if persistMajority and not locally persisted).
2000+
*
2001+
* Scenario:
2002+
* Replica (PassiveDM):
2003+
* 1:prepare(persistToMajority)
2004+
* 2:prepare(majority)
2005+
* 3:commit(1) -> cannot locally remove 1 as not persisted yet.
2006+
* 4:commit(2) -> cannot locally remove 2 as seqno:1 not persisted yet
2007+
* (in-order commit).
2008+
* -> trackedWrites=[1,2]
2009+
* HPS=0
2010+
* HCS=2 (want HCS higher than the first element in the trackedWrites)
2011+
*
2012+
* Convert to ADM (null topology):
2013+
* -> trackedWrites=[1,2]
2014+
* HPS=0
2015+
* HCS=2
2016+
* (i.e. same as previous PassiveDM)
2017+
*
2018+
* Convert to PDM:
2019+
* State should be same as it was:
2020+
* -> trackedWrites=[1,2]
2021+
* HPS=0
2022+
* HCS=2
2023+
* notifyLocalPersistence -> can remove 1 and 2:
2024+
* -> trackedWrites=[]
2025+
* HPS=2
2026+
* HCS=2
2027+
*/
2028+
TEST_P(EPVBucketDurabilityTest, ReplicaToActiveToReplica) {
2029+
// Setup: PassiveDM with
2030+
// 1:PRE(persistMajority), 2:PRE(majority), 3:COMMIT(1), 4:COMMIT(2)
2031+
vbucket->setState(vbucket_state_replica);
2032+
using namespace cb::durability;
2033+
std::vector<SyncWriteSpec> seqnos{{1, false, Level::PersistToMajority}, 2};
2034+
testAddPrepare(seqnos);
2035+
auto& pdm = VBucketTestIntrospector::public_getPassiveDM(*vbucket);
2036+
pdm.completeSyncWrite(makeStoredDocKey("key1"),
2037+
PassiveDurabilityMonitor::Resolution::Commit,
2038+
1);
2039+
pdm.completeSyncWrite(makeStoredDocKey("key2"),
2040+
PassiveDurabilityMonitor::Resolution::Commit,
2041+
2);
2042+
pdm.notifySnapshotEndReceived(4);
2043+
2044+
// Sanity: Check PassiveDM state as expected - HPS is still zero as haven't
2045+
// locally prepared the persistMajority, but globally that's been
2046+
// committed (HCS=1).
2047+
ASSERT_EQ(2, pdm.getNumTracked());
2048+
ASSERT_EQ(0, pdm.getHighPreparedSeqno());
2049+
ASSERT_EQ(2, pdm.getHighCompletedSeqno());
2050+
2051+
// Setup(2): Convert to ActiveDM (null topology).
2052+
vbucket->setState(vbucket_state_active);
2053+
auto& adm = VBucketTestIntrospector::public_getActiveDM(*vbucket);
2054+
EXPECT_EQ(2, adm.getNumTracked());
2055+
EXPECT_EQ(0, adm.getHighPreparedSeqno());
2056+
EXPECT_EQ(2, adm.getHighCompletedSeqno());
2057+
2058+
// Test: Convert back to PassiveDM.
2059+
vbucket->setState(vbucket_state_replica);
2060+
{
2061+
auto& pdm = VBucketTestIntrospector::public_getPassiveDM(*vbucket);
2062+
EXPECT_EQ(2, pdm.getNumTracked());
2063+
EXPECT_EQ(0, pdm.getHighPreparedSeqno());
2064+
EXPECT_EQ(2, pdm.getHighCompletedSeqno());
2065+
2066+
// Test(2): Check that notification of local persistence will remove
2067+
// the completed items and advance HPS.
2068+
2069+
// @todo MB-35366: This notifySnapshotEndReceived *shouldn't* be
2070+
// necessary; calling notifyPersistenceToDurabilityMonitor (via
2071+
// simulateLocalAck) should be sufficient to prepare.
2072+
pdm.notifySnapshotEndReceived(4);
2073+
simulateLocalAck(4);
2074+
2075+
EXPECT_EQ(0, pdm.getNumTracked());
2076+
EXPECT_EQ(2, pdm.getHighPreparedSeqno());
2077+
EXPECT_EQ(2, pdm.getHighCompletedSeqno());
2078+
}
2079+
}
2080+
18992081
// Test that a double set_vb_state with identical state & topology is handled
19002082
// correctly.
19012083
// MB-35189: ns_server can send such set_vb_state messages, and in the

0 commit comments

Comments
 (0)