Skip to content

Commit bcbf13e

Browse files
committed
MB-60046: Ephemeral: Don't auto-delete committed and pending keys.
Prior to this change the auto-delete path was able to generate a delete of the committed item whilst there is a pending write. The delete would be rejected by the consumer and disconnect DCP (causing a cycle of disconnect). This change updates EphemeralVBucket::eligibleToPageOut to check the current hash-table bucket for a pending key. This approach is deliberately simple and the inefficient extra search of the hash-chain is tolerated to provide a rapid solution to this issue. Change-Id: If83ecb927026a4ef4ca2dff813a8729ec67da211 Reviewed-on: https://review.couchbase.org/c/kv_engine/+/202456 Well-Formed: Restriction Checker Reviewed-by: Paolo Cocchi <[email protected]> Tested-by: Jim Walker <[email protected]>
1 parent 4128ca7 commit bcbf13e

File tree

6 files changed

+161
-1
lines changed

6 files changed

+161
-1
lines changed

engines/ep/src/ephemeral_vb.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,15 @@ bool EphemeralVBucket::eligibleToPageOut(const HashTable::HashBucketLock& lh,
156156
// The system event documents must not be paged-out
157157
return false;
158158
}
159+
160+
// Cannot auto-delete this v if it has a pending v. Simplest is to
161+
// look again for a pending v in the hash-table.
162+
// @todo MB-60105: Tracks that scanning the hash-chain again is not the most
163+
// efficient fix.
164+
if (ht.unlocked_find(lh, v.getKey()).pendingSV) {
165+
return false;
166+
}
167+
159168
return true;
160169
}
161170

engines/ep/src/hash_table.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,12 @@ HashTable::FindInnerResult HashTable::findInner(const DocKey& key) {
329329
"non-active object");
330330
}
331331
HashBucketLock hbl = getLockedBucket(key);
332+
auto rv = unlocked_find(hbl, key);
333+
return {std::move(hbl), rv.committedSV, rv.pendingSV};
334+
}
335+
336+
HashTable::UnlockedFindResult HashTable::unlocked_find(
337+
const HashBucketLock& hbl, const DocKey& key) const {
332338
// Scan through all elements in the hash bucket chain looking for Committed
333339
// and Pending items with the same key.
334340
StoredValue* foundCmt = nullptr;
@@ -346,7 +352,7 @@ HashTable::FindInnerResult HashTable::findInner(const DocKey& key) {
346352
}
347353
}
348354

349-
return {std::move(hbl), foundCmt, foundPend};
355+
return {foundCmt, foundPend};
350356
}
351357

352358
HashTable::RandomKeyVisitor::RandomKeyVisitor(size_t size, int random)

engines/ep/src/hash_table.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,6 +1199,26 @@ class HashTable {
11991199
StoredValue& value,
12001200
CommittedState state);
12011201

1202+
struct UnlockedFindResult {
1203+
/// The Committed StoredValue with this key if in HashTable, else
1204+
/// nullptr.
1205+
StoredValue* committedSV{nullptr};
1206+
/// The Committed StoredValue with this key if in HashTable, else
1207+
/// nullptr.
1208+
StoredValue* pendingSV{nullptr};
1209+
};
1210+
1211+
/**
1212+
* Attempt to find the pending and committed versions of key in the given
1213+
* hbl.getBucketNum hash bucket.
1214+
*
1215+
* @param hbl The structure which holds the lock and the bucket number
1216+
* @param key The key to find
1217+
* @return pair of pointers, null when nothing found.
1218+
*/
1219+
UnlockedFindResult unlocked_find(const HashBucketLock& hbl,
1220+
const DocKey& key) const;
1221+
12021222
/**
12031223
* Releases an item(StoredValue) in the hash table, but does not delete it.
12041224
* It will pass out the removed item to the caller who can decide whether to

engines/ep/tests/module_tests/ephemeral_vb_test.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,29 @@ TEST_F(EphemeralVBucketTest, SoftDeleteDuringBackfill) {
472472
mockEpheVB->public_getNumListItems());
473473
}
474474

475+
// MB-60046: The ephemeral auto-delete was able to delete a committed item which
476+
// has a pending write (disrupting DCP). Test checks eligibleToPageOut now
477+
// detects this case.
478+
TEST_F(EphemeralVBucketTest, CannotPageOutCommittedAndPendingKey) {
479+
auto meta = nlohmann::json{
480+
{"topology", nlohmann::json::array({{"active", "replica"}})}};
481+
vbucket->setState(vbucket_state_active, &meta);
482+
// Commit and item and then also make it pending.
483+
auto key = makeStoredDocKey("key");
484+
auto item = makeCommittedItem(key, "value");
485+
ASSERT_EQ(MutationStatus::WasClean,
486+
public_processSet(*item, item->getCas()));
487+
item = makePendingItem(key, "value");
488+
VBQueueItemCtx ctx;
489+
ctx.durability =
490+
DurabilityItemCtx{item->getDurabilityReqs(), nullptr /*cookie*/};
491+
ASSERT_EQ(MutationStatus::WasClean,
492+
public_processSet(*item, item->getCas(), ctx));
493+
auto result = vbucket->ht.findForRead(key);
494+
ASSERT_TRUE(result.storedValue);
495+
ASSERT_FALSE(vbucket->eligibleToPageOut(result.lock, *result.storedValue));
496+
}
497+
475498
// EphemeralVB Tombstone Purging //////////////////////////////////////////////
476499

477500
class EphTombstoneTest : public EphemeralVBucketTest {

engines/ep/tests/module_tests/evp_store_single_threaded_test.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,13 @@ class STParameterizedBucketTest
718718
"item_eviction_policy=full_eviction"s);
719719
}
720720

721+
static auto ephAutoDeleteConfigValues() {
722+
using namespace std::string_literals;
723+
return ::testing::Values(
724+
"bucket_type=ephemeral:"
725+
"ephemeral_full_policy=auto_delete"s);
726+
}
727+
721728
bool persistent() const {
722729
return GetParam().find("persistent") != std::string::npos;
723730
}

engines/ep/tests/module_tests/item_pager_test.cc

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,94 @@ TEST_P(STEphemeralItemPagerTest, ReplicaNotPaged) {
15731573
}
15741574
}
15751575

1576+
/**
1577+
* Test fixture for Ephemeral item pager tests with
1578+
* ephemeral_full_policy=auto_delete.
1579+
*/
1580+
class STEphemeralAutoDeleteItemPagerTest : public STItemPagerTest {};
1581+
1582+
// Test covers MB-60046. A pending and committed key must not find itself auto
1583+
// deleted, else the delete of the commit will be rejected by the DCP replica
1584+
// and cause disconnects.
1585+
TEST_P(STEphemeralAutoDeleteItemPagerTest, MB_60046) {
1586+
setVBucketState(
1587+
vbid,
1588+
vbucket_state_active,
1589+
{{"topology", nlohmann::json::array({{"active", "replica"}})}});
1590+
1591+
auto setActive = [this](int count) {
1592+
auto item =
1593+
make_item(vbid,
1594+
makeStoredDocKey("active_" + std::to_string(count)),
1595+
std::string(512, 'V'));
1596+
item.setFreqCounterValue(0);
1597+
storeItem(item);
1598+
};
1599+
1600+
int count = 0;
1601+
do {
1602+
setActive(++count);
1603+
} while (store->getPageableMemCurrent() <
1604+
store->getPageableMemHighWatermark());
1605+
1606+
auto key = makeStoredDocKey("active_" + std::to_string(count));
1607+
// Now make the last key written in the loop also pending.
1608+
ASSERT_EQ(cb::engine_errc::would_block,
1609+
storeItem(*makePendingItem(key, "pending-value")));
1610+
1611+
std::vector<queued_item> items;
1612+
auto& vb = *store->getVBucket(vbid);
1613+
auto cursor = vb.checkpointManager->registerCursorBySeqno(
1614+
"test", 1, CheckpointCursor::Droppable::No);
1615+
ASSERT_FALSE(cursor.tryBackfill) << *vb.checkpointManager;
1616+
vb.checkpointManager->getNextItemsForCursor(cursor.cursor.lock().get(),
1617+
items);
1618+
EXPECT_NE(0, items.size()) << *vb.checkpointManager;
1619+
bool pendingFound{false};
1620+
bool committedFound{false};
1621+
for (const auto& item : items) {
1622+
if (item->getKey() == key) {
1623+
if (item->isPending()) {
1624+
ASSERT_FALSE(pendingFound);
1625+
pendingFound = true;
1626+
} else {
1627+
ASSERT_FALSE(committedFound);
1628+
committedFound = true;
1629+
}
1630+
}
1631+
}
1632+
1633+
// Both pending and committed must be seen before we trigger auto-delete
1634+
ASSERT_TRUE(pendingFound);
1635+
ASSERT_TRUE(committedFound);
1636+
items.clear();
1637+
1638+
// Now trigger the auto-delete paging
1639+
runHighMemoryPager();
1640+
1641+
// Iterate through the checkpoint and check all deletes.
1642+
vb.checkpointManager->getNextItemsForCursor(cursor.cursor.lock().get(),
1643+
items);
1644+
// Should be some new mutations (deletes)
1645+
EXPECT_NE(0, items.size()) << *vb.checkpointManager;
1646+
1647+
int deleteCount{0};
1648+
for (const auto& item : items) {
1649+
if (item->getKey() == key) {
1650+
// The key should not be found in the new batch of items. The
1651+
// original commit+pending were in the first batch and not expected
1652+
// to see again. Prior to the fix, a delete(key) was seen here.
1653+
FAIL() << *item;
1654+
}
1655+
if (item->isDeleted()) {
1656+
++deleteCount;
1657+
}
1658+
}
1659+
1660+
// We certainly should see deletes.
1661+
ASSERT_NE(0, deleteCount);
1662+
}
1663+
15761664
/**
15771665
* Test fixture for expiry pager tests - enables the Expiry Pager (in addition
15781666
* to what the parent class does).
@@ -2685,6 +2773,11 @@ INSTANTIATE_TEST_SUITE_P(PersistentFullValue,
26852773
STParameterizedBucketTest::persistentConfigValues(),
26862774
STParameterizedBucketTest::PrintToStringParamName);
26872775

2776+
INSTANTIATE_TEST_SUITE_P(EphemeralAutoDelete,
2777+
STEphemeralAutoDeleteItemPagerTest,
2778+
STParameterizedBucketTest::ephAutoDeleteConfigValues(),
2779+
STParameterizedBucketTest::PrintToStringParamName);
2780+
26882781
#else
26892782
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(STItemPagerTest);
26902783
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MultiPagingVisitorTest);
@@ -2694,4 +2787,6 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(STValueEvictionExpiryPagerTest);
26942787
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MB_32669);
26952788
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(STEphemeralItemPagerTest);
26962789
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MB_36087);
2790+
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(
2791+
STEphemeralAutoDeleteItemPagerTest);
26972792
#endif

0 commit comments

Comments
 (0)