Skip to content

Commit 7b6cf23

Browse files
jameseh96daverigby
authored andcommitted
MB-43559: Ensure eviction doesn't stop at high watermark if no replicas
In order to keep active data in memory preferentially, PagingVisitor::visitBucket skipped evicting from active vbuckets if both: 1. active resident ratio < replica resident ratio 2. current memory usage < high watermark However, if there are _no_ replica vbuckets on the node store.getReplicaResidentRatio() defaults to 100%. As a result, once the active vbuckets became <100% resident, 1. is always true. Given this, the PagingVisitor would evict until `mem_used < mem_high_wat` (making 2. true), and would then skip any remaining vbuckets. This meant eviction would never reach the low watermark. Fixed by removing the replica residency check from PagingVisitor::visitBucket. This has negligible impact on eviction, as active data is _already_ kept in memory preferentially after changes for MB-40531. Change-Id: I08b3b3de9a9a35673f2e9596652e370484ab6267 Reviewed-on: http://review.couchbase.org/c/kv_engine/+/143661 Tested-by: Build Bot <[email protected]> Reviewed-by: Dave Rigby <[email protected]>
1 parent cde1939 commit 7b6cf23

File tree

3 files changed

+127
-7
lines changed

3 files changed

+127
-7
lines changed

engines/ep/src/paging_visitor.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,6 @@ void PagingVisitor::visitBucket(const VBucketPtr& vb) {
228228
// skip active vbuckets if active resident ratio is lower than replica
229229
auto current = static_cast<double>(stats.getEstimatedTotalMemoryUsed());
230230
auto lower = static_cast<double>(stats.mem_low_wat);
231-
auto high = static_cast<double>(stats.mem_high_wat);
232-
if (vb->getState() == vbucket_state_active && current < high &&
233-
store.getActiveResidentRatio() < store.getReplicaResidentRatio()) {
234-
return;
235-
}
236231

237232
if (current > lower) {
238233
if (vBucketFilter(vb->getId())) {

engines/ep/tests/module_tests/item_pager_test.cc

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
#include "../mock/mock_global_task.h"
2323
#include "../mock/mock_paging_visitor.h"
24+
#include "../mock/mock_stat_collector.h"
2425
#include "bgfetcher.h"
2526
#include "checkpoint_manager.h"
2627
#include "checkpoint_utils.h"
@@ -36,6 +37,7 @@
3637

3738
#include <folly/portability/GTest.h>
3839
#include <programs/engine_testapp/mock_server.h>
40+
#include <statistics/labelled_collector.h>
3941
#include <string_utilities.h>
4042
#include <xattr/blob.h>
4143
#include <xattr/utils.h>
@@ -198,7 +200,8 @@ class STBucketQuotaTest : public STParameterizedBucketTest {
198200
*/
199201
size_t populateVbsUntil(const std::vector<Vbid>& vbids,
200202
const std::function<bool()>& predicate,
201-
std::string_view keyPrefix = "key_") {
203+
std::string_view keyPrefix = "key_",
204+
size_t itemSize = 5000) {
202205
bool populate = true;
203206
int count = 0;
204207
while (populate) {
@@ -208,7 +211,8 @@ class STBucketQuotaTest : public STParameterizedBucketTest {
208211
{
209212
auto key = makeStoredDocKey(std::string(keyPrefix) +
210213
std::to_string(count++));
211-
auto item = make_item(vbid, key, std::string(5000, 'x'));
214+
auto item =
215+
make_item(vbid, key, std::string(itemSize, 'x'));
212216
// make the items have a range of MFU values
213217
// without this, no matter what percent of items should be
214218
// evicted, either all of them or none of them would be
@@ -1219,6 +1223,122 @@ TEST_P(STItemPagerTest, ItemPagerEvictionOrder) {
12191223
;
12201224
}
12211225

1226+
TEST_P(STItemPagerTest, MB43559_EvictionWithoutReplicasReachesLWM) {
1227+
// MB-43559: Test that eviction does not stop once memory usage drops
1228+
// below the high watermark if there are no replica vbuckets
1229+
1230+
if (!persistent()) {
1231+
// ephemeral buckets are never less than 100% resident and are not
1232+
// vulnerable to the bug (see MB)
1233+
return;
1234+
}
1235+
1236+
// Magma's memory usage makes calculations around recovering memory
1237+
// through eviction difficult. Increase the quota to allow eviction
1238+
// to actually reach the low watermark despite magma's memory usage
1239+
if (isMagma()) {
1240+
increaseQuota(2 * 1024 * 1024);
1241+
}
1242+
1243+
// issue involves eviction skipping vbuckets after mem_used < hwm,
1244+
// so multiple vbuckets are needed
1245+
std::vector<Vbid> vbids = {Vbid(0), Vbid(1), Vbid(2), Vbid(3)};
1246+
1247+
// set vbs active
1248+
for (const auto& vb : vbids) {
1249+
setVBucketStateAndRunPersistTask(vb, vbucket_state_active);
1250+
}
1251+
1252+
auto& stats = engine->getEpStats();
1253+
1254+
// check that baseline memory usage is definitely below the LWM
1255+
ASSERT_LT(stats.getPreciseTotalMemoryUsed(), stats.mem_low_wat.load());
1256+
1257+
auto aboveHWM = [&stats]() {
1258+
return stats.getPreciseTotalMemoryUsed() > stats.mem_high_wat.load();
1259+
};
1260+
1261+
auto itemCount = 0;
1262+
auto i = 0;
1263+
// items need to be persisted to be evicted, but flushing after each item
1264+
// would be slow. Load to the HWM, then flush, then repeat if we dropped
1265+
// below the HWM
1266+
while (!aboveHWM()) {
1267+
itemCount += populateVbsUntil(
1268+
vbids, aboveHWM, "keys_" + std::to_string(i++) + "_", 500);
1269+
for (const auto& vb : vbids) {
1270+
// flush and remove checkpoints as eviction will not touch
1271+
// dirty items
1272+
flushAndRemoveCheckpoints(vb);
1273+
}
1274+
}
1275+
1276+
EXPECT_GT(itemCount, 100);
1277+
1278+
// Note: prior to the fix for this MB, eviction relied upon cached
1279+
// residency ratios _which were only updated when gathering stats_.
1280+
// This is no longer the case post-fix, but to ensure the test _would_
1281+
// fail prior to the fix, this is done here.
1282+
auto refreshCachedResidentRatios = [&store=store, &engine=engine]() {
1283+
// call getAggregatedVBucketStats to updated cached resident ratios (as
1284+
// returned by `store->get*ResidentRatio()`)
1285+
testing::NiceMock<MockStatCollector> collector;
1286+
store->getAggregatedVBucketStats(collector.forBucket("foobar"));
1287+
};
1288+
refreshCachedResidentRatios();
1289+
1290+
// confirm we are above the high watermark, and can test the item pager
1291+
// behaviour
1292+
ASSERT_GT(stats.getEstimatedTotalMemoryUsed(), stats.mem_high_wat.load());
1293+
1294+
// there are no replica buckets
1295+
ASSERT_EQ(0, store->getNumOfVBucketsInState(vbucket_state_replica));
1296+
// the replica resident ratio defaults to 100 as there are no replica items
1297+
ASSERT_EQ(100, store->getReplicaResidentRatio());
1298+
1299+
// age is not relevant to this test, increase the mfu threshold under which
1300+
// age is ignored
1301+
auto& config = engine->getConfiguration();
1302+
config.setItemEvictionFreqCounterAgeThreshold(255);
1303+
1304+
// to make sure this isn't sensitive to small changes in memory usage,
1305+
// try to evict _everything_ (eviction ratio 1.0). If mem_used still hasn't
1306+
// gone below the lwm then something is definitely wrong.
1307+
auto available = std::make_shared<std::atomic<bool>>();
1308+
1309+
auto pv = std::make_unique<MockPagingVisitor>(
1310+
*store,
1311+
stats,
1312+
EvictionRatios{1.0 /* active&pending */, 1.0 /* replica */},
1313+
available,
1314+
ITEM_PAGER,
1315+
false,
1316+
VBucketFilter(vbids),
1317+
config.getItemEvictionAgePercentage(),
1318+
config.getItemEvictionFreqCounterAgeThreshold());
1319+
1320+
auto label = "Item pager";
1321+
auto taskid = TaskId::ItemPagerVisitor;
1322+
VBCBAdaptor task(store,
1323+
taskid,
1324+
std::move(pv),
1325+
label,
1326+
/*shutdown*/ false);
1327+
1328+
// call the run method repeatedly until it returns false, indicating the
1329+
// visitor is definitely done, rather than paused
1330+
while (task.run()) {
1331+
// need to refresh the resident ratio so it reflects active RR < 100
1332+
// after some items have been evicted. This wouldn't need to happen
1333+
// _during_ eviction in a full repro, just _once_ ever after active RR
1334+
// drops.
1335+
refreshCachedResidentRatios();
1336+
}
1337+
1338+
// confirm that memory usage is now below the low watermark
1339+
EXPECT_LT(stats.getPreciseTotalMemoryUsed(), stats.mem_low_wat.load());
1340+
}
1341+
12221342
/**
12231343
* Test fixture for Ephemeral-only item pager tests.
12241344
*/

engines/ep/tests/module_tests/kv_bucket_test.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ class KVBucketTest : virtual public ::testing::Test {
9595
*/
9696
void flush_vbucket_to_disk(Vbid vbid, size_t expected = 1);
9797

98+
/**
99+
* Check if the current bucket is a persistent bucket.
100+
*/
101+
bool persistent() const;
102+
98103
/**
99104
* Flush the given vBucket to disk if the bucket is peristent, otherwise
100105
* do nothing.

0 commit comments

Comments
 (0)