Skip to content

Commit 3999521

Browse files
committed
MB-40531: [BP] Prefer paging from replicas if possible
Backport of http://review.couchbase.org/c/kv_engine/+/134806 Prior to this change, the pager would try to page out X% of items in replica vbuckets, but if this did not bring memory usage below the low water mark, it would flip phase to ACTIVE_AND_PENDING_ONLY - meaning the next run of the pager would page immediately try to page from active and pending vbuckets. Now, active/pending vbuckets will only be considered if paging _every_ possible item from replicas would not free enough memory to drop below the low water mark. Change-Id: I46e578bf163fa2e12834173595c8aa3f52f1593d Reviewed-on: http://review.couchbase.org/c/kv_engine/+/133197 Tested-by: Build Bot <[email protected]> Well-Formed: Build Bot <[email protected]> Reviewed-by: Jim Walker <[email protected]>
1 parent 607643c commit 3999521

20 files changed

+552
-151
lines changed

engines/ep/src/ep_bucket.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ class EPBucket : public KVBucket {
216216

217217
void warmupCompleted();
218218

219+
bool canEvictFromReplicas() override {
220+
return true;
221+
}
222+
219223
protected:
220224
// During the warmup phase we might want to enable external traffic
221225
// at a given point in time.. The LoadStorageKvPairCallback will be

engines/ep/src/ep_vb.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,14 @@ bool EPVBucket::eligibleToPageOut(const HashTable::HashBucketLock& lh,
490490
return v.eligibleForEviction(eviction);
491491
}
492492

493+
size_t EPVBucket::getPageableMemUsage() {
494+
if (eviction == EvictionPolicy::Full) {
495+
return ht.getItemMemory();
496+
} else {
497+
return ht.getItemMemory() - ht.getMetadataMemory();
498+
}
499+
}
500+
493501
size_t EPVBucket::queueBGFetchItem(const DocKey& key,
494502
std::unique_ptr<VBucketBGFetchItem> fetch,
495503
BgFetcher* bgFetcher) {

engines/ep/src/ep_vb.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ class EPVBucket : public VBucket {
108108
bool eligibleToPageOut(const HashTable::HashBucketLock& lh,
109109
const StoredValue& v) const override;
110110

111+
size_t getPageableMemUsage() override;
112+
111113
bool areDeletedItemsAlwaysResident() const override;
112114

113115
void addStats(VBucketStatsDetailLevel detail,

engines/ep/src/ephemeral_bucket.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ class EphemeralBucket : public KVBucket {
136136
*/
137137
static void reconfigureForEphemeral(Configuration& config);
138138

139+
bool canEvictFromReplicas() override {
140+
return false;
141+
}
142+
139143
protected:
140144
std::unique_ptr<VBucketCountVisitor> makeVBCountVisitor(
141145
vbucket_state_t state) override;

engines/ep/src/ephemeral_vb.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,17 @@ bool EphemeralVBucket::eligibleToPageOut(const HashTable::HashBucketLock& lh,
153153
return true;
154154
}
155155

156+
size_t EphemeralVBucket::getPageableMemUsage() {
157+
if (getState() == vbucket_state_replica) {
158+
// Ephemeral buckets are not backed by disk - nothing can be evicted
159+
// from a replica as deleting from replicas would cause inconsistency
160+
// with the active. When the active vb evicts items deletions will be
161+
// streamed to the replica.
162+
return 0;
163+
}
164+
return ht.getItemMemory();
165+
}
166+
156167
bool EphemeralVBucket::areDeletedItemsAlwaysResident() const {
157168
// Ephemeral buckets do keep all deleted items resident in memory.
158169
// (We have nowhere else to store them, given there is no disk).

engines/ep/src/ephemeral_vb.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ class EphemeralVBucket : public VBucket {
110110
bool eligibleToPageOut(const HashTable::HashBucketLock& lh,
111111
const StoredValue& v) const override;
112112

113+
size_t getPageableMemUsage() override;
114+
113115
bool areDeletedItemsAlwaysResident() const override;
114116

115117
void addStats(VBucketStatsDetailLevel detail,

engines/ep/src/hash_table.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,15 @@ class HashTable {
12831283
*/
12841284
nlohmann::json dumpStoredValuesAsJson() const;
12851285

1286+
/**
1287+
* Generates a new value that is either the same or higher than the input
1288+
* value. It is intended to be used to increment the frequency counter of a
1289+
* storedValue.
1290+
* @param value The value counter to try to generate an increment for.
1291+
* @returns The new value that is the same or higher than value.
1292+
*/
1293+
uint8_t generateFreqValue(uint8_t value);
1294+
12861295
private:
12871296
// The container for actually holding the StoredValues.
12881297
using table_type = std::vector<StoredValue::UniquePtr>;
@@ -1452,15 +1461,6 @@ class HashTable {
14521461

14531462
void clear_UNLOCKED(bool deactivate);
14541463

1455-
/**
1456-
* Generates a new value that is either the same or higher than the input
1457-
* value. It is intended to be used to increment the frequency counter of a
1458-
* storedValue.
1459-
* @param value The value counter to try to generate an increment for.
1460-
* @returns The new value that is the same or higher than value.
1461-
*/
1462-
uint8_t generateFreqValue(uint8_t value);
1463-
14641464
/**
14651465
* Update the frequency counter of a given stored value.
14661466
* @param v reference to a value in the hash table whose frequency counter

engines/ep/src/item_pager.cc

Lines changed: 128 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "kv_bucket_iface.h"
3131
#include "paging_visitor.h"
3232

33+
#include <folly/lang/Assume.h>
3334
#include <platform/platform_time.h>
3435

3536
#include <cmath>
@@ -43,23 +44,44 @@
4344
#include <phosphor/phosphor.h>
4445
#include <memory>
4546

47+
double EvictionRatios::getForState(vbucket_state_t state) {
48+
switch (state) {
49+
case vbucket_state_replica:
50+
return replica;
51+
case vbucket_state_active:
52+
case vbucket_state_pending:
53+
return activeAndPending;
54+
case vbucket_state_dead:
55+
return 0;
56+
}
57+
folly::assume_unreachable();
58+
}
59+
60+
void EvictionRatios::setForState(vbucket_state_t state, double value) {
61+
switch (state) {
62+
case vbucket_state_replica:
63+
replica = value;
64+
return;
65+
case vbucket_state_active:
66+
case vbucket_state_pending:
67+
activeAndPending = value;
68+
return;
69+
case vbucket_state_dead:
70+
// no-op
71+
return;
72+
}
73+
folly::assume_unreachable();
74+
}
75+
4676
ItemPager::ItemPager(EventuallyPersistentEngine& e, EPStats& st)
4777
: GlobalTask(&e, TaskId::ItemPager, 10, false),
4878
engine(e),
4979
stats(st),
5080
available(new std::atomic<bool>(true)),
51-
phase(REPLICA_ONLY),
5281
doEvict(false),
5382
sleepTime(std::chrono::milliseconds(
5483
e.getConfiguration().getPagerSleepTimeMs())),
5584
notified(false) {
56-
// For the hifi_mfu algorithm if a couchbase/persistent bucket we
57-
// want to start visiting the replica vbucket first. However for
58-
// ephemeral we do not evict from replica vbuckets and therefore
59-
// we start with active and pending vbuckets.
60-
phase = (engine.getConfiguration().getBucketType() == "persistent")
61-
? REPLICA_ONLY
62-
: ACTIVE_AND_PENDING_ONLY;
6385
}
6486

6587
bool ItemPager::run(void) {
@@ -72,7 +94,7 @@ bool ItemPager::run(void) {
7294
// be that we've gone over HWM have been notified to run, then came back
7395
// down (e.g. 1 byte under HWM), we should still page in this scenario.
7496
// Notified would be false if we were woken by the periodic scheduler
75-
bool wasNotified = notified;
97+
const bool wasNotified = notified;
7698

7799
// Clear the notification flag before starting the task's actions
78100
notified.store(false);
@@ -95,53 +117,82 @@ bool ItemPager::run(void) {
95117

96118
++stats.pagerRuns;
97119

98-
double toKill = (current - static_cast<double>(lower)) / current;
120+
if (current <= lower) {
121+
// early exit - no need to run a paging visitor
122+
return true;
123+
}
99124

100-
EP_LOG_DEBUG("Using {} bytes of memory, paging out {} of items.",
101-
stats.getEstimatedTotalMemoryUsed(),
102-
(toKill * 100.0));
125+
VBucketFilter replicaFilter;
126+
VBucketFilter activePendingFilter;
103127

104-
// compute active vbuckets evicition bias factor
105-
Configuration& cfg = engine.getConfiguration();
106-
size_t activeEvictPerc = cfg.getPagerActiveVbPcnt();
107-
double bias = static_cast<double>(activeEvictPerc) / 50;
128+
for (auto vbid : kvBucket->getVBucketsInState(vbucket_state_replica)) {
129+
replicaFilter.addVBucket(vbid);
130+
}
131+
132+
for (auto vbid : kvBucket->getVBucketsInState(vbucket_state_active)) {
133+
activePendingFilter.addVBucket(vbid);
134+
}
135+
for (auto vbid : kvBucket->getVBucketsInState(vbucket_state_pending)) {
136+
activePendingFilter.addVBucket(vbid);
137+
}
138+
139+
ssize_t bytesToEvict = current - lower;
140+
141+
const double replicaEvictableMem = getEvictableBytes(replicaFilter);
142+
const double activePendingEvictableMem =
143+
getEvictableBytes(activePendingFilter);
144+
145+
double replicaEvictionRatio = 0.0;
146+
double activeAndPendingEvictionRatio = 0.0;
147+
148+
if (kvBucket->canEvictFromReplicas()) {
149+
// try evict from replicas first if we can
150+
replicaEvictionRatio =
151+
std::min(1.0, bytesToEvict / replicaEvictableMem);
152+
153+
bytesToEvict -= replicaEvictableMem;
154+
}
155+
156+
if (bytesToEvict > 0) {
157+
// replicas are not sufficient (or are not eligible for eviction if
158+
// ephemeral). Not enough memory can be reclaimed from them to
159+
// reach the low watermark.
160+
// Consider active and pending vbuckets too.
161+
// active and pending share an eviction ratio, it need only be
162+
// set once
163+
activeAndPendingEvictionRatio =
164+
std::min(1.0, bytesToEvict / activePendingEvictableMem);
165+
}
166+
167+
EP_LOG_DEBUG(
168+
"Using {} bytes of memory, paging out {}% of active and "
169+
"pending items, {}% of replica items.",
170+
stats.getEstimatedTotalMemoryUsed(),
171+
(activeAndPendingEvictionRatio * 100.0),
172+
(replicaEvictionRatio * 100.0));
108173

109174
VBucketFilter filter;
110-
// For the hifi_mfu algorithm use the phase to filter which vbuckets
111-
// we want to visit (either replica or active/pending vbuckets).
112-
vbucket_state_t state;
113-
if (phase == REPLICA_ONLY) {
114-
state = vbucket_state_replica;
115-
} else if (phase == ACTIVE_AND_PENDING_ONLY) {
116-
state = vbucket_state_active;
117-
auto acceptableVBs = kvBucket->getVBucketsInState(state);
118-
for (auto vb : acceptableVBs) {
119-
filter.addVBucket(vb);
120-
}
121-
state = vbucket_state_pending;
122-
} else {
123-
throw std::invalid_argument(
124-
"ItemPager::run - "
125-
"phase is invalid for hifi_mfu eviction algorithm");
175+
176+
if (replicaEvictionRatio > 0.0) {
177+
filter = filter.filter_union(replicaFilter);
126178
}
127-
auto acceptableVBs = kvBucket->getVBucketsInState(state);
128-
for (auto vb : acceptableVBs) {
129-
filter.addVBucket(vb);
179+
180+
if (activeAndPendingEvictionRatio > 0.0) {
181+
filter = filter.filter_union(activePendingFilter);
130182
}
131183

132-
bool isEphemeral = (cfg.getBucketType() == "ephemeral");
184+
// compute active vbuckets evicition bias factor
185+
const Configuration& cfg = engine.getConfiguration();
133186

134187
auto pv = std::make_unique<PagingVisitor>(
135188
*kvBucket,
136189
stats,
137-
toKill,
190+
EvictionRatios{activeAndPendingEvictionRatio,
191+
replicaEvictionRatio},
138192
available,
139193
ITEM_PAGER,
140194
false,
141-
bias,
142195
filter,
143-
&phase,
144-
isEphemeral,
145196
cfg.getItemEvictionAgePercentage(),
146197
cfg.getItemEvictionFreqCounterAgeThreshold());
147198

@@ -165,6 +216,40 @@ void ItemPager::scheduleNow() {
165216
}
166217
}
167218

219+
/**
220+
* Visitor used to aggregate how much memory could potentially be reclaimed
221+
* by evicting every eligible item from specified vbuckets
222+
*/
223+
class VBucketEvictableMemVisitor : public VBucketVisitor {
224+
public:
225+
explicit VBucketEvictableMemVisitor(const VBucketFilter& filter)
226+
: filter(filter) {
227+
}
228+
229+
void visitBucket(const VBucketPtr& vb) override {
230+
if (!filter.empty() && filter(vb->getId())) {
231+
totalEvictableMemory += vb->getPageableMemUsage();
232+
}
233+
}
234+
235+
size_t getTotalEvictableMemory() const {
236+
return totalEvictableMemory;
237+
}
238+
239+
private:
240+
const VBucketFilter& filter;
241+
size_t totalEvictableMemory = 0;
242+
};
243+
244+
size_t ItemPager::getEvictableBytes(const VBucketFilter& filter) const {
245+
KVBucket* kvBucket = engine.getKVBucket();
246+
247+
VBucketEvictableMemVisitor visitor(filter);
248+
kvBucket->visit(visitor);
249+
250+
return visitor.getTotalEvictableMemory();
251+
}
252+
168253
ExpiredItemPager::ExpiredItemPager(EventuallyPersistentEngine *e,
169254
EPStats &st, size_t stime,
170255
ssize_t taskTime) :
@@ -218,19 +303,15 @@ bool ExpiredItemPager::run(void) {
218303

219304
VBucketFilter filter;
220305
Configuration& cfg = engine->getConfiguration();
221-
bool isEphemeral =
222-
(engine->getConfiguration().getBucketType() == "ephemeral");
223306
auto pv = std::make_unique<PagingVisitor>(
224307
*kvBucket,
225308
stats,
226-
-1,
309+
EvictionRatios{0.0 /* active&pending */,
310+
0.0 /* replica */}, // evict nothing
227311
available,
228312
EXPIRY_PAGER,
229313
true,
230-
1,
231314
filter,
232-
/* pager_phase */ nullptr,
233-
isEphemeral,
234315
cfg.getItemEvictionAgePercentage(),
235316
cfg.getItemEvictionFreqCounterAgeThreshold());
236317

0 commit comments

Comments
 (0)