Skip to content

Commit e0b181e

Browse files
committed
MB-42306 [1/2]: Add onDiskPrepareBytes to vbucket_state
To allow a VBucket to track the space consumed by SyncWrite prepares, add a new onDiskPrepareBytes member variable to vbucket_state. * onDiskPrepareBytes: Total number of bytes that SyncWrite prepares take on-disk (including any compression the KVStore may have applied). This is updated in a similar way to onDiskPrepares (the number of prepares on disk). Change-Id: I836e837ba2d2ad049cfaf606ea2eca8f8bc4b93d Reviewed-on: http://review.couchbase.org/c/kv_engine/+/139272 Well-Formed: Build Bot <[email protected]> Tested-by: Dave Rigby <[email protected]> Reviewed-by: Jim Walker <[email protected]>
1 parent 30a1c9e commit e0b181e

File tree

11 files changed

+299
-44
lines changed

11 files changed

+299
-44
lines changed

engines/ep/src/couch-kvstore/couch-kvstore.cc

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,12 @@ struct kvstats_ctx {
311311
* the persisted VB state before commit
312312
*/
313313
size_t onDiskPrepareDelta = 0;
314+
315+
/**
316+
* Delta of onDiskPrepareBytes that we should add to the value tracked in
317+
* the persisted VB state before commit.
318+
*/
319+
ssize_t onDiskPrepareBytesDelta = 0;
314320
};
315321

316322
CouchKVStore::CouchKVStore(KVStoreConfig& config)
@@ -948,6 +954,7 @@ static int time_purge_hook(Db* d, DocInfo* info, sized_buf item, void* ctx_p) {
948954
}
949955
if (metadata->isPrepare()) {
950956
ctx->stats.preparesPurged++;
957+
ctx->stats.prepareBytesPurged += info->size;
951958
}
952959
return COUCHSTORE_COMPACT_DROP_ITEM;
953960
} else if (info->deleted) {
@@ -998,6 +1005,7 @@ static int time_purge_hook(Db* d, DocInfo* info, sized_buf item, void* ctx_p) {
9981005
if (metadata->isPrepare()) {
9991006
if (info->db_seq <= ctx->highCompletedSeqno) {
10001007
ctx->stats.preparesPurged++;
1008+
ctx->stats.prepareBytesPurged += info->size;
10011009
return COUCHSTORE_COMPACT_DROP_ITEM;
10021010
}
10031011

@@ -1232,6 +1240,8 @@ bool CouchKVStore::compactDBInternal(compaction_ctx* hook_ctx,
12321240
cachedDeleteCount[vbid.get()] = info.deleted_count;
12331241
cachedDocCount[vbid.get()] = info.doc_count;
12341242
state->onDiskPrepares -= hook_ctx->stats.preparesPurged;
1243+
state->updateOnDiskPrepareBytes(
1244+
-int64_t(hook_ctx->stats.prepareBytesPurged));
12351245
}
12361246

12371247
logger.debug("INFO: created new couch db file, name:{} rev:{}",
@@ -1269,7 +1279,8 @@ couchstore_error_t CouchKVStore::compactPrecommitCallback(Db& db,
12691279

12701280
// we need to update the _local/vbstate to update the number of
12711281
// on_disk_prepared to match whatever we purged
1272-
if (ctx.stats.preparesPurged != 0) {
1282+
if ((ctx.stats.preparesPurged != 0) ||
1283+
(ctx.stats.prepareBytesPurged != 0)) {
12731284
LocalDoc* ldoc = nullptr;
12741285
sized_buf id{(char*)"_local/vbstate", sizeof("_local/vbstate") - 1};
12751286
auto err = couchstore_open_local_document(
@@ -1296,12 +1307,27 @@ couchstore_error_t CouchKVStore::compactPrecommitCallback(Db& db,
12961307
}
12971308
couchstore_free_local_document(ldoc);
12981309

1299-
auto iter = json.find("on_disk_prepares");
1300-
// only update if it's there..
1301-
if (iter != json.end()) {
1302-
auto onDiskPrepares = std::stoull(iter->get<std::string>()) -
1310+
bool updateVbState = false;
1311+
1312+
auto prepares = json.find("on_disk_prepares");
1313+
if (prepares != json.end()) {
1314+
auto onDiskPrepares = std::stoull(prepares->get<std::string>()) -
13031315
ctx.stats.preparesPurged;
1304-
json["on_disk_prepares"] = std::to_string(onDiskPrepares);
1316+
*prepares = std::to_string(onDiskPrepares);
1317+
updateVbState = true;
1318+
}
1319+
1320+
auto prepareBytes = json.find("on_disk_prepare_bytes");
1321+
if (prepareBytes != json.end()) {
1322+
auto onDiskPrepareBytes =
1323+
std::stoull(prepareBytes->get<std::string>()) -
1324+
ctx.stats.prepareBytesPurged;
1325+
*prepareBytes = std::to_string(onDiskPrepareBytes);
1326+
updateVbState = true;
1327+
}
1328+
1329+
// only update if at least one of the prepare stats is already there
1330+
if (updateVbState) {
13051331
const auto doc = json.dump();
13061332
LocalDoc newDoc{};
13071333
newDoc.id = id;
@@ -2230,6 +2256,12 @@ static void saveDocsCallback(const DocInfo* oldInfo,
22302256
std::to_string(itemCountDelta));
22312257
}
22322258

2259+
if (newKey.isPrepared()) {
2260+
const ssize_t newSize = newInfo->size;
2261+
const ssize_t oldSize = oldInfo ? oldInfo->size : 0;
2262+
cbCtx->onDiskPrepareBytesDelta += (newSize - oldSize);
2263+
}
2264+
22332265
// Do not need to update high seqno if we are calling this for a prepare and
22342266
// it will error if we do so return early.
22352267
if (!newKey.isCommitted()) {
@@ -2326,6 +2358,7 @@ couchstore_error_t CouchKVStore::saveDocs(Vbid vbid,
23262358
std::placeholders::_2));
23272359

23282360
state->onDiskPrepares += kvctx.onDiskPrepareDelta;
2361+
state->updateOnDiskPrepareBytes(kvctx.onDiskPrepareBytesDelta);
23292362
errCode = saveVBState(db, *state);
23302363
if (errCode != COUCHSTORE_SUCCESS) {
23312364
logger.warn("CouchKVStore::saveDocs: saveVBState error:{} [{}]",

engines/ep/src/ep_bucket.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1111,14 +1111,15 @@ void EPBucket::compactInternal(const CompactionConfig& config,
11111111

11121112
EP_LOG_INFO(
11131113
"Compaction of {} done ({}). "
1114-
"purged tombstones:{}, prepares:{}, "
1114+
"purged tombstones:{}, prepares:{}, prepareBytes:{} "
11151115
"collection_items_erased:alive:{},deleted:{}, "
11161116
"size/items/tombstones/purge_seqno pre{{{}, {}, {}, {}}}, "
11171117
"post{{{}, {}, {}, {}}}",
11181118
config.db_file_id,
11191119
result ? "ok" : "failed",
11201120
ctx.stats.tombstonesPurged,
11211121
ctx.stats.preparesPurged,
1122+
ctx.stats.prepareBytesPurged,
11221123
ctx.stats.collectionsItemsPurged,
11231124
ctx.stats.collectionsDeletedItemsPurged,
11241125
ctx.stats.pre.size,

engines/ep/src/kvstore.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ bool KVStore::updateCachedVBState(Vbid vbid, const vbucket_state& newState) {
198198
newState.highPreparedSeqno;
199199
vbState->maxVisibleSeqno = newState.maxVisibleSeqno;
200200
vbState->onDiskPrepares = newState.onDiskPrepares;
201+
vbState->setOnDiskPrepareBytes(newState.getOnDiskPrepareBytes());
201202
} else {
202203
state_change_detected = false;
203204
}

engines/ep/src/kvstore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ struct CompactionStats {
103103
size_t collectionsDeletedItemsPurged = 0;
104104
uint64_t tombstonesPurged = 0;
105105
uint64_t preparesPurged = 0;
106+
uint64_t prepareBytesPurged = 0;
106107
FileInfo pre;
107108
FileInfo post;
108109
};

engines/ep/src/magma-kvstore/magma-kvstore.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,6 +1435,7 @@ int MagmaKVStore::saveDocs(Collections::VB::Flush& collectionsFlush,
14351435
if (vbstate) {
14361436
vbstate->highSeqno = lastSeqno;
14371437
vbstate->onDiskPrepares += kvctx.onDiskPrepareDelta;
1438+
vbstate->onDiskPrepareBytes += kvctx.onDiskPrepareBytesDelta;
14381439

14391440
auto magmaInfo = cachedMagmaInfo[vbid.get()].get();
14401441
magmaInfo->docCount += ninserts - ndeletes;
@@ -2162,6 +2163,8 @@ bool MagmaKVStore::compactDB(compaction_ctx* ctx) {
21622163
{
21632164
std::lock_guard<std::shared_timed_mutex> lock(kvHandle->vbstateMutex);
21642165
cachedVBStates[vbid.get()]->onDiskPrepares -= ctx->stats.preparesPurged;
2166+
cachedVBStates[vbid.get()]->onDiskPrepareBytes -=
2167+
ctx->stats.prepareBytesPurged;
21652168
cachedVBStates[vbid.get()]->purgeSeqno = ctx->max_purged_seq;
21662169
cachedMagmaInfo[vbid.get()]->docCount -=
21672170
ctx->stats.collectionsItemsPurged + ctx->stats.preparesPurged;

engines/ep/src/vbucket_state.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ void vbucket_state::reset() {
9999
highPreparedSeqno = 0;
100100
maxVisibleSeqno = 0;
101101
onDiskPrepares = 0;
102+
onDiskPrepareBytes = 0;
102103
transition = vbucket_transition_state{};
103104
}
104105

@@ -120,6 +121,7 @@ bool vbucket_state::operator==(const vbucket_state& other) const {
120121
other.highPreparedSeqno);
121122
rv = rv && (maxVisibleSeqno == other.maxVisibleSeqno);
122123
rv = rv && (onDiskPrepares == other.onDiskPrepares);
124+
rv = rv && (onDiskPrepareBytes == other.onDiskPrepareBytes);
123125
rv = rv && (checkpointType == other.checkpointType);
124126
rv = rv && (transition == other.transition);
125127
return rv;
@@ -129,6 +131,14 @@ bool vbucket_state::operator!=(const vbucket_state& other) const {
129131
return !(*this == other);
130132
}
131133

134+
void vbucket_state::updateOnDiskPrepareBytes(int64_t delta) {
135+
// Note: onDiskPrepareBytes was only added in 6.6.1 (where it is
136+
// initialized to zero). As such, if using files created before
137+
// then, we need to ensure that onDiskPrepareBytes is capped at zero
138+
// and doesn't underflow.
139+
onDiskPrepareBytes += std::max(-int64_t(onDiskPrepareBytes), delta);
140+
}
141+
132142
void to_json(nlohmann::json& json, const vbucket_state& vbs) {
133143
// First add all required fields.
134144
// Note that integers are stored as strings to avoid any undesired
@@ -152,6 +162,8 @@ void to_json(nlohmann::json& json, const vbucket_state& vbs) {
152162
{"high_prepared_seqno", std::to_string(vbs.highPreparedSeqno)},
153163
{"max_visible_seqno", std::to_string(vbs.maxVisibleSeqno)},
154164
{"on_disk_prepares", std::to_string(vbs.onDiskPrepares)},
165+
{"on_disk_prepare_bytes",
166+
std::to_string(vbs.getOnDiskPrepareBytes())},
155167
{"checkpoint_type", to_string(vbs.checkpointType)}};
156168

157169
to_json(json, vbs.transition);
@@ -238,6 +250,10 @@ void from_json(const nlohmann::json& j, vbucket_state& vbs) {
238250
// Note: We don't track on disk prepares pre-6.5
239251
vbs.onDiskPrepares = std::stoll(j.value("on_disk_prepares", "0"));
240252

253+
// Note: We don't track on disk prepare total size pre-6.6.1.
254+
vbs.setOnDiskPrepareBytes(
255+
std::stoll(j.value("on_disk_prepare_bytes", "0")));
256+
241257
// Note: We don't track checkpoint type pre-6.5
242258
auto checkpointType = j.find("checkpoint_type");
243259
if (checkpointType != j.end()) {

engines/ep/src/vbucket_state.h

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,26 @@ struct vbucket_state {
8989
* v3: Mad-Hatter. high_completed_seqno and high_prepared_seqno added along
9090
* with counter for number of prepares on disk. Checkpoint-ID no longer
9191
* stored (and ignored during upgrade)
92+
* v4: 6.6.1, added with MB-32670. Adds onDiskPrepareBytes.
9293
*/
93-
static constexpr int CurrentVersion = 3;
94+
static constexpr int CurrentVersion = 4;
9495

9596
bool needsToBePersisted(const vbucket_state& vbstate);
9697

98+
uint64_t getOnDiskPrepareBytes() const {
99+
return onDiskPrepareBytes;
100+
}
101+
102+
void setOnDiskPrepareBytes(int64_t value) {
103+
onDiskPrepareBytes = value;
104+
}
105+
106+
/**
107+
* Apply the given delta to the value of onDiskPrepareBytes. Clamps
108+
* onDiskPrepareBytes at zero (avoids underflow).
109+
*/
110+
void updateOnDiskPrepareBytes(int64_t delta);
111+
97112
void reset();
98113

99114
bool operator==(const vbucket_state& other) const;
@@ -186,6 +201,19 @@ struct vbucket_state {
186201
*/
187202
uint64_t onDiskPrepares = 0;
188203

204+
private:
205+
/**
206+
* Size in bytes of on disk prepares (Pending SyncWrites). Required to
207+
* estimate the size of completed prepares as part of calculating the
208+
* couchstore 'stale' data overhead.
209+
* Note this is private as care needs to be taken when modifying it to
210+
* ensure it doesn't underflow.
211+
*
212+
* Added for SyncReplication (MB-42306) in 6.6.1.
213+
*/
214+
uint64_t onDiskPrepareBytes = 0;
215+
216+
public:
189217
/**
190218
* The type of the most recently persisted snapshot disk. Required as we
191219
* cannot rely on the HCS in the middle of a snapshot to optimise warmup and

0 commit comments

Comments
 (0)