Skip to content

Commit 4cfa9e0

Browse files
jameseh96daverigby
authored andcommitted
RocksDB: Persist tombstones in RocksDBKVStore
We cannot map a KVStore level `del` to a RocksDB `Delete` because the deleted item actually needs to remain as a tombstone. This is resolved by implementing RocksDBKVStore::del as a RocksDB `Put`. NB: Until expiration is implemented, tombstones will never be purged. Change-Id: I2cff6f21cde3625f9bd36f06828702432b56b37d Reviewed-on: http://review.couchbase.org/82321 Tested-by: Build Bot <[email protected]> Reviewed-by: Dave Rigby <[email protected]>
1 parent f7b2222 commit 4cfa9e0

File tree

5 files changed

+85
-119
lines changed

5 files changed

+85
-119
lines changed

engines/ep/src/rocksdb-kvstore/rocksdb-kvstore.cc

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,11 @@ void RocksDBKVStore::rollback() {
9999

100100
void RocksDBKVStore::adjustValBuffer(const size_t to) {
101101
// Save room for the flags, exp, etc...
102-
size_t needed((sizeof(uint32_t) * 2) + to);
102+
size_t needed(sizeof(ItemMetaData) +
103+
/* deleted flag */ sizeof(uint8_t) +
104+
/* bySeqno */ sizeof(uint64_t) +
105+
/* datatype */ sizeof(uint8_t) +
106+
/* value len */ sizeof(uint32_t) + to);
103107

104108
if (valBuffer == NULL || valSize < needed) {
105109
void* buf = realloc(valBuffer, needed);
@@ -118,19 +122,26 @@ std::vector<vbucket_state*> RocksDBKVStore::listPersistedVbuckets() {
118122
}
119123

120124
void RocksDBKVStore::set(const Item& itm, Callback<mutation_result>& cb) {
121-
// TODO RDB: Consider using SliceParts to avoid copying if
122-
// possible.
123-
auto k = mkKeyStr(itm.getVBucketId(), itm.getKey());
124-
auto v = mkValSlice(itm);
125-
126-
batch->Put(k, v);
125+
storeItem(itm);
127126

128127
// TODO RDB: This callback should not really be called until
129128
// after the batch is committed.
130129
std::pair<int, bool> p(1, true);
131130
cb.callback(p);
132131
}
133132

133+
void RocksDBKVStore::storeItem(const Item& itm) {
134+
// TODO RDB: Consider using SliceParts to avoid copying if
135+
// possible.
136+
uint16_t vbid = itm.getVBucketId();
137+
auto k = mkKeyStr(vbid, itm.getKey());
138+
auto v = mkValSlice(itm);
139+
140+
// TODO RDB: check status.
141+
rocksdb::Status s = batch->Put(k, v);
142+
cb_assert(s.ok());
143+
}
144+
134145
GetValue RocksDBKVStore::get(const DocKey& key, uint16_t vb, bool fetchDelete) {
135146
return getWithHeader(nullptr, key, vb, GetMetaOnly::No, fetchDelete);
136147
}
@@ -180,8 +191,10 @@ void RocksDBKVStore::reset(uint16_t vbucketId) {
180191
}
181192

182193
void RocksDBKVStore::del(const Item& itm, Callback<int>& cb) {
183-
auto k(mkKeyStr(itm.getVBucketId(), itm.getKey()));
184-
batch->Delete(k);
194+
// TODO RDB: Deleted items remain as tombstones, but are not yet
195+
// expired - they will accumuate forever.
196+
storeItem(itm);
197+
185198
int rv(1);
186199
cb.callback(rv);
187200
}
@@ -262,16 +275,22 @@ rocksdb::Slice RocksDBKVStore::mkValSlice(const Item& item) {
262275
// uint64_t revSeqno ] ItemMetaData
263276
// uint32_t flags ]
264277
// uint32_t exptime ]
278+
// TODO RDB: Wasting a whole byte for deleted
279+
// uint8_t deleted
265280
// uint64_t bySeqno
266281
// uint8_t datatype
267282
// uint32_t value_len
268283
// uint8_t[value_len] value
269-
//
270-
adjustValBuffer(item.size());
284+
285+
adjustValBuffer(item.getNBytes());
271286
char* dest = valBuffer;
272287
std::memcpy(dest, &item.getMetaData(), sizeof(ItemMetaData));
273288
dest += sizeof(ItemMetaData);
274289

290+
uint8_t deleted = item.isDeleted();
291+
std::memcpy(dest, &deleted, sizeof(deleted));
292+
dest += sizeof(deleted);
293+
275294
const int64_t bySeqno{item.getBySeqno()};
276295
std::memcpy(dest, &bySeqno, sizeof(bySeqno));
277296
dest += sizeof(uint64_t);
@@ -283,7 +302,9 @@ rocksdb::Slice RocksDBKVStore::mkValSlice(const Item& item) {
283302
const uint32_t valueLen = item.getNBytes();
284303
std::memcpy(dest, &valueLen, sizeof(valueLen));
285304
dest += sizeof(valueLen);
286-
std::memcpy(dest, item.getValue()->getData(), valueLen);
305+
if (valueLen) {
306+
std::memcpy(dest, item.getValue()->getData(), valueLen);
307+
}
287308
dest += valueLen;
288309

289310
return rocksdb::Slice(valBuffer, dest - valBuffer);
@@ -302,6 +323,10 @@ std::unique_ptr<Item> RocksDBKVStore::grokValSlice(uint16_t vb,
302323
std::memcpy(&meta, src, sizeof(meta));
303324
src += sizeof(meta);
304325

326+
uint8_t deleted;
327+
std::memcpy(&deleted, src, sizeof(deleted));
328+
src += sizeof(deleted);
329+
305330
int64_t bySeqno;
306331
std::memcpy(&bySeqno, src, sizeof(bySeqno));
307332
src += sizeof(bySeqno);
@@ -317,17 +342,23 @@ std::unique_ptr<Item> RocksDBKVStore::grokValSlice(uint16_t vb,
317342
uint8_t extMeta[EXT_META_LEN];
318343
extMeta[0] = datatype;
319344

320-
return std::make_unique<Item>(key,
321-
meta.flags,
322-
meta.exptime,
323-
src,
324-
valueLen,
325-
extMeta,
326-
EXT_META_LEN,
327-
meta.cas,
328-
bySeqno,
329-
vb,
330-
meta.revSeqno);
345+
auto item = std::make_unique<Item>(key,
346+
meta.flags,
347+
meta.exptime,
348+
src,
349+
valueLen,
350+
extMeta,
351+
EXT_META_LEN,
352+
meta.cas,
353+
bySeqno,
354+
vb,
355+
meta.revSeqno);
356+
357+
if (deleted) {
358+
item->setDeleted();
359+
}
360+
361+
return item;
331362
}
332363

333364
GetValue RocksDBKVStore::makeGetValue(uint16_t vb,

engines/ep/src/rocksdb-kvstore/rocksdb-kvstore.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ class RocksDBKVStore : public KVStore {
282282
const DocKey& key,
283283
const std::string& value);
284284

285+
void storeItem(const Item& item);
286+
285287
std::unique_ptr<rocksdb::WriteBatch> batch;
286288
rocksdb::WriteOptions writeOptions;
287289

engines/ep/tests/ep_testsuite.cc

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7420,14 +7420,8 @@ BaseTestCase testsuite_testcases[] = {
74207420
NULL, prepare, cleanup),
74217421
TestCase("test observe single key", test_observe_single_key, test_setup, teardown,
74227422
NULL, prepare, cleanup),
7423-
TestCase("test observe on temp item",
7424-
test_observe_temp_item,
7425-
test_setup,
7426-
teardown,
7427-
NULL,
7428-
/* TODO RDB: Enable after implementing persisted tombstones */
7429-
prepare_skip_broken_under_rocks,
7430-
cleanup),
7423+
TestCase("test observe on temp item", test_observe_temp_item, test_setup, teardown,
7424+
NULL, prepare, cleanup),
74317425
TestCase("test observe multi key", test_observe_multi_key, test_setup, teardown,
74327426
NULL, prepare, cleanup),
74337427
TestCase("test multiple observes", test_multiple_observes, test_setup, teardown,

engines/ep/tests/ep_testsuite_basic.cc

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,14 +2287,8 @@ BaseTestCase testsuite_testcases[] = {
22872287
/* TODO RDB: implement getItemCount */
22882288
prepare_skip_broken_under_rocks,
22892289
cleanup),
2290-
TestCase("delete with value CAS",
2291-
test_delete_with_value_cas,
2292-
test_setup,
2293-
teardown,
2294-
NULL,
2295-
/* TODO RDB: Enable after implementing persisted tombstones */
2296-
prepare_skip_broken_under_rocks,
2297-
cleanup),
2290+
TestCase("delete with value CAS", test_delete_with_value_cas,
2291+
test_setup, teardown, NULL, prepare, cleanup),
22982292
TestCase("set/delete", test_set_delete, test_setup,
22992293
teardown, NULL, prepare, cleanup),
23002294
TestCase("set/delete (invalid cas)", test_set_delete_invalid_cas,

engines/ep/tests/ep_testsuite_xdcr.cc

Lines changed: 25 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2522,24 +2522,12 @@ BaseTestCase testsuite_testcases[] = {
25222522
* isMetaOnly not return value */
25232523
prepare_skip_broken_under_rocks,
25242524
cleanup),
2525-
TestCase("get meta deleted",
2526-
test_get_meta_deleted,
2527-
test_setup,
2528-
teardown,
2529-
NULL,
2530-
/* TODO RDB: Enable after implementing persisted tombstones */
2531-
prepare_skip_broken_under_rocks,
2532-
cleanup),
2525+
TestCase("get meta deleted", test_get_meta_deleted,
2526+
test_setup, teardown, NULL, prepare, cleanup),
25332527
TestCase("get meta nonexistent", test_get_meta_nonexistent,
25342528
test_setup, teardown, NULL, prepare, cleanup),
2535-
TestCase("get meta followed by get",
2536-
test_get_meta_with_get,
2537-
test_setup,
2538-
teardown,
2539-
NULL,
2540-
/* TODO RDB: Enable after implementing persisted tombstones */
2541-
prepare_skip_broken_under_rocks,
2542-
cleanup),
2529+
TestCase("get meta followed by get", test_get_meta_with_get,
2530+
test_setup, teardown, NULL, prepare, cleanup),
25432531
TestCase("get meta followed by set",
25442532
test_get_meta_with_set,
25452533
test_setup,
@@ -2548,14 +2536,8 @@ BaseTestCase testsuite_testcases[] = {
25482536
/* TODO RDB: implement getItemCount */
25492537
prepare_skip_broken_under_rocks,
25502538
cleanup),
2551-
TestCase("get meta followed by delete",
2552-
test_get_meta_with_delete,
2553-
test_setup,
2554-
teardown,
2555-
NULL,
2556-
/* TODO RDB: Enable after implementing persisted tombstones */
2557-
prepare_skip_broken_under_rocks,
2558-
cleanup),
2539+
TestCase("get meta followed by delete", test_get_meta_with_delete,
2540+
test_setup, teardown, NULL, prepare, cleanup),
25592541
TestCase("get meta with xattr",
25602542
test_get_meta_with_xattr,
25612543
test_setup,
@@ -2573,16 +2555,14 @@ BaseTestCase testsuite_testcases[] = {
25732555
test_setup,
25742556
teardown,
25752557
nullptr,
2576-
/* TODO RDB: Enable after implementing persisted tombstones */
2577-
prepare_skip_broken_under_rocks,
2558+
prepare,
25782559
cleanup),
25792560
TestCase("delete with meta nonexistent",
25802561
test_delete_with_meta_nonexistent,
25812562
test_setup,
25822563
teardown,
25832564
nullptr,
2584-
/* TODO RDB: Enable after implementing persisted tombstones */
2585-
prepare_skip_broken_under_rocks,
2565+
prepare,
25862566
cleanup),
25872567
TestCase("delete with meta nonexistent no temp",
25882568
test_delete_with_meta_nonexistent_no_temp,
@@ -2592,21 +2572,11 @@ BaseTestCase testsuite_testcases[] = {
25922572
prepare,
25932573
cleanup),
25942574
TestCase("delete_with_meta race with concurrent delete",
2595-
test_delete_with_meta_race_with_delete,
2596-
test_setup,
2597-
teardown,
2598-
NULL,
2599-
/* TODO RDB: Enable after implementing persisted tombstones */
2600-
prepare_skip_broken_under_rocks,
2601-
cleanup),
2575+
test_delete_with_meta_race_with_delete, test_setup,
2576+
teardown, NULL, prepare, cleanup),
26022577
TestCase("delete_with_meta race with concurrent set",
2603-
test_delete_with_meta_race_with_set,
2604-
test_setup,
2605-
teardown,
2606-
NULL,
2607-
/* TODO RDB: Enable after implementing persisted tombstones */
2608-
prepare_skip_broken_under_rocks,
2609-
cleanup),
2578+
test_delete_with_meta_race_with_set, test_setup,
2579+
teardown, NULL, prepare, cleanup),
26102580
TestCase("set with meta",
26112581
test_set_with_meta,
26122582
test_setup,
@@ -2634,44 +2604,28 @@ BaseTestCase testsuite_testcases[] = {
26342604
prepare_skip_broken_under_rocks,
26352605
cleanup),
26362606
TestCase("set_with_meta race with concurrent set",
2637-
test_set_with_meta_race_with_set,
2638-
test_setup,
2639-
teardown,
2640-
NULL,
2641-
/* TODO RDB: Enable after implementing persisted tombstones */
2642-
prepare_skip_broken_under_rocks,
2643-
cleanup),
2607+
test_set_with_meta_race_with_set, test_setup,
2608+
teardown, NULL, prepare, cleanup),
26442609
TestCase("set_with_meta race with concurrent delete",
2645-
test_set_with_meta_race_with_delete,
2646-
test_setup,
2647-
teardown,
2648-
NULL,
2649-
/* TODO RDB: Enable after implementing persisted tombstones */
2650-
prepare_skip_broken_under_rocks,
2651-
cleanup),
2652-
TestCase("test set_with_meta exp persisted",
2653-
test_exp_persisted_set_del,
2654-
test_setup,
2655-
teardown,
2656-
"exp_pager_stime=3",
2657-
/* TODO RDB: Enable after implementing persisted tombstones */
2658-
prepare_ep_bucket_skip_broken_under_rocks, // Requires persistence
2610+
test_set_with_meta_race_with_delete, test_setup,
2611+
teardown, NULL, prepare, cleanup),
2612+
TestCase("test set_with_meta exp persisted", test_exp_persisted_set_del,
2613+
test_setup, teardown, "exp_pager_stime=3",
2614+
prepare_ep_bucket, // Requires persistence
26592615
cleanup),
26602616
TestCase("test del meta conflict resolution",
26612617
test_del_meta_conflict_resolution,
26622618
test_setup,
26632619
teardown,
26642620
nullptr,
2665-
/* TODO RDB: Enable after implementing persisted tombstones */
2666-
prepare_skip_broken_under_rocks,
2621+
prepare,
26672622
cleanup),
26682623
TestCase("test add meta conflict resolution",
26692624
test_add_meta_conflict_resolution,
26702625
test_setup,
26712626
teardown,
26722627
NULL,
2673-
/* TODO RDB: Enable after implementing persisted tombstones */
2674-
prepare_skip_broken_under_rocks,
2628+
prepare,
26752629
cleanup),
26762630
TestCase("test set meta conflict resolution",
26772631
test_set_meta_conflict_resolution, test_setup, teardown, NULL,
@@ -2681,8 +2635,7 @@ BaseTestCase testsuite_testcases[] = {
26812635
test_setup,
26822636
teardown,
26832637
"conflict_resolution_type=lww",
2684-
/* TODO RDB: Enable after implementing persisted tombstones */
2685-
prepare_skip_broken_under_rocks,
2638+
prepare,
26862639
cleanup),
26872640
TestCase("test set meta lww conflict resolution",
26882641
test_set_meta_lww_conflict_resolution, test_setup, teardown,
@@ -2712,9 +2665,7 @@ BaseTestCase testsuite_testcases[] = {
27122665
test_setup,
27132666
teardown,
27142667
"exp_pager_stime=1",
2715-
/* related to temp items in hash table */
2716-
/* TODO RDB: Enable after implementing persisted tombstones */
2717-
prepare_ep_bucket_skip_broken_under_rocks,
2668+
/* related to temp items in hash table */prepare_ep_bucket,
27182669
cleanup),
27192670
TestCase("test get_meta with item_eviction",
27202671
test_getMeta_with_item_eviction, test_setup, teardown,
@@ -2757,14 +2708,8 @@ BaseTestCase testsuite_testcases[] = {
27572708
test_cas_options_and_nmeta, test_setup, teardown,
27582709
"conflict_resolution_type=seqno",
27592710
prepare, cleanup),
2760-
TestCase("getMetaData mb23905",
2761-
test_get_meta_mb23905,
2762-
test_setup,
2763-
teardown,
2764-
nullptr,
2765-
/* TODO RDB: Enable after implementing persisted tombstones */
2766-
prepare_ep_bucket_skip_broken_under_rocks,
2767-
cleanup),
2711+
TestCase("getMetaData mb23905", test_get_meta_mb23905,
2712+
test_setup, teardown, nullptr, prepare_ep_bucket, cleanup),
27682713
TestCase(NULL, NULL, NULL, NULL, NULL, prepare, cleanup)
27692714
};
27702715

0 commit comments

Comments
 (0)