Skip to content

Commit 39e042f

Browse files
BenHuddlestondaverigby
authored andcommitted
MB-33332: Keep committed prepare in HashTable - Ephemeral
We need to keep the completed prepare in the HashTable so that any subsequent prepares can re-use (or chain if there is a range read) the same OSV to ensure that we have snapshot with unique keys. Change-Id: Idcdc3711fc4b183c28bab112e95285869d1041f1 Reviewed-on: http://review.couchbase.org/109841 Reviewed-by: Dave Rigby <[email protected]> Tested-by: Dave Rigby <[email protected]>
1 parent 7447307 commit 39e042f

13 files changed

+292
-190
lines changed

engines/ep/src/ephemeral_vb.cc

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,11 @@ EphemeralVBucket::updateStoredValue(const HashTable::HashBucketLock& hbl,
392392
const Item& itm,
393393
const VBQueueItemCtx& queueItmCtx,
394394
bool justTouch) {
395-
// Don't make any update if we know that this is a pending sync write. We
396-
// need to abort before we make any modification to the seqList.
395+
// Don't make any update if we know that this is a pending sync write that
396+
// has not yet been completed. We need to return early before we make any
397+
// modification to the seqList. This is typically handled in the EP case by
398+
// the HashTable functions, but for ephemeral we update the seqList first so
399+
// we need an earlier check.
397400
if (v.isPending()) {
398401
return std::make_tuple(
399402
nullptr, MutationStatus::IsPendingSyncWrite, VBNotifyCtx{});
@@ -403,14 +406,10 @@ EphemeralVBucket::updateStoredValue(const HashTable::HashBucketLock& hbl,
403406
StoredValue* newSv = nullptr;
404407
MutationStatus status(MutationStatus::WasClean);
405408

406-
// If this is a new SyncWrite then we don't actually want to update an
407-
// existing StoredValue, we just want to add the new pending. What about
408-
// if we already have an existing (committed) pending I hear you ask? It
409-
// should be stale and the StaleItemDeleter should remove it when it next
410-
// runs. It has no affect on operations so we can ignore it. Do this before
411-
// we take any locks as addNewStoredValue will attempt to acquire the same
412-
// locks.
413-
if (itm.isPending()) {
409+
// If this is a new SyncWrite then we should just add a new prepare. If we
410+
// have a prepare already (that has been completed) we can replace the
411+
// existing one.
412+
if (itm.isPending() && !v.isCompleted()) {
414413
std::tie(newSv, notifyCtx) =
415414
addNewStoredValue(hbl, itm, queueItmCtx, GenerateRevSeqno::No);
416415
return {newSv, status, notifyCtx};
@@ -669,11 +668,9 @@ VBNotifyCtx EphemeralVBucket::commitStoredValue(
669668
// 1a. If an existing Committed exists, update that with the data from
670669
// the Prepare.
671670
// 1b. If no existing Committed OSV, then create a new Committed and
672-
// populate
671+
// populate
673672
// with data from the Prepare.
674-
// 2. Mark the Prepare as stale in the seqList and remove it from the
675-
// HashTable - this means that the key is no longer locked against
676-
// mutations (ESYNC_WRITE_IN_PROGRESS).
673+
// 2. Mark the prepare as completed
677674

678675
// Look for an existing Committed OSV under this key.
679676
StoredValue* newCommitted;
@@ -712,20 +709,7 @@ VBNotifyCtx EphemeralVBucket::commitStoredValue(
712709
GenerateRevSeqno::No);
713710
}
714711

715-
// Set the OSV to stale so that we can:
716-
// a) Allow the StaleItemPurger to delete the item
717-
// b) Ensure that we delete the OSV on destruction of the seqList as it does
718-
// not exist in the HashTable
719-
std::lock_guard<std::mutex> listWriteLg(seqList->getListWriteLock());
720-
// Remove the prepare from the hash table so that we can now allow mutations
721-
// against the same key
722-
auto release = ht.unlocked_release(values.pending.getHBL(),
723-
values.pending.getSV());
724-
seqList->markItemStale(listWriteLg, std::move(release), nullptr);
725-
// Manually release this ptr (not reset) as we don't want to destruct the
726-
// item in the seqList.
727-
release.release();
728-
712+
values.pending.setCommitted(CommittedState::PrepareCommitted);
729713
return notifyCtx;
730714
}
731715

engines/ep/src/hash_table.cc

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "hash_table.h"
1919

20+
#include "ep_time.h"
2021
#include "item.h"
2122
#include "stats.h"
2223
#include "stored_value_factories.h"
@@ -98,6 +99,11 @@ HashTable::StoredValueProxy::~StoredValueProxy() {
9899
valueStats.epilogue(pre, value);
99100
}
100101

102+
void HashTable::StoredValueProxy::setCommitted(CommittedState state) {
103+
value->setCommitted(state);
104+
value->setCompletedOrDeletedTime(ep_current_time());
105+
}
106+
101107
HashTable::HashTable(EPStats& st,
102108
std::unique_ptr<AbstractStoredValueFactory> svFactory,
103109
size_t initialSize,
@@ -286,7 +292,7 @@ HashTable::FindInnerResult HashTable::findInner(const DocKey& key) {
286292
for (StoredValue* v = values[hbl.getBucketNum()].get().get(); v;
287293
v = v->getNext().get().get()) {
288294
if (v->hasKey(key)) {
289-
if (v->isPending()) {
295+
if (v->isPending() || v->isCompleted()) {
290296
Expects(!foundPend);
291297
foundPend = v;
292298
} else {
@@ -340,42 +346,33 @@ HashTable::UpdateResult HashTable::unlocked_updateStoredValue(
340346
"call on a non-active HT object");
341347
}
342348

343-
switch (v.getCommitted()) {
344-
case CommittedState::Pending:
345-
case CommittedState::PreparedMaybeVisible:
346-
// Cannot update a SV if it's a Pending item.
349+
if (v.isPending()) {
347350
return {MutationStatus::IsPendingSyncWrite, nullptr};
348-
case CommittedState::PrepareAborted:
349-
case CommittedState::PrepareCommitted:
350-
// We shouldn't be trying to use PrepareCompleted states yet
351-
throw std::logic_error(
352-
"HashTable::unlocked_updateStoredValue"
353-
" attempting to update a completed prepare");
354-
case CommittedState::CommittedViaMutation:
355-
case CommittedState::CommittedViaPrepare:
356-
// Logically /can/ update a non-Pending StoredValue with a Pending Item;
357-
// however internally this is implemented as a separate (new)
358-
// StoredValue object for the Pending item.
359-
if (itm.isPending()) {
360-
auto* sv = HashTable::unlocked_addNewStoredValue(hbl, itm);
361-
return {MutationStatus::WasClean, sv};
362-
}
351+
}
363352

364-
// item is not Pending; can directly replace the existing SV.
365-
MutationStatus status = v.isDirty() ? MutationStatus::WasDirty
366-
: MutationStatus::WasClean;
353+
// Logically /can/ update a non-Pending StoredValue with a Pending Item;
354+
// however internally this is implemented as a separate (new)
355+
// StoredValue object for the Pending item. We can replace a completed
356+
// prepare with a new prepare though (so just drop through and update
357+
// normally).
358+
if (itm.isPending() && !v.isCompleted()) {
359+
auto* sv = HashTable::unlocked_addNewStoredValue(hbl, itm);
360+
return {MutationStatus::WasClean, sv};
361+
}
367362

368-
const auto preProps = valueStats.prologue(&v);
363+
// Can directly replace the existing SV.
364+
MutationStatus status =
365+
v.isDirty() ? MutationStatus::WasDirty : MutationStatus::WasClean;
369366

370-
/* setValue() will mark v as undeleted if required */
371-
v.setValue(itm);
372-
updateFreqCounter(v);
367+
const auto preProps = valueStats.prologue(&v);
373368

374-
valueStats.epilogue(preProps, &v);
369+
/* setValue() will mark v as undeleted if required */
370+
v.setValue(itm);
371+
updateFreqCounter(v);
375372

376-
return {status, &v};
377-
}
378-
folly::assume_unreachable();
373+
valueStats.epilogue(preProps, &v);
374+
375+
return {status, &v};
379376
}
380377

381378
StoredValue* HashTable::unlocked_addNewStoredValue(const HashBucketLock& hbl,
@@ -420,7 +417,7 @@ HashTable::Statistics::StoredValueProperties::StoredValueProperties(
420417
isDeleted = sv->isDeleted();
421418
isTempItem = sv->isTempItem();
422419
isSystemItem = sv->getKey().getCollectionID().isSystem();
423-
isPreparedSyncWrite = sv->isPending();
420+
isPreparedSyncWrite = sv->isPending() || sv->isCompleted();
424421
}
425422

426423
HashTable::Statistics::StoredValueProperties HashTable::Statistics::prologue(
@@ -634,28 +631,53 @@ HashTable::FindResult HashTable::findForWrite(const DocKey& key,
634631
WantsDeleted wantsDeleted) {
635632
auto result = findInner(key);
636633

637-
/// Writing using the Pending StoredValue (if found), else committed.
638-
auto* sv = result.pendingSV ? result.pendingSV : result.committedSV;
634+
// We found a prepare. It may have been completed (Ephemeral) though. If it
635+
// has been completed we will return the committed StoredValue.
636+
if (result.pendingSV && !result.pendingSV->isCompleted()) {
637+
// Early return if we found a prepare. We should always return prepares
638+
// regardless of whether or not they are deleted or the caller has asked
639+
// for deleted SVs. For example, consider searching for a SyncDelete, we
640+
// should always return the deleted prepare.
641+
return {result.pendingSV, std::move(result.lock)};
642+
}
639643

640-
if (!sv) {
644+
/// Writing using the Pending StoredValue (if found), else committed.
645+
if (!result.committedSV) {
641646
// No item found - return null.
642647
return {nullptr, std::move(result.lock)};
643648
}
644649

645-
// Early return if we found a prepare. We should always return prepares
646-
// regardless of whether or not they are deleted or the caller has asked for
647-
// deleted SVs. For example, consider searching for a SyncDelete, we should
648-
// always return the deleted prepare.
650+
if (result.committedSV->isDeleted() && wantsDeleted == WantsDeleted::No) {
651+
// Deleted items should only be returned if caller asked for them -
652+
// otherwise return null.
653+
return {nullptr, std::move(result.lock)};
654+
}
655+
return {result.committedSV, std::move(result.lock)};
656+
}
657+
658+
HashTable::FindResult HashTable::findForSyncWrite(const DocKey& key) {
659+
auto result = findInner(key);
660+
649661
if (result.pendingSV) {
650-
return {sv, std::move(result.lock)};
662+
// Early return if we found a prepare. We should always return
663+
// prepares regardless of whether or not they are deleted or the caller
664+
// has asked for deleted SVs. For example, consider searching for a
665+
// SyncDelete, we should always return the deleted prepare. Also,
666+
// we always return completed prepares.
667+
return {result.pendingSV, std::move(result.lock)};
651668
}
652669

653-
if (sv->isDeleted() && wantsDeleted == WantsDeleted::No) {
670+
if (!result.committedSV) {
671+
// No item found - return null.
672+
return {nullptr, std::move(result.lock)};
673+
}
674+
675+
if (result.committedSV->isDeleted()) {
654676
// Deleted items should only be returned if caller asked for them -
655677
// otherwise return null.
656678
return {nullptr, std::move(result.lock)};
657679
}
658-
return {sv, std::move(result.lock)};
680+
return {result.committedSV, std::move(result.lock)};
659681
}
660682

661683
HashTable::StoredValueProxy HashTable::findForWrite(StoredValueProxy::RetSVPTag,

engines/ep/src/hash_table.h

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -714,9 +714,7 @@ class HashTable {
714714
value->setBySeqno(newSeqno);
715715
}
716716

717-
void setCommitted(CommittedState state) {
718-
value->setCommitted(state);
719-
}
717+
void setCommitted(CommittedState state);
720718

721719
private:
722720
HashBucketLock lock;
@@ -746,8 +744,9 @@ class HashTable {
746744
* change shouldn't affect the (old) reference count; the reference count
747745
* will get updated later part of actually changing the item.
748746
*
749-
* Returns the Prepare StoredValue for the key if one exists, otherwise
750-
* returns the Committed StoredValue, or nullptr neither exist.
747+
* Returns the Prepare StoredValue for the key if one exists that has not
748+
* been completed, otherwise returns the Committed StoredValue, or nullptr
749+
* if neither exist.
751750
*
752751
* @param key The key of the item to find
753752
* @param wantsDeleted whether a deleted value should be returned
@@ -770,6 +769,28 @@ class HashTable {
770769
const DocKey& key,
771770
WantsDeleted wantsDeleted = WantsDeleted::Yes);
772771

772+
/**
773+
* Find an item with the specified key for write access.
774+
*
775+
* Does not modify referenced status, as typically the lookup of a SV to
776+
* change shouldn't affect the (old) reference count; the reference count
777+
* will get updated later part of actually changing the item.
778+
*
779+
* Returns the Prepare StoredValue for the key if one exists, otherwise
780+
* returns the Committed StoredValue, or nullptr neither exist. Will return
781+
* completed prepares.
782+
*
783+
* @param key The key of the item to find
784+
* @return A FindResult consisting of:
785+
* - a pointer to a StoredValue -- NULL if not found
786+
* - a (locked) HashBucketLock for the key's hash bucket. Note
787+
* this always returns a locked object; even if the requested key
788+
* doesn't exist. This is to facilitate use-cases where the caller
789+
* subsequently needs to insert a StoredValue for this key, to
790+
* avoid unlocking and re-locking the mutex.
791+
*/
792+
FindResult findForSyncWrite(const DocKey& key);
793+
773794
/**
774795
* @return A pair of StoredValues (pending and prepare) so that we can
775796
* update accordingly when committing a SyncWrite.

engines/ep/src/stored-value.cc

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ std::unique_ptr<Item> StoredValue::toItem(
233233
switch (getCommitted()) {
234234
case CommittedState::Pending:
235235
case CommittedState::PreparedMaybeVisible:
236+
case CommittedState::PrepareAborted:
237+
case CommittedState::PrepareCommitted:
236238
if (!durabilityReqs) {
237239
throw std::logic_error(
238240
"StoredValue::toItemImpl: attempted to create Item from "
@@ -249,13 +251,6 @@ std::unique_ptr<Item> StoredValue::toItem(
249251
case CommittedState::CommittedViaMutation:
250252
// nothing do to.
251253
break;
252-
case CommittedState::PrepareAborted:
253-
case CommittedState::PrepareCommitted:
254-
// We shouldn't be trying to create Item's out of these (for now)
255-
throw std::logic_error(
256-
"StoredValue::toItemImpl: attempt to create"
257-
"Item from completed prepare");
258-
break;
259254
}
260255

261256
return item;
@@ -471,6 +466,15 @@ void StoredValue::incrementAge() {
471466
}
472467
}
473468

469+
void StoredValue::setCompletedOrDeletedTime(rel_time_t time) {
470+
if (isOrdered()) {
471+
return static_cast<OrderedStoredValue*>(this)
472+
->setCompletedOrDeletedTime(time);
473+
} else {
474+
// Do nothing, only applicable to OSV
475+
}
476+
}
477+
474478
std::ostream& operator<<(std::ostream& os, const StoredValue& sv) {
475479

476480
// type, address
@@ -600,7 +604,7 @@ void OrderedStoredValue::setValueImpl(const Item& itm) {
600604
}
601605

602606
void OrderedStoredValue::setCompletedOrDeletedTime(rel_time_t time) {
603-
if (!isDeleted() && !isPending()) {
607+
if (!(isDeleted() || isCompleted())) {
604608
throw std::logic_error(
605609
"OrderedStoredValue::setCompletedOrDeletedTime: Called on "
606610
"Alive item");

engines/ep/src/stored-value.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ class StoredValue {
540540
* @return true if the item is locked
541541
*/
542542
bool isLocked(rel_time_t curtime) const {
543-
if (isDeleted()) {
543+
if (isDeleted() || isCompleted()) {
544544
// Deleted items cannot be locked.
545545
return false;
546546
}
@@ -827,6 +827,21 @@ class StoredValue {
827827
return !isPending();
828828
}
829829

830+
/**
831+
* Returns true if the stored value is Completed (by Abort or Commit).
832+
*/
833+
bool isCompleted() const {
834+
return (getCommitted() == CommittedState::PrepareAborted) ||
835+
(getCommitted() == CommittedState::PrepareCommitted);
836+
}
837+
838+
/**
839+
* Set the time the item was completed or deleted at to the specified time.
840+
*
841+
* Only applicable for an OSV so we do nothing for a normal SV.
842+
*/
843+
void setCompletedOrDeletedTime(rel_time_t time);
844+
830845
protected:
831846
/**
832847
* Constructor - protected as allocation needs to be done via

0 commit comments

Comments
 (0)