Skip to content

Commit 1c08303

Browse files
jimwwalkerdaverigby
authored andcommitted
Refactor: Capture vbucket id in DurabilityMonitor throw messages
It would be useful to capture the vbucket id in throws messages to assist debugging issues, for example the following exception is seen in MB-34956 "ActiveDurabilityMonitor::processSeqnoAck: seqno(20371) is greater than lastTrackedSeqno(0)" Having the vbucket of that ADM could be useful. This patch lifts throwException from the collection's VB::Manifest code and modifies it for use in ADM/PDM. The code attempts to avoid the use of any non-standard macros, so __func__ is used over __FUNCTION__ or __PRETTY_FUNCTION. With this change the above throw message now becomes (if vb was 67) "ActiveDurabilityMonitor::processSeqnoAck: vb:67 seqno(20371) is greater than lastTrackedSeqno(0)" Change-Id: I53268f82899a1db3f51db1efa45b7dcd8dc29b9e Reviewed-on: http://review.couchbase.org/111860 Reviewed-by: Dave Rigby <[email protected]> Tested-by: Build Bot <[email protected]>
1 parent 56f74b7 commit 1c08303

File tree

4 files changed

+107
-68
lines changed

4 files changed

+107
-68
lines changed

engines/ep/src/durability/active_durability_monitor.cc

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,18 @@ struct ActiveDurabilityMonitor::State {
206206
const std::string& node,
207207
bool shouldAck);
208208

209+
/**
210+
* throw exception with the following error string:
211+
* "ActiveDurabilityMonitor::State::<thrower>:<error> vb:x"
212+
*
213+
* @param thrower a string for who is throwing, typically __func__
214+
* @param error a string containing the error and any useful data
215+
* @throws exception
216+
*/
217+
template <class exception>
218+
[[noreturn]] void throwException(const std::string& thrower,
219+
const std::string& error) const;
220+
209221
public:
210222
/// The container of pending Prepares.
211223
Container trackedWrites;
@@ -383,16 +395,12 @@ void ActiveDurabilityMonitor::setReplicationTopology(
383395
Expects(!topology.is_null());
384396

385397
if (!topology.is_array()) {
386-
throw std::invalid_argument(
387-
"ActiveDurabilityMonitor::setReplicationTopology: Topology is "
388-
"not an "
389-
"array");
398+
throwException<std::invalid_argument>(__func__,
399+
"Topology is not an array");
390400
}
391401

392402
if (topology.size() == 0) {
393-
throw std::invalid_argument(
394-
"ActiveDurabilityMonitor::setReplicationTopology: Topology is "
395-
"empty");
403+
throwException<std::invalid_argument>(__func__, "Topology is empty");
396404
}
397405

398406
// Setting the replication topology also resets the topology in all
@@ -442,17 +450,15 @@ void ActiveDurabilityMonitor::addSyncWrite(const void* cookie,
442450
auto durReq = item->getDurabilityReqs();
443451

444452
if (durReq.getLevel() == cb::durability::Level::None) {
445-
throw std::invalid_argument(
446-
"ActiveDurabilityMonitor::addSyncWrite: Level::None");
453+
throwException<std::invalid_argument>(__func__, "Level::None");
447454
}
448455

449456
// The caller must have already checked this and returned a proper error
450457
// before executing down here. Here we enforce it again for defending from
451458
// unexpected races between VBucket::setState (which sets the replication
452459
// topology).
453460
if (!isDurabilityPossible()) {
454-
throw std::logic_error(
455-
"ActiveDurabilityMonitor::addSyncWrite: Impossible");
461+
throwException<std::logic_error>(__func__, "Impossible");
456462
}
457463

458464
state.wlock()->addSyncWrite(cookie, std::move(item));
@@ -504,9 +510,9 @@ void ActiveDurabilityMonitor::processTimeout(
504510
std::chrono::steady_clock::time_point asOf) {
505511
// @todo: Add support for DurabilityMonitor at Replica
506512
if (vb.getState() != vbucket_state_active) {
507-
throw std::logic_error("ActiveDurabilityMonitor::processTimeout: " +
508-
vb.getId().to_string() + " state is: " +
509-
VBucket::toString(vb.getState()));
513+
throwException<std::logic_error>(
514+
__func__,
515+
"state is: " + std::string(VBucket::toString(vb.getState())));
510516
}
511517

512518
// Identify SyncWrites which can be timed out as of this time point
@@ -688,10 +694,9 @@ ActiveDurabilityMonitor::State::advanceNodePosition(const std::string& node) {
688694
auto firstChainFound = firstChainItr != firstChain->positions.end();
689695
if (!firstChainFound && !secondChain) {
690696
// Attempting to advance for a node we don't know about, panic
691-
throw std::logic_error(
692-
"ActiveDurabilityMonitor::State::advanceNodePosition: "
693-
"Attempting to advance positions for an invalid node " +
694-
node);
697+
throwException<std::logic_error>(
698+
__func__,
699+
"Attempting to advance positions for an invalid node " + node);
695700
}
696701

697702
std::unordered_map<std::string, Position>::iterator secondChainItr;
@@ -700,10 +705,11 @@ ActiveDurabilityMonitor::State::advanceNodePosition(const std::string& node) {
700705
secondChainItr = secondChain->positions.find(node);
701706
secondChainFound = secondChainItr != secondChain->positions.end();
702707
if (!firstChainFound && !secondChainFound) {
703-
throw std::logic_error(
704-
"ActiveDurabilityMonitor::State::advanceNodePosition "
708+
throwException<std::logic_error>(
709+
__func__,
705710
"Attempting to advance positions for an invalid node " +
706-
node + ". Node is not in firstChain or secondChain");
711+
node +
712+
". Node is not in firstChain or secondChain");
707713
}
708714
}
709715

@@ -829,10 +835,8 @@ int64_t ActiveDurabilityMonitor::State::getNodeWriteSeqno(
829835
}
830836
}
831837

832-
throw std::invalid_argument(
833-
"ActiveDurabilityMonitor::State::getNodeWriteSeqno: "
834-
"Node " +
835-
node + " not found");
838+
throwException<std::invalid_argument>(__func__,
839+
"Node " + node + " not found");
836840
}
837841

838842
int64_t ActiveDurabilityMonitor::State::getNodeAckSeqno(
@@ -850,17 +854,14 @@ int64_t ActiveDurabilityMonitor::State::getNodeAckSeqno(
850854
}
851855
}
852856

853-
throw std::invalid_argument(
854-
"ActiveDurabilityMonitor::State::getNodeAckSeqno: "
855-
"Node " +
856-
node + " not found");
857+
throwException<std::invalid_argument>(__func__,
858+
"Node " + node + " not found");
857859
}
858860

859861
DurabilityMonitor::SyncWrite ActiveDurabilityMonitor::State::removeSyncWrite(
860862
Container::iterator it) {
861863
if (it == trackedWrites.end()) {
862-
throw std::logic_error(
863-
"ActiveDurabilityMonitor::commit: Position points to end");
864+
throwException<std::logic_error>(__func__, "Position points to end");
864865
}
865866

866867
Container::iterator prev;
@@ -923,10 +924,8 @@ void ActiveDurabilityMonitor::commit(const SyncWrite& sw) {
923924
vb.lockCollections(key),
924925
sw.getCookie());
925926
if (result != ENGINE_SUCCESS) {
926-
throw std::logic_error(
927-
"ActiveDurabilityMonitor::commit: VBucket::commit failed with "
928-
"status:" +
929-
std::to_string(result));
927+
throwException<std::logic_error>(
928+
__func__, "failed with status:" + std::to_string(result));
930929
}
931930

932931
// Record the duration of the SyncWrite in histogram.
@@ -966,10 +965,8 @@ void ActiveDurabilityMonitor::abort(const SyncWrite& sw) {
966965
vb.lockCollections(key),
967966
sw.getCookie());
968967
if (result != ENGINE_SUCCESS) {
969-
throw std::logic_error(
970-
"ActiveDurabilityMonitor::abort: VBucket::abort failed with "
971-
"status:" +
972-
std::to_string(result));
968+
throwException<std::logic_error>(
969+
__func__, "failed with status:" + std::to_string(result));
973970
}
974971
auto s = state.wlock();
975972
s->lastAbortedSeqno = sw.getBySeqno();
@@ -995,15 +992,14 @@ void ActiveDurabilityMonitor::State::processSeqnoAck(const std::string& node,
995992
int64_t seqno,
996993
CompletedQueue& toCommit) {
997994
if (!firstChain) {
998-
throw std::logic_error(
999-
"ActiveDurabilityMonitor::processSeqnoAck: FirstChain not "
1000-
"set");
995+
throwException<std::logic_error>(__func__, "FirstChain not set");
1001996
}
1002997
if (seqno > lastTrackedSeqno) {
1003-
throw std::invalid_argument(
1004-
"ActiveDurabilityMonitor::processSeqnoAck: seqno(" +
1005-
std::to_string(seqno) + ") is greater than lastTrackedSeqno(" +
1006-
std::to_string(lastTrackedSeqno) + ")");
998+
throwException<std::invalid_argument>(
999+
__func__,
1000+
"seqno(" + std::to_string(seqno) +
1001+
") is greater than lastTrackedSeqno(" +
1002+
std::to_string(lastTrackedSeqno) + ")");
10071003
}
10081004

10091005
// We should never ack for the active
@@ -1122,17 +1118,19 @@ ActiveDurabilityMonitor::State::makeChain(
11221118
auto firstChainItr = firstChain->positions.find(firstChain->active);
11231119
if (firstChainItr == firstChain->positions.end()) {
11241120
// Sanity - we should never make a chain in this state
1125-
throw std::logic_error(
1126-
"ADM::State::makeChain did not find the "
1121+
throwException<std::logic_error>(
1122+
__func__,
1123+
"did not find the "
11271124
"active node for the first chain in the "
11281125
"first chain.");
11291126
}
11301127

11311128
auto newChainItr = ptr->positions.find(ptr->active);
11321129
if (newChainItr == ptr->positions.end()) {
11331130
// Sanity - we should never make a chain in this state
1134-
throw std::logic_error(
1135-
"ADM::State::makeChain did not find the "
1131+
throwException<std::logic_error>(
1132+
__func__,
1133+
"did not find the "
11361134
"active node for the first chain in the "
11371135
"new chain.");
11381136
}
@@ -1163,9 +1161,8 @@ void ActiveDurabilityMonitor::State::setReplicationTopology(
11631161
if (topology.size() > 1) {
11641162
if (topology.size() > 2) {
11651163
// Too many chains specified
1166-
throw std::invalid_argument(
1167-
"ActiveDurabilityMonitor::State::setReplicationTopology: "
1168-
"Too many chains specified");
1164+
throwException<std::invalid_argument>(__func__,
1165+
"Too many chains specified");
11691166
}
11701167

11711168
auto& sChain = topology.at(1);
@@ -1383,3 +1380,17 @@ void ActiveDurabilityMonitor::checkForCommit() {
13831380

13841381
processCompletedSyncWriteQueue();
13851382
}
1383+
1384+
template <class exception>
1385+
[[noreturn]] void ActiveDurabilityMonitor::State::throwException(
1386+
const std::string& thrower, const std::string& error) const {
1387+
throw exception("ActiveDurabilityMonitor::State::" + thrower + " " +
1388+
adm.vb.getId().to_string() + " " + error);
1389+
}
1390+
1391+
template <class exception>
1392+
[[noreturn]] void ActiveDurabilityMonitor::throwException(
1393+
const std::string& thrower, const std::string& error) const {
1394+
throw exception("ActiveDurabilityMonitor::" + thrower + " " +
1395+
vb.getId().to_string() + " " + error);
1396+
}

engines/ep/src/durability/active_durability_monitor.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,18 @@ class ActiveDurabilityMonitor : public DurabilityMonitor {
218218
protected:
219219
void toOStream(std::ostream& os) const override;
220220

221+
/**
222+
* throw exception with the following error string:
223+
* "ActiveDurabilityMonitor::<thrower>:<error> vb:x"
224+
*
225+
* @param thrower a string for who is throwing, typically __func__
226+
* @param error a string containing the error and any useful data
227+
* @throws exception
228+
*/
229+
template <class exception>
230+
[[noreturn]] void throwException(const std::string& thrower,
231+
const std::string& error) const;
232+
221233
/**
222234
* Commit the given SyncWrite.
223235
*

engines/ep/src/durability/passive_durability_monitor.cc

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,13 @@ void PassiveDurabilityMonitor::addSyncWrite(queued_item item) {
9090
auto durReq = item->getDurabilityReqs();
9191

9292
if (durReq.getLevel() == cb::durability::Level::None) {
93-
throw std::invalid_argument(
94-
"PassiveDurabilityMonitor::addSyncWrite: Level::None");
93+
throwException<std::invalid_argument>(__func__, "Level::None");
9594
}
9695
if (durReq.getTimeout().isDefault()) {
97-
throw std::invalid_argument(
98-
"PassiveDurabilityMonitor::addSyncWrite: timeout is default "
99-
"(explicit value should have been specified by Active node)");
96+
throwException<std::invalid_argument>(
97+
__func__,
98+
"timeout is default (explicit value should have been specified "
99+
"by Active node)");
100100
}
101101

102102
// Need to specify defaultTimeout for SyncWrite ctor, but we've already
@@ -187,19 +187,19 @@ void PassiveDurabilityMonitor::completeSyncWrite(const StoredDocKey& key,
187187
auto s = state.wlock();
188188

189189
if (s->trackedWrites.empty()) {
190-
throw std::logic_error(
191-
"PassiveDurabilityMonitor::resolvePrepare: No tracked, but "
192-
"received " +
193-
to_string(res) + " for key " + key.to_string());
190+
throwException<std::logic_error>(__func__,
191+
"No tracked, but received " +
192+
to_string(res) + " for key " +
193+
key.to_string());
194194
}
195195

196196
const auto next = s->getIteratorNext(s->highCompletedSeqno.it);
197197

198198
if (next == s->trackedWrites.end()) {
199-
throw std::logic_error(
200-
"PassiveDurabilityMonitor::resolvePrepare: No Prepare waiting "
201-
"for completion, but received " +
202-
to_string(res) + " for key " + key.to_string());
199+
throwException<std::logic_error>(
200+
__func__,
201+
"No Prepare waiting for completion, but received " +
202+
to_string(res) + " for key " + key.to_string());
203203
}
204204

205205
// Sanity check for In-Order Commit
@@ -208,8 +208,7 @@ void PassiveDurabilityMonitor::completeSyncWrite(const StoredDocKey& key,
208208
ss << "Pending resolution for '" << *next
209209
<< "', but received unexpected " + to_string(res) + " for key "
210210
<< key;
211-
throw std::logic_error("PassiveDurabilityMonitor::resolvePrepare: " +
212-
ss.str());
211+
throwException<std::logic_error>(__func__, "" + ss.str());
213212
}
214213

215214
// Note: Update last-write-seqno first to enforce monotonicity and
@@ -439,3 +438,9 @@ void PassiveDurabilityMonitor::State::checkForAndRemovePrepares() {
439438
it = next;
440439
}
441440
}
441+
442+
template <class exception>
443+
[[noreturn]] void PassiveDurabilityMonitor::throwException(
444+
const std::string& thrower, const std::string& error) const {
445+
throw exception(thrower + error + vb.getId().to_string());
446+
}

engines/ep/src/durability/passive_durability_monitor.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,17 @@ class PassiveDurabilityMonitor : public DurabilityMonitor {
122122

123123
protected:
124124
void toOStream(std::ostream& os) const override;
125+
/**
126+
* throw exception with the following error string:
127+
* "<thrower>:<error> vb:x"
128+
*
129+
* @param thrower a string for who is throwing, typically __FUNCTION__
130+
* @param error a string containing the error and any useful data
131+
* @throws exception
132+
*/
133+
template <class exception>
134+
[[noreturn]] void throwException(const std::string& thrower,
135+
const std::string& error) const;
125136

126137
// The VBucket owning this DurabilityMonitor instance
127138
VBucket& vb;

0 commit comments

Comments
 (0)