Skip to content

Commit 2513928

Browse files
owend74daverigby
authored andcommitted
[BP] MB-25798: Re-register a dropped cursor if we don't backfill
After dropping a cursor, when scheduleBackfill_UNLOCKED is called but the backfill task does not need to be scheduled, it means the cursor is not re-registered in markDiskSnapshot. Therefore the cursor must therefore be re-registered from within scheduleBackfill_UNLOCKED. Change-Id: I4a643aed902316e0753555564b8bfd9b56291efe Reviewed-on: http://review.couchbase.org/83531 Well-Formed: Build Bot <[email protected]> Tested-by: Build Bot <[email protected]> Reviewed-by: Dave Rigby <[email protected]>
1 parent 0286542 commit 2513928

File tree

5 files changed

+144
-27
lines changed

5 files changed

+144
-27
lines changed

src/checkpoint.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,9 @@ CursorRegResult CheckpointManager::registerCursorBySeqno(
678678
* number we are looking for is higher than anything currently assigned
679679
* and there is already an assert above for this case.
680680
*/
681-
LOG(EXTENSION_LOG_WARNING, "Cursor not registered into vb %d "
682-
" for stream '%s' because seqno %" PRIu64 " is too high",
683-
vbucketId, name.c_str(), startBySeqno);
681+
throw std::logic_error(
682+
"CheckpointManager::registerCursorBySeqno the sequences number "
683+
"is higher than anything currently assigned");
684684
}
685685
return result;
686686
}

src/dcp/stream.cc

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ Stream::~Stream() {
6262
clear_UNLOCKED();
6363
}
6464

65+
bool Stream::isBackfilling() const {
66+
return state_.load() == STREAM_BACKFILLING;
67+
}
68+
69+
bool Stream::isInMemory() const {
70+
return state_.load() == STREAM_IN_MEMORY;
71+
}
72+
6573
void Stream::clear_UNLOCKED() {
6674
while (!readyQ.empty()) {
6775
DcpResponse* resp = readyQ.front();
@@ -325,12 +333,21 @@ void ActiveStream::markDiskSnapshot(uint64_t startSeqno, uint64_t endSeqno) {
325333
lastSentSnapEndSeqno.store(endSeqno, std::memory_order_relaxed);
326334

327335
if (!(flags_ & DCP_ADD_STREAM_FLAG_DISKONLY)) {
328-
// Only re-register the cursor if we still need to get memory snapshots
329-
CursorRegResult result =
330-
vb->checkpointManager.registerCursorBySeqno(
331-
name_, chkCursorSeqno,
332-
MustSendCheckpointEnd::NO);
333-
curChkSeqno = result.first;
336+
// Only re-register the cursor if we still need to get memory
337+
// snapshots
338+
try {
339+
CursorRegResult result =
340+
vb->checkpointManager.registerCursorBySeqno(
341+
name_, chkCursorSeqno,
342+
MustSendCheckpointEnd::NO);
343+
344+
curChkSeqno = result.first;
345+
} catch(std::exception& error) {
346+
producer->getLogger().log(EXTENSION_LOG_WARNING,
347+
"(vb %" PRIu16 ") Failed to register cursor: %s",
348+
vb_, error.what());
349+
endStream(END_STREAM_STATE);
350+
}
334351
}
335352

336353
lh.unlock();
@@ -1024,13 +1041,17 @@ void ActiveStream::scheduleBackfill_UNLOCKED(bool reschedule) {
10241041
}
10251042
tryBackfill = true;
10261043
} else {
1027-
CursorRegResult result =
1028-
vbucket->checkpointManager.registerCursorBySeqno(
1029-
name_,
1030-
lastReadSeqno.load(),
1031-
MustSendCheckpointEnd::NO);
1032-
curChkSeqno = result.first;
1033-
tryBackfill = result.second;
1044+
try {
1045+
std::tie(curChkSeqno, tryBackfill) =
1046+
vbucket->checkpointManager.registerCursorBySeqno(
1047+
name_, lastReadSeqno.load(),
1048+
MustSendCheckpointEnd::NO);
1049+
} catch(std::exception& error) {
1050+
producer->getLogger().log(EXTENSION_LOG_WARNING,
1051+
"(vb %" PRIu16 ") Failed to register "
1052+
"cursor: %s", vb_, error.what());
1053+
endStream(END_STREAM_STATE);
1054+
}
10341055

10351056
if (lastReadSeqno.load() > curChkSeqno) {
10361057
throw std::logic_error("ActiveStream::scheduleBackfill_UNLOCKED: "
@@ -1088,6 +1109,30 @@ void ActiveStream::scheduleBackfill_UNLOCKED(bool reschedule) {
10881109
lastReadSeqno.load(),
10891110
lastSentSeqno.load(), curChkSeqno.load(),
10901111
itemsReady ? "True" : "False");
1112+
1113+
/* Cursor was dropped, but we will not do backfill.
1114+
* This may happen in a corner case where, the memory usage is high
1115+
* due to other vbuckets and persistence cursor moves ahead of
1116+
* replication cursor to new checkpoint open but does not persist
1117+
* items yet.
1118+
*
1119+
* Because we dropped the cursor but did not do a backfill (and
1120+
* therefore did not re-register a cursor in markDiskSnapshot) we
1121+
* must re-register the cursor here.
1122+
*/
1123+
try {
1124+
CursorRegResult result =
1125+
vbucket->checkpointManager.registerCursorBySeqno(
1126+
name_, lastReadSeqno.load(),
1127+
MustSendCheckpointEnd::NO);
1128+
1129+
curChkSeqno = result.first;
1130+
} catch (std::exception& error) {
1131+
producer->getLogger().log(EXTENSION_LOG_WARNING,
1132+
"(vb %" PRIu16 ") Failed to register "
1133+
"cursor: %s", vb_, error.what());
1134+
endStream(END_STREAM_STATE);
1135+
}
10911136
}
10921137
if (flags_ & DCP_ADD_STREAM_FLAG_DISKONLY) {
10931138
endStream(END_STREAM_OK);
@@ -1097,17 +1142,15 @@ void ActiveStream::scheduleBackfill_UNLOCKED(bool reschedule) {
10971142
transitionState(STREAM_IN_MEMORY);
10981143
}
10991144
if (reschedule) {
1100-
/* Cursor was dropped, but we will not do backfill.
1101-
This may happen in a corner case where, the memory
1102-
usage is high due to other vbuckets and persistence cursor moves
1103-
ahead of replication cursor to new checkpoint open but does not
1104-
persist items yet.
1105-
Note: (1) We must not notify when we schedule backfill for the
1106-
first time because the stream is not yet in producer
1107-
conn list of streams
1108-
(2) It is not absolutely necessary to notify immediately
1109-
as conn manager or an incoming items will cause a
1110-
notification eventually, but wouldn't hurt to do so */
1145+
/*
1146+
* It is not absolutely necessary to notify immediately as conn
1147+
* manager or an incoming item will cause a notification eventually,
1148+
* but wouldn't hurt to do so.
1149+
*
1150+
* Note: must not notify when we schedule a backfill for the first
1151+
* time (i.e. when reschedule is false) because the stream is not
1152+
* yet in producer conn list of streams.
1153+
*/
11111154
bool inverse = false;
11121155
if (itemsReady.compare_exchange_strong(inverse, true)) {
11131156
producer->notifyStreamReady(vb_);

src/dcp/stream.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ class Stream : public RCValue {
133133
return state_ != STREAM_DEAD;
134134
}
135135

136+
/// @Returns true if state_ is Backfilling
137+
bool isBackfilling() const;
138+
139+
/// @Returns true if state_ is InMemory
140+
bool isInMemory() const;
141+
136142
void clear() {
137143
LockHolder lh(streamMutex);
138144
clear_UNLOCKED();

tests/mock/mock_stream.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,8 @@ class MockActiveStream : public ActiveStream {
8686
bool public_getPendingBackfill() const {
8787
return pendingBackfill;
8888
}
89+
90+
void transitionStateToBackfilling() {
91+
transitionState(STREAM_BACKFILLING);
92+
}
8993
};

tests/module_tests/evp_store_single_threaded_test.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,70 @@ class SingleThreadedEPStoreTest : public EventuallyPersistentStoreTest {
119119
SingleThreadedExecutorPool* task_executor;
120120
};
121121

122+
/**
123+
* The following test checks to see if a cursor is re-registered after it is
124+
* dropped in handleSlowStream. In particular the test is for when
125+
* scheduleBackfill_UNLOCKED is called however the backfill task does not need
126+
* to be scheduled and therefore the cursor is not re-registered in
127+
* markDiskSnapshot. The cursor must therefore be registered from within
128+
* scheduleBackfill_UNLOCKED.
129+
*
130+
* At the end of the test we should have 2 cursors: 1 persistence cursor and 1
131+
* DCP stream cursor.
132+
*/
133+
TEST_F(SingleThreadedEPStoreTest, MB22421_reregister_cursor) {
134+
// Make vbucket active.
135+
setVBucketStateAndRunPersistTask(vbid, vbucket_state_active);
136+
auto vb = store->getVBuckets().getBucket(vbid);
137+
auto& ckpt_mgr = vb->checkpointManager;
138+
139+
// Create a Mock Dcp producer
140+
dcp_producer_t producer = new MockDcpProducer(*engine,
141+
cookie,
142+
"test_producer",
143+
/*notifyOnly*/false);
144+
// Create a Mock Active Stream
145+
stream_t stream = new MockActiveStream(
146+
static_cast<EventuallyPersistentEngine*>(engine.get()),
147+
producer,
148+
producer->getName(),
149+
/*flags*/0,
150+
/*opaque*/0, vbid,
151+
/*st_seqno*/0,
152+
/*en_seqno*/~0,
153+
/*vb_uuid*/0xabcd,
154+
/*snap_start_seqno*/0,
155+
/*snap_end_seqno*/~0);
156+
157+
MockActiveStream* mock_stream =
158+
static_cast<MockActiveStream*>(stream.get());
159+
160+
mock_stream->transitionStateToBackfilling();
161+
EXPECT_TRUE(mock_stream->isInMemory())
162+
<< "stream state should have transitioned to StreamInMemory";
163+
// Have a persistence cursor and DCP cursor
164+
EXPECT_EQ(2, ckpt_mgr.getNumOfCursors());
165+
166+
mock_stream->public_setBackfillTaskRunning(true);
167+
mock_stream->transitionStateToBackfilling();
168+
EXPECT_TRUE(mock_stream->isBackfilling())
169+
<< "stream state should not have transitioned to StreamBackfilling";
170+
mock_stream->handleSlowStream();
171+
// The call to handleSlowStream should result in setting pendingBackfill
172+
// flag to true and the DCP cursor being dropped
173+
EXPECT_TRUE(mock_stream->public_getPendingBackfill());
174+
EXPECT_EQ(1, ckpt_mgr.getNumOfCursors());
175+
176+
mock_stream->public_setBackfillTaskRunning(false);
177+
178+
//schedule a backfill
179+
mock_stream->next();
180+
// Calling scheduleBackfill_UNLOCKED(reschedule == true) will not actually
181+
// schedule a backfill task because backfillStart (is lastReadSeqno + 1) is
182+
// 1 and backfillEnd is 0, however the cursor still needs to be
183+
// re-registered.
184+
EXPECT_EQ(2, ckpt_mgr.getNumOfCursors());
185+
}
122186

123187
/**
124188
* Regression test for MB-22451: When handleSlowStream is called and in

0 commit comments

Comments
 (0)