Skip to content

Commit 92f1f76

Browse files
jameseh96jimwwalker
authored andcommitted
[BP] MB-37321: Register cursor before backfilling after cursor dropping
Prior to this patch there was a small window of time after ActiveStream checked the persisted seqno to determine if a backfill could provide any items needed to catch up to the checkpoint manager _before_ the cursor is reregistered. In this window, closed checkpoints might be removed, or checkpoint expelling might remove some items from the oldest checkpoint. Either of these events would lead to the stream "skipping" some items; the cursor would be registered at a later seqno than expected, and the stream does not re-check whether a backfill is needed. This patch makes ActiveStream register the cursor before trying to schedule a backfill. If the backfill end extends later than the requested seqno (an expected situation) existing code in markDiskSnapshot "atomically" (under the checkpoint manager lock) reregisters a cursor at the correct seqno. Change-Id: I472eaf97537532fabb0eed323a4789342a547a98 Reviewed-on: http://review.couchbase.org/120038 Tested-by: Build Bot <[email protected]> Well-Formed: Build Bot <[email protected]> Reviewed-by: Jim Walker <[email protected]>
1 parent b1bbe30 commit 92f1f76

File tree

12 files changed

+304
-142
lines changed

12 files changed

+304
-142
lines changed

engines/ep/src/checkpoint.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ typedef std::list<std::pair<std::string, MustSendCheckpointEnd>>
9494
class Checkpoint;
9595
class CheckpointManager;
9696
class CheckpointConfig;
97+
class CheckpointCursorIntrospector;
9798
class PreLinkDocumentContext;
9899
class VBucket;
99100

@@ -130,6 +131,8 @@ using CheckpointList = std::list<std::unique_ptr<Checkpoint>>;
130131
class CheckpointCursor {
131132
friend class CheckpointManager;
132133
friend class Checkpoint;
134+
friend class CheckpointCursorIntrospector;
135+
133136
public:
134137

135138
CheckpointCursor() { }
@@ -1062,6 +1065,7 @@ class CheckpointManager {
10621065
FlusherCallback flusherCB;
10631066

10641067
friend std::ostream& operator<<(std::ostream& os, const CheckpointManager& m);
1068+
friend class CheckpointCursorIntrospector;
10651069
};
10661070

10671071
// Outputs a textual description of the CheckpointManager.

engines/ep/src/dcp/producer.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -929,11 +929,15 @@ void DcpProducer::recordBackfillManagerBytesSent(size_t bytes) {
929929
backfillMgr->bytesSent(bytes);
930930
}
931931

932-
void DcpProducer::scheduleBackfillManager(VBucket& vb,
932+
bool DcpProducer::scheduleBackfillManager(VBucket& vb,
933933
std::shared_ptr<ActiveStream> s,
934934
uint64_t start,
935935
uint64_t end) {
936-
backfillMgr->schedule(vb, s, start, end);
936+
if (start <= end) {
937+
backfillMgr->schedule(vb, s, start, end);
938+
return true;
939+
}
940+
return false;
937941
}
938942

939943
void DcpProducer::addStats(ADD_STAT add_stat, const void *c) {

engines/ep/src/dcp/producer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,10 @@ class DcpProducer : public ConnHandler,
143143
void notifyBackfillManager();
144144
bool recordBackfillManagerBytesRead(size_t bytes, bool force);
145145
void recordBackfillManagerBytesSent(size_t bytes);
146-
void scheduleBackfillManager(VBucket& vb,
147-
std::shared_ptr<ActiveStream> s,
148-
uint64_t start,
149-
uint64_t end);
146+
virtual bool scheduleBackfillManager(VBucket& vb,
147+
std::shared_ptr<ActiveStream> s,
148+
uint64_t start,
149+
uint64_t end);
150150

151151
bool isExtMetaDataEnabled () {
152152
return enableExtMetaData;

engines/ep/src/dcp/stream.cc

Lines changed: 56 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,49 +1444,61 @@ void ActiveStream::scheduleBackfill_UNLOCKED(bool reschedule) {
14441444
}
14451445

14461446
uint64_t backfillStart = lastReadSeqno.load() + 1;
1447-
uint64_t backfillEnd = 0;
1448-
bool tryBackfill = false;
1447+
uint64_t backfillEnd;
1448+
bool tryBackfill;
14491449

1450-
if ((flags_ & DCP_ADD_STREAM_FLAG_DISKONLY) || reschedule) {
1451-
uint64_t vbHighSeqno = static_cast<uint64_t>(vbucket->getHighSeqno());
1452-
if (lastReadSeqno.load() > vbHighSeqno) {
1453-
throw std::logic_error("ActiveStream::scheduleBackfill_UNLOCKED: "
1454-
"lastReadSeqno (which is " +
1455-
std::to_string(lastReadSeqno.load()) +
1456-
" ) is greater than vbHighSeqno (which is " +
1457-
std::to_string(vbHighSeqno) + " ). " +
1458-
"for stream " + producer->logHeader() +
1459-
"; vb " + std::to_string(vb_));
1460-
}
1461-
if (reschedule) {
1462-
/* We need to do this for reschedule because in case of
1463-
DCP_ADD_STREAM_FLAG_DISKONLY (the else part), end_seqno_ is
1464-
set to last persisted seqno befor calling
1465-
scheduleBackfill_UNLOCKED() */
1466-
backfillEnd = engine->getKVBucket()->getLastPersistedSeqno(vb_);
1467-
} else {
1468-
backfillEnd = end_seqno_;
1469-
}
1450+
if (flags_ & static_cast<uint64_t>(DCP_ADD_STREAM_FLAG_DISKONLY)) {
1451+
// if disk only, always backfill to the requested end seqno
1452+
backfillEnd = end_seqno_;
14701453
tryBackfill = true;
14711454
} else {
1455+
/* not disk only - stream may require backfill but will transition to
1456+
* in-memory afterward; register the cursor now.
1457+
* There are two expected cases:
1458+
* 1: registerResult.second=true, which means
1459+
* - Cursor at start of first checkpoint
1460+
* - CheckpointManager can't provide all the items needed
1461+
* so a backfill may be required before moving to
1462+
* in-memory streaming.
1463+
* 2: registerResult.second=false
1464+
* - The CheckpointManager contains the required items
1465+
* - No backfill needed
1466+
*/
1467+
1468+
CursorRegResult registerResult;
14721469
try {
1473-
std::tie(curChkSeqno, tryBackfill) =
1474-
vbucket->checkpointManager->registerCursorBySeqno(
1475-
cursorName,
1476-
lastReadSeqno.load(),
1477-
MustSendCheckpointEnd::NO);
1478-
cursorRegistered = true;
1479-
} catch(std::exception& error) {
1470+
registerResult = vbucket->checkpointManager->registerCursorBySeqno(
1471+
cursorName,
1472+
lastReadSeqno.load(),
1473+
MustSendCheckpointEnd::NO);
1474+
} catch (std::exception& error) {
14801475
log(EXTENSION_LOG_WARNING,
14811476
"(vb %" PRIu16
14821477
") Failed to register "
14831478
"cursor: %s",
14841479
vb_,
14851480
error.what());
14861481
endStream(END_STREAM_STATE);
1482+
return;
14871483
}
14881484

1485+
curChkSeqno = registerResult.first;
1486+
tryBackfill = registerResult.second;
1487+
cursorRegistered = true;
1488+
1489+
log(EXTENSION_LOG_INFO,
1490+
"(vb %" PRIu16
1491+
") ActiveStream::scheduleBackfill_UNLOCKED register cursor "
1492+
"with "
1493+
"name \"{}\" backfill:{}, seqno:{}",
1494+
vb_,
1495+
name_.c_str(),
1496+
tryBackfill,
1497+
curChkSeqno.load());
1498+
14891499
if (lastReadSeqno.load() > curChkSeqno) {
1500+
// something went wrong registering the cursor - it is too early
1501+
// and could read items this stream has already sent.
14901502
throw std::logic_error("ActiveStream::scheduleBackfill_UNLOCKED: "
14911503
"lastReadSeqno (which is " +
14921504
std::to_string(lastReadSeqno.load()) +
@@ -1496,23 +1508,18 @@ void ActiveStream::scheduleBackfill_UNLOCKED(bool reschedule) {
14961508
"; vb " + std::to_string(vb_));
14971509
}
14981510

1499-
/* We need to find the minimum seqno that needs to be backfilled in
1500-
* order to make sure that we don't miss anything when transitioning
1501-
* to a memory snapshot. The backfill task will always make sure that
1502-
* the backfill end seqno is contained in the backfill.
1503-
*/
1504-
if (backfillStart < curChkSeqno) {
1505-
if (curChkSeqno > end_seqno_) {
1506-
/* Backfill only is enough */
1507-
backfillEnd = end_seqno_;
1508-
} else {
1509-
/* Backfill + in-memory streaming */
1510-
backfillEnd = curChkSeqno - 1;
1511-
}
1512-
}
1511+
// _if_ a backfill is required, it should end either at the
1512+
// requested stream end seqno OR the seqno immediately
1513+
// before what the checkpoint manager can provide
1514+
// - whichever is lower.
1515+
backfillEnd = std::min(end_seqno_, curChkSeqno - 1);
15131516
}
15141517

1515-
if (backfillStart <= backfillEnd && tryBackfill) {
1518+
if (tryBackfill &&
1519+
producer->scheduleBackfillManager(
1520+
*vbucket, shared_from_this(), backfillStart, backfillEnd)) {
1521+
// backfill will be needed to catch up to the items in the
1522+
// CheckpointManager
15161523
log(EXTENSION_LOG_NOTICE,
15171524
"(vb %" PRIu16
15181525
") Scheduling backfill "
@@ -1523,79 +1530,17 @@ void ActiveStream::scheduleBackfill_UNLOCKED(bool reschedule) {
15231530
backfillStart,
15241531
backfillEnd,
15251532
reschedule ? "True" : "False");
1526-
producer->scheduleBackfillManager(
1527-
*vbucket, shared_from_this(), backfillStart, backfillEnd);
1533+
15281534
isBackfillTaskRunning.store(true);
15291535
/// Number of backfill items is unknown until the Backfill task
15301536
/// completes the scan phase - reset backfillRemaining counter.
15311537
backfillRemaining.reset();
15321538
} else {
1533-
if (reschedule) {
1534-
// Infrequent code path, see comment below.
1535-
log(EXTENSION_LOG_NOTICE,
1536-
"(vb %" PRIu16
1537-
") Did not schedule "
1538-
"backfill with reschedule : True, "
1539-
"tryBackfill : True; "
1540-
"backfillStart : %" PRIu64
1541-
", "
1542-
"backfillEnd : %" PRIu64
1543-
", "
1544-
"flags_ : %" PRIu32
1545-
", "
1546-
"start_seqno_ : %" PRIu64
1547-
", "
1548-
"end_seqno_ : %" PRIu64
1549-
", "
1550-
"lastReadSeqno : %" PRIu64
1551-
", "
1552-
"lastSentSeqno : %" PRIu64
1553-
", "
1554-
"curChkSeqno : %" PRIu64
1555-
", "
1556-
"itemsReady : %s",
1557-
vb_,
1558-
backfillStart,
1559-
backfillEnd,
1560-
flags_,
1561-
start_seqno_,
1562-
end_seqno_,
1563-
lastReadSeqno.load(),
1564-
lastSentSeqno.load(),
1565-
curChkSeqno.load(),
1566-
itemsReady ? "True" : "False");
1567-
1568-
/* Cursor was dropped, but we will not do backfill.
1569-
* This may happen in a corner case where, the memory usage is high
1570-
* due to other vbuckets and persistence cursor moves ahead of
1571-
* replication cursor to new checkpoint open but does not persist
1572-
* items yet.
1573-
*
1574-
* Because we dropped the cursor but did not do a backfill (and
1575-
* therefore did not re-register a cursor in markDiskSnapshot) we
1576-
* must re-register the cursor here.
1577-
*/
1578-
try {
1579-
CursorRegResult result =
1580-
vbucket->checkpointManager->registerCursorBySeqno(
1581-
cursorName,
1582-
lastReadSeqno.load(),
1583-
MustSendCheckpointEnd::NO);
1584-
cursorRegistered = true;
1585-
curChkSeqno = result.first;
1586-
} catch (std::exception& error) {
1587-
log(EXTENSION_LOG_WARNING,
1588-
"(vb %" PRIu16
1589-
") Failed to register "
1590-
"cursor: %s",
1591-
vb_,
1592-
error.what());
1593-
endStream(END_STREAM_STATE);
1594-
}
1595-
}
1596-
if (flags_ & DCP_ADD_STREAM_FLAG_DISKONLY) {
1539+
// backfill not needed
1540+
if (flags_ & static_cast<uint64_t>(DCP_ADD_STREAM_FLAG_DISKONLY)) {
15971541
endStream(END_STREAM_OK);
1598-
} else if (flags_ & DCP_ADD_STREAM_FLAG_TAKEOVER) {
1542+
} else if (flags_ &
1543+
static_cast<uint64_t>(DCP_ADD_STREAM_FLAG_TAKEOVER)) {
15991544
transitionState(StreamState::TakeoverSend);
16001545
} else {
16011546
transitionState(StreamState::InMemory);

engines/ep/src/dcp/stream.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ class Stream {
131131
clear_UNLOCKED();
132132
}
133133

134-
protected:
135134

136135
// The StreamState is protected as it needs to be accessed by sub-classes
137136
enum class StreamState {
@@ -148,6 +147,7 @@ class Stream {
148147

149148
StreamState getState() const { return state_; }
150149

150+
protected:
151151
void clear_UNLOCKED();
152152

153153
/* To be called after getting streamMutex lock */

engines/ep/tests/mock/mock_dcp_producer.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ std::shared_ptr<MockActiveStream> MockDcpProducer::mockActiveStreamRequest(
2727
uint64_t end_seqno,
2828
uint64_t vbucket_uuid,
2929
uint64_t snap_start_seqno,
30-
uint64_t snap_end_seqno) {
30+
uint64_t snap_end_seqno,
31+
IncludeValue includeValue,
32+
IncludeXattrs includeXattrs) {
3133
auto stream = std::make_shared<MockActiveStream>(
3234
static_cast<EventuallyPersistentEngine*>(&engine_),
3335
std::static_pointer_cast<MockDcpProducer>(shared_from_this()),
@@ -38,7 +40,9 @@ std::shared_ptr<MockActiveStream> MockDcpProducer::mockActiveStreamRequest(
3840
end_seqno,
3941
vbucket_uuid,
4042
snap_start_seqno,
41-
snap_end_seqno);
43+
snap_end_seqno,
44+
includeValue,
45+
includeXattrs);
4246
stream->setActive();
4347
if (!streams.insert(std::make_pair(vb.getId(), stream))) {
4448
throw std::logic_error(
@@ -47,4 +51,4 @@ std::shared_ptr<MockActiveStream> MockDcpProducer::mockActiveStreamRequest(
4751
}
4852
notifyStreamReady(vb.getId());
4953
return stream;
50-
}
54+
}

engines/ep/tests/mock/mock_dcp_producer.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,5 +161,22 @@ class MockDcpProducer : public DcpProducer {
161161
uint64_t end_seqno,
162162
uint64_t vbucket_uuid,
163163
uint64_t snap_start_seqno,
164-
uint64_t snap_end_seqno);
164+
uint64_t snap_end_seqno,
165+
IncludeValue includeValue = IncludeValue::Yes,
166+
IncludeXattrs includeXattrs = IncludeXattrs::Yes);
167+
168+
bool scheduleBackfillManager(VBucket& vb,
169+
std::shared_ptr<ActiveStream> s,
170+
uint64_t start,
171+
uint64_t end) override {
172+
beforeScheduleBackfillCB(end);
173+
return DcpProducer::scheduleBackfillManager(
174+
vb, std::move(s), start, end);
175+
}
176+
177+
void setBeforeScheduleBackfillCB(std::function<void(uint64_t)> backfillCB) {
178+
beforeScheduleBackfillCB = std::move(backfillCB);
179+
}
180+
181+
std::function<void(uint64_t)> beforeScheduleBackfillCB = [](uint64_t) {};
165182
};
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2+
/*
3+
* Copyright 2018 Couchbase, Inc
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
#pragma once
18+
19+
#include "checkpoint.h"
20+
21+
/**
22+
* Stateless class used to gain privileged access into CheckpointCursor for
23+
* testing purposes. This is a friend class of CheckpointCursor.
24+
*/
25+
class CheckpointCursorIntrospector {
26+
public:
27+
static const CheckpointQueue::iterator& getCurrentPos(
28+
const CheckpointManager& cm, const std::string& cursorName) {
29+
auto& cursor = cm.connCursors.find(cursorName)->second;
30+
return cursor.currentPos;
31+
}
32+
};

0 commit comments

Comments
 (0)