Skip to content

Commit 7899e80

Browse files
authored
fix: skip full sync during partial sync (#5580)
Signed-off-by: kostas <[email protected]>
1 parent 53c678a commit 7899e80

File tree

11 files changed

+180
-139
lines changed

11 files changed

+180
-139
lines changed

.pre-commit-config.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,3 @@ repos:
3131
rev: 25.1.0
3232
hooks:
3333
- id: black
34-
35-
- repo: https://github.com/gitleaks/gitleaks
36-
rev: v8.16.3
37-
hooks:
38-
- id: gitleaks

src/server/dflycmd.cc

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,9 @@ void DflyCmd::Flow(CmdArgList args, RedisReplyBuilder* rb, ConnectionContext* cn
273273
{
274274
util::fb2::LockGuard lk{replica_ptr->shared_mu};
275275

276-
if (replica_ptr->replica_state != SyncState::PREPARATION)
276+
if (replica_ptr->replica_state != SyncState::PREPARATION) {
277277
return rb->SendError(kInvalidState);
278+
}
278279

279280
// Set meta info on connection.
280281
cntx->conn()->SetName(absl::StrCat("repl_flow_", sync_id));
@@ -380,8 +381,20 @@ void DflyCmd::StartStable(CmdArgList args, Transaction* tx, RedisReplyBuilder* r
380381
return;
381382

382383
util::fb2::LockGuard lk{replica_ptr->shared_mu};
383-
if (!CheckReplicaStateOrReply(*replica_ptr, SyncState::FULL_SYNC, rb))
384+
auto repl_state = replica_ptr->replica_state;
385+
if (repl_state != SyncState::FULL_SYNC && repl_state != SyncState::PREPARATION) {
386+
rb->SendError(kInvalidState);
384387
return;
388+
}
389+
390+
// Check all flows are connected.
391+
// This might happen if a flow abruptly disconnected before sending the SYNC request.
392+
for (const FlowInfo& flow : replica_ptr->flows) {
393+
if (!flow.conn) {
394+
rb->SendError(kInvalidState);
395+
return;
396+
}
397+
}
385398

386399
{
387400
Transaction::Guard tg{tx};
@@ -390,10 +403,15 @@ void DflyCmd::StartStable(CmdArgList args, Transaction* tx, RedisReplyBuilder* r
390403
auto cb = [this, &status, replica_ptr = replica_ptr](EngineShard* shard) {
391404
FlowInfo* flow = &replica_ptr->flows[shard->shard_id()];
392405

393-
status = StopFullSyncInThread(flow, &replica_ptr->exec_st, shard);
394-
if (*status != OpStatus::OK) {
395-
return;
406+
// We are doing partial sync. We never started FullSync so we don't need to stop it.
407+
bool is_partial = flow->start_partial_sync_at.has_value();
408+
if (!is_partial) {
409+
status = StopFullSyncInThread(flow, &replica_ptr->exec_st, shard);
410+
if (*status != OpStatus::OK) {
411+
return;
412+
}
396413
}
414+
397415
StartStableSyncInThread(flow, &replica_ptr->exec_st, shard);
398416
};
399417
shard_set->RunBlockingInParallel(std::move(cb));
@@ -665,10 +683,7 @@ OpStatus DflyCmd::StartFullSyncInThread(FlowInfo* flow, ExecutionState* exec_st,
665683
return OpStatus::CANCELLED;
666684
}
667685

668-
if (flow->start_partial_sync_at.has_value())
669-
saver->StartIncrementalSnapshotInShard(*flow->start_partial_sync_at, exec_st, shard);
670-
else
671-
saver->StartSnapshotInShard(true, exec_st, shard);
686+
saver->StartSnapshotInShard(true, exec_st, shard);
672687

673688
return OpStatus::OK;
674689
}
@@ -700,8 +715,10 @@ void DflyCmd::StartStableSyncInThread(FlowInfo* flow, ExecutionState* exec_st, E
700715
DCHECK(shard);
701716
DCHECK(flow->conn);
702717

703-
flow->streamer.reset(
704-
new JournalStreamer(sf_->journal(), exec_st, JournalStreamer::SendLsn::YES, true));
718+
LSN partial_lsn = flow->start_partial_sync_at.value_or(0);
719+
JournalStreamer::Config config{
720+
.should_sent_lsn = true, .init_from_stable_sync = true, .start_partial_sync_at = partial_lsn};
721+
flow->streamer.reset(new JournalStreamer(sf_->journal(), exec_st, config));
705722
flow->streamer->Start(flow->conn->socket());
706723

707724
// Register cleanup.

src/server/journal/streamer.cc

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ void LogTcpSocketDiagnostics(util::FiberSocketBase* dest) {
109109

110110
} // namespace
111111

112-
JournalStreamer::JournalStreamer(journal::Journal* journal, ExecutionState* cntx, SendLsn send_lsn,
113-
bool is_stable_sync)
114-
: cntx_(cntx), journal_(journal), is_stable_sync_(is_stable_sync), send_lsn_(send_lsn) {
112+
JournalStreamer::JournalStreamer(journal::Journal* journal, ExecutionState* cntx,
113+
JournalStreamer::Config config)
114+
: cntx_(cntx), journal_(journal), config_(config) {
115115
// cache the flag to avoid accessing it later.
116116
replication_stream_output_limit_cached = absl::GetFlag(FLAGS_replication_stream_output_limit);
117117
migration_buckets_sleep_usec_cached = absl::GetFlag(FLAGS_migration_buckets_sleep_usec);
@@ -136,7 +136,7 @@ void JournalStreamer::ConsumeJournalChange(const JournalChangeItem& item) {
136136
time_t now = time(nullptr);
137137
last_lsn_writen_ = item.journal_item.lsn;
138138
// TODO: to chain it to the previous Write call.
139-
if (send_lsn_ == SendLsn::YES && now - last_lsn_time_ > 3) {
139+
if (config_.should_sent_lsn && now - last_lsn_time_ > 3) {
140140
last_lsn_time_ = now;
141141
io::StringSink sink;
142142
JournalWriter writer(&sink);
@@ -148,14 +148,19 @@ void JournalStreamer::ConsumeJournalChange(const JournalChangeItem& item) {
148148
void JournalStreamer::Start(util::FiberSocketBase* dest) {
149149
CHECK(dest_ == nullptr && dest != nullptr);
150150
dest_ = dest;
151-
journal_cb_id_ = journal_->RegisterOnChange(this);
151+
// For partial sync we first catch up from journal replication buffer and only then register.
152+
if (config_.start_partial_sync_at == 0) {
153+
journal_cb_id_ = journal_->RegisterOnChange(this);
154+
}
152155
StartStalledDataWriterFiber();
153156
}
154157

155158
void JournalStreamer::Cancel() {
156159
VLOG(1) << "JournalStreamer::Cancel " << cntx_->IsCancelled();
157160
waker_.notifyAll();
158-
journal_->UnregisterOnChange(journal_cb_id_);
161+
if (journal_cb_id_) {
162+
journal_->UnregisterOnChange(journal_cb_id_);
163+
}
159164
StopStalledDataWriterFiber();
160165
WaitForInflightToComplete();
161166
}
@@ -182,7 +187,7 @@ void JournalStreamer::Write(std::string str) {
182187
}
183188

184189
void JournalStreamer::StartStalledDataWriterFiber() {
185-
if (is_stable_sync_ && !stalled_data_writer_.IsJoinable()) {
190+
if (config_.init_from_stable_sync && !stalled_data_writer_.IsJoinable()) {
186191
auto pb = fb2::ProactorBase::me();
187192
std::chrono::milliseconds period_us(stalled_writer_base_period_ms);
188193
stalled_data_writer_ = MakeFiber([this, index = pb->GetPoolIndex(), period_us]() mutable {
@@ -192,8 +197,55 @@ void JournalStreamer::StartStalledDataWriterFiber() {
192197
}
193198
}
194199

200+
bool JournalStreamer::MaybePartialStreamLSNs() {
201+
// Same algorithm as SwitchIncrementalFb. The only difference is that we don't sent
202+
// the old LSN"s via a snapshot but rather as journal changes.
203+
if (config_.start_partial_sync_at > 0) {
204+
LSN lsn = config_.start_partial_sync_at;
205+
DCHECK_LE(lsn, journal_->GetLsn()) << "The replica tried to sync from the future.";
206+
207+
LOG(INFO) << "Starting partial sync from lsn: " << lsn;
208+
// The replica sends the LSN of the next entry is wants to receive.
209+
while (cntx_->IsRunning() && journal_->IsLSNInBuffer(lsn)) {
210+
JournalChangeItem item;
211+
item.journal_item.data = journal_->GetEntry(lsn);
212+
item.journal_item.lsn = lsn;
213+
ConsumeJournalChange(item);
214+
lsn++;
215+
}
216+
217+
if (!cntx_->IsRunning()) {
218+
return false;
219+
}
220+
221+
if (journal_->GetLsn() != lsn) {
222+
// We stopped but we didn't manage to send the whole stream.
223+
cntx_->ReportError(
224+
std::make_error_code(errc::state_not_recoverable),
225+
absl::StrCat("Partial sync was unsuccessful because entry #", lsn,
226+
" was dropped from the buffer. Current lsn=", journal_->GetLsn()));
227+
return false;
228+
}
229+
230+
// We are done, register back to the journal so we don't miss any changes
231+
journal_cb_id_ = journal_->RegisterOnChange(this);
232+
233+
LOG(INFO) << "Last LSN sent in partial sync was " << (lsn - 1);
234+
// flush pending
235+
if (pending_buf_.Size() != 0) {
236+
AsyncWrite(true);
237+
}
238+
}
239+
return true;
240+
}
241+
195242
void JournalStreamer::StalledDataWriterFiber(std::chrono::milliseconds period_ms,
196243
util::fb2::Done* waiter) {
244+
if (!MaybePartialStreamLSNs()) {
245+
// Either context got cancelled, or partial sync failed because the lsn's stalled.
246+
return;
247+
}
248+
197249
while (cntx_->IsRunning()) {
198250
if (waiter->WaitFor(period_ms)) {
199251
if (!cntx_->IsRunning()) {
@@ -222,7 +274,7 @@ void JournalStreamer::AsyncWrite(bool force_send) {
222274

223275
// Writing in stable sync and outside of fiber needs to check
224276
// threshold before writing data.
225-
if (is_stable_sync_ && !force_send &&
277+
if (config_.init_from_stable_sync && !force_send &&
226278
pending_buf_.FrontBufSize() < replication_dispatch_threshold) {
227279
return;
228280
}
@@ -332,7 +384,7 @@ void JournalStreamer::WaitForInflightToComplete() {
332384
}
333385

334386
void JournalStreamer::StopStalledDataWriterFiber() {
335-
if (is_stable_sync_ && stalled_data_writer_.IsJoinable()) {
387+
if (config_.init_from_stable_sync && stalled_data_writer_.IsJoinable()) {
336388
stalled_data_writer_done_.Notify();
337389
if (stalled_data_writer_.IsJoinable()) {
338390
stalled_data_writer_.Join();
@@ -346,9 +398,7 @@ bool JournalStreamer::IsStalled() const {
346398

347399
RestoreStreamer::RestoreStreamer(DbSlice* slice, cluster::SlotSet slots, journal::Journal* journal,
348400
ExecutionState* cntx)
349-
: JournalStreamer(journal, cntx, JournalStreamer::SendLsn::NO, false),
350-
db_slice_(slice),
351-
my_slots_(std::move(slots)) {
401+
: JournalStreamer(journal, cntx, {}), db_slice_(slice), my_slots_(std::move(slots)) {
352402
DCHECK(slice != nullptr);
353403
migration_buckets_serialization_threshold_cached =
354404
absl::GetFlag(FLAGS_migration_buckets_serialization_threshold);

src/server/journal/streamer.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@ namespace dfly {
1616
// journal listener and writes them to a destination sink in a separate fiber.
1717
class JournalStreamer : public journal::JournalConsumerInterface {
1818
public:
19-
enum class SendLsn : uint8_t { NO = 0, YES = 1 };
20-
JournalStreamer(journal::Journal* journal, ExecutionState* cntx, SendLsn send_lsn,
21-
bool is_stable_sync);
19+
struct Config {
20+
bool should_sent_lsn = false;
21+
bool init_from_stable_sync = false;
22+
LSN start_partial_sync_at = 0;
23+
};
24+
25+
JournalStreamer(journal::Journal* journal, ExecutionState* cntx, Config config);
26+
2227
virtual ~JournalStreamer();
2328

2429
// Self referential.
@@ -69,6 +74,10 @@ class JournalStreamer : public journal::JournalConsumerInterface {
6974
PendingBuf pending_buf_;
7075

7176
private:
77+
// Return true if all lsn's from config_.start_partial_sync_at were sent (or if started from 0).
78+
// Return false if not all lsn's were sent (stalled) in time. Cancels the context with error.
79+
bool MaybePartialStreamLSNs();
80+
7281
void AsyncWrite(bool force_send);
7382
void OnCompletion(std::error_code ec, size_t len);
7483

@@ -82,16 +91,15 @@ class JournalStreamer : public journal::JournalConsumerInterface {
8291
void StopStalledDataWriterFiber();
8392
void StalledDataWriterFiber(std::chrono::milliseconds period_ms, util::fb2::Done* waiter);
8493

94+
const Config config_;
8595
// If we are replication in stable sync we can aggregate data before sending
86-
bool is_stable_sync_;
8796
size_t in_flight_bytes_ = 0, total_sent_ = 0;
8897
// Last time that send data in milliseconds
8998
uint64_t last_async_write_time_ = 0;
9099
time_t last_lsn_time_ = 0;
91100
LSN last_lsn_writen_ = 0;
92101
util::fb2::EventCount waker_;
93102
uint32_t journal_cb_id_{0};
94-
SendLsn send_lsn_;
95103
};
96104

97105
class CmdSerializer;

src/server/rdb_save.cc

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,6 @@ class RdbSaver::Impl final : public SliceSnapshot::SnapshotDataConsumerInterface
10661066
~Impl();
10671067

10681068
void StartSnapshotting(bool stream_journal, ExecutionState* cntx, EngineShard* shard);
1069-
void StartIncrementalSnapshotting(LSN start_lsn, ExecutionState* cntx, EngineShard* shard);
10701069

10711070
void StopSnapshotting(EngineShard* shard);
10721071
void WaitForSnapshottingFinish(EngineShard* shard);
@@ -1244,15 +1243,6 @@ void RdbSaver::Impl::StartSnapshotting(bool stream_journal, ExecutionState* cntx
12441243
s->Start(stream_journal, allow_flush);
12451244
}
12461245

1247-
void RdbSaver::Impl::StartIncrementalSnapshotting(LSN start_lsn, ExecutionState* cntx,
1248-
EngineShard* shard) {
1249-
auto& db_slice = namespaces->GetDefaultNamespace().GetDbSlice(shard->shard_id());
1250-
auto& s = GetSnapshot(shard);
1251-
1252-
s = CreateSliceSnapshot(shard, &db_slice, cntx);
1253-
s->StartIncremental(start_lsn);
1254-
}
1255-
12561246
SnapshotPtr RdbSaver::Impl::CreateSliceSnapshot(EngineShard* shard, DbSlice* db_slice,
12571247
ExecutionState* cntx) {
12581248
return SnapshotPtr(new SliceSnapshot(compression_mode_, db_slice, this, cntx),
@@ -1459,11 +1449,6 @@ void RdbSaver::StartSnapshotInShard(bool stream_journal, ExecutionState* cntx, E
14591449
impl_->StartSnapshotting(stream_journal, cntx, shard);
14601450
}
14611451

1462-
void RdbSaver::StartIncrementalSnapshotInShard(LSN start_lsn, ExecutionState* cntx,
1463-
EngineShard* shard) {
1464-
impl_->StartIncrementalSnapshotting(start_lsn, cntx, shard);
1465-
}
1466-
14671452
error_code RdbSaver::WaitSnapshotInShard(EngineShard* shard) {
14681453
impl_->WaitForSnapshottingFinish(shard);
14691454
return SaveEpilog();

src/server/rdb_save.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,6 @@ class RdbSaver {
9595
// cll allows breaking in the middle.
9696
void StartSnapshotInShard(bool stream_journal, ExecutionState* cntx, EngineShard* shard);
9797

98-
// Send only the incremental snapshot since start_lsn.
99-
void StartIncrementalSnapshotInShard(LSN start_lsn, ExecutionState* cntx, EngineShard* shard);
100-
10198
// Stops full-sync serialization for replication in the shard's thread.
10299
std::error_code StopFullSyncInShard(EngineShard* shard);
103100

0 commit comments

Comments
 (0)