Skip to content

Commit 8d0f743

Browse files
adiholdenmkaruza
authored andcommitted
add prints to investigte failure
Signed-off-by: adi_holden <[email protected]>
1 parent e4b4847 commit 8d0f743

File tree

4 files changed

+15
-8
lines changed

4 files changed

+15
-8
lines changed

src/server/engine_shard.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,8 @@ static void RunFPeriodically(std::function<void()> f, std::chrono::milliseconds
519519

520520
int64_t now_ms = fb2::ProactorBase::GetMonotonicTimeNs() / 1000000;
521521
if (now_ms - 5 * period_ms.count() > last_heartbeat_ms) {
522-
VLOG(1) << "This " << error_msg << " step took " << now_ms - last_heartbeat_ms << "ms";
522+
VLOG(1) << "This " << error_msg << " step was stalled for " << now_ms - last_heartbeat_ms
523+
<< "ms";
523524
}
524525
f();
525526
last_heartbeat_ms = fb2::ProactorBase::GetMonotonicTimeNs() / 1000000;

src/server/journal/streamer.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ void JournalStreamer::AsyncWrite(bool force_send) {
166166
v[i] = IoVec(io::Bytes(uptr, cur_buf.buf[i].size()));
167167
}
168168

169+
DVLOG(3) << "calling AsyncWrite with buff size:" << v.size();
169170
dest_->AsyncWrite(v.data(), v.size(),
170171
[this, len = in_flight_bytes_](std::error_code ec) { OnCompletion(ec, len); });
171172
}

src/server/replica.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@ void DflyShardReplica::StableSyncDflyReadFb(ExecutionState* cntx) {
934934

935935
std::optional<TransactionData> tx_data;
936936
while ((tx_data = tx_reader.NextTxData(&reader, cntx))) {
937-
DVLOG(3) << "Lsn: " << tx_data->lsn;
937+
DVLOG(3) << "Lsn: " << tx_data->lsn << " flowid: " << flow_id_;
938938

939939
last_io_time_ = Proactor()->GetMonotonicTimeNs();
940940
if (tx_data->opcode == journal::Op::LSN) {
@@ -953,6 +953,7 @@ void DflyShardReplica::StableSyncDflyReadFb(ExecutionState* cntx) {
953953
// inconsistent data because the replica will resume from the next
954954
// lsn of the master and this lsn entry will be lost.
955955
journal_rec_executed_.fetch_add(1, std::memory_order_relaxed);
956+
DVLOG(3) << "journal_rec_executed_: " << journal_rec_executed_ << " flowid: " << flow_id_;
956957
}
957958
}
958959
shard_replica_waker_.notifyAll();
@@ -996,7 +997,8 @@ void DflyShardReplica::StableSyncDflyAcksFb(ExecutionState* cntx) {
996997
// Handle ACKs with the master. PING opcodes from the master mean we should immediately
997998
// answer.
998999
current_offset = journal_rec_executed_.load(std::memory_order_relaxed);
999-
VLOG(1) << "Sending an ACK with offset=" << current_offset << " forced=" << force_ping_;
1000+
VLOG(1) << "Sending an ACK with offset=" << current_offset << " forced=" << force_ping_
1001+
<< " flowid=" << flow_id_;
10001002
ack_cmd = absl::StrCat("REPLCONF ACK ", current_offset);
10011003
force_ping_ = false;
10021004
next_ack_tp = std::chrono::steady_clock::now() + ack_time_max_interval;

tests/dragonfly/replication_test.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,13 +1263,16 @@ async def delayed_takeover():
12631263

12641264
@pytest.mark.exclude_epoll
12651265
@pytest.mark.parametrize("master_threads, replica_threads", take_over_cases)
1266-
async def test_take_over_seeder(
1267-
request, df_factory, df_seeder_factory, master_threads, replica_threads
1268-
):
1266+
async def test_take_over_seeder(df_factory, df_seeder_factory, master_threads, replica_threads):
12691267
master = df_factory.create(
1270-
proactor_threads=master_threads, dbfilename=f"dump_{tmp_file_name()}", admin_port=ADMIN_PORT
1268+
proactor_threads=master_threads,
1269+
dbfilename=f"dump_{tmp_file_name()}",
1270+
admin_port=ADMIN_PORT,
1271+
vmodule="rdb_save=1,dflycmd=1,snapshot=1,streamer=3",
1272+
)
1273+
replica = df_factory.create(
1274+
proactor_threads=replica_threads, vmodule="rdb_save=1,replica=3,dflycmd=1,snapshot=1"
12711275
)
1272-
replica = df_factory.create(proactor_threads=replica_threads)
12731276
df_factory.start_all([master, replica])
12741277

12751278
seeder = df_seeder_factory.create(port=master.port, keys=1000, dbcount=5, stop_on_failure=False)

0 commit comments

Comments
 (0)