@@ -24,6 +24,9 @@ ABSL_FLAG(uint32_t, replication_stream_output_limit, 64_KB,
24
24
ABSL_FLAG (uint32_t , migration_buckets_serialization_threshold, 100 ,
25
25
" The Number of buckets to serialize on each iteration before yielding" );
26
26
27
+ ABSL_FLAG (uint32_t , replication_dispatch_threshold, 1500 ,
28
+ " Number of bytes to aggregate before replication" );
29
+
27
30
namespace dfly {
28
31
using namespace util ;
29
32
using namespace journal ;
@@ -36,13 +39,18 @@ iovec IoVec(io::Bytes src) {
36
39
37
40
uint32_t replication_stream_output_limit_cached = 64_KB;
38
41
uint32_t migration_buckets_serialization_threshold_cached = 100 ;
42
+ uint32_t replication_dispatch_threshold = 1500 ;
43
+ uint32_t stalled_writer_base_period_ms = 10 ;
39
44
40
45
} // namespace
41
46
42
- JournalStreamer::JournalStreamer (journal::Journal* journal, ExecutionState* cntx, SendLsn send_lsn)
43
- : cntx_(cntx), journal_(journal), send_lsn_(send_lsn) {
47
+ JournalStreamer::JournalStreamer (journal::Journal* journal, ExecutionState* cntx, SendLsn send_lsn,
48
+ bool is_stable_sync)
49
+ : cntx_(cntx), journal_(journal), is_stable_sync_(is_stable_sync), send_lsn_(send_lsn) {
44
50
// cache the flag to avoid accessing it later.
45
51
replication_stream_output_limit_cached = absl::GetFlag (FLAGS_replication_stream_output_limit);
52
+ replication_dispatch_threshold = absl::GetFlag (FLAGS_replication_dispatch_threshold);
53
+ last_async_write_time_ = fb2::ProactorBase::GetMonotonicTimeNs () / 1000000 ;
46
54
}
47
55
48
56
JournalStreamer::~JournalStreamer () {
@@ -75,34 +83,80 @@ void JournalStreamer::Start(util::FiberSocketBase* dest) {
75
83
CHECK (dest_ == nullptr && dest != nullptr );
76
84
dest_ = dest;
77
85
journal_cb_id_ = journal_->RegisterOnChange (this );
86
+ StartStalledDataWriterFiber ();
78
87
}
79
88
80
89
void JournalStreamer::Cancel () {
81
90
VLOG (1 ) << " JournalStreamer::Cancel" ;
82
91
waker_.notifyAll ();
83
92
journal_->UnregisterOnChange (journal_cb_id_);
84
- if (!cntx_->IsError ()) {
85
- WaitForInflightToComplete ();
86
- }
93
+ StopStalledDataWriterFiber ();
94
+ WaitForInflightToComplete ();
87
95
}
88
96
89
97
size_t JournalStreamer::UsedBytes () const {
90
98
return pending_buf_.Size ();
91
99
}
92
100
93
- void JournalStreamer::AsyncWrite () {
94
- DCHECK (!pending_buf_.Empty ());
101
+ void JournalStreamer::Write (std::string str) {
102
+ DCHECK (!str.empty ());
103
+ DVLOG (3 ) << " Writing " << str.size () << " bytes" ;
104
+
105
+ pending_buf_.Push (std::move (str));
106
+ AsyncWrite (false );
107
+ }
108
+
109
+ void JournalStreamer::StartStalledDataWriterFiber () {
110
+ if (is_stable_sync_ && !stalled_data_writer_.IsJoinable ()) {
111
+ auto pb = fb2::ProactorBase::me ();
112
+ std::chrono::milliseconds period_us (stalled_writer_base_period_ms);
113
+ stalled_data_writer_ = MakeFiber ([this , index = pb->GetPoolIndex (), period_us]() mutable {
114
+ ThisFiber::SetName (absl::StrCat (" fiber_periodic_journal_writer_" , index));
115
+ this ->StalledDataWriterFiber (period_us, &stalled_data_writer_done_);
116
+ });
117
+ }
118
+ }
119
+
120
+ void JournalStreamer::StalledDataWriterFiber (std::chrono::milliseconds period_ms,
121
+ util::fb2::Done* waiter) {
122
+ while (cntx_->IsRunning ()) {
123
+ if (waiter->WaitFor (period_ms)) {
124
+ if (!cntx_->IsRunning ()) {
125
+ return ;
126
+ }
127
+ }
128
+
129
+ // We don't want to force async write to replicate if last data
130
+ // was written recent. Data needs to be stalled for period_ms duration.
131
+ if (!pending_buf_.Size () || in_flight_bytes_ > 0 ||
132
+ ((last_async_write_time_ + period_ms.count ()) >
133
+ (fb2::ProactorBase::GetMonotonicTimeNs () / 1000000 ))) {
134
+ continue ;
135
+ }
95
136
137
+ AsyncWrite (true );
138
+ }
139
+ }
140
+
141
+ void JournalStreamer::AsyncWrite (bool force_send) {
142
+ // Stable sync or RestoreStreamer replication can't write data until
143
+ // previous AsyncWriter finished.
96
144
if (in_flight_bytes_ > 0 ) {
97
- // We can not flush data while there are in flight requests because AsyncWrite
98
- // is not atomic. Therefore, we just aggregate.
145
+ return ;
146
+ }
147
+
148
+ // Writing in stable sync and outside of fiber needs to check
149
+ // threshold before writing data.
150
+ if (is_stable_sync_ && !force_send &&
151
+ pending_buf_.FrontBufSize () < replication_dispatch_threshold) {
99
152
return ;
100
153
}
101
154
102
155
const auto & cur_buf = pending_buf_.PrepareSendingBuf ();
103
156
104
157
in_flight_bytes_ = cur_buf.mem_size ;
105
158
total_sent_ += in_flight_bytes_;
159
+ last_async_write_time_ = fb2::ProactorBase::GetMonotonicTimeNs () / 1000000 ;
106
160
107
161
const auto v_size = cur_buf.buf .size ();
108
162
absl::InlinedVector<iovec, 8 > v (v_size);
@@ -112,18 +166,8 @@ void JournalStreamer::AsyncWrite() {
112
166
v[i] = IoVec (io::Bytes (uptr, cur_buf.buf [i].size ()));
113
167
}
114
168
115
- dest_->AsyncWrite (v.data (), v.size (), [this , len = in_flight_bytes_](std::error_code ec) {
116
- OnCompletion (std::move (ec), len);
117
- });
118
- }
119
-
120
- void JournalStreamer::Write (std::string str) {
121
- DCHECK (!str.empty ());
122
- DVLOG (3 ) << " Writing " << str.size () << " bytes" ;
123
-
124
- pending_buf_.Push (std::move (str));
125
-
126
- AsyncWrite ();
169
+ dest_->AsyncWrite (v.data (), v.size (),
170
+ [this , len = in_flight_bytes_](std::error_code ec) { OnCompletion (ec, len); });
127
171
}
128
172
129
173
void JournalStreamer::OnCompletion (std::error_code ec, size_t len) {
@@ -136,7 +180,7 @@ void JournalStreamer::OnCompletion(std::error_code ec, size_t len) {
136
180
if (ec) {
137
181
cntx_->ReportError (ec);
138
182
} else if (!pending_buf_.Empty ()) {
139
- AsyncWrite ();
183
+ AsyncWrite (false );
140
184
}
141
185
}
142
186
@@ -176,13 +220,22 @@ void JournalStreamer::WaitForInflightToComplete() {
176
220
}
177
221
}
178
222
223
+ void JournalStreamer::StopStalledDataWriterFiber () {
224
+ if (is_stable_sync_ && stalled_data_writer_.IsJoinable ()) {
225
+ stalled_data_writer_done_.Notify ();
226
+ if (stalled_data_writer_.IsJoinable ()) {
227
+ stalled_data_writer_.Join ();
228
+ }
229
+ }
230
+ }
231
+
179
232
bool JournalStreamer::IsStalled () const {
180
233
return pending_buf_.Size () >= replication_stream_output_limit_cached;
181
234
}
182
235
183
236
RestoreStreamer::RestoreStreamer (DbSlice* slice, cluster::SlotSet slots, journal::Journal* journal,
184
237
ExecutionState* cntx)
185
- : JournalStreamer(journal, cntx, JournalStreamer::SendLsn::NO),
238
+ : JournalStreamer(journal, cntx, JournalStreamer::SendLsn::NO, false ),
186
239
db_slice_ (slice),
187
240
my_slots_(std::move(slots)) {
188
241
DCHECK (slice != nullptr );
0 commit comments