Skip to content

Commit 4d66d10

Browse files
committed
[client] Introduce batchDeliveryTimeout in client to expire batches that have been stuck for a long time
1 parent b8d614a commit 4d66d10

File tree

8 files changed

+317
-48
lines changed

8 files changed

+317
-48
lines changed

fluss-client/src/main/java/com/alibaba/fluss/client/write/RecordAccumulator.java

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,15 +108,21 @@ public final class RecordAccumulator {
108108
private final IdempotenceManager idempotenceManager;
109109
private final Clock clock;
110110

111+
/**
112+
* An upper bound on the time to report success or failure on record delivery when append into
113+
* WriteBatch.
114+
*/
115+
private final long batchDeliveryTimeoutMs;
116+
111117
// TODO add retryBackoffMs to retry the produce request upon receiving an error.
112-
// TODO add deliveryTimeoutMs to report success or failure on record delivery.
113-
// TODO add nextBatchExpiryTimeMs
118+
// TODO add nextBatchExpiryTimeMs. Trace by https://github.com/alibaba/fluss/pull/300
114119

115120
RecordAccumulator(
116121
Configuration conf,
117122
IdempotenceManager idempotenceManager,
118123
WriterMetricGroup writerMetricGroup,
119-
Clock clock) {
124+
Clock clock,
125+
long batchDeliveryTimeoutMs) {
120126
this.closed = false;
121127
this.flushesInProgress = new AtomicInteger(0);
122128
this.appendsInProgress = new AtomicInteger(0);
@@ -136,6 +142,7 @@ public final class RecordAccumulator {
136142
this.nodesDrainIndex = new HashMap<>();
137143
this.idempotenceManager = idempotenceManager;
138144
this.clock = clock;
145+
this.batchDeliveryTimeoutMs = batchDeliveryTimeoutMs;
139146
registerMetrics(writerMetricGroup);
140147
}
141148

@@ -294,6 +301,31 @@ public void reEnqueue(WriteBatch batch) {
294301
}
295302
}
296303

304+
public List<WriteBatch> expiredBatches(long now) {
305+
List<WriteBatch> expiredBatches = new ArrayList<>();
306+
for (BucketAndWriteBatches bucketAndWriteBatch : writeBatches.values()) {
307+
for (Deque<WriteBatch> deque : bucketAndWriteBatch.batches.values()) {
308+
// expire the batches in the order of sending.
309+
synchronized (deque) {
310+
while (!deque.isEmpty()) {
311+
WriteBatch batch = deque.getFirst();
312+
if (batch.hasReachedDeliveryTimeout(batchDeliveryTimeoutMs, now)) {
313+
deque.poll();
314+
expiredBatches.add(batch);
315+
} else {
316+
break;
317+
}
318+
}
319+
}
320+
}
321+
}
322+
return expiredBatches;
323+
}
324+
325+
public long getDeliveryTimeoutMs() {
326+
return batchDeliveryTimeoutMs;
327+
}
328+
297329
/** Get the deque for the given table-bucket, creating it if necessary. */
298330
private Deque<WriteBatch> getOrCreateDeque(
299331
TableBucket tableBucket, PhysicalTablePath physicalTablePath) {

fluss-client/src/main/java/com/alibaba/fluss/client/write/Sender.java

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.alibaba.fluss.exception.InvalidMetadataException;
2525
import com.alibaba.fluss.exception.OutOfOrderSequenceException;
2626
import com.alibaba.fluss.exception.RetriableException;
27+
import com.alibaba.fluss.exception.TimeoutException;
2728
import com.alibaba.fluss.exception.UnknownTableOrBucketException;
2829
import com.alibaba.fluss.metadata.PhysicalTablePath;
2930
import com.alibaba.fluss.metadata.TableBucket;
@@ -37,6 +38,7 @@
3738
import com.alibaba.fluss.rpc.messages.PutKvResponse;
3839
import com.alibaba.fluss.rpc.protocol.ApiError;
3940
import com.alibaba.fluss.rpc.protocol.Errors;
41+
import com.alibaba.fluss.utils.clock.Clock;
4042

4143
import org.slf4j.Logger;
4244
import org.slf4j.LoggerFactory;
@@ -46,6 +48,7 @@
4648
import java.util.ArrayList;
4749
import java.util.HashMap;
4850
import java.util.HashSet;
51+
import java.util.Iterator;
4952
import java.util.List;
5053
import java.util.Map;
5154
import java.util.Set;
@@ -81,6 +84,9 @@ public class Sender implements Runnable {
8184
/** the number of times to retry a failed write batch before giving up. */
8285
private final int retries;
8386

87+
/* the clock instance used for getting the time */
88+
private final Clock clock;
89+
8490
/** true while the sender thread is still running. */
8591
private volatile boolean running;
8692

@@ -111,6 +117,7 @@ public Sender(
111117
int retries,
112118
MetadataUpdater metadataUpdater,
113119
IdempotenceManager idempotenceManager,
120+
Clock clock,
114121
WriterMetricGroup writerMetricGroup) {
115122
this.accumulator = accumulator;
116123
this.maxRequestSize = maxRequestSize;
@@ -124,6 +131,7 @@ public Sender(
124131
checkNotNull(metadataUpdater.getCoordinatorServer());
125132

126133
this.idempotenceManager = idempotenceManager;
134+
this.clock = clock;
127135
this.writerMetricGroup = writerMetricGroup;
128136

129137
// TODO add retry logic while send failed. See FLUSS-56364375
@@ -175,7 +183,8 @@ public void runOnce() throws Exception {
175183
}
176184

177185
// do send.
178-
sendWriteData();
186+
long currentTimeMillis = clock.milliseconds();
187+
sendWriteData(currentTimeMillis);
179188
}
180189

181190
public boolean isRunning() {
@@ -188,7 +197,7 @@ private void addToInflightBatches(Map<Integer, List<WriteBatch>> batches) {
188197
}
189198
}
190199

191-
private void sendWriteData() throws Exception {
200+
private void sendWriteData(long now) throws Exception {
192201
// get the list of buckets with data ready to send.
193202
ReadyCheckResult readyCheckResult = accumulator.ready(metadataUpdater.getCluster());
194203

@@ -216,7 +225,9 @@ private void sendWriteData() throws Exception {
216225
if (!batches.isEmpty()) {
217226
addToInflightBatches(batches);
218227

219-
// TODO add logic for batch expire.
228+
// check and expire batches if they have reached batch delivery timeout. This can avoid
229+
// the client to wait for a long time while the batch is forever retry.
230+
checkAndExpireBatches(now);
220231

221232
sendWriteRequests(batches);
222233

@@ -239,9 +250,8 @@ private void failBatch(WriteBatch batch, Exception exception, boolean adjustBatc
239250
if (idempotenceManager.idempotenceEnabled()) {
240251
try {
241252
// This call can throw an exception in the rare case that there's an invalid
242-
// state
243-
// transition attempted. Catch these so as not to interfere with the rest of the
244-
// logic.
253+
// state transition attempted. Catch these so as not to interfere with the rest
254+
// of the logic.
245255
idempotenceManager.handleFailedBatch(batch, exception, adjustBatchSequences);
246256
} catch (Exception e) {
247257
LOG.debug(
@@ -280,6 +290,64 @@ private void maybeRemoveAndDeallocateBatch(WriteBatch batch) {
280290
accumulator.deallocate(batch);
281291
}
282292

293+
private void checkAndExpireBatches(long now) {
294+
List<WriteBatch> expiredInflightBatches = getExpiredInflightBatches(now);
295+
List<WriteBatch> expiredBatches = accumulator.expiredBatches(now);
296+
expiredBatches.addAll(expiredInflightBatches);
297+
if (!expiredBatches.isEmpty()) {
298+
LOG.trace("Client found expired batches: {}", expiredBatches);
299+
}
300+
301+
for (WriteBatch batch : expiredBatches) {
302+
failBatch(
303+
batch,
304+
new TimeoutException(
305+
String.format(
306+
"Expiring %s records for %s : %s ms has passed since batch creation.",
307+
batch.recordCount,
308+
batch.tableBucket(),
309+
now - batch.getCreatedMs())),
310+
false);
311+
}
312+
}
313+
314+
/** Get the in-flight batches that has reached batch delivery timeout. */
315+
@VisibleForTesting
316+
List<WriteBatch> getExpiredInflightBatches(long now) {
317+
List<WriteBatch> expiredBatches = new ArrayList<>();
318+
for (Iterator<Map.Entry<TableBucket, List<WriteBatch>>> batchIt =
319+
inFlightBatches.entrySet().iterator();
320+
batchIt.hasNext(); ) {
321+
Map.Entry<TableBucket, List<WriteBatch>> entry = batchIt.next();
322+
List<WriteBatch> tbFlightBatches = entry.getValue();
323+
if (tbFlightBatches != null) {
324+
Iterator<WriteBatch> iter = tbFlightBatches.iterator();
325+
while (iter.hasNext()) {
326+
WriteBatch batch = iter.next();
327+
if (batch.hasReachedDeliveryTimeout(accumulator.getDeliveryTimeoutMs(), now)) {
328+
iter.remove();
329+
if (!batch.isDone()) {
330+
expiredBatches.add(batch);
331+
} else {
332+
throw new IllegalStateException(
333+
batch.tableBucket()
334+
+ " batch created at "
335+
+ batch.getCreatedMs()
336+
+ " gets unexpected final state "
337+
+ batch.getFinalState());
338+
}
339+
} else {
340+
break;
341+
}
342+
}
343+
if (tbFlightBatches.isEmpty()) {
344+
batchIt.remove();
345+
}
346+
}
347+
}
348+
return expiredBatches;
349+
}
350+
283351
private void maybeRemoveFromInflightBatches(WriteBatch batch) {
284352
synchronized (inFlightBatchesLock) {
285353
List<WriteBatch> batches = inFlightBatches.get(batch.tableBucket());
@@ -355,12 +423,11 @@ private void sendProduceLogRequestAndHandleResponse(
355423
ProduceLogRequest request,
356424
long tableId,
357425
Map<TableBucket, WriteBatch> recordsByBucket) {
358-
long startTime = System.currentTimeMillis();
426+
long startTime = clock.milliseconds();
359427
gateway.produceLog(request)
360428
.whenComplete(
361429
(produceLogResponse, e) -> {
362-
writerMetricGroup.setSendLatencyInMs(
363-
System.currentTimeMillis() - startTime);
430+
writerMetricGroup.setSendLatencyInMs(clock.milliseconds() - startTime);
364431
if (e != null) {
365432
handleWriteRequestException(e, recordsByBucket);
366433
} else {
@@ -375,12 +442,11 @@ private void sendPutKvRequestAndHandleResponse(
375442
PutKvRequest request,
376443
long tableId,
377444
Map<TableBucket, WriteBatch> recordsByBucket) {
378-
long startTime = System.currentTimeMillis();
445+
long startTime = clock.milliseconds();
379446
gateway.putKv(request)
380447
.whenComplete(
381448
(putKvResponse, e) -> {
382-
writerMetricGroup.setSendLatencyInMs(
383-
System.currentTimeMillis() - startTime);
449+
writerMetricGroup.setSendLatencyInMs(clock.milliseconds() - startTime);
384450
if (e != null) {
385451
handleWriteRequestException(e, recordsByBucket);
386452
} else {

fluss-client/src/main/java/com/alibaba/fluss/client/write/WriteBatch.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,18 @@ public RequestFuture getRequestFuture() {
140140
return requestFuture;
141141
}
142142

143+
long getCreatedMs() {
144+
return createdMs;
145+
}
146+
147+
boolean hasReachedDeliveryTimeout(long batchDeliveryTimeoutMs, long now) {
148+
return batchDeliveryTimeoutMs <= now - this.createdMs;
149+
}
150+
151+
FinalState getFinalState() {
152+
return finalState.get();
153+
}
154+
143155
public long waitedTimeMs(long nowMs) {
144156
return Math.max(0, nowMs - createdMs);
145157
}

fluss-client/src/main/java/com/alibaba/fluss/client/write/WriterClient.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import com.alibaba.fluss.rpc.gateway.TabletServerGateway;
3232
import com.alibaba.fluss.rpc.metrics.ClientMetricGroup;
3333
import com.alibaba.fluss.utils.CopyOnWriteMap;
34+
import com.alibaba.fluss.utils.clock.Clock;
3435
import com.alibaba.fluss.utils.clock.SystemClock;
3536
import com.alibaba.fluss.utils.concurrent.ExecutorThreadFactory;
3637

@@ -97,10 +98,16 @@ public WriterClient(
9798

9899
short acks = configureAcks(idempotenceManager.idempotenceEnabled());
99100
int retries = configureRetries(idempotenceManager.idempotenceEnabled());
101+
long batchDeliveryTimeoutMs = configureBatchDeliveryTimeout(conf);
102+
SystemClock clock = SystemClock.getInstance();
100103
this.accumulator =
101104
new RecordAccumulator(
102-
conf, idempotenceManager, writerMetricGroup, SystemClock.getInstance());
103-
this.sender = newSender(acks, retries);
105+
conf,
106+
idempotenceManager,
107+
writerMetricGroup,
108+
clock,
109+
batchDeliveryTimeoutMs);
110+
this.sender = newSender(acks, retries, clock);
104111
this.ioThreadPool = createThreadPool();
105112
ioThreadPool.submit(sender);
106113
} catch (Throwable t) {
@@ -249,7 +256,27 @@ private int configureRetries(boolean idempotenceEnabled) {
249256
return retries;
250257
}
251258

252-
private Sender newSender(short acks, int retries) {
259+
private long configureBatchDeliveryTimeout(Configuration conf) {
260+
long batchDeliveryTimeoutMs =
261+
conf.get(ConfigOptions.CLIENT_WRITER_BATCH_DELIVERY_TIMEOUT).toMillis();
262+
long batchTimeoutMs = conf.get(ConfigOptions.CLIENT_WRITER_BATCH_TIMEOUT).toMillis();
263+
long requestTimeoutMs = conf.get(ConfigOptions.CLIENT_REQUEST_TIMEOUT).toMillis();
264+
long batchAndRequestTimeoutMs = Math.min(batchTimeoutMs + requestTimeoutMs, Long.MAX_VALUE);
265+
266+
if (batchDeliveryTimeoutMs < batchAndRequestTimeoutMs) {
267+
throw new IllegalConfigurationException(
268+
"The value of "
269+
+ ConfigOptions.CLIENT_WRITER_BATCH_DELIVERY_TIMEOUT.key()
270+
+ " should be greater than or equal to "
271+
+ ConfigOptions.CLIENT_REQUEST_TIMEOUT.key()
272+
+ " + "
273+
+ ConfigOptions.CLIENT_WRITER_BATCH_TIMEOUT.key()
274+
+ ".");
275+
}
276+
return batchDeliveryTimeoutMs;
277+
}
278+
279+
private Sender newSender(short acks, int retries, Clock clock) {
253280
return new Sender(
254281
accumulator,
255282
(int) conf.get(ConfigOptions.CLIENT_REQUEST_TIMEOUT).toMillis(),
@@ -258,6 +285,7 @@ private Sender newSender(short acks, int retries) {
258285
retries,
259286
metadataUpdater,
260287
idempotenceManager,
288+
clock,
261289
writerMetricGroup);
262290
}
263291

0 commit comments

Comments
 (0)