Skip to content

Commit 161e0f7

Browse files
Improve HTTP2/PING timeout logic (#5018)
Improve PING timeout logic to avoid premature closing of the connection that can occur due to delays in scheduling of writing PING frame to the channel and/or flushing it to the socket.
1 parent 2e085bf commit 161e0f7

File tree

3 files changed

+145
-8
lines changed

3 files changed

+145
-8
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "bugfix",
3+
"category": "AWS SDK for Java v2",
4+
"contributor": "akidambisrinivasan",
5+
"description": "Improve HTTP2/PING timeout logic. Improvement avoids premature timeouts due to delays in scheduling the write of PING frame to the channel and/or flushing it to the socket."
6+
}

http-clients/netty-nio-client/src/main/java/software/amazon/awssdk/http/nio/netty/internal/http2/Http2PingHandler.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public class Http2PingHandler extends SimpleChannelInboundHandler<Http2PingFrame
4141
private static final NettyClientLogger log = NettyClientLogger.getLogger(Http2PingHandler.class);
4242
private static final Http2PingFrame DEFAULT_PING_FRAME = new DefaultHttp2PingFrame(0);
4343

44+
/**
45+
* Time limit in ms for delays that results in a warning message being printed.
46+
*/
47+
private final long delayWarningTimeLimitMs;
48+
4449
private final long pingTimeoutMillis;
4550

4651
private ScheduledFuture<?> periodicPing;
@@ -49,6 +54,7 @@ public class Http2PingHandler extends SimpleChannelInboundHandler<Http2PingFrame
4954

5055
public Http2PingHandler(int pingTimeoutMillis) {
5156
this.pingTimeoutMillis = pingTimeoutMillis;
57+
delayWarningTimeLimitMs = Math.min(100, pingTimeoutMillis / 10);
5258
}
5359

5460
@Override
@@ -62,7 +68,7 @@ private void start(Protocol protocol, ChannelHandlerContext ctx) {
6268
if (protocol == Protocol.HTTP2 && periodicPing == null) {
6369
periodicPing = ctx.channel()
6470
.eventLoop()
65-
.scheduleAtFixedRate(() -> doPeriodicPing(ctx.channel()), 0, pingTimeoutMillis, MILLISECONDS);
71+
.schedule(() -> doPeriodicPing(ctx.channel()), 0, MILLISECONDS);
6672
}
6773
}
6874

@@ -79,8 +85,8 @@ public void channelInactive(ChannelHandlerContext ctx) {
7985

8086
@Override
8187
protected void channelRead0(ChannelHandlerContext ctx, Http2PingFrame frame) {
88+
log.debug(ctx.channel(), () -> "Received PING from channel, ack=" + frame.ack());
8289
if (frame.ack()) {
83-
log.debug(ctx.channel(), () -> "Received PING ACK from channel " + ctx.channel());
8490
lastPingAckTime = System.currentTimeMillis();
8591
} else {
8692
ctx.fireChannelRead(frame);
@@ -89,22 +95,48 @@ protected void channelRead0(ChannelHandlerContext ctx, Http2PingFrame frame) {
8995

9096
private void doPeriodicPing(Channel channel) {
9197
if (lastPingAckTime <= lastPingSendTime - pingTimeoutMillis) {
98+
log.warn(channel, () -> "PING timeout occurred");
9299
long timeSinceLastPingSend = System.currentTimeMillis() - lastPingSendTime;
93100
channelIsUnhealthy(channel, new PingFailedException("Server did not respond to PING after " +
94101
timeSinceLastPingSend + "ms (limit: " +
95102
pingTimeoutMillis + "ms)"));
96103
} else {
104+
log.debug(channel, () -> "Sending HTTP2/PING frame");
105+
long scheduleTime = lastPingSendTime == 0 ? 0 : System.currentTimeMillis() - lastPingSendTime;
106+
if (scheduleTime - pingTimeoutMillis > delayWarningTimeLimitMs) {
107+
log.warn(channel, () -> "PING timer scheduled after " + scheduleTime + "ms");
108+
}
97109
sendPing(channel);
98110
}
99111
}
100112

101113
private void sendPing(Channel channel) {
114+
long writeMs = System.currentTimeMillis();
102115
channel.writeAndFlush(DEFAULT_PING_FRAME).addListener(res -> {
103116
if (!res.isSuccess()) {
104117
log.debug(channel, () -> "Failed to write and flush PING frame to connection", res.cause());
105118
channelIsUnhealthy(channel, new PingFailedException("Failed to send PING to the service", res.cause()));
106119
} else {
120+
log.debug(channel, () -> "Successfully flushed PING frame to connection");
107121
lastPingSendTime = System.currentTimeMillis();
122+
long flushTime = lastPingSendTime - writeMs;
123+
if (flushTime > delayWarningTimeLimitMs) {
124+
log.warn(channel, () -> "Flushing PING frame took " + flushTime + "ms");
125+
}
126+
// ping frame was flushed to the socket, schedule to send the next ping now to avoid premature timeout.
127+
//
128+
// Scenario we want to avoid is shown below (NOTE: ptm - pingTimeoutMillis, Wx - Write Ping x,
129+
// Fx - Flush Ping x, Rx - Receive ack x, T -Timeout)
130+
// When timer is scheduled periodically, even though ack2 comes < ptm after being flushed to the socket, we
131+
// will still timeout at 2ptm.
132+
// 0 1ptm 2ptm 3ptm
133+
// |----------|----------|----------|-------> time
134+
// W1F1 R1 W2 F2 T R2
135+
// When timer is scheduled after flushing, we allow time for ack to come back and don't prematurely timeout.
136+
// 0 ptm1 ptm2
137+
// |-|----------|------|----------|-----------> time
138+
// W1F1 R1 W2 F2 R2 W3
139+
periodicPing = channel.eventLoop().schedule(() -> doPeriodicPing(channel), pingTimeoutMillis, MILLISECONDS);
108140
}
109141
});
110142
}

http-clients/netty-nio-client/src/test/java/software/amazon/awssdk/http/nio/netty/internal/http2/Http2PingHandlerTest.java

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,21 @@
3333
import java.util.Collections;
3434
import java.util.List;
3535
import java.util.concurrent.CompletableFuture;
36+
import java.util.concurrent.Executors;
37+
import java.util.concurrent.ScheduledExecutorService;
3638
import java.util.concurrent.TimeUnit;
39+
import java.util.logging.Level;
40+
import java.util.logging.Logger;
3741
import org.junit.jupiter.api.BeforeEach;
3842
import org.junit.jupiter.api.Test;
3943
import org.mockito.Mockito;
4044
import software.amazon.awssdk.http.Protocol;
4145
import software.amazon.awssdk.http.nio.netty.internal.ChannelAttributeKey;
46+
import software.amazon.awssdk.http.nio.netty.internal.utils.NettyClientLogger;
4247

4348
public class Http2PingHandlerTest {
49+
private static final NettyClientLogger log = NettyClientLogger.getLogger(Http2PingHandler.class);
50+
4451
private static final int FAST_CHECKER_DURATION_MILLIS = 100;
4552

4653
private Http2PingHandler fastChecker;
@@ -109,15 +116,21 @@ public void unregister_stopsRunning() throws InterruptedException {
109116
}
110117

111118
@Test
112-
public void ignoredPingsResultInOneChannelException() throws InterruptedException {
119+
public void schedulingDelayDoesNotCausePingTimeout() throws InterruptedException {
113120
PipelineExceptionCatcher catcher = new PipelineExceptionCatcher();
114-
EmbeddedChannel channel = createHttp2Channel(fastChecker, catcher);
115-
116-
Thread.sleep(FAST_CHECKER_DURATION_MILLIS);
121+
PingResponder pingResponder = new PingResponder();
122+
EmbeddedChannel channel = createHttp2Channel(fastChecker, catcher, pingResponder);
123+
124+
pingResponder.setCallback(() -> channel.writeInbound(new DefaultHttp2PingFrame(0, true)),
125+
(long)(FAST_CHECKER_DURATION_MILLIS / 10) /* send ack 10ms after getting ping */);
126+
117127
channel.runPendingTasks();
118128

119-
assertThat(catcher.caughtExceptions).hasSize(1);
120-
assertThat(catcher.caughtExceptions.get(0)).isInstanceOf(IOException.class);
129+
// cause a scheduling delay for the timer to run
130+
Thread.sleep(FAST_CHECKER_DURATION_MILLIS * 2);
131+
channel.runPendingTasks();
132+
133+
assertThat(catcher.caughtExceptions).hasSize(0);
121134
}
122135

123136
@Test
@@ -214,6 +227,51 @@ public void channelInactive_shouldCancelTaskAndForwardToOtherHandlers() {
214227
assertThat(channel.runScheduledPendingTasks()).isEqualTo(-1L);
215228
}
216229

230+
@Test
231+
public void delayedPingFlushDoesntTerminateConnectionPrematurely() {
232+
Logger.getLogger("").setLevel(Level.ALL);
233+
234+
PipelineExceptionCatcher catcher = new PipelineExceptionCatcher();
235+
PingResponder pingResponder = new PingResponder();
236+
DelayingWriter delayingWriter = new DelayingWriter((long)(FAST_CHECKER_DURATION_MILLIS * 1.5));
237+
238+
EmbeddedChannel channel = createHttp2Channel(fastChecker, catcher, pingResponder, delayingWriter);
239+
240+
pingResponder.setCallback(() -> channel.writeInbound(new DefaultHttp2PingFrame(0, true)),
241+
FAST_CHECKER_DURATION_MILLIS / 10 /* send ack in 10 ms after getting ping*/);
242+
243+
Instant runEnd = Instant.now().plus(1, SECONDS);
244+
while (Instant.now().isBefore(runEnd)) {
245+
channel.runPendingTasks();
246+
assertThat(catcher.caughtExceptions).isEmpty();
247+
}
248+
}
249+
250+
@Test
251+
public void delayedPingAckTerminatesConnection() {
252+
Logger.getLogger("").setLevel(Level.ALL);
253+
254+
PipelineExceptionCatcher catcher = new PipelineExceptionCatcher();
255+
PingResponder pingResponder = new PingResponder();
256+
DelayingWriter delayingWriter = new DelayingWriter((long)(FAST_CHECKER_DURATION_MILLIS * 1.5));
257+
258+
EmbeddedChannel channel = createHttp2Channel(fastChecker, catcher, pingResponder, delayingWriter);
259+
260+
pingResponder.setCallback(() -> channel.writeInbound(new DefaultHttp2PingFrame(0, true)),
261+
(long)(FAST_CHECKER_DURATION_MILLIS * 1.5) /* send a late ack to trigger timeout */);
262+
263+
Instant runEnd = Instant.now().plus(1, SECONDS);
264+
while (Instant.now().isBefore(runEnd)) {
265+
channel.runPendingTasks();
266+
if (!catcher.caughtExceptions.isEmpty()) {
267+
break;
268+
}
269+
}
270+
271+
assertThat(catcher.caughtExceptions).hasSize(1);
272+
assertThat(catcher.caughtExceptions.get(0)).isInstanceOf(PingFailedException.class);
273+
}
274+
217275
private static final class PingReadCatcher extends SimpleChannelInboundHandler<Http2PingFrame> {
218276
private final List<Http2PingFrame> caughtPings = Collections.synchronizedList(new ArrayList<>());
219277

@@ -239,4 +297,45 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
239297
promise.setFailure(new IOException("Failed!"));
240298
}
241299
}
300+
301+
private static final class PingResponder extends ChannelOutboundHandlerAdapter {
302+
private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
303+
private Runnable respondCallback;
304+
private long callbackDelayMillis;
305+
306+
void setCallback(Runnable respondCallback, long delay) {
307+
this.respondCallback = respondCallback;
308+
this.callbackDelayMillis = delay;
309+
}
310+
311+
@Override
312+
public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) {
313+
if (msg instanceof Http2PingFrame) {
314+
log.debug(ctx.channel(), () -> "OutgoingPingCatcher Received ping " + msg);
315+
scheduler.schedule(respondCallback, callbackDelayMillis, TimeUnit.MILLISECONDS);
316+
}
317+
ctx.write(msg, promise);
318+
}
319+
}
320+
321+
private static final class DelayingWriter extends ChannelOutboundHandlerAdapter {
322+
private final long sleepMillis;
323+
324+
DelayingWriter(long sleepMillis) {
325+
this.sleepMillis = sleepMillis;
326+
}
327+
328+
@Override
329+
public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) {
330+
log.debug(ctx.channel(), () -> " Writing " + msg + " delayed by " + sleepMillis);
331+
try {
332+
Thread.sleep(sleepMillis);
333+
} catch (InterruptedException e) {
334+
335+
}
336+
log.debug(ctx.channel(), () -> " Continuing write of " + msg);
337+
338+
ctx.write(msg, promise);
339+
}
340+
}
242341
}

0 commit comments

Comments
 (0)