Skip to content

Commit 7e86102

Browse files
authored
Revert "Improve Http2 ping timeout logic (#4027)" (#4993)
This reverts commit 1fa1d65.
1 parent 1fa1d65 commit 7e86102

File tree

6 files changed

+26
-221
lines changed

6 files changed

+26
-221
lines changed

.changes/next-release/bugfix-AWSSDKforJavav2-6454991.json

Lines changed: 0 additions & 6 deletions
This file was deleted.

core/sdk-core/src/test/java/software/amazon/awssdk/core/internal/http/timers/HttpClientApiCallTimeoutTest.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ public class HttpClientApiCallTimeoutTest {
5656

5757
private AmazonSyncHttpClient httpClient;
5858

59-
private final static int EPSILON_MILLIS = 10;
60-
6159
@Before
6260
public void setup() {
6361
httpClient = testClientBuilder()
@@ -72,7 +70,7 @@ public void successfulResponse_SlowResponseHandler_ThrowsApiCallTimeoutException
7270
.willReturn(aResponse().withStatus(200).withBody("{}")));
7371

7472
assertThatThrownBy(() -> requestBuilder().execute(combinedSyncResponseHandler(
75-
superSlowResponseHandler(API_CALL_TIMEOUT.toMillis() + EPSILON_MILLIS), null)))
73+
superSlowResponseHandler(API_CALL_TIMEOUT.toMillis()), null)))
7674
.isInstanceOf(ApiCallTimeoutException.class);
7775
}
7876

http-clients/netty-nio-client/src/main/java/software/amazon/awssdk/http/nio/netty/internal/http2/Http2PingHandler.java

Lines changed: 9 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
import io.netty.channel.Channel;
2121
import io.netty.channel.ChannelHandlerContext;
22-
import io.netty.channel.ChannelInboundHandlerAdapter;
2322
import io.netty.channel.ChannelPipeline;
23+
import io.netty.channel.SimpleChannelInboundHandler;
2424
import io.netty.handler.codec.http2.DefaultHttp2PingFrame;
2525
import io.netty.handler.codec.http2.Http2PingFrame;
2626
import io.netty.util.concurrent.ScheduledFuture;
@@ -37,24 +37,18 @@
3737
* If a channel is found to be unhealthy, this will invoke {@link ChannelPipeline#fireExceptionCaught(Throwable)}.
3838
*/
3939
@SdkInternalApi
40-
public class Http2PingHandler extends ChannelInboundHandlerAdapter {
40+
public class Http2PingHandler extends SimpleChannelInboundHandler<Http2PingFrame> {
4141
private static final NettyClientLogger log = NettyClientLogger.getLogger(Http2PingHandler.class);
4242
private static final Http2PingFrame DEFAULT_PING_FRAME = new DefaultHttp2PingFrame(0);
4343

44-
/**
45-
* Time limit in ms for delays that results in a warning message being printed.
46-
*/
47-
private final long delayWarningTimeLimitMs;
48-
4944
private final long pingTimeoutMillis;
5045

5146
private ScheduledFuture<?> periodicPing;
5247
private long lastPingSendTime = 0;
53-
private long lastMsgRcvdTime = 0;
48+
private long lastPingAckTime = 0;
5449

5550
public Http2PingHandler(int pingTimeoutMillis) {
5651
this.pingTimeoutMillis = pingTimeoutMillis;
57-
delayWarningTimeLimitMs = Math.min(100, pingTimeoutMillis / 10);
5852
}
5953

6054
@Override
@@ -68,7 +62,7 @@ private void start(Protocol protocol, ChannelHandlerContext ctx) {
6862
if (protocol == Protocol.HTTP2 && periodicPing == null) {
6963
periodicPing = ctx.channel()
7064
.eventLoop()
71-
.schedule(() -> doPeriodicPing(ctx.channel()), 0, MILLISECONDS);
65+
.scheduleAtFixedRate(() -> doPeriodicPing(ctx.channel()), 0, pingTimeoutMillis, MILLISECONDS);
7266
}
7367
}
7468

@@ -84,68 +78,33 @@ public void channelInactive(ChannelHandlerContext ctx) {
8478
}
8579

8680
@Override
87-
public void channelRead(ChannelHandlerContext ctx, Object msg) {
88-
// if there is any message read on this connection, it also suggests the channel is
89-
// active, extend the time of connection inactiveness for ping timeout logic.
90-
lastMsgRcvdTime = System.currentTimeMillis();
91-
if (msg instanceof Http2PingFrame) {
92-
doChannelRead(ctx, (Http2PingFrame) msg);
81+
protected void channelRead0(ChannelHandlerContext ctx, Http2PingFrame frame) {
82+
if (frame.ack()) {
83+
log.debug(ctx.channel(), () -> "Received PING ACK from channel " + ctx.channel());
84+
lastPingAckTime = System.currentTimeMillis();
9385
} else {
94-
ctx.fireChannelRead(msg);
95-
}
96-
}
97-
98-
private void doChannelRead(ChannelHandlerContext ctx, Http2PingFrame frame) {
99-
log.debug(ctx.channel(), () -> "Received PING from channel, ack=" + frame.ack());
100-
if (!frame.ack()) {
10186
ctx.fireChannelRead(frame);
10287
}
10388
}
10489

10590
private void doPeriodicPing(Channel channel) {
106-
if (lastMsgRcvdTime <= lastPingSendTime - pingTimeoutMillis) {
107-
log.warn(channel, () -> "PING timeout occurred");
91+
if (lastPingAckTime <= lastPingSendTime - pingTimeoutMillis) {
10892
long timeSinceLastPingSend = System.currentTimeMillis() - lastPingSendTime;
10993
channelIsUnhealthy(channel, new PingFailedException("Server did not respond to PING after " +
11094
timeSinceLastPingSend + "ms (limit: " +
11195
pingTimeoutMillis + "ms)"));
11296
} else {
113-
log.debug(channel, () -> "Sending HTTP2/PING frame");
114-
long scheduleTime = lastPingSendTime == 0 ? 0 : System.currentTimeMillis() - lastPingSendTime;
115-
if (scheduleTime - pingTimeoutMillis > delayWarningTimeLimitMs) {
116-
log.warn(channel, () -> "PING timer scheduled after " + scheduleTime + "ms");
117-
}
11897
sendPing(channel);
11998
}
12099
}
121100

122101
private void sendPing(Channel channel) {
123-
long writeMs = System.currentTimeMillis();
124102
channel.writeAndFlush(DEFAULT_PING_FRAME).addListener(res -> {
125103
if (!res.isSuccess()) {
126104
log.debug(channel, () -> "Failed to write and flush PING frame to connection", res.cause());
127105
channelIsUnhealthy(channel, new PingFailedException("Failed to send PING to the service", res.cause()));
128106
} else {
129-
log.debug(channel, () -> "Successfully flushed PING frame to connection");
130107
lastPingSendTime = System.currentTimeMillis();
131-
long flushTime = lastPingSendTime - writeMs;
132-
if (flushTime > delayWarningTimeLimitMs) {
133-
log.warn(channel, () -> "Flushing PING frame took " + flushTime + "ms");
134-
}
135-
// ping frame was flushed to the socket, schedule to send the next ping now to avoid premature timeout.
136-
//
137-
// Scenario we want to avoid is shown below (NOTE: ptm - pingTimeoutMillis, Wx - Write Ping x,
138-
// Fx - Flush Ping x, Rx - Receive ack x, T -Timeout)
139-
// When timer is scheduled periodically, even though ack2 comes < ptm after being flushed to the socket, we
140-
// will still timeout at 2ptm.
141-
// 0 1ptm 2ptm 3ptm
142-
// |----------|----------|----------|-------> time
143-
// W1F1 R1 W2 F2 T R2
144-
// When timer is scheduled after flushing, we allow time for ack to come back and don't prematurely timeout.
145-
// 0 ptm1 ptm2
146-
// |-|----------|------|----------|-----------> time
147-
// W1F1 R1 W2 F2 R2 W3
148-
periodicPing = channel.eventLoop().schedule(() -> doPeriodicPing(channel), pingTimeoutMillis, MILLISECONDS);
149108
}
150109
});
151110
}

http-clients/netty-nio-client/src/test/java/software/amazon/awssdk/http/nio/netty/fault/PingTimeoutTest.java

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -99,37 +99,23 @@ public void methodTeardown() throws InterruptedException {
9999
netty = null;
100100
}
101101

102-
/**
103-
* Default ping timeout value is 5000ms, so when the channel has no activity including
104-
* no data packets received or ping acks received for the timeout value, the connection
105-
* will be closed. During connection setup for testing, there will be Setting frame and
106-
* the SettingsAck frame exchanged which counts as activity on the channel, so the channel
107-
* should timeout during the second interval which is 10 seconds.
108-
*/
109102
@Test
110-
public void pingHealthCheck_null_shouldThrowExceptionAfter10Sec() {
103+
public void pingHealthCheck_null_shouldThrowExceptionAfter5Sec() {
111104
Instant a = Instant.now();
112105
assertThatThrownBy(() -> makeRequest(null).join())
113106
.hasMessageContaining("An error occurred on the connection")
114107
.hasCauseInstanceOf(IOException.class)
115108
.hasRootCauseInstanceOf(PingFailedException.class);
116-
assertThat(Duration.between(a, Instant.now())).isBetween(Duration.ofSeconds(10), Duration.ofSeconds(12));
109+
assertThat(Duration.between(a, Instant.now())).isBetween(Duration.ofSeconds(5), Duration.ofSeconds(7));
117110
}
118111

119-
/**
120-
* Test when ping timeout value is 3000ms. When the channel has no activity including
121-
* no data packets received or ping acks received for the timeout value, the connection
122-
* will be closed. During connection setup for testing, there will be Settings frame and
123-
* the SettingsAck frame exchanged which counts as activity on the channel, so the channel
124-
* should timeout during the second interval which is 6 seconds.
125-
*/
126112
@Test
127-
public void pingHealthCheck_3sec_shouldThrowExceptionAfter6Secs() {
113+
public void pingHealthCheck_10sec_shouldThrowExceptionAfter10Secs() {
128114
Instant a = Instant.now();
129-
assertThatThrownBy(() -> makeRequest(Duration.ofSeconds(3)).join()).hasCauseInstanceOf(IOException.class)
115+
assertThatThrownBy(() -> makeRequest(Duration.ofSeconds(10)).join()).hasCauseInstanceOf(IOException.class)
130116
.hasMessageContaining("An error occurred on the connection")
131117
.hasRootCauseInstanceOf(PingFailedException.class);
132-
assertThat(Duration.between(a, Instant.now())).isBetween(Duration.ofSeconds(6), Duration.ofSeconds(8));
118+
assertThat(Duration.between(a, Instant.now())).isBetween(Duration.ofSeconds(10), Duration.ofSeconds(12));
133119
}
134120

135121
@Test

http-clients/netty-nio-client/src/test/java/software/amazon/awssdk/http/nio/netty/fault/ServerNotRespondingTest.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
import java.time.Duration;
5555
import java.util.concurrent.CompletableFuture;
5656
import java.util.concurrent.atomic.AtomicInteger;
57-
import java.util.logging.Level;
5857
import org.junit.jupiter.api.AfterEach;
5958
import org.junit.jupiter.api.BeforeEach;
6059
import org.junit.jupiter.api.Test;
@@ -115,9 +114,8 @@ public void connectionNotAckPing_newRequestShouldUseNewConnection() throws Inter
115114
// First request should succeed
116115
firstRequest.join();
117116

118-
// Wait for Ping to close the connection, it takes 2 * healthCheckPingPeriod (200ms)
119-
// to identify a silent connection to be closed.
120-
Thread.sleep(400);
117+
// Wait for Ping to close the connection
118+
Thread.sleep(200);
121119
server.notRespondOnFirstChannel = false;
122120
sendGetRequest().join();
123121
assertThat(server.h2ConnectionCount.get()).isEqualTo(2);

0 commit comments

Comments
 (0)