Skip to content

Commit 5f7c98d

Browse files
authored
fix: add support for idle connection monitoring (opt-in for now) (#1171)
1 parent 792a6f8 commit 5f7c98d

File tree

10 files changed

+250
-5
lines changed

10 files changed

+250
-5
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"id": "386353e6-e3cc-4561-bfd6-9763d3ac033b",
3+
"type": "bugfix",
4+
"description": "Add support for connection idle monitoring for OkHttp via the engine config parameter `connectionIdlePollingInterval`. Monitoring is disabled by default to match previous behavior. This monitoring will switch to enabled by default in an upcoming minor version release.",
5+
"issues": [
6+
"awslabs/aws-sdk-kotlin#1214"
7+
]
8+
}

gradle/libs.versions.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ ktor-http-cio = { module = "io.ktor:ktor-http-cio", version.ref = "ktor-version"
9292
ktor-utils = { module = "io.ktor:ktor-utils", version.ref = "ktor-version" }
9393
ktor-io = { module = "io.ktor:ktor-io", version.ref = "ktor-version" }
9494
ktor-server-netty = { module = "io.ktor:ktor-server-netty", version.ref = "ktor-version" }
95-
ktor-server-jetty = { module = "io.ktor:ktor-server-jetty", version.ref = "ktor-version" }
95+
ktor-server-jetty-jakarta = { module = "io.ktor:ktor-server-jetty-jakarta", version.ref = "ktor-version" }
9696
ktor-server-cio = { module = "io.ktor:ktor-server-cio", version.ref = "ktor-version" }
9797
ktor-network-tls-certificates = { module = "io.ktor:ktor-network-tls-certificates", version.ref = "ktor-version" }
9898

runtime/protocol/http-client-engines/http-client-engine-okhttp/api/http-client-engine-okhttp.api

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,17 @@ public final class aws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngine$Com
6666
public final class aws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngineConfig : aws/smithy/kotlin/runtime/http/engine/HttpClientEngineConfigImpl {
6767
public static final field Companion Laws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngineConfig$Companion;
6868
public synthetic fun <init> (Laws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngineConfig$Builder;Lkotlin/jvm/internal/DefaultConstructorMarker;)V
69+
public final fun getConnectionIdlePollingInterval-FghU774 ()Lkotlin/time/Duration;
6970
public final fun getMaxConcurrencyPerHost-pVg5ArA ()I
7071
public fun toBuilderApplicator ()Lkotlin/jvm/functions/Function1;
7172
}
7273

7374
public final class aws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngineConfig$Builder : aws/smithy/kotlin/runtime/http/engine/HttpClientEngineConfigImpl$BuilderImpl {
7475
public fun <init> ()V
76+
public final fun getConnectionIdlePollingInterval-FghU774 ()Lkotlin/time/Duration;
7577
public final fun getMaxConcurrencyPerHost-0hXNFcg ()Lkotlin/UInt;
7678
public fun getTelemetryProvider ()Laws/smithy/kotlin/runtime/telemetry/TelemetryProvider;
79+
public final fun setConnectionIdlePollingInterval-BwNAW2A (Lkotlin/time/Duration;)V
7780
public final fun setMaxConcurrencyPerHost-ExVfyTY (Lkotlin/UInt;)V
7881
public fun setTelemetryProvider (Laws/smithy/kotlin/runtime/telemetry/TelemetryProvider;)V
7982
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package aws.smithy.kotlin.runtime.http.engine.okhttp
6+
7+
import aws.smithy.kotlin.runtime.telemetry.logging.logger
8+
import kotlinx.coroutines.CoroutineName
9+
import kotlinx.coroutines.CoroutineScope
10+
import kotlinx.coroutines.Dispatchers
11+
import kotlinx.coroutines.Job
12+
import kotlinx.coroutines.cancelAndJoin
13+
import kotlinx.coroutines.isActive
14+
import kotlinx.coroutines.launch
15+
import kotlinx.coroutines.runBlocking
16+
import okhttp3.Call
17+
import okhttp3.Connection
18+
import okhttp3.ConnectionListener
19+
import okhttp3.ExperimentalOkHttpApi
20+
import okhttp3.internal.closeQuietly
21+
import okio.EOFException
22+
import okio.buffer
23+
import okio.source
24+
import java.net.SocketException
25+
import java.net.SocketTimeoutException
26+
import java.util.concurrent.ConcurrentHashMap
27+
import kotlin.coroutines.coroutineContext
28+
import kotlin.time.Duration
29+
import kotlin.time.measureTime
30+
31+
@OptIn(ExperimentalOkHttpApi::class)
32+
internal class ConnectionIdleMonitor(val pollInterval: Duration) : ConnectionListener() {
33+
private val monitors = ConcurrentHashMap<Connection, Job>()
34+
35+
private fun Call.callContext() =
36+
request()
37+
.tag(SdkRequestTag::class.java)
38+
?.callContext
39+
?: Dispatchers.IO
40+
41+
override fun connectionAcquired(connection: Connection, call: Call) {
42+
// Non-locking map access is okay here because this code will only execute synchronously as part of a
43+
// `connectionAcquired` event and will be complete before any future `connectionReleased` event could fire for
44+
// the same connection.
45+
monitors.remove(connection)?.let { monitor ->
46+
val context = call.callContext()
47+
val logger = context.logger<ConnectionIdleMonitor>()
48+
logger.trace { "Cancel monitoring for $connection" }
49+
50+
// Use `runBlocking` because this _must_ finish before OkHttp goes to use the connection
51+
val cancelTime = measureTime {
52+
runBlocking(context) { monitor.cancelAndJoin() }
53+
}
54+
55+
logger.trace { "Monitoring canceled for $connection in $cancelTime" }
56+
}
57+
}
58+
59+
override fun connectionReleased(connection: Connection, call: Call) {
60+
val connId = System.identityHashCode(connection)
61+
val context = call.callContext()
62+
val scope = CoroutineScope(context)
63+
val monitor = scope.launch(CoroutineName("okhttp-conn-monitor-for-$connId")) {
64+
doMonitor(connection)
65+
}
66+
context.logger<ConnectionIdleMonitor>().trace { "Launched coroutine $monitor to monitor $connection" }
67+
68+
// Non-locking map access is okay here because this code will only execute synchronously as part of a
69+
// `connectionReleased` event and will be complete before any future `connectionAcquired` event could fire for
70+
// the same connection.
71+
monitors[connection] = monitor
72+
}
73+
74+
private suspend fun doMonitor(conn: Connection) {
75+
val logger = coroutineContext.logger<ConnectionIdleMonitor>()
76+
77+
val socket = conn.socket()
78+
val source = try {
79+
socket.source()
80+
} catch (_: SocketException) {
81+
logger.trace { "Socket for $conn closed before monitoring started. Skipping polling loop." }
82+
return
83+
}.buffer().peek()
84+
85+
logger.trace { "Commence socket monitoring for $conn" }
86+
var resetTimeout = true
87+
val oldTimeout = socket.soTimeout
88+
89+
try {
90+
socket.soTimeout = pollInterval.inWholeMilliseconds.toInt()
91+
92+
while (coroutineContext.isActive) {
93+
try {
94+
logger.trace { "Polling socket for $conn" }
95+
source.readByte() // Blocking read; will take up to `pollInterval` time to complete
96+
} catch (_: SocketTimeoutException) {
97+
logger.trace { "Socket still alive for $conn" }
98+
} catch (_: EOFException) {
99+
logger.trace { "Socket closed remotely for $conn" }
100+
socket.closeQuietly()
101+
resetTimeout = false
102+
return
103+
}
104+
}
105+
106+
logger.trace { "Monitoring coroutine has been cancelled. Ending polling loop." }
107+
} catch (e: Throwable) {
108+
logger.warn(e) { "Failed to poll $conn. Ending polling loop. Connection may be unstable now." }
109+
} finally {
110+
if (resetTimeout) {
111+
logger.trace { "Attempting to reset soTimeout..." }
112+
try {
113+
conn.socket().soTimeout = oldTimeout
114+
} catch (e: Throwable) {
115+
logger.warn(e) { "Failed to reset socket timeout on $conn. Connection may be unstable now." }
116+
}
117+
}
118+
}
119+
}
120+
}

runtime/protocol/http-client-engines/http-client-engine-okhttp/jvm/src/aws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngine.kt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,20 @@ public fun OkHttpEngineConfig.buildClient(metrics: HttpClientMetrics): OkHttpCli
9999
readTimeout(config.socketReadTimeout.toJavaDuration())
100100
writeTimeout(config.socketWriteTimeout.toJavaDuration())
101101

102-
// FIXME - register a [ConnectionListener](https://github.com/square/okhttp/blob/master/okhttp/src/jvmMain/kotlin/okhttp3/ConnectionListener.kt#L27)
103-
// when a new okhttp release is cut that contains this abstraction and wireup connection uptime metrics
102+
@OptIn(ExperimentalOkHttpApi::class)
103+
val connectionListener = if (config.connectionIdlePollingInterval == null) {
104+
ConnectionListener.NONE
105+
} else {
106+
ConnectionIdleMonitor(connectionIdlePollingInterval)
107+
}
104108

105109
// use our own pool configured with the timeout settings taken from config
110+
@OptIn(ExperimentalOkHttpApi::class)
106111
val pool = ConnectionPool(
107112
maxIdleConnections = 5, // The default from the no-arg ConnectionPool() constructor
108113
keepAliveDuration = config.connectionIdleTimeout.inWholeMilliseconds,
109114
TimeUnit.MILLISECONDS,
115+
connectionListener = connectionListener,
110116
)
111117
connectionPool(pool)
112118

runtime/protocol/http-client-engines/http-client-engine-okhttp/jvm/src/aws/smithy/kotlin/runtime/http/engine/okhttp/OkHttpEngineConfig.kt

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import aws.smithy.kotlin.runtime.http.engine.HttpClientEngineConfig
99
import aws.smithy.kotlin.runtime.http.engine.HttpClientEngineConfigImpl
1010
import aws.smithy.kotlin.runtime.telemetry.Global
1111
import aws.smithy.kotlin.runtime.telemetry.TelemetryProvider
12+
import kotlin.time.Duration
1213

1314
/**
1415
* The configuration parameters for an OkHttp HTTP client engine.
@@ -28,6 +29,22 @@ public class OkHttpEngineConfig private constructor(builder: Builder) : HttpClie
2829
public val Default: OkHttpEngineConfig = OkHttpEngineConfig(Builder())
2930
}
3031

32+
/**
33+
* The interval in which to poll idle connections for remote closure or `null` to disable monitoring of idle
34+
* connections. The default value is `null`.
35+
*
36+
* When this value is non-`null`, polling is enabled on connections which are released from an engine call and
37+
* enter the connection pool. Polling consists of a loop that performs blocking reads with the socket timeout
38+
* set to [connectionIdlePollingInterval]. Polling is cancelled for a connection when the engine acquires it
39+
* from the pool or when the connection is evicted from the pool and closed. Because the polling loop uses
40+
* blocking reads, an engine call to acquire or close a connection may be delayed by as much as
41+
* [connectionIdlePollingInterval].
42+
*
43+
* When this value is `null`, polling is disabled. Idle connections in the pool which are closed remotely may
44+
* encounter errors when they are acquired for a subsequent call.
45+
*/
46+
public val connectionIdlePollingInterval: Duration? = builder.connectionIdlePollingInterval
47+
3148
/**
3249
* The maximum number of requests to execute concurrently for a single host.
3350
*/
@@ -37,6 +54,7 @@ public class OkHttpEngineConfig private constructor(builder: Builder) : HttpClie
3754
super.toBuilderApplicator()()
3855

3956
if (this is Builder) {
57+
connectionIdlePollingInterval = this@OkHttpEngineConfig.connectionIdlePollingInterval
4058
maxConcurrencyPerHost = this@OkHttpEngineConfig.maxConcurrencyPerHost
4159
}
4260
}
@@ -45,6 +63,22 @@ public class OkHttpEngineConfig private constructor(builder: Builder) : HttpClie
4563
* A builder for [OkHttpEngineConfig]
4664
*/
4765
public class Builder : BuilderImpl() {
66+
/**
67+
* The interval in which to poll idle connections for remote closure or `null` to disable monitoring of idle
68+
* connections. The default value is `null`.
69+
*
70+
* When this value is non-`null`, polling is enabled on connections which are released from an engine call and
71+
* enter the connection pool. Polling consists of a loop that performs blocking reads with the socket timeout
72+
* set to [connectionIdlePollingInterval]. Polling is cancelled for a connection when the engine acquires it
73+
* from the pool or when the connection is evicted from the pool and closed. Because the polling loop uses
74+
* blocking reads, an engine call to acquire or close a connection may be delayed by as much as
75+
* [connectionIdlePollingInterval].
76+
*
77+
* When this value is `null`, polling is disabled. Idle connections in the pool which are closed remotely may
78+
* encounter errors when they are acquired for a subsequent call.
79+
*/
80+
public var connectionIdlePollingInterval: Duration? = null
81+
4882
/**
4983
* The maximum number of requests to execute concurrently for a single host. Defaults to [maxConcurrency].
5084
*/

runtime/protocol/http-client-engines/test-suite/build.gradle.kts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ kotlin {
2424

2525
jvmMain {
2626
dependencies {
27-
implementation(libs.ktor.server.jetty)
27+
implementation(libs.ktor.server.jetty.jakarta)
2828
implementation(libs.ktor.network.tls.certificates)
2929

3030
implementation(project(":runtime:protocol:http-client-engines:http-client-engine-default"))
@@ -52,6 +52,8 @@ kotlin {
5252
implementation("org.bouncycastle:bcpkix-jdk18on:1.78") // https://github.com/docker-java/docker-java/pull/2326
5353

5454
implementation(libs.docker.transport.zerodep)
55+
56+
implementation(project(":runtime:observability:telemetry-defaults"))
5557
}
5658
}
5759

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package aws.smithy.kotlin.runtime.http.test.suite
6+
7+
import io.ktor.server.application.Application
8+
import io.ktor.server.response.respondText
9+
import io.ktor.server.routing.post
10+
import io.ktor.server.routing.route
11+
import io.ktor.server.routing.routing
12+
13+
internal fun Application.connectionTests() {
14+
routing {
15+
route("connectionDrop") {
16+
post {
17+
call.respondText("Bar")
18+
}
19+
}
20+
}
21+
}

runtime/protocol/http-client-engines/test-suite/jvm/src/aws/smithy/kotlin/runtime/http/test/util/TestServers.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@ import aws.smithy.kotlin.runtime.http.test.suite.uploadTests
1313
import io.ktor.server.application.*
1414
import io.ktor.server.engine.*
1515
import io.ktor.server.jetty.*
16+
import io.ktor.server.jetty.jakarta.Jetty
17+
import io.ktor.server.jetty.jakarta.JettyApplicationEngineBase
1618
import redirectTests
1719
import java.io.Closeable
1820
import java.nio.file.Paths
1921
import java.util.concurrent.TimeUnit
22+
import kotlin.time.Duration.Companion.seconds
2023

2124
private data class TestServer(
2225
val port: Int,
@@ -94,7 +97,7 @@ private fun tlsServer(instance: TestServer, sslConfig: SslConfig): EmbeddedServe
9497
val rootConfig = serverConfig {
9598
module(instance.initializer)
9699
}
97-
val engineConfig: ApplicationEngine.Configuration.() -> Unit = {
100+
val engineConfig: JettyApplicationEngineBase.Configuration.() -> Unit = {
98101
when (instance.type) {
99102
ConnectorType.HTTP -> connector { port = instance.port }
100103

@@ -109,6 +112,8 @@ private fun tlsServer(instance: TestServer, sslConfig: SslConfig): EmbeddedServe
109112
enabledProtocols = instance.protocolName?.let(::listOf)
110113
}
111114
}
115+
116+
idleTimeout = 3.seconds // Required for ConnectionTest.testShortLivedConnections
112117
}
113118

114119
return try {
@@ -126,6 +131,7 @@ internal fun Application.testRoutes() {
126131
uploadTests()
127132
concurrentTests()
128133
headerTests()
134+
connectionTests()
129135
}
130136

131137
// configure SSL-only routes

runtime/protocol/http-client-engines/test-suite/jvm/test/aws/smithy/kotlin/runtime/http/test/ConnectionTest.kt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@ package aws.smithy.kotlin.runtime.http.test
66

77
import aws.smithy.kotlin.runtime.content.decodeToString
88
import aws.smithy.kotlin.runtime.http.*
9+
import aws.smithy.kotlin.runtime.http.engine.okhttp.OkHttpEngineConfig
910
import aws.smithy.kotlin.runtime.http.request.HttpRequest
1011
import aws.smithy.kotlin.runtime.http.request.url
1112
import aws.smithy.kotlin.runtime.http.test.util.*
1213
import aws.smithy.kotlin.runtime.http.test.util.testServers
1314
import aws.smithy.kotlin.runtime.net.TlsVersion
15+
import kotlinx.coroutines.delay
1416
import java.nio.file.Paths
1517
import kotlin.test.*
18+
import kotlin.time.Duration.Companion.milliseconds
19+
import kotlin.time.Duration.Companion.seconds
1620

1721
class ConnectionTest : AbstractEngineTest() {
1822
private fun testMinTlsVersion(version: TlsVersion, serverType: ServerType) {
@@ -79,4 +83,45 @@ class ConnectionTest : AbstractEngineTest() {
7983

8084
@Test
8185
fun testMinTls1_3() = testMinTlsVersion(TlsVersion.TLS_1_3, ServerType.TLS_1_3)
86+
87+
// See https://github.com/awslabs/aws-sdk-kotlin/issues/1214
88+
@Test
89+
fun testShortLivedConnections() = testEngines(
90+
// Only run this test on OkHttp
91+
skipEngines = setOf("CrtHttpEngine", "OkHttp4Engine"),
92+
) {
93+
engineConfig {
94+
this as OkHttpEngineConfig.Builder
95+
connectionIdlePollingInterval = 200.milliseconds
96+
connectionIdleTimeout = 10.seconds // Longer than the server-side timeout
97+
}
98+
99+
test { _, client ->
100+
val initialReq = HttpRequest {
101+
testSetup()
102+
method = HttpMethod.POST
103+
url {
104+
path.decoded = "/connectionDrop"
105+
}
106+
body = "Foo".toHttpBody()
107+
}
108+
val initialCall = client.call(initialReq)
109+
val initialResp = initialCall.response.body.toByteStream()?.decodeToString()
110+
assertEquals("Bar", initialResp)
111+
112+
delay(5.seconds) // Longer than the service side timeout, shorter than the client-side timeout
113+
114+
val subsequentReq = HttpRequest {
115+
testSetup()
116+
method = HttpMethod.POST
117+
url {
118+
path.decoded = "/connectionDrop"
119+
}
120+
body = "Foo".toHttpBody()
121+
}
122+
val subsequentCall = client.call(subsequentReq)
123+
val subsequentResp = subsequentCall.response.body.toByteStream()?.decodeToString()
124+
assertEquals("Bar", subsequentResp)
125+
}
126+
}
82127
}

0 commit comments

Comments
 (0)