Skip to content

Commit f07a53e

Browse files
authored
grid: Propagate transport failure reason of both connection pools (envoyproxy#39253)
Previously, if the TCP pool failed before the HTTP/3 pool failed, the `ConnectivityGrid` would propagate the transport failure reason for both the TCP and QUIC pools. However, if the QUIC pool failed first, the `ConnectivityGrid` would not propagate the transport failure reason for the QUIC pool (only for the TCP pool). This change ensures that the `ConnectivityGrid` propagates the transport failure reason for *both* TCP and QUIC pools if both pools fail in the `ConnectivityGrid`, regardless of the order in which the pools fail. --------- Signed-off-by: Ali Beyad <[email protected]>
1 parent d6076a6 commit f07a53e

File tree

3 files changed

+53
-15
lines changed

3 files changed

+53
-15
lines changed

source/common/http/conn_pool_grid.cc

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,9 @@ void ConnectivityGrid::WrapperCallbacks::onConnectionAttemptFailed(
140140

141141
// If there is another connection attempt in flight then let that proceed.
142142
if (!connection_attempts_.empty()) {
143-
if (!grid_.isPoolHttp3(attempt->pool())) {
144-
// TCP pool failed before HTTP/3 pool.
145-
prev_tcp_pool_failure_reason_ = reason;
146-
prev_tcp_pool_transport_failure_reason_ = transport_failure_reason;
147-
}
143+
prev_pool_failure_reason_ = reason;
144+
prev_pool_transport_failure_reason_ = fmt::format(
145+
"{}: {}", grid_.isPoolHttp3(attempt->pool()) ? "QUIC" : "TCP", transport_failure_reason);
148146
return;
149147
}
150148

@@ -167,12 +165,12 @@ void ConnectivityGrid::WrapperCallbacks::signalFailureAndDeleteSelf(
167165
if (callbacks != nullptr) {
168166
ENVOY_LOG(trace, "Passing pool failure up to caller.");
169167
std::string failure_str;
170-
if (prev_tcp_pool_failure_reason_.has_value()) {
171-
// TCP pool failed early on, log its error details as well.
172-
failure_str = fmt::format("{} (with earlier TCP attempt failure reason {}, {})",
173-
transport_failure_reason,
174-
static_cast<int>(prev_tcp_pool_failure_reason_.value()),
175-
prev_tcp_pool_transport_failure_reason_);
168+
if (prev_pool_failure_reason_.has_value()) {
169+
// The other pool (either TCP or QUIC depending on which failed first) also failed, log its
170+
// error details as well.
171+
failure_str = fmt::format(
172+
"{} (with earlier attempt failure reason {}, {})", transport_failure_reason,
173+
static_cast<int>(prev_pool_failure_reason_.value()), prev_pool_transport_failure_reason_);
176174
transport_failure_reason = failure_str;
177175
}
178176
callbacks->onPoolFailure(reason, transport_failure_reason, host);

source/common/http/conn_pool_grid.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ class ConnectivityGrid : public ConnectionPool::Instance,
172172
bool tcp_attempt_succeeded_{};
173173
// Latch the passed-in stream options.
174174
const Instance::StreamOptions stream_options_{};
175-
absl::optional<ConnectionPool::PoolFailureReason> prev_tcp_pool_failure_reason_;
176-
std::string prev_tcp_pool_transport_failure_reason_;
175+
absl::optional<ConnectionPool::PoolFailureReason> prev_pool_failure_reason_;
176+
std::string prev_pool_transport_failure_reason_;
177177
};
178178
using WrapperCallbacksPtr = std::unique_ptr<WrapperCallbacks>;
179179

test/common/http/conn_pool_grid_test.cc

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,7 @@ TEST_F(ConnectivityGridTest, ParallelConnectionsTcpFailsFirst) {
783783
EXPECT_FALSE(grid_->isHttp3Broken());
784784
// Only the previous TCP failure details will be appended.
785785
EXPECT_EQ(callbacks_.transport_failure_reason_,
786-
"handshake timeout2 (with earlier TCP attempt failure reason 1, TCP failure details)");
786+
"handshake timeout2 (with earlier attempt failure reason 1, TCP: TCP failure details)");
787787
}
788788

789789
// Test the first pool failing inline but http/3 happy eyeballs succeeding inline
@@ -1011,7 +1011,47 @@ TEST_F(ConnectivityGridTest, TcpFailsFollowedByH3Failure) {
10111011
EXPECT_FALSE(grid_->isHttp3Broken());
10121012
EXPECT_TRUE(alternate_protocols_->findAlternatives(origin_).has_value());
10131013
EXPECT_EQ(callbacks_.transport_failure_reason_,
1014-
"handshake time out (with earlier TCP attempt failure reason 1, network unreachable)");
1014+
"handshake time out (with earlier attempt failure reason 1, TCP: network unreachable)");
1015+
}
1016+
1017+
// Tests one HTTP/2 pool and one HTTP/3 pool connecting and both fail with HTTP/3 failing first.
1018+
TEST_F(ConnectivityGridTest, H3FailsFirstFollowedByTcpFailure) {
1019+
initialize();
1020+
addHttp3AlternateProtocol();
1021+
EXPECT_EQ(grid_->http3Pool(), nullptr);
1022+
1023+
// This timer will be returned and armed as the grid creates the wrapper's
1024+
// failover timer.
1025+
Event::MockTimer* failover_timer = new NiceMock<MockTimer>(&dispatcher_);
1026+
1027+
grid_->newStream(decoder_, callbacks_, {/*can_send_early_data_=*/false, /*can_use_http3_=*/true});
1028+
EXPECT_NE(grid_->http3Pool(), nullptr);
1029+
EXPECT_TRUE(failover_timer->enabled_);
1030+
1031+
// Kick off the second connection.
1032+
failover_timer->invokeCallback();
1033+
EXPECT_NE(grid_->http2Pool(), nullptr);
1034+
1035+
// HTTP/3 pool fails first. Failure shouldn't be propagated to the original caller, but instead
1036+
// wait for the HTTP/2 pool to finish.
1037+
EXPECT_NE(grid_->callbacks(0), nullptr);
1038+
EXPECT_CALL(callbacks_.pool_failure_, ready()).Times(0);
1039+
grid_->callbacks(0)->onPoolFailure(ConnectionPool::PoolFailureReason::LocalConnectionFailure,
1040+
"network unreachable", host_);
1041+
EXPECT_FALSE(grid_->isHttp3Broken());
1042+
1043+
// HTTP/2 pool fails.
1044+
EXPECT_NE(grid_->callbacks(1), nullptr);
1045+
EXPECT_CALL(callbacks_.pool_failure_, ready());
1046+
grid_->callbacks(1)->onPoolFailure(ConnectionPool::PoolFailureReason::LocalConnectionFailure,
1047+
"handshake time out", host_);
1048+
1049+
// HTTP/3 shouldn't be marked broken as TCP also failed.
1050+
EXPECT_FALSE(grid_->isHttp3Broken());
1051+
EXPECT_TRUE(alternate_protocols_->findAlternatives(origin_).has_value());
1052+
EXPECT_EQ(
1053+
callbacks_.transport_failure_reason_,
1054+
"handshake time out (with earlier attempt failure reason 1, QUIC: network unreachable)");
10151055
}
10161056

10171057
// Test both connections happening in parallel and the second connecting before

0 commit comments

Comments
 (0)