Skip to content

Commit d9f7bd6

Browse files
committed
fix: handle cancels from node proxies
Signed-off-by: Skyler Ross <[email protected]>
1 parent b214019 commit d9f7bd6

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ parking_lot = "0.12.0"
5353
serde_json = { version = "1.0.96", optional = true }
5454
serde = { version = "1.0.163", optional = true }
5555
serde_derive = { version = "1.0.163", optional = true }
56+
hyper = { version = "0.14.14", default-features = false }
5657

5758
[dependencies.futures-util]
5859
version = "0.3.21"

src/execute.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
* ‍
1919
*/
2020

21+
use std::error::Error as StdError;
2122
use std::ops::ControlFlow;
2223
use std::time::Duration;
2324

@@ -292,6 +293,16 @@ fn map_tonic_error(
292293
network: &client::NetworkData,
293294
node_index: usize,
294295
) -> retry::Error {
296+
/// punches through all the layers of `tonic::Status` sources to check if this is a `hyper::Error` that is canceled.
297+
fn is_hyper_canceled(status: &tonic::Status) -> bool {
298+
status
299+
.source()
300+
.and_then(|it| it.downcast_ref::<tonic::transport::Error>())
301+
.and_then(StdError::source)
302+
.and_then(|it| it.downcast_ref::<hyper::Error>())
303+
.is_some_and(hyper::Error::is_canceled)
304+
}
305+
295306
const MIME_HTML: &[u8] = b"text/html";
296307

297308
match status.code() {
@@ -304,6 +315,13 @@ fn map_tonic_error(
304315
retry::Error::Transient(status.into())
305316
}
306317

318+
// if the proxy cancels the request (IE it's `Unavailable`/`ResourceExausted`) treat it like a transient error.
319+
tonic::Code::Unknown if is_hyper_canceled(&status) => {
320+
network.mark_node_unhealthy(node_index);
321+
322+
retry::Error::Transient(status.into())
323+
}
324+
307325
// todo: find a way to make this less fragile
308326
// if this happens:
309327
// the node is completely borked (we're probably seeing the load balancer's response),

0 commit comments

Comments
 (0)