Skip to content

Commit d4d96ce

Browse files
committed
Increase server-side healthcheck timeout, improve probe failure logging
1 parent d83995d commit d4d96ce

File tree

2 files changed

+39
-13
lines changed

2 files changed

+39
-13
lines changed

router/src/health.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::sync::atomic::{AtomicBool, Ordering};
22
use std::sync::Arc;
33
use tokenizers::Tokenizer;
4+
use tokio::time::Instant;
45
use text_generation_client::{Batch, NextTokenChooserParameters, Request, ShardedClient};
56

67
const TEST_INPUT: &str = "liveness";
@@ -25,9 +26,15 @@ impl Health {
2526
}
2627

2728
pub(crate) async fn check(&mut self) -> bool {
28-
if self.generation_health.load(Ordering::SeqCst) {
29+
let generation_healthy = self.generation_health.load(Ordering::SeqCst);
30+
31+
let mut guard = Guard{ prefill: !generation_healthy, start_time: Some(Instant::now()) };
32+
33+
let ok = if generation_healthy {
2934
// Generation is healthy, we only check that the shards are answering gRPC calls
30-
self.client.health().await.is_ok()
35+
self.client.health().await
36+
.map_err(|err| tracing::error!("Basic shard healthcheck error: {err}"))
37+
.is_ok()
3138
} else {
3239
// Generation is unhealthy or have not sent any generation request yet
3340

@@ -51,13 +58,32 @@ impl Health {
5158
requests: vec![liveness_request],
5259
total_tokens: 1,
5360
};
54-
// Skips the queue
61+
// Skips the queue, but will still be serialized behind in-flight prefill/next_token requests
5562
let value = self.client.prefill(batch, vec![]).await
56-
.map_err(|err| tracing::error!("Healthcheck error: {err}"))
63+
.map_err(|err| tracing::error!("Prefill healthcheck error: {err}"))
5764
.is_ok();
5865
// Update generation health
5966
self.generation_health.store(value, Ordering::SeqCst);
6067
value
68+
};
69+
guard.start_time = None;
70+
ok
71+
}
72+
}
73+
74+
struct Guard {
75+
prefill: bool,
76+
start_time: Option<Instant>, // None once completed
77+
}
78+
79+
impl Drop for Guard {
80+
fn drop(&mut self) {
81+
if let Some(start_time) = self.start_time {
82+
tracing::warn!(
83+
"Healthcheck request cancelled during {} check after {}ms",
84+
if self.prefill { "prefill" } else { "basic shard" },
85+
start_time.elapsed().as_millis(),
86+
)
6187
}
6288
}
6389
}

router/src/server.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,24 @@ pub(crate) struct ServerState {
3838
pub(crate) seq2seq: bool,
3939
}
4040

41+
// This is a safety-net timeout, it's expected the client (e.g. kubelet) will
42+
// be configured with a shorter one
43+
const PROBE_TIMEOUT_SECS: u64 = 60;
44+
4145
/// Health check method
4246
#[instrument(skip(health))]
4347
async fn health(mut health: Extension<Health>) -> Result<(), (StatusCode, Json<ErrorResponse>)> {
44-
match timeout(Duration::from_secs(5), health.check()).await {
48+
match timeout(Duration::from_secs(PROBE_TIMEOUT_SECS), health.check()).await {
4549
Ok(true) => Ok(()),
4650
Ok(false) => Err((
4751
StatusCode::SERVICE_UNAVAILABLE,
48-
Json(ErrorResponse {
49-
error: "unhealthy".to_string(),
50-
}),
52+
Json(ErrorResponse { error: "unhealthy".to_string() }),
5153
)),
5254
Err(_) => {
53-
tracing::error!("Healthcheck request timed-out");
55+
tracing::error!("Aborting health-check request after {PROBE_TIMEOUT_SECS}s time-out");
5456
Err((
55-
StatusCode::REQUEST_TIMEOUT,
56-
Json(ErrorResponse {
57-
error: "Healthcheck timed-out".to_string(),
58-
}),
57+
StatusCode::SERVICE_UNAVAILABLE,
58+
Json(ErrorResponse { error: "Healthcheck timed-out".to_string() }),
5959
))
6060
}
6161
}

0 commit comments

Comments
 (0)