Skip to content

Commit b0c612f

Browse files
committed
Update various rust crates
Metrics update includes new macro syntax.
1 parent 0400c25 commit b0c612f

File tree

8 files changed

+371
-221
lines changed

8 files changed

+371
-221
lines changed

Cargo.lock

Lines changed: 285 additions & 132 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

launcher/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ authors = ["Olivier Dehaene"]
66
description = "Text Generation Launcher"
77

88
[dependencies]
9-
clap = { version = "4.4.11", features = ["derive", "env"] }
10-
ctrlc = { version = "3.4.1", features = ["termination"] }
9+
clap = { version = "4.4.12", features = ["derive", "env"] }
10+
ctrlc = { version = "3.4.2", features = ["termination"] }
1111
subprocess = "0.2.9"
1212
tracing = "0.1.40"
13-
tracing-subscriber = { version = "0.3.17", features = ["json"] }
13+
tracing-subscriber = { version = "0.3.18", features = ["json"] }
1414

router/Cargo.toml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,30 +17,30 @@ path = "src/main.rs"
1717
axum = { version = "0.6.20", features = ["json"] }
1818
text-generation-client = { path = "client" }
1919
clap = { version = "^4.4.11", features = ["derive", "env"] }
20-
futures = "^0.3.29"
20+
futures = "^0.3.30"
2121
flume = "^0.11.0"
22-
metrics = "0.21.1"
23-
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
24-
moka = { version = "0.12.1", features = ["future"] }
22+
metrics = "0.22.0"
23+
metrics-exporter-prometheus = { version = "0.13.0", features = [] }
24+
moka = { version = "0.12.2", features = ["future"] }
2525
nohash-hasher = "^0.2.0"
2626
num = "^0.4.1"
2727
num_cpus = "^1.16.0"
28-
hyper = "^0.14.27" # Override to address CVE-2023-26964
29-
openssl = "^0.10.59" # Override to address WS-2023-0082, WS-2023-0083, WS-2023-0195
30-
openssl-sys = "^0.9.95" # Override to address WS-2023-0082, WS-2023-0083, WS-2023-0195
31-
rustls-webpki = "^0.101.7" # Override to address WS-2023-0305, CVE-2018-16875
28+
hyper = "^0.14.28" # Override to address CVE-2023-26964
29+
openssl = "^0.10.62" # Override to address WS-2023-0082, WS-2023-0083, WS-2023-0195
30+
openssl-sys = "^0.9.98" # Override to address WS-2023-0082, WS-2023-0083, WS-2023-0195
31+
rustls-webpki = "0.102.0" # Override to address WS-2023-0305, CVE-2018-16875
3232
rand = "^0.8.5"
33-
serde = "^1.0.193"
34-
serde_json = "^1.0.108"
33+
serde = "^1.0.194"
34+
serde_json = "^1.0.110"
3535
# Pin sct to avoid WS-2023-0094 in older 0.5.2 version of spin sub-dependency
3636
sct = "=0.7.1"
37-
thiserror = "^1.0.50"
37+
thiserror = "^1.0.56"
3838
tokenizers = "0.15.0"
39-
tokio = { version = "1.35.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "fs"] }
39+
tokio = { version = "1.35.1", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "fs"] }
4040
tokio-rustls = "^0.25.0"
41-
rustls = "0.21.8"
41+
rustls = "0.22.1"
4242
tracing = "^0.1.40"
43-
tracing-subscriber = { version = "0.3.17", features = ["json"] }
43+
tracing-subscriber = { version = "0.3.18", features = ["json"] }
4444
prost = "^0.12.3"
4545
tonic = { version = "^0.10.2", features = ["tls"] }
4646
tokio-stream ="^0.1.14"
@@ -49,4 +49,4 @@ unicode-truncate = "^0.2.0"
4949

5050
[build-dependencies]
5151
tonic-build = "^0.10.2"
52-
tempfile = "^3.8.1" # Override 0.3.3 version from tonic-build/prost-build, due to RUSTSEC-2023-0018 and WS-2023-0366
52+
tempfile = "^3.9.0" # Override 0.3.3 version from tonic-build/prost-build, due to RUSTSEC-2023-0018 and WS-2023-0366

router/client/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ edition = "2021"
55
build="build.rs"
66

77
[dependencies]
8-
futures = "^0.3.29"
9-
prost = "^0.12.1"
10-
thiserror = "^1.0.50"
11-
tokio = { version = "1.35.0", features = ["sync"] }
8+
futures = "^0.3.30"
9+
prost = "^0.12.3"
10+
thiserror = "^1.0.56"
11+
tokio = { version = "1.35.1", features = ["sync"] }
1212
tonic = "^0.10.2"
1313
tower = "^0.4.13"
1414
tracing = "^0.1.40"

router/src/batcher.rs

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,9 @@ async fn batching_task<B: BatchType>(
380380
batch_size,
381381
);
382382

383-
metrics::gauge!("tgi_batch_current_size", batch_size as f64);
384-
metrics::gauge!("tgi_batch_input_tokens", batch_tokens as f64);
385-
metrics::gauge!("tgi_batch_max_remaining_tokens", batch_max_remaining_tokens.unwrap() as f64);
383+
metrics::gauge!("tgi_batch_current_size").set(batch_size as f64);
384+
metrics::gauge!("tgi_batch_input_tokens").set(batch_tokens as f64);
385+
metrics::gauge!("tgi_batch_max_remaining_tokens").set(batch_max_remaining_tokens.unwrap() as f64);
386386

387387
// Don't interfere with current batch if it's about to complete
388388
if batch_max_remaining_tokens.unwrap() >= 2 {
@@ -436,7 +436,7 @@ async fn batching_task<B: BatchType>(
436436
if added_batch_size > 0 {
437437
info!("Extending batch #{} of {} with additional batch #{} of {}",
438438
batch_id, batch_size, new_batch_id, added_batch_size);
439-
metrics::increment_counter!("tgi_batch_concatenation_count");
439+
metrics::counter!("tgi_batch_concatenation_count").increment(1);
440440
}
441441
} else {
442442
combined_batch_id = new_batch_id;
@@ -463,9 +463,9 @@ async fn batching_task<B: BatchType>(
463463
}
464464
}
465465

466-
metrics::gauge!("tgi_batch_current_size", 0.0);
467-
metrics::gauge!("tgi_batch_input_tokens", 0.0);
468-
metrics::gauge!("tgi_batch_max_remaining_tokens", 0.0);
466+
metrics::gauge!("tgi_batch_current_size").set(0.0);
467+
metrics::gauge!("tgi_batch_input_tokens").set(0.0);
468+
metrics::gauge!("tgi_batch_max_remaining_tokens").set(0.0);
469469
}
470470

471471
info!("Batching loop exiting");
@@ -524,10 +524,8 @@ impl<'a> TokenProcessor<'a> {
524524
let batch_size = batch.requests.len();
525525
let batch_tokens = batch.total_tokens;
526526
let start_time = Instant::now();
527-
metrics::histogram!("tgi_batch_next_tokens", batch_tokens as f64);
528-
metrics::histogram!(
529-
"tgi_batch_inference_batch_size", batch_size as f64, "method" => "prefill"
530-
);
527+
metrics::histogram!("tgi_batch_next_tokens").record(batch_tokens as f64);
528+
metrics::histogram!("tgi_batch_inference_batch_size", "method" => "prefill").record(batch_size as f64);
531529
self._wrap_future(
532530
client.prefill(batch, to_prune).map(|r| {
533531
info!(
@@ -543,9 +541,8 @@ impl<'a> TokenProcessor<'a> {
543541
async fn next_token<B: BatchType>(
544542
&mut self, client: &mut ShardedClient, batches: Vec<CachedBatch>, queue: &mut Queue<B>,
545543
) -> Option<CachedBatch> {
546-
metrics::histogram!(
547-
"tgi_batch_inference_batch_size", self.entries.len() as f64, "method" => "next_token"
548-
);
544+
metrics::histogram!("tgi_batch_inference_batch_size", "method" => "next_token")
545+
.record(self.entries.len() as f64);
549546
let start_time = Instant::now();
550547
self._wrap_future(
551548
client.next_token(batches), "next_token", start_time, None, queue
@@ -562,7 +559,7 @@ impl<'a> TokenProcessor<'a> {
562559
start_id: Option<u64>,
563560
queue: &mut Queue<B>,
564561
) -> Option<CachedBatch> {
565-
metrics::increment_counter!("tgi_batch_inference_count", "method" => method);
562+
metrics::counter!("tgi_batch_inference_count", "method" => method).increment(1);
566563

567564
// We process the shared queue while waiting for the response from the python shard(s)
568565
let queue_servicer = queue.service_queue().fuse();
@@ -576,7 +573,8 @@ impl<'a> TokenProcessor<'a> {
576573

577574
match result {
578575
Ok(
579-
Some((generated_tokens, input_tokens, errors, next_batch_id, forward_duration))
576+
Some((generated_tokens, input_tokens,
577+
errors, next_batch_id, forward_duration))
580578
) => {
581579
let pre_token_process_time = Instant::now();
582580
self.process_input_tokens(input_tokens);
@@ -585,27 +583,27 @@ impl<'a> TokenProcessor<'a> {
585583
);
586584
// Update health
587585
self.generation_health.store(true, Ordering::SeqCst);
588-
metrics::histogram!(
586+
let histogram = metrics::histogram!(
589587
"tgi_batch_inference_duration",
590-
start_time.elapsed().as_secs_f64(),
591588
"method" => method,
592589
"makeup" => "single_only", // later will possibly be beam_only or mixed
593590
);
594-
metrics::histogram!(
591+
histogram.record(start_time.elapsed().as_secs_f64());
592+
let histogram = metrics::histogram!(
595593
"tgi_batch_inference_forward_duration",
596-
forward_duration,
597594
"method" => method,
598595
"makeup" => "single_only", // later will possibly be beam_only or mixed
599596
);
600-
metrics::histogram!(
597+
histogram.record(forward_duration);
598+
let histogram = metrics::histogram!(
601599
"tgi_batch_inference_tokproc_duration",
602-
pre_token_process_time.elapsed().as_secs_f64(),
603600
"method" => method,
604601
"makeup" => "single_only", // later will possibly be beam_only or mixed
605602
);
603+
histogram.record(pre_token_process_time.elapsed().as_secs_f64());
606604
// Probably don't need this additional counter because the duration histogram
607605
// records a total count
608-
metrics::increment_counter!("tgi_batch_inference_success", "method" => method);
606+
metrics::counter!("tgi_batch_inference_success", "method" => method).increment(1);
609607
Some(CachedBatch{
610608
batch_id: next_batch_id,
611609
status: completed_request_ids.map(|c| RequestsStatus{completed_ids: c}),
@@ -622,7 +620,8 @@ impl<'a> TokenProcessor<'a> {
622620
ClientError::Connection(_) => "connection",
623621
_ => "error"
624622
};
625-
metrics::increment_counter!("tgi_batch_inference_failure", "method" => method, "reason" => reason);
623+
metrics::counter!("tgi_batch_inference_failure", "method" => method, "reason" => reason)
624+
.increment(1);
626625
self.send_errors(err, start_id);
627626
None
628627
},
@@ -819,7 +818,7 @@ impl<'a> TokenProcessor<'a> {
819818
// If receiver closed (request cancelled), cancel this entry
820819
let e = self.entries.remove(&request_id).unwrap();
821820
stop_reason = Cancelled;
822-
metrics::increment_counter!("tgi_request_failure", "err" => "cancelled");
821+
metrics::counter!("tgi_request_failure", "err" => "cancelled").increment(1);
823822
//TODO include request context in log message
824823
warn!("Aborted streaming request {request_id} cancelled by client \
825824
after generating {} token(s)", e.generated_tokens);
@@ -831,7 +830,7 @@ impl<'a> TokenProcessor<'a> {
831830
// If receiver closed (request cancelled), cancel this entry
832831
let e = self.entries.remove(&request_id).unwrap();
833832
stop_reason = Cancelled;
834-
metrics::increment_counter!("tgi_request_failure", "err" => "cancelled");
833+
metrics::counter!("tgi_request_failure", "err" => "cancelled").increment(1);
835834
//TODO include request context in log message
836835
warn!("Aborted request {request_id} cancelled by client \
837836
after generating {} token(s)", e.generated_tokens);

router/src/grpc_server.rs

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ pub(crate) async fn start_grpc_server<F: Future<Output = ()> + Send +'static> (
5959
let grpc_service = GenerationServicer {
6060
state: shared_state,
6161
tokenizer,
62-
input_counter: metrics::register_counter!("tgi_request_input_count"),
63-
tokenize_input_counter: metrics::register_counter!("tgi_tokenize_request_input_count"),
62+
input_counter: metrics::counter!("tgi_request_input_count"),
63+
tokenize_input_counter: metrics::counter!("tgi_tokenize_request_input_count"),
6464
};
6565
let grpc_server = builder
6666
.add_service(GenerationServiceServer::new(grpc_service))
@@ -104,7 +104,7 @@ impl GenerationService for GenerationServicer {
104104
let br = request.into_inner();
105105
let batch_size = br.requests.len();
106106
let kind = if batch_size == 1 { "single" } else { "batch" };
107-
metrics::increment_counter!("tgi_request_count", "kind" => kind);
107+
metrics::counter!("tgi_request_count", "kind" => kind).increment(1);
108108
if batch_size == 0 {
109109
return Ok(Response::new(BatchedGenerationResponse{ responses: vec![] }));
110110
}
@@ -113,7 +113,7 @@ impl GenerationService for GenerationServicer {
113113
let _permit = self.state.limit_concurrent_requests
114114
.try_acquire_many(batch_size as u32)
115115
.map_err(|_| {
116-
metrics::increment_counter!("tgi_request_failure", "err" => "conc_limit");
116+
metrics::counter!("tgi_request_failure", "err" => "conc_limit").increment(1);
117117
tracing::error!("Model is overloaded");
118118
Status::resource_exhausted("Model is overloaded")
119119
})?;
@@ -155,11 +155,11 @@ impl GenerationService for GenerationServicer {
155155
}
156156
}.map_err(|err| match err {
157157
InferError::RequestQueueFull() => {
158-
metrics::increment_counter!("tgi_request_failure", "err" => "queue_full");
158+
metrics::counter!("tgi_request_failure", "err" => "queue_full").increment(1);
159159
Status::resource_exhausted(err.to_string())
160160
},
161161
_ => {
162-
metrics::increment_counter!("tgi_request_failure", "err" => "generate");
162+
metrics::counter!("tgi_request_failure", "err" => "generate").increment(1);
163163
tracing::error!("{err}");
164164
Status::from_error(Box::new(err))
165165
},
@@ -184,11 +184,11 @@ impl GenerationService for GenerationServicer {
184184
&self, request: Request<SingleGenerationRequest>
185185
) -> Result<Response<Self::GenerateStreamStream>, Status> {
186186
let start_time = Instant::now();
187-
metrics::increment_counter!("tgi_request_count", "kind" => "stream");
187+
metrics::counter!("tgi_request_count", "kind" => "stream").increment(1);
188188
self.input_counter.increment(1);
189189
let permit = self.state.limit_concurrent_requests.clone()
190190
.try_acquire_owned().map_err(|_| {
191-
metrics::increment_counter!("tgi_request_failure", "err" => "conc_limit");
191+
metrics::counter!("tgi_request_failure", "err" => "conc_limit").increment(1);
192192
tracing::error!("Model is overloaded");
193193
Status::resource_exhausted("Model is overloaded")
194194
})?;
@@ -210,7 +210,7 @@ impl GenerationService for GenerationServicer {
210210
}, |ctx, count, reason, request_id, times, out, err| {
211211
let _enter = ctx.span.enter();
212212
if let Some(e) = err {
213-
metrics::increment_counter!("tgi_request_failure", "err" => "generate");
213+
metrics::counter!("tgi_request_failure", "err" => "generate").increment(1);
214214
tracing::error!("Streaming response failed after {count} tokens, \
215215
output so far: '{:?}': {e}", truncate(&out, 32));
216216
} else {
@@ -229,11 +229,11 @@ impl GenerationService for GenerationServicer {
229229
.await
230230
.map_err(|err| match err {
231231
InferError::RequestQueueFull() => {
232-
metrics::increment_counter!("tgi_request_failure", "err" => "queue_full");
232+
metrics::counter!("tgi_request_failure", "err" => "queue_full").increment(1);
233233
Status::resource_exhausted(err.to_string())
234234
},
235235
_ => {
236-
metrics::increment_counter!("tgi_request_failure", "err" => "unknown");
236+
metrics::counter!("tgi_request_failure", "err" => "unknown").increment(1);
237237
tracing::error!("{err}");
238238
Status::from_error(Box::new(err))
239239
},
@@ -247,7 +247,7 @@ impl GenerationService for GenerationServicer {
247247
&self, request: Request<BatchedTokenizeRequest>
248248
) -> Result<Response<BatchedTokenizeResponse>, Status> {
249249
let br = request.into_inner();
250-
metrics::increment_counter!("tgi_tokenize_request_count");
250+
metrics::counter!("tgi_tokenize_request_count").increment(1);
251251
let start_time = Instant::now();
252252
self.tokenize_input_counter.increment(br.requests.len() as u64);
253253

@@ -262,8 +262,8 @@ impl GenerationService for GenerationServicer {
262262
))).map_err(Status::from_error).await?;
263263

264264
let token_total: u32 = responses.iter().map(|tr| tr.token_count).sum();
265-
metrics::histogram!("tgi_tokenize_request_tokens", token_total as f64);
266-
metrics::histogram!("tgi_tokenize_request_duration", start_time.elapsed().as_secs_f64());
265+
metrics::histogram!("tgi_tokenize_request_tokens").record(token_total as f64);
266+
metrics::histogram!("tgi_tokenize_request_duration").record(start_time.elapsed().as_secs_f64());
267267

268268
Ok(Response::new(BatchedTokenizeResponse { responses }))
269269
}
@@ -304,11 +304,11 @@ impl GenerationServicer {
304304
).await,
305305
Err(err) => Err(err),
306306
}.map_err(|err| {
307-
metrics::increment_counter!("tgi_request_failure", "err" => "validation");
307+
metrics::counter!("tgi_request_failure", "err" => "validation").increment(1);
308308
tracing::error!("{err}");
309309
Status::invalid_argument(err.to_string())
310310
}).map(|requests| {
311-
metrics::histogram!("tgi_request_validation_duration", start_time.elapsed().as_secs_f64());
311+
metrics::histogram!("tgi_request_validation_duration").record(start_time.elapsed().as_secs_f64());
312312
requests
313313
})
314314
}
@@ -349,23 +349,21 @@ fn log_response(
349349
);
350350
_enter = span.enter();
351351

352-
metrics::histogram!("tgi_request_inference_duration", inference_time.as_secs_f64());
353-
metrics::histogram!("tgi_request_mean_time_per_token_duration", time_per_token.as_secs_f64());
352+
metrics::histogram!("tgi_request_inference_duration").record(inference_time.as_secs_f64());
353+
metrics::histogram!("tgi_request_mean_time_per_token_duration").record(time_per_token.as_secs_f64());
354354
}
355355

356356
// Metrics
357357
match reason {
358-
Error => metrics::increment_counter!("tgi_request_failure", "err" => "generate"),
358+
Error => metrics::counter!("tgi_request_failure", "err" => "generate").increment(1),
359359
Cancelled => (), // recorded where cancellation is detected
360360
_ => {
361-
metrics::increment_counter!(
362-
"tgi_request_success", "stop_reason" => reason.as_str_name(), "kind" => kind
363-
);
364-
metrics::histogram!("tgi_request_duration", total_time.as_secs_f64());
365-
metrics::histogram!("tgi_request_generated_tokens", generated_tokens as f64);
366-
metrics::histogram!(
367-
"tgi_request_total_tokens", (generated_tokens as usize + input_tokens) as f64
368-
);
361+
metrics::counter!("tgi_request_success", "stop_reason" => reason.as_str_name(), "kind" => kind)
362+
.increment(1);
363+
metrics::histogram!("tgi_request_duration").record(total_time.as_secs_f64());
364+
metrics::histogram!("tgi_request_generated_tokens").record(generated_tokens as f64);
365+
metrics::histogram!("tgi_request_total_tokens")
366+
.record((generated_tokens as usize + input_tokens) as f64);
369367
}
370368
}
371369

0 commit comments

Comments
 (0)