Skip to content

Commit cbe805a

Browse files
committed
Add metric to cover per-iteration rust token processing time
Called `tgi_batch_inference_tokproc_duration`
1 parent bc4fbac commit cbe805a

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

router/src/batcher.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,7 @@ impl<'a> TokenProcessor<'a> {
577577
Ok(
578578
Some((generated_tokens, input_tokens, errors, next_batch_id, forward_duration))
579579
) => {
580+
let pre_token_process_time = Instant::now();
580581
self.process_input_tokens(input_tokens);
581582
let completed_request_ids = self.process_next_tokens(
582583
generated_tokens, errors,
@@ -595,6 +596,12 @@ impl<'a> TokenProcessor<'a> {
595596
"method" => method,
596597
"makeup" => "single_only", // later will possibly be beam_only or mixed
597598
);
599+
metrics::histogram!(
600+
"tgi_batch_inference_tokproc_duration",
601+
pre_token_process_time.elapsed().as_secs_f64(),
602+
"method" => method,
603+
"makeup" => "single_only", // later will possibly be beam_only or mixed
604+
);
598605
// Probably don't need this additional counter because the duration histogram
599606
// records a total count
600607
metrics::increment_counter!("tgi_batch_inference_success", "method" => method);

0 commit comments

Comments
 (0)