Add metric to cover per-iteration rust token processing time

njhill · njhill · commit cbe805aca02e · 2023-10-13T15:15:26.000-07:00
Called `tgi_batch_inference_tokproc_duration`
diff --git a/router/src/batcher.rs b/router/src/batcher.rs
@@ -577,6 +577,7 @@ impl<'a> TokenProcessor<'a> {
             Ok(
                 Some((generated_tokens, input_tokens, errors, next_batch_id, forward_duration))
             ) => {
+                let pre_token_process_time = Instant::now();
                 self.process_input_tokens(input_tokens);
                 let completed_request_ids = self.process_next_tokens(
                     generated_tokens, errors,
@@ -595,6 +596,12 @@ impl<'a> TokenProcessor<'a> {
                     "method" => method,
                     "makeup" => "single_only", // later will possibly be beam_only or mixed
                 );
+                metrics::histogram!(
+                    "tgi_batch_inference_tokproc_duration",
+                    pre_token_process_time.elapsed().as_secs_f64(),
+                    "method" => method,
+                    "makeup" => "single_only", // later will possibly be beam_only or mixed
+                );
                 // Probably don't need this additional counter because the duration histogram
                 // records a total count
                 metrics::increment_counter!("tgi_batch_inference_success", "method" => method);