Skip to content

Commit 7c48f48

Browse files
committed
Add prometheus metrics for tokenize API
1 parent d31197b commit 7c48f48

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,7 @@ They are all prefixed with `tgi_`. Descriptions will be added to the table below
154154
| `tgi_prefill_weight_limit_exceeded` | `counter` | | |
155155
| `tgi_prompt_load_failure` | `counter` | | |
156156
| `tgi_prompt_load_duration` | `histogram` | | |
157+
| `tgi_tokenize_request_count` | `counter` | | |
158+
| `tgi_tokenize_request_input_count` | `counter` | | |
159+
| `tgi_tokenize_request_tokens` | `histogram` | | |
160+
| `tgi_tokenize_request_duration` | `histogram` | | |

router/src/grpc_server.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ pub(crate) async fn start_grpc_server<F: Future<Output = ()> + Send +'static> (
6060
state: shared_state,
6161
tokenizer,
6262
input_counter: metrics::register_counter!("tgi_request_input_count"),
63+
tokenize_input_counter: metrics::register_counter!("tgi_tokenize_request_input_count"),
6364
};
6465
let grpc_server = builder
6566
.add_service(GenerationServiceServer::new(grpc_service))
@@ -82,6 +83,7 @@ pub struct GenerationServicer {
8283
state: ServerState,
8384
tokenizer: Tokenizer,
8485
input_counter: metrics::Counter,
86+
tokenize_input_counter: metrics::Counter,
8587
}
8688

8789
#[tonic::async_trait]
@@ -245,15 +247,25 @@ impl GenerationService for GenerationServicer {
245247
&self, request: Request<BatchedTokenizeRequest>
246248
) -> Result<Response<BatchedTokenizeResponse>, Status> {
247249
let br = request.into_inner();
250+
metrics::increment_counter!("tgi_tokenize_request_count");
251+
let start_time = Instant::now();
252+
self.tokenize_input_counter.increment(br.requests.len() as u64);
248253

254+
let mut token_total = 0;
249255
let responses = self.tokenizer.encode_batch(
250256
br.requests.into_iter().map(|tr| tr.text).collect(), true
251257
)
252-
.map_err(Status::from_error)?
253-
.into_iter().map(|e| TokenizeResponse {
254-
token_count: e.len() as u32,
255-
tokens: if br.return_tokens { e.get_tokens().to_vec() } else { vec![] },
256-
}).collect();
258+
.map_err(Status::from_error)?.into_iter()
259+
.map(|e| {
260+
let token_count = e.len() as u32;
261+
token_total += token_count;
262+
TokenizeResponse {
263+
token_count,
264+
tokens: if br.return_tokens { e.get_tokens().to_vec() } else { vec![] },
265+
}
266+
}).collect();
267+
metrics::histogram!("tgi_tokenize_request_tokens", token_total as f64);
268+
metrics::histogram!("tgi_tokenize_request_duration", start_time.elapsed().as_secs_f64());
257269

258270
Ok(Response::new(BatchedTokenizeResponse { responses }))
259271
}

router/src/server.rs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,9 @@ async fn do_run<B: BatchType>(
343343
value *= 1.5;
344344
duration_buckets.push(value);
345345
}
346+
// Tokenization token count buckets
347+
let tokenized_tokens_matcher = Matcher::Full(String::from("tgi_tokenize_request_tokens"));
348+
let tokenized_tokens_buckets: Vec<f64> = (6..20).map(|x| (1 << x) as f64).collect();
346349
// Input Length buckets
347350
let input_length_matcher = Matcher::Full(String::from("tgi_request_input_length"));
348351
let max_sequence_length_buckets: Vec<f64> = (0..64)
@@ -364,20 +367,14 @@ async fn do_run<B: BatchType>(
364367

365368
// Prometheus handler
366369
let builder = PrometheusBuilder::new()
367-
.set_buckets_for_metric(duration_matcher, &duration_buckets)
368-
.unwrap()
369-
.set_buckets_for_metric(input_length_matcher, &max_sequence_length_buckets)
370-
.unwrap()
371-
.set_buckets_for_metric(generated_tokens_matcher, &max_new_tokens_buckets)
372-
.unwrap()
373-
.set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets)
374-
.unwrap()
375-
.set_buckets_for_metric(total_tokens_matcher, &max_sequence_length_buckets)
376-
.unwrap()
377-
.set_buckets_for_metric(batch_size_matcher, &batch_size_buckets)
378-
.unwrap()
379-
.set_buckets_for_metric(batch_inference_size_matcher, &batch_size_buckets)
380-
.unwrap();
370+
.set_buckets_for_metric(duration_matcher, &duration_buckets).unwrap()
371+
.set_buckets_for_metric(tokenized_tokens_matcher, &tokenized_tokens_buckets).unwrap()
372+
.set_buckets_for_metric(input_length_matcher, &max_sequence_length_buckets).unwrap()
373+
.set_buckets_for_metric(generated_tokens_matcher, &max_new_tokens_buckets).unwrap()
374+
.set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets).unwrap()
375+
.set_buckets_for_metric(total_tokens_matcher, &max_sequence_length_buckets).unwrap()
376+
.set_buckets_for_metric(batch_size_matcher, &batch_size_buckets).unwrap()
377+
.set_buckets_for_metric(batch_inference_size_matcher, &batch_size_buckets).unwrap();
381378
let prom_handle = builder
382379
.install_recorder()
383380
.expect("failed to install metrics recorder");

0 commit comments

Comments
 (0)