@@ -60,6 +60,7 @@ pub(crate) async fn start_grpc_server<F: Future<Output = ()> + Send +'static> (
60
60
state : shared_state,
61
61
tokenizer,
62
62
input_counter : metrics:: register_counter!( "tgi_request_input_count" ) ,
63
+ tokenize_input_counter : metrics:: register_counter!( "tgi_tokenize_request_input_count" ) ,
63
64
} ;
64
65
let grpc_server = builder
65
66
. add_service ( GenerationServiceServer :: new ( grpc_service) )
@@ -82,6 +83,7 @@ pub struct GenerationServicer {
82
83
state : ServerState ,
83
84
tokenizer : Tokenizer ,
84
85
input_counter : metrics:: Counter ,
86
+ tokenize_input_counter : metrics:: Counter ,
85
87
}
86
88
87
89
#[ tonic:: async_trait]
@@ -245,15 +247,25 @@ impl GenerationService for GenerationServicer {
245
247
& self , request : Request < BatchedTokenizeRequest >
246
248
) -> Result < Response < BatchedTokenizeResponse > , Status > {
247
249
let br = request. into_inner ( ) ;
250
+ metrics:: increment_counter!( "tgi_tokenize_request_count" ) ;
251
+ let start_time = Instant :: now ( ) ;
252
+ self . tokenize_input_counter . increment ( br. requests . len ( ) as u64 ) ;
248
253
254
+ let mut token_total = 0 ;
249
255
let responses = self . tokenizer . encode_batch (
250
256
br. requests . into_iter ( ) . map ( |tr| tr. text ) . collect ( ) , true
251
257
)
252
- . map_err ( Status :: from_error) ?
253
- . into_iter ( ) . map ( |e| TokenizeResponse {
254
- token_count : e. len ( ) as u32 ,
255
- tokens : if br. return_tokens { e. get_tokens ( ) . to_vec ( ) } else { vec ! [ ] } ,
256
- } ) . collect ( ) ;
258
+ . map_err ( Status :: from_error) ?. into_iter ( )
259
+ . map ( |e| {
260
+ let token_count = e. len ( ) as u32 ;
261
+ token_total += token_count;
262
+ TokenizeResponse {
263
+ token_count,
264
+ tokens : if br. return_tokens { e. get_tokens ( ) . to_vec ( ) } else { vec ! [ ] } ,
265
+ }
266
+ } ) . collect ( ) ;
267
+ metrics:: histogram!( "tgi_tokenize_request_tokens" , token_total as f64 ) ;
268
+ metrics:: histogram!( "tgi_tokenize_request_duration" , start_time. elapsed( ) . as_secs_f64( ) ) ;
257
269
258
270
Ok ( Response :: new ( BatchedTokenizeResponse { responses } ) )
259
271
}
0 commit comments