@@ -59,8 +59,8 @@ pub(crate) async fn start_grpc_server<F: Future<Output = ()> + Send +'static> (
59
59
let grpc_service = GenerationServicer {
60
60
state : shared_state,
61
61
tokenizer,
62
- input_counter : metrics:: register_counter !( "tgi_request_input_count" ) ,
63
- tokenize_input_counter : metrics:: register_counter !( "tgi_tokenize_request_input_count" ) ,
62
+ input_counter : metrics:: counter !( "tgi_request_input_count" ) ,
63
+ tokenize_input_counter : metrics:: counter !( "tgi_tokenize_request_input_count" ) ,
64
64
} ;
65
65
let grpc_server = builder
66
66
. add_service ( GenerationServiceServer :: new ( grpc_service) )
@@ -104,7 +104,7 @@ impl GenerationService for GenerationServicer {
104
104
let br = request. into_inner ( ) ;
105
105
let batch_size = br. requests . len ( ) ;
106
106
let kind = if batch_size == 1 { "single" } else { "batch" } ;
107
- metrics:: increment_counter !( "tgi_request_count" , "kind" => kind) ;
107
+ metrics:: counter !( "tgi_request_count" , "kind" => kind) . increment ( 1 ) ;
108
108
if batch_size == 0 {
109
109
return Ok ( Response :: new ( BatchedGenerationResponse { responses : vec ! [ ] } ) ) ;
110
110
}
@@ -113,7 +113,7 @@ impl GenerationService for GenerationServicer {
113
113
let _permit = self . state . limit_concurrent_requests
114
114
. try_acquire_many ( batch_size as u32 )
115
115
. map_err ( |_| {
116
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "conc_limit" ) ;
116
+ metrics:: counter !( "tgi_request_failure" , "err" => "conc_limit" ) . increment ( 1 ) ;
117
117
tracing:: error!( "Model is overloaded" ) ;
118
118
Status :: resource_exhausted ( "Model is overloaded" )
119
119
} ) ?;
@@ -155,11 +155,11 @@ impl GenerationService for GenerationServicer {
155
155
}
156
156
} . map_err ( |err| match err {
157
157
InferError :: RequestQueueFull ( ) => {
158
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "queue_full" ) ;
158
+ metrics:: counter !( "tgi_request_failure" , "err" => "queue_full" ) . increment ( 1 ) ;
159
159
Status :: resource_exhausted ( err. to_string ( ) )
160
160
} ,
161
161
_ => {
162
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "generate" ) ;
162
+ metrics:: counter !( "tgi_request_failure" , "err" => "generate" ) . increment ( 1 ) ;
163
163
tracing:: error!( "{err}" ) ;
164
164
Status :: from_error ( Box :: new ( err) )
165
165
} ,
@@ -184,11 +184,11 @@ impl GenerationService for GenerationServicer {
184
184
& self , request : Request < SingleGenerationRequest >
185
185
) -> Result < Response < Self :: GenerateStreamStream > , Status > {
186
186
let start_time = Instant :: now ( ) ;
187
- metrics:: increment_counter !( "tgi_request_count" , "kind" => "stream" ) ;
187
+ metrics:: counter !( "tgi_request_count" , "kind" => "stream" ) . increment ( 1 ) ;
188
188
self . input_counter . increment ( 1 ) ;
189
189
let permit = self . state . limit_concurrent_requests . clone ( )
190
190
. try_acquire_owned ( ) . map_err ( |_| {
191
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "conc_limit" ) ;
191
+ metrics:: counter !( "tgi_request_failure" , "err" => "conc_limit" ) . increment ( 1 ) ;
192
192
tracing:: error!( "Model is overloaded" ) ;
193
193
Status :: resource_exhausted ( "Model is overloaded" )
194
194
} ) ?;
@@ -210,7 +210,7 @@ impl GenerationService for GenerationServicer {
210
210
} , |ctx, count, reason, request_id, times, out, err| {
211
211
let _enter = ctx. span . enter ( ) ;
212
212
if let Some ( e) = err {
213
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "generate" ) ;
213
+ metrics:: counter !( "tgi_request_failure" , "err" => "generate" ) . increment ( 1 ) ;
214
214
tracing:: error!( "Streaming response failed after {count} tokens, \
215
215
output so far: '{:?}': {e}", truncate( & out, 32 ) ) ;
216
216
} else {
@@ -229,11 +229,11 @@ impl GenerationService for GenerationServicer {
229
229
. await
230
230
. map_err ( |err| match err {
231
231
InferError :: RequestQueueFull ( ) => {
232
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "queue_full" ) ;
232
+ metrics:: counter !( "tgi_request_failure" , "err" => "queue_full" ) . increment ( 1 ) ;
233
233
Status :: resource_exhausted ( err. to_string ( ) )
234
234
} ,
235
235
_ => {
236
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "unknown" ) ;
236
+ metrics:: counter !( "tgi_request_failure" , "err" => "unknown" ) . increment ( 1 ) ;
237
237
tracing:: error!( "{err}" ) ;
238
238
Status :: from_error ( Box :: new ( err) )
239
239
} ,
@@ -247,7 +247,7 @@ impl GenerationService for GenerationServicer {
247
247
& self , request : Request < BatchedTokenizeRequest >
248
248
) -> Result < Response < BatchedTokenizeResponse > , Status > {
249
249
let br = request. into_inner ( ) ;
250
- metrics:: increment_counter !( "tgi_tokenize_request_count" ) ;
250
+ metrics:: counter !( "tgi_tokenize_request_count" ) . increment ( 1 ) ;
251
251
let start_time = Instant :: now ( ) ;
252
252
self . tokenize_input_counter . increment ( br. requests . len ( ) as u64 ) ;
253
253
@@ -262,8 +262,8 @@ impl GenerationService for GenerationServicer {
262
262
) ) ) . map_err ( Status :: from_error) . await ?;
263
263
264
264
let token_total: u32 = responses. iter ( ) . map ( |tr| tr. token_count ) . sum ( ) ;
265
- metrics:: histogram!( "tgi_tokenize_request_tokens" , token_total as f64 ) ;
266
- metrics:: histogram!( "tgi_tokenize_request_duration" , start_time. elapsed( ) . as_secs_f64( ) ) ;
265
+ metrics:: histogram!( "tgi_tokenize_request_tokens" ) . record ( token_total as f64 ) ;
266
+ metrics:: histogram!( "tgi_tokenize_request_duration" ) . record ( start_time. elapsed ( ) . as_secs_f64 ( ) ) ;
267
267
268
268
Ok ( Response :: new ( BatchedTokenizeResponse { responses } ) )
269
269
}
@@ -304,11 +304,11 @@ impl GenerationServicer {
304
304
) . await ,
305
305
Err ( err) => Err ( err) ,
306
306
} . map_err ( |err| {
307
- metrics:: increment_counter !( "tgi_request_failure" , "err" => "validation" ) ;
307
+ metrics:: counter !( "tgi_request_failure" , "err" => "validation" ) . increment ( 1 ) ;
308
308
tracing:: error!( "{err}" ) ;
309
309
Status :: invalid_argument ( err. to_string ( ) )
310
310
} ) . map ( |requests| {
311
- metrics:: histogram!( "tgi_request_validation_duration" , start_time. elapsed( ) . as_secs_f64( ) ) ;
311
+ metrics:: histogram!( "tgi_request_validation_duration" ) . record ( start_time. elapsed ( ) . as_secs_f64 ( ) ) ;
312
312
requests
313
313
} )
314
314
}
@@ -349,23 +349,21 @@ fn log_response(
349
349
) ;
350
350
_enter = span. enter ( ) ;
351
351
352
- metrics:: histogram!( "tgi_request_inference_duration" , inference_time. as_secs_f64( ) ) ;
353
- metrics:: histogram!( "tgi_request_mean_time_per_token_duration" , time_per_token. as_secs_f64( ) ) ;
352
+ metrics:: histogram!( "tgi_request_inference_duration" ) . record ( inference_time. as_secs_f64 ( ) ) ;
353
+ metrics:: histogram!( "tgi_request_mean_time_per_token_duration" ) . record ( time_per_token. as_secs_f64 ( ) ) ;
354
354
}
355
355
356
356
// Metrics
357
357
match reason {
358
- Error => metrics:: increment_counter !( "tgi_request_failure" , "err" => "generate" ) ,
358
+ Error => metrics:: counter !( "tgi_request_failure" , "err" => "generate" ) . increment ( 1 ) ,
359
359
Cancelled => ( ) , // recorded where cancellation is detected
360
360
_ => {
361
- metrics:: increment_counter!(
362
- "tgi_request_success" , "stop_reason" => reason. as_str_name( ) , "kind" => kind
363
- ) ;
364
- metrics:: histogram!( "tgi_request_duration" , total_time. as_secs_f64( ) ) ;
365
- metrics:: histogram!( "tgi_request_generated_tokens" , generated_tokens as f64 ) ;
366
- metrics:: histogram!(
367
- "tgi_request_total_tokens" , ( generated_tokens as usize + input_tokens) as f64
368
- ) ;
361
+ metrics:: counter!( "tgi_request_success" , "stop_reason" => reason. as_str_name( ) , "kind" => kind)
362
+ . increment ( 1 ) ;
363
+ metrics:: histogram!( "tgi_request_duration" ) . record ( total_time. as_secs_f64 ( ) ) ;
364
+ metrics:: histogram!( "tgi_request_generated_tokens" ) . record ( generated_tokens as f64 ) ;
365
+ metrics:: histogram!( "tgi_request_total_tokens" )
366
+ . record ( ( generated_tokens as usize + input_tokens) as f64 ) ;
369
367
}
370
368
}
371
369
0 commit comments