@@ -341,7 +341,7 @@ struct server_slot {
341341};
342342
343343struct server_metrics {
344- const int64_t t_start = ggml_time_us() ;
344+ int64_t t_start = 0 ;
345345
346346 uint64_t n_prompt_tokens_processed_total = 0 ;
347347 uint64_t t_prompt_processing_total = 0 ;
@@ -354,14 +354,18 @@ struct server_metrics {
354354 uint64_t n_tokens_predicted = 0 ;
355355 uint64_t t_tokens_generation = 0 ;
356356
357- void on_prompt_eval (const server_slot &slot) {
357+ void init () {
358+ t_start = ggml_time_us ();
359+ }
360+
361+ void on_prompt_eval (const server_slot & slot) {
358362 n_prompt_tokens_processed_total += slot.n_prompt_tokens_processed ;
359363 n_prompt_tokens_processed += slot.n_prompt_tokens_processed ;
360364 t_prompt_processing += slot.t_prompt_processing ;
361365 t_prompt_processing_total += slot.t_prompt_processing ;
362366 }
363367
364- void on_prediction (const server_slot &slot) {
368+ void on_prediction (const server_slot & slot) {
365369 n_tokens_predicted_total += slot.n_decoded ;
366370 n_tokens_predicted += slot.n_decoded ;
367371 t_tokens_generation += slot.t_token_generation ;
@@ -690,10 +694,11 @@ struct server_context {
690694 return res > 0 ;
691695 }
692696
693- void initialize () {
697+ void init () {
694698 const int32_t n_ctx_slot = n_ctx / params.n_parallel ;
695699
696700 LOG_INFO (" initializing slots" , {{" n_slots" , params.n_parallel }});
701+
697702 for (int i = 0 ; i < params.n_parallel ; i++) {
698703 server_slot slot;
699704
@@ -735,6 +740,8 @@ struct server_context {
735740 default_generation_settings_for_props[" seed" ] = -1 ;
736741
737742 batch = llama_batch_init (n_ctx, 0 , params.n_parallel );
743+
744+ metrics.init ();
738745 }
739746
740747 std::vector<llama_token> tokenize (const json & json_prompt, bool add_bos) const {
@@ -2783,7 +2790,7 @@ int main(int argc, char ** argv) {
27832790 state.store (SERVER_STATE_ERROR);
27842791 return 1 ;
27852792 } else {
2786- ctx_server.initialize ();
2793+ ctx_server.init ();
27872794 state.store (SERVER_STATE_READY);
27882795 }
27892796
0 commit comments