@@ -597,13 +597,13 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng
597597 int idx = dist (rng);
598598
599599 newpick.selected_token = FileFormatTokenizeID (candidates->data [idx].id , file_format, true );
600- newpick.selected_logprob = candidates->data [idx].logit ;
600+ newpick.selected_logprob = logf ( candidates->data [idx].p ) ;
601601 newpick.selected_probability = candidates->data [idx].p ;
602602 newpick.selected_tokenid = candidates->data [idx].id ;
603- for (size_t i = 0 ; (i < candidates->size && i<5 ); ++i)
603+ for (size_t i = 0 ; (i < candidates->size && i<logprobs_max ); ++i)
604604 {
605605 newpick.tokens .push_back (FileFormatTokenizeID (candidates->data [i].id , file_format, true ));
606- newpick.logprobs .push_back (candidates->data [i].logit );
606+ newpick.logprobs .push_back (logf ( candidates->data [i].p ) );
607607 newpick.p .push_back (candidates->data [i].p );
608608 newpick.tokenid .push_back (candidates->data [i].id );
609609 }
@@ -2467,6 +2467,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
24672467 printf (" \n Warning: KCPP text generation not initialized!\n " );
24682468 output.text = nullptr ;
24692469 output.status = 0 ;
2470+ output.prompt_tokens = output.completion_tokens = 0 ;
24702471 output.stopreason = stop_reason::INVALID;
24712472 generation_finished = true ;
24722473 return output;
@@ -3142,6 +3143,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31423143 fprintf (stderr, " \n Failed to predict at %d! Check your context buffer sizes!\n " ,n_past);
31433144 output.text = nullptr ;
31443145 output.status = 0 ;
3146+ output.prompt_tokens = output.completion_tokens = 0 ;
31453147 output.stopreason = stop_reason::INVALID;
31463148 generation_finished = true ;
31473149 return output;
@@ -3471,6 +3473,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34713473 fprintf (stderr, " \n Failed to eval llava image at %d!\n " ,n_past);
34723474 output.text = nullptr ;
34733475 output.status = 0 ;
3476+ output.prompt_tokens = output.completion_tokens = 0 ;
34743477 output.stopreason = stop_reason::INVALID;
34753478 generation_finished = true ;
34763479 return output;
@@ -3482,6 +3485,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34823485 fprintf (stderr, " \n LLAVA image tokens mismatch at %d! (%d vs %d tokens)\n " ,n_past,llavatokenscounted,llavatokensevaled);
34833486 output.text = nullptr ;
34843487 output.status = 0 ;
3488+ output.prompt_tokens = output.completion_tokens = 0 ;
34853489 output.stopreason = stop_reason::INVALID;
34863490 generation_finished = true ;
34873491 return output;
@@ -3534,6 +3538,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
35343538 printf (" \n CtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)" ,(int )current_context_tokens.size (),(int )nctx, realnpredict, kcpp_data->n_predict , time0, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second);
35353539 fflush (stdout);
35363540 output.status = 1 ;
3541+ int finaltokcount = (int )current_context_tokens.size ()-realnpredict;
3542+ output.prompt_tokens = (finaltokcount<0 ?0 :finaltokcount);
3543+ output.completion_tokens = realnpredict;
35373544 output.stopreason = last_stop_reason;
35383545 last_eval_time = pt2;
35393546 last_process_time = pt1;
0 commit comments