File tree Expand file tree Collapse file tree 2 files changed +2
-3
lines changed Expand file tree Collapse file tree 2 files changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -1308,7 +1308,7 @@ struct server_slot {
13081308 int64_t t_start_process_prompt;
13091309 int64_t t_start_generation;
13101310
1311- size_t n_prompt_processing = 0 ; // number of decoded prompt tokens (may be less than prompt_tokens.n_kv_tokens (), in case we are using cache)
1311+ size_t n_prompt_processing = 0 ; // number of decoded prompt tokens (may be less than prompt_tokens.n_tokens (), in case we are using cache)
13121312 double t_prompt_processing; // ms
13131313 double t_token_generation; // ms
13141314
@@ -2476,7 +2476,7 @@ struct server_context {
24762476 res->truncated = slot.truncated ;
24772477 res->n_decoded = slot.n_decoded ;
24782478 res->n_prompt_tokens = slot.n_prompt_tokens ();
2479- res->n_tokens_cached = slot.n_past ;
2479+ res->n_tokens_cached = slot.n_cache_tokens () ;
24802480 res->has_new_line = slot.has_new_line ;
24812481 res->stopping_word = slot.stopping_word ;
24822482 res->stop = slot.stop ;
Original file line number Diff line number Diff line change @@ -1045,7 +1045,6 @@ struct server_tokens {
10451045 // it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
10461046 // a mtmd_input_chunk can occupy multiple tokens, one llama_token per **position**
10471047 // important: for models using mrope, an image can contain multiple tokens but will use only one **position**
1048- // in otherwords, tokens.size() == n_past
10491048 llama_tokens tokens;
10501049
10511050 // for ex. with input of 5 text tokens and 2 images:
You can’t perform that action at this time.
0 commit comments