Skip to content

Commit 987955f

Browse files
ngxsonggerganov
andauthored
Apply suggestions from code review
Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 3012326 commit 987955f

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

tools/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,7 +1308,7 @@ struct server_slot {
13081308
int64_t t_start_process_prompt;
13091309
int64_t t_start_generation;
13101310

1311-
size_t n_prompt_processing = 0; // number of decoded prompt tokens (may be less than prompt_tokens.n_kv_tokens(), in case we are using cache)
1311+
size_t n_prompt_processing = 0; // number of decoded prompt tokens (may be less than prompt_tokens.n_tokens(), in case we are using cache)
13121312
double t_prompt_processing; // ms
13131313
double t_token_generation; // ms
13141314

@@ -2476,7 +2476,7 @@ struct server_context {
24762476
res->truncated = slot.truncated;
24772477
res->n_decoded = slot.n_decoded;
24782478
res->n_prompt_tokens = slot.n_prompt_tokens();
2479-
res->n_tokens_cached = slot.n_past;
2479+
res->n_tokens_cached = slot.n_cache_tokens();
24802480
res->has_new_line = slot.has_new_line;
24812481
res->stopping_word = slot.stopping_word;
24822482
res->stop = slot.stop;

tools/server/utils.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1045,7 +1045,6 @@ struct server_tokens {
10451045
// it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
10461046
// a mtmd_input_chunk can occupy multiple tokens, one llama_token per **position**
10471047
// important: for models using mrope, an image can contain multiple tokens but will use only one **position**
1048-
// in otherwords, tokens.size() == n_past
10491048
llama_tokens tokens;
10501049

10511050
// for ex. with input of 5 text tokens and 2 images:

0 commit comments

Comments
 (0)