diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp index 4e0bccdb781..738951ebb95 100644 --- a/extension/llm/runner/text_llm_runner.cpp +++ b/extension/llm/runner/text_llm_runner.cpp @@ -190,7 +190,7 @@ Error TextLLMRunner::generate( // Generate max_new_tokens - 1 because prefill already generated 1 token. auto generate_result = text_token_generator_->generate( prompt_tokens, - num_prompt_tokens, + pos_, max_new_tokens - 1, temperature_ == -1.0f ? config.temperature : temperature_, wrapped_callback); @@ -199,6 +199,8 @@ Error TextLLMRunner::generate( } int64_t num_generated_tokens = generate_result.get(); + pos_ += num_generated_tokens; + stats_->inference_end_ms = time_in_ms(); if (!config.warming) { printf("\n");