We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9152f0a commit 8946d80Copy full SHA for 8946d80
extension/llm/runner/text_llm_runner.cpp
@@ -190,7 +190,7 @@ Error TextLLMRunner::generate(
190
// Generate max_new_tokens - 1 because prefill already generated 1 token.
191
auto generate_result = text_token_generator_->generate(
192
prompt_tokens,
193
- num_prompt_tokens,
+ pos_,
194
max_new_tokens - 1,
195
temperature_ == -1.0f ? config.temperature : temperature_,
196
wrapped_callback);
@@ -199,6 +199,8 @@ Error TextLLMRunner::generate(
199
}
200
int64_t num_generated_tokens = generate_result.get();
201
202
+ pos_ += num_generated_tokens;
203
+
204
stats_->inference_end_ms = time_in_ms();
205
if (!config.warming) {
206
printf("\n");
0 commit comments