diff --git a/src/llama.cpp b/src/llama.cpp index 2c251c6b2..27647c9d2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17488,7 +17488,7 @@ static struct ggml_cgraph * llama_build_graph( const llama_vocab * vocab = llama_get_vocab(&lctx); llama_token bos = llama_token_bos_impl(*vocab); llama_token eos = llama_token_eos_impl(*vocab); - bool is_warming_up = (batch.n_tokens == 1 && (batch.token[0] == ((bos != -1) ? bos : eos))); + bool is_warming_up = lctx.n_eval == 0 && (batch.n_tokens == 1 && (batch.token[0] == ((bos != -1) ? bos : eos))); struct llm_build_context llm(lctx, batch, cb, worst_case, is_warming_up); llm.init();