diff --git a/src/llama.cpp b/src/llama.cpp
index 2c251c6b2..27647c9d2 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17488,7 +17488,7 @@ static struct ggml_cgraph * llama_build_graph(
     const llama_vocab * vocab = llama_get_vocab(&lctx);
     llama_token bos = llama_token_bos_impl(*vocab);
     llama_token eos = llama_token_eos_impl(*vocab);
-    bool is_warming_up = (batch.n_tokens == 1 && (batch.token[0] == ((bos != -1) ? bos : eos)));
+    bool is_warming_up = lctx.n_eval == 0 && (batch.n_tokens == 1 && (batch.token[0] == ((bos != -1) ? bos : eos)));
     struct llm_build_context llm(lctx, batch, cb, worst_case, is_warming_up);
 
     llm.init();