rm double check for out-of-context

ngxson · ngxson · commit 3012326f0f8c · 2025-05-17T16:25:30.000+02:00
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
@@ -2262,12 +2262,14 @@ struct server_context {
             slot.has_next_token = true;
         }
 
-        // if context shifting is disabled, make sure that we don't run out of context
-        if (!params_base.ctx_shift && slot.n_past + 1 >= slot.n_ctx) {
+        // if context shift is disabled, we stop when it reaches the context limit
+        if (!params_base.ctx_shift && slot.n_cache_tokens() + 1 >= slot.n_ctx) {
+            slot.truncated      = true;
             slot.stop           = STOP_TYPE_LIMIT;
             slot.has_next_token = false;
 
-            SLT_DBG(slot, "stopped due to running out of context, n_past = %d, n_ctx = %d\n", slot.n_past, slot.n_ctx);
+            SLT_DBG(slot, "stopped due to running out of context capacity, n_cache_tokens = %d, n_prompt_tokens = %d, n_decoded = %d, n_ctx = %d\n",
+                    slot.n_cache_tokens(), slot.n_prompt_tokens(), slot.n_decoded, slot.n_ctx);
         }
 
         // check the limits
@@ -2327,16 +2329,6 @@ struct server_context {
             }
         }
 
-        // if context shift is disabled, we stop when it reaches the context limit
-        if (!params_base.ctx_shift && slot.n_cache_tokens() >= slot.n_ctx) {
-            slot.truncated      = true;
-            slot.stop           = STOP_TYPE_LIMIT;
-            slot.has_next_token = false;
-
-            SLT_DBG(slot, "stopped due to running out of context capacity, n_cache_tokens = %d, n_prompt_tokens = %d, n_decoded = %d, n_ctx = %d\n",
-                    slot.n_cache_tokens(), slot.n_prompt_tokens(), slot.n_decoded, slot.n_ctx);
-        }
-
         if (llama_vocab_is_eog(vocab, result.tok)) {
             slot.stop           = STOP_TYPE_EOS;
             slot.has_next_token = false;