@@ -2262,12 +2262,14 @@ struct server_context {
22622262            slot.has_next_token  = true ;
22632263        }
22642264
2265-         //  if context shifting is disabled, make sure that we don't run out of context
2266-         if  (!params_base.ctx_shift  && slot.n_past  + 1  >= slot.n_ctx ) {
2265+         //  if context shift is disabled, we stop when it reaches the context limit
2266+         if  (!params_base.ctx_shift  && slot.n_cache_tokens () + 1  >= slot.n_ctx ) {
2267+             slot.truncated       = true ;
22672268            slot.stop            = STOP_TYPE_LIMIT;
22682269            slot.has_next_token  = false ;
22692270
2270-             SLT_DBG (slot, " stopped due to running out of context, n_past = %d, n_ctx = %d\n "  , slot.n_past , slot.n_ctx );
2271+             SLT_DBG (slot, " stopped due to running out of context capacity, n_cache_tokens = %d, n_prompt_tokens = %d, n_decoded = %d, n_ctx = %d\n "  ,
2272+                     slot.n_cache_tokens (), slot.n_prompt_tokens (), slot.n_decoded , slot.n_ctx );
22712273        }
22722274
22732275        //  check the limits
@@ -2327,16 +2329,6 @@ struct server_context {
23272329            }
23282330        }
23292331
2330-         //  if context shift is disabled, we stop when it reaches the context limit
2331-         if  (!params_base.ctx_shift  && slot.n_cache_tokens () >= slot.n_ctx ) {
2332-             slot.truncated       = true ;
2333-             slot.stop            = STOP_TYPE_LIMIT;
2334-             slot.has_next_token  = false ;
2335- 
2336-             SLT_DBG (slot, " stopped due to running out of context capacity, n_cache_tokens = %d, n_prompt_tokens = %d, n_decoded = %d, n_ctx = %d\n "  ,
2337-                     slot.n_cache_tokens (), slot.n_prompt_tokens (), slot.n_decoded , slot.n_ctx );
2338-         }
2339- 
23402332        if  (llama_vocab_is_eog (vocab, result.tok )) {
23412333            slot.stop            = STOP_TYPE_EOS;
23422334            slot.has_next_token  = false ;
0 commit comments