Skip to content

Commit edd2e8c

Browse files
committed
server : simplify context limit logic
1 parent 7ebe7f7 commit edd2e8c

File tree

1 file changed

+3
-22
lines changed

1 file changed

+3
-22
lines changed

tools/server/server.cpp

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2866,10 +2866,12 @@ struct server_context {
28662866

28672867
// if context shifting is disabled, make sure that we don't run out of context
28682868
if (!params_base.ctx_shift && slot.n_past + 1 >= slot.n_ctx) {
2869+
slot.truncated = true;
28692870
slot.stop = STOP_TYPE_LIMIT;
28702871
slot.has_next_token = false;
28712872

2872-
SLT_DBG(slot, "stopped due to running out of context, n_past = %d, n_ctx = %d\n", slot.n_past, slot.n_ctx);
2873+
SLT_DBG(slot, "stopped due to running out of context capacity, n_past = %d, n_prompt_tokens = %d, n_decoded = %d, n_ctx = %d\n",
2874+
slot.n_decoded, slot.n_prompt_tokens(), slot.n_past, slot.n_ctx);
28732875
}
28742876

28752877
// check the limits
@@ -2929,34 +2931,13 @@ struct server_context {
29292931
}
29302932
}
29312933

2932-
// if context shift is disabled, we stop when it reaches the context limit
2933-
if (slot.n_past >= slot.n_ctx) {
2934-
slot.truncated = true;
2935-
slot.stop = STOP_TYPE_LIMIT;
2936-
slot.has_next_token = false;
2937-
2938-
SLT_DBG(slot, "stopped due to running out of context capacity, n_past = %d, n_prompt_tokens = %d, n_decoded = %d, n_ctx = %d\n",
2939-
slot.n_decoded, slot.n_prompt_tokens(), slot.n_past, slot.n_ctx);
2940-
}
2941-
29422934
if (llama_vocab_is_eog(vocab, result.tok)) {
29432935
slot.stop = STOP_TYPE_EOS;
29442936
slot.has_next_token = false;
29452937

29462938
SLT_DBG(slot, "%s", "stopped by EOS\n");
29472939
}
29482940

2949-
if (slot.task->params.n_predict < 1 && slot.n_prompt_tokens() + slot.n_decoded >= slot.n_ctx) {
2950-
slot.truncated = true;
2951-
slot.stop = STOP_TYPE_LIMIT;
2952-
slot.has_next_token = false; // stop prediction
2953-
2954-
SLT_WRN(slot,
2955-
"n_predict (%d) is set for infinite generation. "
2956-
"Limiting generated tokens to slot.n_ctx (%d) to avoid EOS-less generation infinite loop\n",
2957-
slot.task->params.n_predict, slot.n_ctx);
2958-
}
2959-
29602941
SLT_DBG(slot, "n_decoded = %d, n_remaining = %d, next token: %5d '%s'\n", slot.n_decoded, slot.n_remaining, result.tok, token_str.c_str());
29612942

29622943
return slot.has_next_token; // continue

0 commit comments

Comments
 (0)