Skip to content

Commit c08d0d1

Browse files
committed
context : do not cap the size of the context
1 parent ff68436 commit c08d0d1

File tree

2 files changed

+11
-14
lines changed

2 files changed

+11
-14
lines changed

src/llama-context.cpp

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -115,19 +115,8 @@ llama_context::llama_context(
115115
if (cparams.kv_unified) {
116116
cparams.n_ctx_seq = cparams.n_ctx;
117117
} else {
118-
cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
119-
}
120-
121-
if (cparams.n_ctx_seq > hparams.n_ctx_train) {
122-
LLAMA_LOG_WARN("%s: capping n_ctx_seq (%u) to n_ctx_train (%u)\n", __func__, cparams.n_ctx_seq, hparams.n_ctx_train);
123-
124-
cparams.n_ctx_seq = hparams.n_ctx_train;
125-
}
126-
127-
if (cparams.kv_unified) {
128-
cparams.n_ctx = cparams.n_ctx_seq;
129-
} else {
130-
cparams.n_ctx = cparams.n_ctx_seq * cparams.n_seq_max;
118+
cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
119+
cparams.n_ctx = cparams.n_ctx_seq * cparams.n_seq_max;
131120
}
132121

133122
LLAMA_LOG_INFO("%s: n_seq_max = %u\n", __func__, cparams.n_seq_max);

tools/server/server.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2497,12 +2497,20 @@ struct server_context {
24972497
void init() {
24982498
SRV_INF("initializing slots, n_slots = %d\n", params_base.n_parallel);
24992499

2500+
const int n_ctx_train = llama_model_n_ctx_train(model);
2501+
2502+
int n_ctx_slot = llama_n_ctx_seq(ctx);
2503+
if (n_ctx_slot > n_ctx_train) {
2504+
SRV_WRN("the slot context (%d) exceeds the training context of the model (%d) - capping\n", n_ctx_slot, n_ctx_train);
2505+
n_ctx_slot = n_ctx_train;
2506+
}
2507+
25002508
for (int i = 0; i < params_base.n_parallel; i++) {
25012509
server_slot slot;
25022510

25032511
slot.id = i;
25042512
slot.ctx = ctx;
2505-
slot.n_ctx = llama_n_ctx_seq(ctx);
2513+
slot.n_ctx = n_ctx_slot;
25062514
slot.mctx = mctx;
25072515
slot.prompt.tokens.has_mtmd = mctx != nullptr;
25082516

0 commit comments

Comments
 (0)