Skip to content

Commit 56fceee

Browse files
committed
context : add warning
1 parent 356dc08 commit 56fceee

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

src/llama-context.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,16 @@ llama_context::llama_context(
115115
if (cparams.kv_unified) {
116116
cparams.n_ctx_seq = cparams.n_ctx;
117117
} else {
118-
cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
119-
cparams.n_ctx = cparams.n_ctx_seq * cparams.n_seq_max;
118+
cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max;
119+
120+
if (cparams.n_ctx_seq == 0) {
121+
throw std::runtime_error("n_ctx_seq == 0");
122+
}
123+
124+
if (cparams.n_ctx != cparams.n_ctx_seq * cparams.n_seq_max) {
125+
cparams.n_ctx = cparams.n_ctx_seq * cparams.n_seq_max;
126+
LLAMA_LOG_WARN("%s: n_ctx is not divisible by n_seq_max - rounding down to %u\n", __func__, cparams.n_ctx);
127+
}
120128
}
121129

122130
LLAMA_LOG_INFO("%s: n_seq_max = %u\n", __func__, cparams.n_seq_max);

tools/server/server.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4440,6 +4440,8 @@ int main(int argc, char ** argv) {
44404440
return 1;
44414441
}
44424442

4443+
// TODO: should we have a separate n_parallel parameter for the server?
4444+
// https://github.com/ggml-org/llama.cpp/pull/16736#discussion_r2483763177
44434445
if (params.n_parallel == 1 && params.kv_unified == false) {
44444446
LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true\n", __func__);
44454447

0 commit comments

Comments
 (0)