File tree Expand file tree Collapse file tree 2 files changed +12
-2
lines changed Expand file tree Collapse file tree 2 files changed +12
-2
lines changed Original file line number Diff line number Diff line change @@ -115,8 +115,16 @@ llama_context::llama_context(
115115 if (cparams.kv_unified ) {
116116 cparams.n_ctx_seq = cparams.n_ctx ;
117117 } else {
118- cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max ;
119- cparams.n_ctx = cparams.n_ctx_seq * cparams.n_seq_max ;
118+ cparams.n_ctx_seq = cparams.n_ctx / cparams.n_seq_max ;
119+
120+ if (cparams.n_ctx_seq == 0 ) {
121+ throw std::runtime_error (" n_ctx_seq == 0" );
122+ }
123+
124+ if (cparams.n_ctx != cparams.n_ctx_seq * cparams.n_seq_max ) {
125+ cparams.n_ctx = cparams.n_ctx_seq * cparams.n_seq_max ;
126+ LLAMA_LOG_WARN (" %s: n_ctx is not divisible by n_seq_max - rounding down to %u\n " , __func__, cparams.n_ctx );
127+ }
120128 }
121129
122130 LLAMA_LOG_INFO (" %s: n_seq_max = %u\n " , __func__, cparams.n_seq_max );
Original file line number Diff line number Diff line change @@ -4440,6 +4440,8 @@ int main(int argc, char ** argv) {
44404440 return 1 ;
44414441 }
44424442
4443+ // TODO: should we have a separate n_parallel parameter for the server?
4444+ // https://github.com/ggml-org/llama.cpp/pull/16736#discussion_r2483763177
44434445 if (params.n_parallel == 1 && params.kv_unified == false ) {
44444446 LOG_WRN (" %s: setting n_parallel = 4 and kv_unified = true\n " , __func__);
44454447
You can’t perform that action at this time.
0 commit comments