@@ -2385,6 +2385,10 @@ struct server_context {
23852385 llama_batch_free (batch);
23862386 }
23872387
2388+ int32_t n_ctx_slot () const {
2389+ return params_base.kv_unified ? n_ctx : n_ctx / params_base.n_parallel ;
2390+ }
2391+
23882392 bool load_model (const common_params & params) {
23892393 SRV_INF (" loading model '%s'\n " , params.model .path .c_str ());
23902394
@@ -2413,7 +2417,7 @@ struct server_context {
24132417
24142418 params_dft.devices = params_base.speculative .devices ;
24152419 params_dft.model = params_base.speculative .model ;
2416- params_dft.n_ctx = params_base.speculative .n_ctx == 0 ? slots. front (). n_ctx : params_base.speculative .n_ctx ;
2420+ params_dft.n_ctx = params_base.speculative .n_ctx == 0 ? n_ctx_slot () : params_base.speculative .n_ctx ;
24172421 params_dft.n_gpu_layers = params_base.speculative .n_gpu_layers ;
24182422 params_dft.n_parallel = 1 ;
24192423 params_dft.cache_type_k = params_base.speculative .cache_type_k ;
@@ -2501,16 +2505,14 @@ struct server_context {
25012505 }
25022506
25032507 void init () {
2504- const int32_t n_ctx_slot = params_base.kv_unified ? n_ctx : n_ctx / params_base.n_parallel ;
2505-
25062508 SRV_INF (" initializing slots, n_slots = %d\n " , params_base.n_parallel );
25072509
25082510 for (int i = 0 ; i < params_base.n_parallel ; i++) {
25092511 server_slot slot;
25102512
25112513 slot.id = i;
25122514 slot.ctx = ctx;
2513- slot.n_ctx = n_ctx_slot;
2515+ slot.n_ctx = n_ctx_slot () ;
25142516 slot.mctx = mctx;
25152517 slot.prompt .tokens .has_mtmd = mctx != nullptr ;
25162518
@@ -2533,7 +2535,7 @@ struct server_context {
25332535 }
25342536 }
25352537
2536- SLT_INF (slot, " new slot n_ctx_slot = %d\n " , slot.n_ctx );
2538+ SLT_INF (slot, " new slot, n_ctx = %d\n " , slot.n_ctx );
25372539
25382540 slot.callback_on_release = [this ](int ) {
25392541 queue_tasks.pop_deferred_task ();
@@ -3697,7 +3699,7 @@ struct server_context {
36973699 slot.n_past = 0 ;
36983700 slot.state = SLOT_STATE_PROCESSING_PROMPT;
36993701
3700- SLT_INF (slot, " new prompt, n_ctx_slot = %d, n_keep = %d, n_prompt_tokens = %d\n " ,
3702+ SLT_INF (slot, " new prompt, n_ctx = %d, n_keep = %d, n_prompt_tokens = %d\n " ,
37013703 slot.n_ctx , slot.task ->params .n_keep , slot.n_prompt_tokens ());
37023704
37033705 // print prompt tokens (for debugging)
0 commit comments