Skip to content

Commit a24cb40

Browse files
committed
server : check if context can do shifts
ggml-ci
1 parent 9473e16 commit a24cb40

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

src/llama-kv-cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1579,7 +1579,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(
15791579
llama_kv_cache_unified::layer_filter_cb filter_swa = [&](int32_t il) { return model.hparams.is_swa(il); };
15801580

15811581
const uint32_t kv_size_base = kv_size;
1582-
const uint32_t kv_size_swa = std::min(kv_size, hparams.n_swa*n_seq_max + n_batch);
1582+
const uint32_t kv_size_swa = std::min(kv_size, GGML_PAD(hparams.n_swa*n_seq_max + n_batch + 1, padding));
15831583

15841584
kv_base = std::make_unique<llama_kv_cache_unified>(model, std::move(filter_base), type_k, type_v, v_trans, offload, kv_size_base, padding);
15851585
kv_swa = std::make_unique<llama_kv_cache_unified>(model, std::move(filter_swa), type_k, type_v, v_trans, offload, kv_size_swa, padding);

tools/server/server.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,6 +2004,23 @@ struct server_context {
20042004
}
20052005
}
20062006

2007+
if (!llama_kv_self_can_shift(ctx)) {
2008+
if (params_base.ctx_shift) {
2009+
params_base.ctx_shift = false;
2010+
SRV_WRN("%s\n", "ctx_shift is not supported by this context, it will be disabled");
2011+
}
2012+
2013+
if (params_base.n_cache_reuse) {
2014+
params_base.n_cache_reuse = 0;
2015+
SRV_WRN("%s\n", "cache_reuse is not supported by this context, it will be disabled");
2016+
}
2017+
2018+
if (!params_base.speculative.model.path.empty()) {
2019+
SRV_ERR("%s\n", "err: speculative decode is not supported by this context");
2020+
return false;
2021+
}
2022+
}
2023+
20072024
return true;
20082025
}
20092026

0 commit comments

Comments
 (0)