Skip to content

Commit 6fb5f1a

Browse files
ggerganovarthw
authored andcommitted
llama : handle KV shift for recurrent models (ggml-org#10402)
ggml-ci
1 parent 3882f3f commit 6fb5f1a

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/llama.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18220,13 +18220,13 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
1822018220
static void llama_kv_cache_update_internal(struct llama_context & lctx) {
1822118221
bool need_reserve = false;
1822218222

18223-
// apply K-shift if needed
18224-
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) {
18223+
if (lctx.kv_self.has_shift) {
1822518224
if (!llama_kv_cache_can_shift(&lctx)) {
18226-
GGML_ABORT("Deepseek2 does not support K-shift");
18225+
GGML_ABORT("The current context does not support K-shift");
1822718226
}
1822818227

18229-
{
18228+
// apply K-shift if needed
18229+
if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
1823018230
ggml_backend_sched_reset(lctx.sched.get());
1823118231

1823218232
ggml_cgraph * gf = llama_build_graph_k_shift(lctx);
@@ -20472,7 +20472,7 @@ void llama_kv_cache_update(struct llama_context * ctx) {
2047220472
}
2047320473

2047420474
bool llama_kv_cache_can_shift(struct llama_context * ctx) {
20475-
return ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
20475+
return !ctx->kv_self.recurrent && ctx->model.arch != LLM_ARCH_DEEPSEEK2; // not supported due to MLA
2047620476
}
2047720477

2047820478
// deprecated

0 commit comments

Comments
 (0)