Skip to content

Commit 3ad524a

Browse files
committed
kv-cache : fix obo error in SWA pruning logic
ggml-ci
1 parent 00149d9 commit 3ad524a

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/llama-kv-cache.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,7 +1574,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(
15741574
llama_kv_cache_unified::layer_filter_cb filter_swa = [&](int32_t il) { return model.hparams.is_swa(il); };
15751575

15761576
const uint32_t kv_size_base = kv_size;
1577-
const uint32_t kv_size_swa = std::min(kv_size, GGML_PAD(hparams.n_swa*n_seq_max + n_batch + 1, padding));
1577+
const uint32_t kv_size_swa = std::min(kv_size, GGML_PAD(hparams.n_swa*n_seq_max + n_batch, padding));
15781578

15791579
LLAMA_LOG_INFO("%s: creating non-SWA KV cache, size = %u cells\n", __func__, kv_size_base);
15801580

@@ -1639,7 +1639,7 @@ void llama_kv_cache_unified_iswa::commit() {
16391639
continue;
16401640
}
16411641

1642-
kv_swa->seq_rm(seq_id, -1, pos_max - hparams.n_swa);
1642+
kv_swa->seq_rm(seq_id, -1, pos_max - hparams.n_swa + 1);
16431643
}
16441644

16451645
pending.pos_max.clear();

0 commit comments

Comments
 (0)