Skip to content

Commit 785426a

Browse files
committed
cont : minor fixes
ggml-ci
1 parent 199d74c commit 785426a

File tree

3 files changed

+25
-6
lines changed

3 files changed

+25
-6
lines changed

src/llama-context.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -450,9 +450,22 @@ bool llama_context::kv_self_update(bool optimize) {
450450
memory_force_optimize = false;
451451

452452
const auto kv_state = kv_self->init_update(this, optimize);
453-
if (kv_state->get_status() == LLAMA_MEMORY_STATUS_NO_UPDATE) {
454-
// no updates need to be performed
455-
return false;
453+
switch (kv_state->get_status()) {
454+
case LLAMA_MEMORY_STATUS_SUCCESS:
455+
{
456+
// noop
457+
} break;
458+
case LLAMA_MEMORY_STATUS_NO_UPDATE:
459+
{
460+
// no updates need to be performed
461+
return false;
462+
}
463+
case LLAMA_MEMORY_STATUS_FAILED_PREPARE:
464+
case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
465+
{
466+
LLAMA_LOG_ERROR("%s: failed to prepare memory update\n", __func__);
467+
return false;
468+
}
456469
}
457470

458471
if (!kv_state->apply()) {

src/llama-kv-cache-unified.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,14 +435,16 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
435435
cells.reset_shift();
436436
}
437437

438-
if (!dinfo.ids.empty()) {
438+
if (!dinfo.empty()) {
439439
LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__);
440440

441441
// apply moves:
442442
{
443443
const auto n_kv = dinfo.ids.size();
444444

445445
for (uint32_t i = 0; i < n_kv; ++i) {
446+
assert(dinfo.ids[i] <= n_kv);
447+
446448
if (dinfo.ids[i] == n_kv) {
447449
continue;
448450
}
@@ -1657,7 +1659,7 @@ llama_kv_cache_unified_state::llama_kv_cache_unified_state(
16571659
llama_context * lctx,
16581660
bool do_shift,
16591661
defrag_info dinfo) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), lctx(lctx), do_shift(do_shift), dinfo(std::move(dinfo)) {
1660-
if (!do_shift && dinfo.ids.empty()) {
1662+
if (!do_shift && dinfo.empty()) {
16611663
status = LLAMA_MEMORY_STATUS_NO_UPDATE;
16621664
}
16631665
}
@@ -1684,7 +1686,7 @@ bool llama_kv_cache_unified_state::next() {
16841686
bool llama_kv_cache_unified_state::apply() {
16851687
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
16861688

1687-
// this is a KV cache update
1689+
// no ubatches -> this is a KV cache update
16881690
if (ubatches.empty()) {
16891691
kv->update(lctx, do_shift, dinfo);
16901692

src/llama-kv-cache-unified.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ class llama_kv_cache_unified : public llama_kv_cache {
2727
using ubatch_heads = std::vector<uint32_t>;
2828

2929
struct defrag_info {
30+
bool empty() const {
31+
return ids.empty();
32+
}
33+
3034
// contains information about which cell moves where:
3135
// - cell i moves to ids[i]
3236
// - if ids[i] == i || ids[i] == ids.size(), then cell i is not moved

0 commit comments

Comments
 (0)