cont : minor fixes

ggerganov · ggerganov · commit 785426a5d527 · 2025-06-04T18:37:12.000+03:00
ggml-ci
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -450,9 +450,22 @@ bool llama_context::kv_self_update(bool optimize) {
         memory_force_optimize = false;
 
         const auto kv_state = kv_self->init_update(this, optimize);
-        if (kv_state->get_status() == LLAMA_MEMORY_STATUS_NO_UPDATE) {
-            // no updates need to be performed
-            return false;
+        switch (kv_state->get_status()) {
+            case LLAMA_MEMORY_STATUS_SUCCESS:
+                {
+                    // noop
+                } break;
+            case LLAMA_MEMORY_STATUS_NO_UPDATE:
+                {
+                    // no updates need to be performed
+                    return false;
+                }
+            case LLAMA_MEMORY_STATUS_FAILED_PREPARE:
+            case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
+                {
+                    LLAMA_LOG_ERROR("%s: failed to prepare memory update\n", __func__);
+                    return false;
+                }
         }
 
         if (!kv_state->apply()) {
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
@@ -435,14 +435,16 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
         cells.reset_shift();
     }
 
-    if (!dinfo.ids.empty()) {
+    if (!dinfo.empty()) {
         LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__);
 
         // apply moves:
         {
             const auto n_kv = dinfo.ids.size();
 
             for (uint32_t i = 0; i < n_kv; ++i) {
+                assert(dinfo.ids[i] <= n_kv);
+
                 if (dinfo.ids[i] == n_kv) {
                     continue;
                 }
@@ -1657,7 +1659,7 @@ llama_kv_cache_unified_state::llama_kv_cache_unified_state(
         llama_context * lctx,
         bool do_shift,
         defrag_info dinfo) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), lctx(lctx), do_shift(do_shift), dinfo(std::move(dinfo)) {
-    if (!do_shift && dinfo.ids.empty()) {
+    if (!do_shift && dinfo.empty()) {
         status = LLAMA_MEMORY_STATUS_NO_UPDATE;
     }
 }
@@ -1684,7 +1686,7 @@ bool llama_kv_cache_unified_state::next() {
 bool llama_kv_cache_unified_state::apply() {
     assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
 
-    // this is a KV cache update
+    // no ubatches -> this is a KV cache update
     if (ubatches.empty()) {
         kv->update(lctx, do_shift, dinfo);
 
diff --git a/src/llama-kv-cache-unified.h b/src/llama-kv-cache-unified.h
@@ -27,6 +27,10 @@ class llama_kv_cache_unified : public llama_kv_cache {
     using ubatch_heads = std::vector<uint32_t>;
 
     struct defrag_info {
+        bool empty() const {
+            return ids.empty();
+        }
+
         // contains information about which cell moves where:
         //  - cell i moves to ids[i]
         //  - if ids[i] == i || ids[i] == ids.size(), then cell i is not moved

Original file line number	Diff line number	Diff line change
`@@ -435,14 +435,16 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d`
`435`	`435`	`cells.reset_shift();`
`436`	`436`	`}`
`437`	`437`
`438`		`- if (!dinfo.ids.empty()) {`
	`438`	`+ if (!dinfo.empty()) {`
`439`	`439`	`LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__);`
`440`	`440`
`441`	`441`	`// apply moves:`
`442`	`442`	`{`
`443`	`443`	`const auto n_kv = dinfo.ids.size();`
`444`	`444`
`445`	`445`	`for (uint32_t i = 0; i < n_kv; ++i) {`
	`446`	`+ assert(dinfo.ids[i] <= n_kv);`
	`447`	`+`
`446`	`448`	`if (dinfo.ids[i] == n_kv) {`
`447`	`449`	`continue;`
`448`	`450`	`}`
`@@ -1657,7 +1659,7 @@ llama_kv_cache_unified_state::llama_kv_cache_unified_state(`
`1657`	`1659`	`llama_context * lctx,`
`1658`	`1660`	`bool do_shift,`
`1659`	`1661`	`defrag_info dinfo) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), lctx(lctx), do_shift(do_shift), dinfo(std::move(dinfo)) {`
`1660`		`- if (!do_shift && dinfo.ids.empty()) {`
	`1662`	`+ if (!do_shift && dinfo.empty()) {`
`1661`	`1663`	`status = LLAMA_MEMORY_STATUS_NO_UPDATE;`
`1662`	`1664`	`}`
`1663`	`1665`	`}`
`@@ -1684,7 +1686,7 @@ bool llama_kv_cache_unified_state::next() {`
`1684`	`1686`	`bool llama_kv_cache_unified_state::apply() {`
`1685`	`1687`	`assert(status == LLAMA_MEMORY_STATUS_SUCCESS);`
`1686`	`1688`
`1687`		`- // this is a KV cache update`
	`1689`	`+ // no ubatches -> this is a KV cache update`
`1688`	`1690`	`if (ubatches.empty()) {`
`1689`	`1691`	`kv->update(lctx, do_shift, dinfo);`
`1690`	`1692`