Implement the DRYRUN functionality inside the new kv-cache types

nicoboss · nicoboss · commit 9c5f68e7d7b6 · 2025-06-05T13:01:33.000+02:00
diff --git a/src/llama-kv-cache-recurrent.cpp b/src/llama-kv-cache-recurrent.cpp
@@ -99,6 +99,10 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
 
         ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
         if (!buf) {
+            if(getenv("DRYRUN")) {
+                LLAMA_LOG_ERROR("%s: pretend allocating buffer for kv cache was successful due to dry-run being enabled\n", __func__);
+                return;
+            }
             throw std::runtime_error("failed to allocate buffer for kv cache");
         }
         ggml_backend_buffer_clear(buf, 0);
diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp
@@ -109,6 +109,10 @@ llama_kv_cache_unified::llama_kv_cache_unified(
 
         ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
         if (!buf) {
+            if(getenv("DRYRUN")) {
+                LLAMA_LOG_ERROR("%s: pretend allocating buffer for kv cache was successful due to dry-run being enabled\n", __func__);
+                return;
+            }
             throw std::runtime_error("failed to allocate buffer for kv cache");
         }
 

Original file line number	Diff line number	Diff line change
`@@ -109,6 +109,10 @@ llama_kv_cache_unified::llama_kv_cache_unified(`
`109`	`109`
`110`	`110`	`ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);`
`111`	`111`	`if (!buf) {`
	`112`	`+ if(getenv("DRYRUN")) {`
	`113`	`+ LLAMA_LOG_ERROR("%s: pretend allocating buffer for kv cache was successful due to dry-run being enabled\n", __func__);`
	`114`	`+ return;`
	`115`	`+ }`
`112`	`116`	`throw std::runtime_error("failed to allocate buffer for kv cache");`
`113`	`117`	`}`
`114`	`118`