Skip to content

Commit 9c5f68e

Browse files
committed
Implement the DRYRUN functionality inside the new kv-cache types
1 parent ec223e6 commit 9c5f68e

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

src/llama-kv-cache-recurrent.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent(
9999

100100
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
101101
if (!buf) {
102+
if(getenv("DRYRUN")) {
103+
LLAMA_LOG_ERROR("%s: pretend allocating buffer for kv cache was successful due to dry-run being enabled\n", __func__);
104+
return;
105+
}
102106
throw std::runtime_error("failed to allocate buffer for kv cache");
103107
}
104108
ggml_backend_buffer_clear(buf, 0);

src/llama-kv-cache-unified.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ llama_kv_cache_unified::llama_kv_cache_unified(
109109

110110
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
111111
if (!buf) {
112+
if(getenv("DRYRUN")) {
113+
LLAMA_LOG_ERROR("%s: pretend allocating buffer for kv cache was successful due to dry-run being enabled\n", __func__);
114+
return;
115+
}
112116
throw std::runtime_error("failed to allocate buffer for kv cache");
113117
}
114118

0 commit comments

Comments
 (0)