llama : suggest reduce ctx size when kv init fails

slaren · slaren · commit 20e12112fda7 · 2024-11-02T00:55:19.000+01:00
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
@@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
     void * data = ggml_aligned_malloc(size);
 
     if (data == NULL) {
-        GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);
+        GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
         return NULL;
     }
 
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model(
 
         if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) {
             LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);
+            LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__);
             llama_free(ctx);
             return nullptr;
         }

Original file line number	Diff line number	Diff line change
`@@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back`
`798`	`798`	`void * data = ggml_aligned_malloc(size);`
`799`	`799`
`800`	`800`	`if (data == NULL) {`
`801`		`- GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size);`
	`801`	`+ GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0);`
`802`	`802`	`return NULL;`
`803`	`803`	`}`
`804`	`804`
Original file line number	Diff line number	Diff line change
`@@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model(`
`19520`	`19520`
`19521`	`19521`	`if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) {`
`19522`	`19522`	`LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__);`
	`19523`	`+ LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__);`
`19523`	`19524`	`llama_free(ctx);`
`19524`	`19525`	`return nullptr;`
`19525`	`19526`	`}`