llama : model-based max number of graph nodes calculation (#8970)

Nexesenex · Nexesenex · commit a13ec57515f6 · 2025-08-05T21:56:55.000+02:00
* llama : model-based max number of graph nodes calculation * Update src/llama.cpp Authors : Nico Bosshard & @slaren
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -4273,13 +4273,9 @@ namespace GGUFMeta {
 
 using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
 
-// TODO: update when needed or think of some clever automatic way to do this
-static size_t llama_model_max_nodes(const llama_model & /*model*/) {
-    //if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
-    //    return 32768;
-    //}
+static size_t llama_model_max_nodes(const llama_model & model) {
+    return std::max<size_t>(8192, model.tensors_by_name.size()*5);
 
-    return 65536;
 }
 
 struct llama_model_loader {