Skip to content

Commit a13ec57

Browse files
committed
llama : model-based max number of graph nodes calculation (#8970)
* llama : model-based max number of graph nodes calculation * Update src/llama.cpp Authors : Nico Bosshard & @slaren
1 parent b66d4df commit a13ec57

File tree

1 file changed

+2
-6
lines changed

1 file changed

+2
-6
lines changed

src/llama.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4273,13 +4273,9 @@ namespace GGUFMeta {
42734273

42744274
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
42754275

4276-
// TODO: update when needed or think of some clever automatic way to do this
4277-
static size_t llama_model_max_nodes(const llama_model & /*model*/) {
4278-
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
4279-
// return 32768;
4280-
//}
4276+
static size_t llama_model_max_nodes(const llama_model & model) {
4277+
return std::max<size_t>(8192, model.tensors_by_name.size()*5);
42814278

4282-
return 65536;
42834279
}
42844280

42854281
struct llama_model_loader {

0 commit comments

Comments
 (0)