@@ -465,7 +465,32 @@ namespace GGUFMeta {
465
465
466
466
// TODO: this is not very clever - figure out something better
467
467
template bool llama_model_loader::get_key_or_arr<std::array<int , 4 >>(enum llm_kv kid, std::array<int , 4 > & result, uint32_t n, bool required);
468
- template bool llama_model_loader::get_key_or_arr<std::array<uint32_t , 512 >>(enum llm_kv kid, std::array<uint32_t , 512 > & result, uint32_t n, bool required);
468
+ template bool llama_model_loader::get_key_or_arr<std::array<uint32_t , 512 >>(enum llm_kv kid,
469
+ std::array<uint32_t , 512 > & result,
470
+ uint32_t n, bool required);
471
+
472
+ // Save tensors data offset of the main file.
473
+ // For subsidiary files, `meta` tensor data offset must not be used,
474
+ // so we build a unified tensors index for weights.
475
+ void llama_model_loader::process_loaded_gguf (struct ggml_context * ctx, gguf_file_load & gguf_load, uint16_t idx) {
476
+ contexts.emplace_back (ctx);
477
+ files.emplace_back (std::move (gguf_load.file ));
478
+ llama_file * raw_file_ptr = files.back ().get ();
479
+
480
+ // Save tensors data offset info of the shard.
481
+ for (ggml_tensor * cur = ggml_get_first_tensor (ctx); cur; cur = ggml_get_next_tensor (ctx, cur)) {
482
+ std::string tensor_name = std::string (cur->name );
483
+ LLAMA_LOG_CMAKE_DEBUG (" %s: loaded tensor %s at split %d\n " , tensor_name.c_str (), __func__, idx);
484
+ // make sure there is no duplicated tensor names
485
+ if (weights_map.find (tensor_name) != weights_map.end ()) {
486
+ throw std::runtime_error (format (" invalid model: tensor '%s' is duplicated" , ggml_get_name (cur)));
487
+ }
488
+ n_elements += ggml_nelements (cur);
489
+ n_bytes += ggml_nbytes (cur);
490
+ weights_map.emplace (tensor_name,
491
+ llama_model_loader::llama_tensor_weight (raw_file_ptr, idx, gguf_load.meta .get (), cur));
492
+ }
493
+ }
469
494
470
495
llama_model_loader::llama_model_loader (
471
496
const std::string & fname,
@@ -489,27 +514,13 @@ llama_model_loader::llama_model_loader(
489
514
490
515
struct ggml_context * ctx = NULL ;
491
516
gguf_file_load main_gguf (&ctx, load_input_variant::fname_load_input{fname, splits});
517
+ process_loaded_gguf (ctx, main_gguf, 0 );
518
+
492
519
meta = std::move (main_gguf.meta );
493
520
494
521
get_key (llm_kv (LLM_KV_GENERAL_ARCHITECTURE), arch_name, false );
495
522
llm_kv = LLM_KV (llm_arch_from_string (arch_name));
496
523
497
- files.emplace_back (std::move (main_gguf.file ));
498
- contexts.emplace_back (ctx);
499
-
500
- // Save tensors data offset of the main file.
501
- // For subsidiary files, `meta` tensor data offset must not be used,
502
- // so we build a unified tensors index for weights.
503
- for (ggml_tensor * cur = ggml_get_first_tensor (ctx); cur; cur = ggml_get_next_tensor (ctx, cur)) {
504
- std::string tensor_name = std::string (cur->name );
505
- // make sure there is no duplicated tensor names
506
- if (weights_map.find (tensor_name) != weights_map.end ()) {
507
- throw std::runtime_error (format (" invalid model: tensor '%s' is duplicated" , ggml_get_name (cur)));
508
- }
509
- n_elements += ggml_nelements (cur);
510
- n_bytes += ggml_nbytes (cur);
511
- weights_map.emplace (tensor_name, llama_tensor_weight (files.back ().get (), 0 , meta.get (), cur));
512
- }
513
524
uint16_t n_split = 0 ;
514
525
get_key (llm_kv (LLM_KV_SPLIT_COUNT), n_split, false );
515
526
@@ -556,20 +567,7 @@ llama_model_loader::llama_model_loader(
556
567
}
557
568
}
558
569
559
- files.emplace_back (std::move (split_gguf.file ));
560
- contexts.emplace_back (ctx);
561
-
562
- // Save tensors data offset info of the shard.
563
- for (ggml_tensor * cur = ggml_get_first_tensor (ctx); cur; cur = ggml_get_next_tensor (ctx, cur)) {
564
- std::string tensor_name = std::string (cur->name );
565
- // make sure there is no duplicated tensor names
566
- if (weights_map.find (tensor_name) != weights_map.end ()) {
567
- throw std::runtime_error (format (" invalid model: tensor '%s' is duplicated" , ggml_get_name (cur)));
568
- }
569
- n_elements += ggml_nelements (cur);
570
- n_bytes += ggml_nbytes (cur);
571
- weights_map.emplace (tensor_name, llama_tensor_weight (files.back ().get (), idx, split_meta.get (), cur));
572
- }
570
+ process_loaded_gguf (ctx, split_gguf, idx);
573
571
}
574
572
575
573
get_key (llm_kv (LLM_KV_SPLIT_TENSORS_COUNT), n_tensors);
0 commit comments