Skip to content

Commit 81ec564

Browse files
committed
[refactor] Process file method
Move the loader code, that process a file after it has been loaded into memory and populate its own attributes, to a reusable method.
1 parent 296ba81 commit 81ec564

File tree

2 files changed

+32
-31
lines changed

2 files changed

+32
-31
lines changed

src/llama-model-loader.cpp

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,32 @@ namespace GGUFMeta {
465465

466466
// TODO: this is not very clever - figure out something better
467467
template bool llama_model_loader::get_key_or_arr<std::array<int, 4>>(enum llm_kv kid, std::array<int, 4> & result, uint32_t n, bool required);
468-
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid, std::array<uint32_t, 512> & result, uint32_t n, bool required);
468+
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid,
469+
std::array<uint32_t, 512> & result,
470+
uint32_t n, bool required);
471+
472+
// Save tensors data offset of the main file.
473+
// For subsidiary files, `meta` tensor data offset must not be used,
474+
// so we build a unified tensors index for weights.
475+
void llama_model_loader::process_loaded_gguf(struct ggml_context * ctx, gguf_file_load & gguf_load, uint16_t idx) {
476+
contexts.emplace_back(ctx);
477+
files.emplace_back(std::move(gguf_load.file));
478+
llama_file * raw_file_ptr = files.back().get();
479+
480+
// Save tensors data offset info of the shard.
481+
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
482+
std::string tensor_name = std::string(cur->name);
483+
LLAMA_LOG_CMAKE_DEBUG("%s: loaded tensor %s at split %d\n", tensor_name.c_str(), __func__, idx);
484+
// make sure there is no duplicated tensor names
485+
if (weights_map.find(tensor_name) != weights_map.end()) {
486+
throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur)));
487+
}
488+
n_elements += ggml_nelements(cur);
489+
n_bytes += ggml_nbytes(cur);
490+
weights_map.emplace(tensor_name,
491+
llama_model_loader::llama_tensor_weight(raw_file_ptr, idx, gguf_load.meta.get(), cur));
492+
}
493+
}
469494

470495
llama_model_loader::llama_model_loader(
471496
const std::string & fname,
@@ -489,27 +514,13 @@ llama_model_loader::llama_model_loader(
489514

490515
struct ggml_context * ctx = NULL;
491516
gguf_file_load main_gguf(&ctx, load_input_variant::fname_load_input{fname, splits});
517+
process_loaded_gguf(ctx, main_gguf, 0);
518+
492519
meta = std::move(main_gguf.meta);
493520

494521
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
495522
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
496523

497-
files.emplace_back(std::move(main_gguf.file));
498-
contexts.emplace_back(ctx);
499-
500-
// Save tensors data offset of the main file.
501-
// For subsidiary files, `meta` tensor data offset must not be used,
502-
// so we build a unified tensors index for weights.
503-
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
504-
std::string tensor_name = std::string(cur->name);
505-
// make sure there is no duplicated tensor names
506-
if (weights_map.find(tensor_name) != weights_map.end()) {
507-
throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur)));
508-
}
509-
n_elements += ggml_nelements(cur);
510-
n_bytes += ggml_nbytes(cur);
511-
weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), 0, meta.get(), cur));
512-
}
513524
uint16_t n_split = 0;
514525
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
515526

@@ -556,20 +567,7 @@ llama_model_loader::llama_model_loader(
556567
}
557568
}
558569

559-
files.emplace_back(std::move(split_gguf.file));
560-
contexts.emplace_back(ctx);
561-
562-
// Save tensors data offset info of the shard.
563-
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
564-
std::string tensor_name = std::string(cur->name);
565-
// make sure there is no duplicated tensor names
566-
if (weights_map.find(tensor_name) != weights_map.end()) {
567-
throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur)));
568-
}
569-
n_elements += ggml_nelements(cur);
570-
n_bytes += ggml_nbytes(cur);
571-
weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), idx, split_meta.get(), cur));
572-
}
570+
process_loaded_gguf(ctx, split_gguf, idx);
573571
}
574572

575573
get_key(llm_kv(LLM_KV_SPLIT_TENSORS_COUNT), n_tensors);

src/llama-model-loader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "llama-impl.h"
66
#include "llama-arch.h"
77
#include "llama-mmap.h"
8+
#include "llama-model-load.h"
89

910
#include "ggml-cpp.h"
1011

@@ -91,6 +92,8 @@ struct llama_model_loader {
9192
size_t size_data = 0;
9293
std::vector<std::pair<size_t, size_t>> mmaps_used;
9394

95+
void process_loaded_gguf(struct ggml_context * ctx, gguf_file_load & gguf_load, uint16_t idx);
96+
9497
llama_model_loader(
9598
const std::string & fname,
9699
std::vector<std::string> & splits, // optional, only need if the split does not follow naming scheme

0 commit comments

Comments
 (0)