@@ -4271,17 +4271,17 @@ struct llama_model_loader {
42714271
42724272 ggml_tensor * tensor;
42734273
4274- llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
4275- const int tensor_idx = gguf_find_tensor(gguf_ctx, name );
4274+ llama_tensor_weight(const llama_file * file, uint16_t idx, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
4275+ const int tensor_idx = gguf_find_tensor(gguf_ctx, ggml_get_name(tensor) );
42764276 offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
42774277
42784278 if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
4279- throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name ));
4279+ throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", ggml_get_name(tensor) ));
42804280 }
42814281 }
42824282 };
4283- std::vector<llama_tensor_weight> weights;
42844283
4284+ std::unordered_map<std::string, struct llama_tensor_weight> weights_map;
42854285 std::unordered_map<std::string, struct llama_model_kv_override> kv_overrides;
42864286
42874287 struct gguf_context * meta = NULL;
@@ -4323,7 +4323,14 @@ struct llama_model_loader {
43234323 // For subsidiary files, `meta` tensor data offset must not be used,
43244324 // so we build a unified tensors index for weights.
43254325 for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
4326- weights.emplace_back(files.back().get(), 0, cur->name, meta, cur);
4326+ std::string tensor_name = std::string(cur->name);
4327+ // make sure there is no duplicated tensor names
4328+ if (weights_map.find(tensor_name) != weights_map.end()) {
4329+ throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur)));
4330+ }
4331+ n_elements += ggml_nelements(cur);
4332+ n_bytes += ggml_nbytes(cur);
4333+ weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), 0, meta, cur));
43274334 }
43284335 uint16_t n_split = 0;
43294336 get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
@@ -4363,7 +4370,14 @@ struct llama_model_loader {
43634370
43644371 // Save tensors data offset info of the shard.
43654372 for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
4366- weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur);
4373+ std::string tensor_name = std::string(cur->name);
4374+ // make sure there is no duplicated tensor names
4375+ if (weights_map.find(tensor_name) != weights_map.end()) {
4376+ throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur)));
4377+ }
4378+ n_elements += ggml_nelements(cur);
4379+ n_bytes += ggml_nbytes(cur);
4380+ weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), idx, ctx_gguf, cur));
43674381 }
43684382
43694383 gguf_free(ctx_gguf);
@@ -4373,7 +4387,7 @@ struct llama_model_loader {
43734387
43744388 // sanity check
43754389 {
4376- const int n_tensors_loaded = (int) weights .size();
4390+ const int n_tensors_loaded = (int) weights_map .size();
43774391 if (n_tensors != n_tensors_loaded) {
43784392 throw std::runtime_error(format("corrupted model: %d tensors expected but %d found", n_tensors, n_tensors_loaded));
43794393 }
@@ -4383,23 +4397,10 @@ struct llama_model_loader {
43834397 }
43844398
43854399 n_kv = gguf_get_n_kv(meta);
4386- n_tensors = weights .size();
4400+ n_tensors = weights_map .size();
43874401
43884402 fver = (enum llama_fver) gguf_get_version(meta);
43894403
4390- std::set<std::string> tensor_names;
4391- for (auto & w : weights) {
4392- n_elements += ggml_nelements(w.tensor);
4393- n_bytes += ggml_nbytes(w.tensor);
4394- // make sure there is no duplicated tensor names
4395- const std::string name(w.tensor->name);
4396- auto found = tensor_names.find(name);
4397- if (found != tensor_names.end()) {
4398- throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", w.tensor->name));
4399- }
4400- tensor_names.insert(name);
4401- }
4402-
44034404 LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
44044405 __func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(fver));
44054406
@@ -4411,8 +4412,10 @@ struct llama_model_loader {
44114412 uint32_t n_type_max = 0;
44124413 enum ggml_type type_max = GGML_TYPE_F32;
44134414
4414- for (int i = 0; i < n_tensors; i++) {
4415- const ggml_tensor * tensor = weights.at(i).tensor;
4415+ for (auto it = weights_map.begin(); it != weights_map.end(); it++) {
4416+ const llama_tensor_weight & w = it->second;
4417+ const ggml_tensor * tensor = w.tensor;
4418+
44164419 enum ggml_type type = tensor->type;
44174420
44184421 n_type[type]++;
@@ -4423,8 +4426,8 @@ struct llama_model_loader {
44234426 }
44244427
44254428 if (trace > 0) {
4426- const uint16_t sid = weights.at(i) .idx;
4427- LLAMA_LOG_INFO("%s: - tensor %4d, split %2d: %32s %-8s [ %s ]\n", __func__, i , sid, ggml_get_name(tensor), ggml_type_name(type), llama_format_tensor_shape(tensor).c_str());
4429+ const uint16_t sid = w .idx;
4430+ LLAMA_LOG_INFO("%s: - tensor split %2d: %32s %-8s [ %s ]\n", __func__, sid, ggml_get_name(tensor), ggml_type_name(type), llama_format_tensor_shape(tensor).c_str());
44284431 }
44294432 }
44304433
@@ -4688,21 +4691,15 @@ struct llama_model_loader {
46884691 return llm_kv.arch;
46894692 }
46904693
4691- const char * get_tensor_name(int i) const {
4692- return weights.at(i).tensor->name;
4693- }
4694-
46954694 const llama_tensor_weight * get_weight(const char * name) const {
4696- for (const auto & weight : weights) {
4697- if (strcmp(name, weight.tensor->name) == 0) {
4698- return &weight;
4699- }
4695+ std::string tensor_name(name);
4696+
4697+ auto pos = weights_map.find(tensor_name);
4698+ if (pos != weights_map.end()) {
4699+ return &pos->second;
47004700 }
4701- return nullptr;
4702- }
47034701
4704- const llama_tensor_weight * get_weight(int i) const {
4705- return get_weight(get_tensor_name(i));
4702+ return nullptr;
47064703 }
47074704
47084705 const llama_tensor_weight & require_weight(const char * name) const {
@@ -4729,10 +4726,6 @@ struct llama_model_loader {
47294726 return tensor;
47304727 }
47314728
4732- struct ggml_tensor * get_tensor_meta(int i) const {
4733- return get_tensor_meta(get_tensor_name(i));
4734- }
4735-
47364729 const struct ggml_tensor * check_tensor_dims(const std::string & name, const std::vector<int64_t> & ne, bool required) const {
47374730 const struct ggml_tensor * cur = get_tensor_meta(name.c_str());
47384731
@@ -4839,8 +4832,8 @@ struct llama_model_loader {
48394832 }
48404833
48414834 // compute the total size of all tensors for progress reporting
4842- for (auto & w : weights ) {
4843- size_data += ggml_nbytes(w .tensor);
4835+ for (auto it = weights_map.begin(); it != weights_map.end(); it++ ) {
4836+ size_data += ggml_nbytes(it->second .tensor);
48444837 }
48454838 }
48464839
@@ -18595,10 +18588,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1859518588 }
1859618589 }
1859718590
18598- for (int i = 0; i < ml.n_tensors ; ++i ) {
18599- const struct ggml_tensor * meta = ml.get_tensor_meta(i) ;
18591+ for (auto it = ml.weights_map.begin(); it != ml.weights_map.end() ; ++it ) {
18592+ const struct ggml_tensor * tensor = it->second.tensor ;
1860018593
18601- const std::string name = ggml_get_name(meta );
18594+ const std::string name = ggml_get_name(tensor );
1860218595
1860318596 // TODO: avoid hardcoded tensor names - use the TN_* constants
1860418597 if (name.find("attn_v.weight") != std::string::npos ||
@@ -18636,20 +18629,22 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1863618629 std::vector<no_init<float>> f32_conv_buf;
1863718630
1863818631 uint16_t n_split = 1;
18632+ const auto & weights_map = ml.weights_map;
18633+
1863918634 // Assume split index is continuous
1864018635 if (params->keep_split) {
18641- for (int i = 0; i < ml.n_tensors ; ++i ) {
18642- n_split = std::max(uint16_t(ml.get_weight(i)-> idx+1), n_split);
18636+ for (auto it = weights_map.begin(); it != weights_map.end() ; ++it ) {
18637+ n_split = std::max(uint16_t(it->second. idx+1), n_split);
1864318638 }
18639+
1864418640 }
1864518641 std::vector<gguf_context*> ctx_outs(n_split, NULL);
1864618642 ctx_outs[0] = ctx_out;
1864718643
1864818644 // populate the original tensors so we get an initial meta data
18649- for (int i = 0; i < ml.n_tensors; ++i) {
18650- auto weight = ml.get_weight(i);
18651- uint16_t i_split = params->keep_split ? weight->idx : 0;
18652- struct ggml_tensor * tensor = weight->tensor;
18645+ for (auto it = weights_map.begin(); it != weights_map.end(); ++it) {
18646+ uint16_t i_split = params->keep_split ? it->second.idx : 0;
18647+ struct ggml_tensor * tensor = it->second.tensor;
1865318648 if (ctx_outs[i_split] == NULL) {
1865418649 ctx_outs[i_split] = gguf_init_empty();
1865518650 }
@@ -18696,12 +18691,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1869618691
1869718692 const auto tn = LLM_TN(model.arch);
1869818693 new_ofstream(0);
18699- for (int i = 0; i < ml.n_tensors ; ++i ) {
18700- auto weight = ml.get_weight(i) ;
18701- struct ggml_tensor * tensor = weight-> tensor;
18702- if (weight-> idx != cur_split && params->keep_split) {
18694+ for (auto it = weights_map.begin(); it != weights_map.end() ; ++it ) {
18695+ auto weight = it->second ;
18696+ struct ggml_tensor * tensor = weight. tensor;
18697+ if (weight. idx != cur_split && params->keep_split) {
1870318698 close_ofstream();
18704- new_ofstream(weight-> idx);
18699+ new_ofstream(weight. idx);
1870518700 }
1870618701
1870718702 const std::string name = ggml_get_name(tensor);
0 commit comments