Skip to content

Commit 056799f

Browse files
committed
Fix LLM_KV_BLOCK_COUNT retrieval
1 parent c128b28 commit 056799f

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/llama-quant.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<
4242
++next_id;
4343
}
4444

45-
std::string name = mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);
46-
return name;
45+
return mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);
4746
}
4847

4948
return orig_name;
@@ -629,8 +628,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
629628
gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV
630629

631630
if (!prune_list.empty()) {
632-
const auto block_count = gguf_get_val_u32(ctx_out.get(), LLM_KV_BLOCK_COUNT) - prune_list.size();
633-
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count);
631+
uint32_t block_count = 0;
632+
ml.get_key(LLM_KV_BLOCK_COUNT, block_count);
633+
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count - prune_list.size());
634634
}
635635

636636
// Remove split metadata

0 commit comments

Comments
 (0)