Fix LLM_KV_BLOCK_COUNT retrieval

EAddario · EAddario · commit 056799f377d4 · 2025-04-23T09:31:55.000+01:00
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
@@ -42,8 +42,7 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<
             ++next_id;
         }
 
-        std::string name = mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);
-        return name;
+        return mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);
     }
 
     return orig_name;
@@ -629,8 +628,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
     gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV
 
     if (!prune_list.empty()) {
-        const auto block_count = gguf_get_val_u32(ctx_out.get(), LLM_KV_BLOCK_COUNT) - prune_list.size();
-        gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count);
+        uint32_t block_count = 0;
+        ml.get_key(LLM_KV_BLOCK_COUNT, block_count);
+        gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count - prune_list.size());
     }
 
     // Remove split metadata

Original file line number	Diff line number	Diff line change
`@@ -42,8 +42,7 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<`
`42`	`42`	`++next_id;`
`43`	`43`	`}`
`44`	`44`
`45`		`- std::string name = mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);`
`46`		`- return name;`
	`45`	`+ return mapped[blk] == "X" ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]);`
`47`	`46`	`}`
`48`	`47`
`49`	`48`	`return orig_name;`
`@@ -629,8 +628,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::`
`629`	`628`	`gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV`
`630`	`629`
`631`	`630`	`if (!prune_list.empty()) {`
`632`		`- const auto block_count = gguf_get_val_u32(ctx_out.get(), LLM_KV_BLOCK_COUNT) - prune_list.size();`
`633`		`- gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count);`
	`631`	`+ uint32_t block_count = 0;`
	`632`	`+ ml.get_key(LLM_KV_BLOCK_COUNT, block_count);`
	`633`	`+ gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), block_count - prune_list.size());`
`634`	`634`	`}`
`635`	`635`
`636`	`636`	`// Remove split metadata`