@@ -26,7 +26,7 @@ static void zeros(std::ofstream & file, size_t n) {
2626 }
2727}
2828
29- static std::string remap_layer (const std::string & orig_name, const std::vector<int >& prune, std::map<int , std::string>& mapped, int & next_id) {
29+ static std::string remap_layer (const std::string & orig_name, const std::vector<int > & prune, std::map<int , std::string> & mapped, int & next_id) {
3030 if (prune.empty ()) {
3131 return orig_name;
3232 }
@@ -39,7 +39,7 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<
3939 if (mapped.count (blk)) {
4040 // Already mapped, do nothing
4141 } else if (std::find (prune.begin (), prune.end (), blk) != prune.end ()) {
42- mapped[blk] = " X " ;
42+ mapped[blk] = " " ;
4343 } else if (blk < prune.front ()) {
4444 mapped[blk] = std::to_string (blk);
4545 next_id = blk + 1 ;
@@ -48,13 +48,13 @@ static std::string remap_layer(const std::string & orig_name, const std::vector<
4848 ++next_id;
4949 }
5050
51- return mapped[blk] == " X " ? mapped[blk] : new_name.replace (match.position (1 ), match.length (1 ), mapped[blk]);
51+ return mapped[blk]. empty () ? mapped[blk] : new_name.replace (match.position (1 ), match.length (1 ), mapped[blk]);
5252 }
5353
5454 return orig_name;
5555}
5656
57- static std::string remap_imatrix (const std::string & orig_name, const std::map<int , std::string>& mapped) {
57+ static std::string remap_imatrix (const std::string & orig_name, const std::map<int , std::string> & mapped) {
5858 if (mapped.empty ()) {
5959 return orig_name;
6060 }
@@ -628,7 +628,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
628628 gguf_set_val_u32 (ctx_out.get (), " general.file_type" , ftype); // TODO: use LLM_KV
629629
630630 if (!prune_list.empty ()) {
631- gguf_set_val_bool (ctx_out.get (), " general.pruned" , true );
632631 uint32_t block_count = 0 ;
633632 ml.get_key (LLM_KV_BLOCK_COUNT, block_count);
634633 gguf_set_val_u32 (ctx_out.get (), ml.llm_kv (LLM_KV_BLOCK_COUNT).c_str (), block_count - prune_list.size ());
@@ -667,10 +666,11 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
667666 tensors.reserve (ml.weights_map .size ());
668667 for (const auto & it : ml.weights_map ) {
669668 const std::string remapped_name (remap_layer (it.first , prune_list, mapped, next_blk_id));
670- if (remapped_name == " X" ) {
671- if (it.first .find (" attn_v.weight" ) != std::string::npos ||
672- it.first .find (" attn_qkv.weight" ) != std::string::npos ||
673- it.first .find (" attn_kv_b.weight" )!= std::string::npos) {
669+ if (remapped_name.empty ()) {
670+ if (false
671+ || it.first .find (" attn_v.weight" ) != std::string::npos
672+ || it.first .find (" attn_qkv.weight" ) != std::string::npos
673+ || it.first .find (" attn_kv_b.weight" )!= std::string::npos) {
674674 pruned_attention_w++;
675675 }
676676 LLAMA_LOG_DEBUG (" %s: prunning tensor %s\n " , __func__, it.first .c_str ());
0 commit comments