Fix mamba layer in plamo2

mitmul · mitmul · commit 07c5c4fc1f0d · 2025-06-22T18:12:53.000+09:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -3577,8 +3577,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
 
         new_name = self.map_tensor_name(name)
 
-        print(f"Plamo2Model: {name} -> {new_name}, shape={data_torch.shape}")
-
         return [(new_name, data_torch)]
 
 
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
@@ -66,6 +66,7 @@ class TensorNameMap:
             "output",                    # llama-pth bloom internlm2
             "word_embeddings_for_head",  # persimmon
             "lm_head.linear",            # phi2
+            "lm_head",                   # plamo2
             "output_layer",              # chatglm
             "head",                      # rwkv
             "head.out",                  # wavtokenizer
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -701,6 +701,9 @@ llm_graph_result_ptr llama_context::process_ubatch(const llama_ubatch & ubatch,
 
     // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
 
+    // Dump computation graph for visualization
+    ggml_graph_dump_dot(gf, NULL, "llama.dot");
+
     if (!ggml_backend_sched_alloc_graph(sched.get(), gf)) {
         LLAMA_LOG_ERROR("%s: failed to allocate graph\n", __func__);
         ret = GGML_STATUS_ALLOC_FAILED;
@@ -1043,11 +1046,6 @@ int llama_context::decode(const llama_batch & batch_inp) {
             }
         }
 
-        // plot the computation graph in dot format (for debugging purposes)
-        //if (n_past%100 == 0) {
-        //    ggml_graph_dump_dot(gf, NULL, "llama.dot");
-        //}
-
         auto * t_logits = res->get_logits();
         auto * t_embd   = cparams.embeddings ? res->get_embd() : nullptr;
 
diff --git a/src/llama-model.cpp b/src/llama-model.cpp