ggml-org
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 15 additions & 6 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎src/llama-arch.cpp‎
Lines changed: 0 additions & 1795 deletions b/‎src/llama-arch.cpp‎
Lines changed: 0 additions & 1795 deletions
@@ -2371,11 +2371,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
                 hidden_size_per_head = self.hparams.get("hidden_size_per_head", 128)
                 d_inner = mamba_num_heads * hidden_size_per_head  # 64 * 128 = 8192
 
-                # Create tensor with correct shape {d_state, d_inner} = {64, 8192}
-                # Each row of the matrix should contain the same value from the original 1D tensor
-                new_tensor = data_torch.new_zeros((d_state, d_inner))
+                # Create tensor with correct shape {d_inner, d_state} = {8192, 64}
+                # Each column of the matrix should contain the same value from the original 1D tensor
+                new_tensor = data_torch.new_zeros((d_inner, d_state))
                 for i in range(d_state):
-                    new_tensor[i, :] = data_torch[i]  # Broadcast the single value across the inner dimension
+                    new_tensor[:, i] = data_torch[i]  # Broadcast the single value across the inner dimension
                 data_torch = new_tensor
                 logger.info(f"Expanded A tensor from {d_state} to shape: {data_torch.shape}")
             elif len(data_torch.shape) == 2:
@@ -2391,13 +2391,22 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
 
             return [(new_name, data_torch)]
 
-        # Handle Mamba D tensor - ensure .weight suffix
+        # Handle Mamba D tensor - ensure .weight suffix and expand shape
         if name.endswith("mixer.D") or name.endswith("ssm.D"):
             new_name = self.map_tensor_name(name)
             # Add .weight suffix if not present
             if not new_name.endswith(".weight"):
                 new_name += ".weight"
-            logger.debug(f"D tensor ==> {new_name}")
+            logger.debug(f"D tensor ==> {new_name}, original shape: {data_torch.shape}")
+
+            # PLaMo2 D is shape {64} but llama.cpp expects {8192}
+            # Expand D to broadcast across d_inner dimension
+            if len(data_torch.shape) == 1 and data_torch.shape[0] == 64:
+                d_inner = 8192  # SSM inner size for PLaMo2
+                # Repeat D values across inner dimension
+                data_torch = data_torch.repeat(d_inner // data_torch.shape[0])
+                logger.debug(f"Expanded D tensor from 64 to shape: {data_torch.shape}")
+
             return [(new_name, data_torch)]
 
         # Handle Mamba conv1d tensor shape adjustment