Udpate modify_tensors

mitmul · mitmul · commit f7d51a53fba6 · 2025-06-01T15:57:55.000+09:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2352,29 +2352,38 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
             # Transform A_log to A: A = -exp(A_log)
             data_torch = -torch.exp(data_torch)
             
-            # PLaMo2 A_log is shape {d_state} but llama.cpp expects {d_state, d_inner}
+            # PLaMo2 A_log is shape {d_state} but llama.cpp expects {d_inner, d_state}
             # Expand the tensor to the correct shape
             if len(data_torch.shape) == 1:
                 d_state = data_torch.shape[0]  # 64
                 d_inner = 8192  # SSM inner size for PLaMo2
                 
-                # Create tensor with correct shape {d_state, d_inner} = {64, 8192}
-                # Each row of the matrix should contain the same value from the original 1D tensor
-                new_tensor = data_torch.new_zeros((d_state, d_inner))
+                # Create tensor with correct shape {d_inner, d_state} = {8192, 64}
+                # Each column of the matrix should contain the same value from the original 1D tensor
+                new_tensor = data_torch.new_zeros((d_inner, d_state))
                 for i in range(d_state):
-                    new_tensor[i, :] = data_torch[i]  # Broadcast the single value across the inner dimension
+                    new_tensor[:, i] = data_torch[i]  # Broadcast the single value across the inner dimension
                 data_torch = new_tensor
                 logger.debug(f"Expanded A tensor from {d_state} to shape: {data_torch.shape}")
             
             return [(new_name, data_torch)]
         
-        # Handle Mamba D tensor - ensure .weight suffix
+        # Handle Mamba D tensor - ensure .weight suffix and expand shape
         if name.endswith("mixer.D") or name.endswith("ssm.D"):
             new_name = self.map_tensor_name(name)
             # Add .weight suffix if not present
             if not new_name.endswith(".weight"):
                 new_name += ".weight"
-            logger.debug(f"D tensor ==> {new_name}")
+            logger.debug(f"D tensor ==> {new_name}, original shape: {data_torch.shape}")
+
+            # PLaMo2 D is shape {64} but llama.cpp expects {8192}
+            # Expand D to broadcast across d_inner dimension
+            if len(data_torch.shape) == 1 and data_torch.shape[0] == 64:
+                d_inner = 8192  # SSM inner size for PLaMo2
+                # Repeat D values across inner dimension
+                data_torch = data_torch.repeat(d_inner // data_torch.shape[0])
+                logger.debug(f"Expanded D tensor from 64 to shape: {data_torch.shape}")
+
             return [(new_name, data_torch)]
         
         # Handle Mamba conv1d tensor shape adjustment