implement a custom tensor creation

jwjohns · jwjohns · commit e2b0dda73ce0 · 2025-08-25T13:44:24.000-04:00
that tries both orientations
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -7981,10 +7981,17 @@ def modify_tensors(self, data_torch, name, bid):
                     new_name = self._map_mamba_tensor(layer_component, bid)
                     # NVIDIA GROUND TRUTH TENSOR TRANSFORMATIONS
                     
-                    # Conv1d: NVIDIA [12288, 1, 4] -> llama.cpp [4, 12288]
+                    # Conv1d: NVIDIA [12288, 4] -> llama.cpp [4, 12288]
                     if "conv1d.weight" in layer_component:
+                        print(f"DEBUG: Processing {layer_component}, shape before: {data_torch.shape}")
                         if len(data_torch.shape) == 3:  # [12288, 1, 4]
                             data_torch = data_torch.squeeze(1).t().contiguous()  # [12288, 4] -> [4, 12288]
+                            print(f"DEBUG: 3D transpose applied, shape after: {data_torch.shape}")
+                        elif len(data_torch.shape) == 2:  # [12288, 4]
+                            data_torch = data_torch.t().contiguous()  # [12288, 4] -> [4, 12288]
+                            print(f"DEBUG: 2D transpose applied, shape after: {data_torch.shape}")
+                        else:
+                            print(f"DEBUG: Unexpected shape dimensions: {len(data_torch.shape)}")
                         
                     # A_log: NVIDIA [128] -> llama.cpp [1, 128] with -exp transform
                     if layer_component.endswith("A_log"):
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -3794,7 +3794,15 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                             // depthwise conv: GGUF has {12288, 4} due to conversion - adapt to ground truth
                             // NVIDIA ground truth: [12288, 1, 4] -> GGUF: {12288, 4} 
                             const int64_t nemotron_conv_dim = 12288;
-                            layer.ssm_conv1d   = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {nemotron_conv_dim, d_conv}, 0);
+                            // Try expected shape first, fallback to transposed if metadata is wrong
+                            struct ggml_tensor * conv_tensor = nullptr;
+                            try {
+                                conv_tensor = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, nemotron_conv_dim}, 0);
+                            } catch (...) {
+                                // GGUF metadata may show {12288, 4} instead of {4, 12288}
+                                conv_tensor = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {nemotron_conv_dim, d_conv}, 0);
+                            }
+                            layer.ssm_conv1d = conv_tensor;
                             layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i),   {nemotron_conv_dim}, 0);
 
                             // time step bias for low-rank delta