Skip to content

Commit e2b0dda

Browse files
committed
implement a custom tensor creation
that tries both orientations
1 parent a556953 commit e2b0dda

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7981,10 +7981,17 @@ def modify_tensors(self, data_torch, name, bid):
79817981
new_name = self._map_mamba_tensor(layer_component, bid)
79827982
# NVIDIA GROUND TRUTH TENSOR TRANSFORMATIONS
79837983

7984-
# Conv1d: NVIDIA [12288, 1, 4] -> llama.cpp [4, 12288]
7984+
# Conv1d: NVIDIA [12288, 4] -> llama.cpp [4, 12288]
79857985
if "conv1d.weight" in layer_component:
7986+
print(f"DEBUG: Processing {layer_component}, shape before: {data_torch.shape}")
79867987
if len(data_torch.shape) == 3: # [12288, 1, 4]
79877988
data_torch = data_torch.squeeze(1).t().contiguous() # [12288, 4] -> [4, 12288]
7989+
print(f"DEBUG: 3D transpose applied, shape after: {data_torch.shape}")
7990+
elif len(data_torch.shape) == 2: # [12288, 4]
7991+
data_torch = data_torch.t().contiguous() # [12288, 4] -> [4, 12288]
7992+
print(f"DEBUG: 2D transpose applied, shape after: {data_torch.shape}")
7993+
else:
7994+
print(f"DEBUG: Unexpected shape dimensions: {len(data_torch.shape)}")
79887995

79897996
# A_log: NVIDIA [128] -> llama.cpp [1, 128] with -exp transform
79907997
if layer_component.endswith("A_log"):

src/llama-model.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3794,7 +3794,15 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
37943794
// depthwise conv: GGUF has {12288, 4} due to conversion - adapt to ground truth
37953795
// NVIDIA ground truth: [12288, 1, 4] -> GGUF: {12288, 4}
37963796
const int64_t nemotron_conv_dim = 12288;
3797-
layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {nemotron_conv_dim, d_conv}, 0);
3797+
// Try expected shape first, fallback to transposed if metadata is wrong
3798+
struct ggml_tensor * conv_tensor = nullptr;
3799+
try {
3800+
conv_tensor = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, nemotron_conv_dim}, 0);
3801+
} catch (...) {
3802+
// GGUF metadata may show {12288, 4} instead of {4, 12288}
3803+
conv_tensor = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {nemotron_conv_dim, d_conv}, 0);
3804+
}
3805+
layer.ssm_conv1d = conv_tensor;
37983806
layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {nemotron_conv_dim}, 0);
37993807

38003808
// time step bias for low-rank delta

0 commit comments

Comments
 (0)