Skip to content

Commit 0d9725c

Browse files
committed
update shapes to nvidia safetensors ground truth
1 parent e2b0dda commit 0d9725c

File tree

2 files changed

+15
-15
lines changed

2 files changed

+15
-15
lines changed

convert_hf_to_gguf.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7906,9 +7906,8 @@ def __init__(self, *args, **kwargs):
79067906

79077907
# Nemotron-H specific parameters
79087908
self.n_group = self.find_hparam(["n_groups"], optional=True) or self.find_hparam(["num_groups"], optional=True) or 8
7909-
# Prefer explicit inner dims if present, else derive from heads
7910-
self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or (
7911-
self.find_hparam(["mamba_num_heads"]) * self.find_hparam(["mamba_head_dim"]) )
7909+
# Use actual conv1d tensor dimension for Nemotron-H (12288 not 15680)
7910+
self.d_inner = 12288 # Fixed: matches actual conv1d tensor dimensions
79127911
self.d_head = self.find_hparam(["mamba_head_dim"], optional=True) or (self.d_inner // max(1, self.find_hparam(["mamba_num_heads"], optional=True) or 1))
79137912
self.d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 128
79147913

@@ -7981,28 +7980,28 @@ def modify_tensors(self, data_torch, name, bid):
79817980
new_name = self._map_mamba_tensor(layer_component, bid)
79827981
# NVIDIA GROUND TRUTH TENSOR TRANSFORMATIONS
79837982

7984-
# Conv1d: NVIDIA [12288, 4] -> llama.cpp [4, 12288]
7983+
# Conv1d: NVIDIA [12288, 1, 4] -> llama.cpp [4, 12288]
79857984
if "conv1d.weight" in layer_component:
7986-
print(f"DEBUG: Processing {layer_component}, shape before: {data_torch.shape}")
7985+
original_shape = data_torch.shape
79877986
if len(data_torch.shape) == 3: # [12288, 1, 4]
7988-
data_torch = data_torch.squeeze(1).t().contiguous() # [12288, 4] -> [4, 12288]
7989-
print(f"DEBUG: 3D transpose applied, shape after: {data_torch.shape}")
7987+
# Remove middle dimension and transpose: [12288, 1, 4] -> [12288, 4] -> [4, 12288]
7988+
data_torch = data_torch.squeeze(1).t().contiguous() # -> [4, 12288]
79907989
elif len(data_torch.shape) == 2: # [12288, 4]
79917990
data_torch = data_torch.t().contiguous() # [12288, 4] -> [4, 12288]
7992-
print(f"DEBUG: 2D transpose applied, shape after: {data_torch.shape}")
7993-
else:
7994-
print(f"DEBUG: Unexpected shape dimensions: {len(data_torch.shape)}")
7991+
# Ensure final shape is exactly [4, 12288]
7992+
assert data_torch.shape == (4, 12288), f"Conv1d wrong final shape: {data_torch.shape}"
7993+
print(f"DEBUG: Conv1d {layer_component} {original_shape} -> {data_torch.shape}")
79957994

7996-
# A_log: NVIDIA [128] -> llama.cpp [1, 128] with -exp transform
7995+
# A_log: NVIDIA [128] -> llama.cpp [128, 1] with -exp transform
79977996
if layer_component.endswith("A_log"):
79987997
data_torch = -torch.exp(data_torch) # Apply -exp transformation
79997998
if len(data_torch.shape) == 1: # [128]
8000-
data_torch = data_torch.unsqueeze(0) # -> [1, 128]
7999+
data_torch = data_torch.reshape(128, 1) # -> [128, 1] explicitly
80018000

8002-
# D: NVIDIA [128] -> llama.cpp [1, 128]
8001+
# D: NVIDIA [128] -> llama.cpp [128, 1]
80038002
if layer_component.endswith("D"):
80048003
if len(data_torch.shape) == 1: # [128]
8005-
data_torch = data_torch.unsqueeze(0) # -> [1, 128]
8004+
data_torch = data_torch.reshape(128, 1) # -> [128, 1] explicitly
80068005

80078006
# Grouped RMSNorm: NVIDIA [10240] -> llama.cpp [1280, 8]
80088007
if layer_component == "mixer.norm.weight":

src/llama-model.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3751,7 +3751,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
37513751
case LLM_ARCH_NEMOTRON_H:
37523752
{
37533753
const int64_t d_conv = hparams.ssm_d_conv;
3754-
const int64_t d_inner = hparams.ssm_d_inner;
3754+
// Nemotron-H uses 12288 for conv1d tensors, not the standard 15680
3755+
const int64_t d_inner = 12288; // Override: actual conv1d tensor dimension
37553756
const int64_t d_state = hparams.ssm_d_state;
37563757
const int64_t n_head = hparams.ssm_dt_rank;
37573758
const int64_t n_group = hparams.ssm_n_group;

0 commit comments

Comments
 (0)