@@ -7982,15 +7982,21 @@ def modify_tensors(self, data_torch, name, bid):
79827982 # Special handling for conv1d: reshape from 3D to 2D
79837983 if "conv1d.weight" in layer_component and len (data_torch .shape ) == 3 :
79847984 data_torch = data_torch .squeeze (1 ) # Remove middle dimension: {4,1,12288} -> {4,12288}
7985- # A_log -> A = -exp(A_log) and reshape from [128,1,1,1] to [1,128]
7985+ # A_log -> A = -exp(A_log) and ensure [1,128] shape for llama.cpp
79867986 if layer_component .endswith ("A_log" ):
79877987 data_torch = - torch .exp (data_torch )
7988- if len (data_torch .shape ) == 4 and data_torch .shape [1 :] == (1 , 1 , 1 ):
7989- data_torch = data_torch .reshape (1 , data_torch .shape [0 ]) # [128,1,1,1] -> [1,128]
7990- # D tensor also needs reshaping from [128,1,1,1] to [1,128]
7988+ # Ensure 2D shape [1, d_state] for llama.cpp compatibility
7989+ if len (data_torch .shape ) == 1 :
7990+ data_torch = data_torch .unsqueeze (- 1 ) # [128] -> [128,1] -> store as [1,128] in GGUF
7991+ elif len (data_torch .shape ) == 4 and data_torch .shape [1 :] == (1 , 1 , 1 ):
7992+ data_torch = data_torch .reshape (data_torch .shape [0 ], 1 ) # [128,1,1,1] -> [128,1]
7993+ # D tensor also needs reshaping to [1,128] for llama.cpp
79917994 if layer_component .endswith ("D" ):
7992- if len (data_torch .shape ) == 4 and data_torch .shape [1 :] == (1 , 1 , 1 ):
7993- data_torch = data_torch .reshape (1 , data_torch .shape [0 ]) # [128,1,1,1] -> [1,128]
7995+ # Ensure 2D shape [1, d_state] for llama.cpp compatibility
7996+ if len (data_torch .shape ) == 1 :
7997+ data_torch = data_torch .unsqueeze (- 1 ) # [128] -> [128,1] -> store as [1,128] in GGUF
7998+ elif len (data_torch .shape ) == 4 and data_torch .shape [1 :] == (1 , 1 , 1 ):
7999+ data_torch = data_torch .reshape (data_torch .shape [0 ], 1 ) # [128,1,1,1] -> [128,1]
79948000 # Grouped RMSNorm reshape to [actual_size/n_group, n_group]
79958001 if layer_component == "mixer.norm.weight" :
79968002 actual_size = data_torch .numel ()
0 commit comments