@@ -2292,6 +2292,21 @@ def set_gguf_parameters(self):
22922292 # Store which layers use full attention vs sliding window
22932293 # This may need custom handling in llama.cpp
22942294 pass
2295+
2296+ # Set layer block types for PLaMo2 hybrid architecture
2297+ # PLaMo2 alternates between mamba and attention layers
2298+ mamba_step = hparams .get ("mamba_step" , 2 )
2299+ num_layers = hparams .get ("num_hidden_layers" , 32 )
2300+
2301+ layer_types = []
2302+ for i in range (num_layers ):
2303+ # Based on PLaMo2 architecture: even layers are mamba, odd layers are attention
2304+ if i % mamba_step == 0 :
2305+ layer_types .append ("mamba" )
2306+ else :
2307+ layer_types .append ("attention" )
2308+
2309+ self .gguf_writer .add_array ("plamo2.layers_block_type" , layer_types )
22952310
22962311 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
22972312 # Handle Plamo2 specific tensor naming
@@ -2327,22 +2342,101 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
23272342
23282343 # Handle Mamba-specific A_log tensor transformation
23292344 if name .endswith (".A_log" ):
2330- # Map the tensor name first
2345+ # Map the A_log tensor directly to ssm_a
23312346 new_name = self .map_tensor_name (name )
2332- logger .debug (f"A_log --> A ==> { new_name } " )
2347+ # Add .weight suffix if not present
2348+ if not new_name .endswith (".weight" ):
2349+ new_name += ".weight"
2350+ logger .debug (f"A_log --> A ==> { new_name } , original shape: { data_torch .shape } " )
2351+
23332352 # Transform A_log to A: A = -exp(A_log)
23342353 data_torch = - torch .exp (data_torch )
2354+
2355+ # PLaMo2 A_log is shape {d_state} but llama.cpp expects {d_state, d_inner}
2356+ # Expand the tensor to the correct shape
2357+ if len (data_torch .shape ) == 1 :
2358+ d_state = data_torch .shape [0 ] # 64
2359+ d_inner = 8192 # SSM inner size for PLaMo2
2360+
2361+ # Create tensor with correct shape {d_state, d_inner} = {64, 8192}
2362+ # Each row of the matrix should contain the same value from the original 1D tensor
2363+ new_tensor = data_torch .new_zeros ((d_state , d_inner ))
2364+ for i in range (d_state ):
2365+ new_tensor [i , :] = data_torch [i ] # Broadcast the single value across the inner dimension
2366+ data_torch = new_tensor
2367+ logger .debug (f"Expanded A tensor from { d_state } to shape: { data_torch .shape } " )
2368+
2369+ return [(new_name , data_torch )]
2370+
2371+ # Handle Mamba D tensor - ensure .weight suffix
2372+ if name .endswith ("mixer.D" ) or name .endswith ("ssm.D" ):
2373+ new_name = self .map_tensor_name (name )
2374+ # Add .weight suffix if not present
2375+ if not new_name .endswith (".weight" ):
2376+ new_name += ".weight"
2377+ logger .debug (f"D tensor ==> { new_name } " )
23352378 return [(new_name , data_torch )]
23362379
23372380 # Handle Mamba conv1d tensor shape adjustment
2338- if "mixer.conv1d" in name :
2381+ if "mixer.conv1d" in name or ".ssm.conv1d" in name :
23392382 new_name = self .map_tensor_name (name )
2340- # Squeeze the conv1d tensor if needed
2341- if len (data_torch .shape ) == 4 :
2383+ # For PLaMo2 conv1d tensors, reshape from (kernel_size, 1, d_inner) to (d_inner, kernel_size)
2384+ if len (data_torch .shape ) == 3 and data_torch .shape [1 ] == 1 :
2385+ # PLaMo2 conv1d is (kernel_size, 1, d_inner), needs to be (d_inner, kernel_size)
2386+ data_torch = data_torch .squeeze (1 ).transpose (0 , 1 )
2387+ elif len (data_torch .shape ) == 4 :
2388+ # For other formats, squeeze and transpose as needed
23422389 data_torch = data_torch .squeeze ()
2390+ if len (data_torch .shape ) == 2 :
2391+ # If it ends up as (kernel_size, d_inner), transpose to (d_inner, kernel_size)
2392+ if data_torch .shape [0 ] < data_torch .shape [1 ]:
2393+ data_torch = data_torch .transpose (0 , 1 )
23432394 return [(new_name , data_torch )]
23442395
2345- return super ().modify_tensors (data_torch , name , bid )
2396+ # Handle Mamba ssm_dt tensor shape adjustment
2397+ if "mixer.dt_proj" in name :
2398+ new_name = self .map_tensor_name (name )
2399+ logger .debug (f"Processing dt_proj tensor: { name } -> { new_name } , original shape: { data_torch .shape } " )
2400+
2401+ # For PLaMo2 dt_proj tensors, original shape is (64, 256) but llama.cpp expects (256, 8192)
2402+ # The GGUF writer seems to transpose tensors, so we need to account for that.
2403+ # We want the final result to be (256, 8192) after GGUF transposition
2404+
2405+ # First transpose from (64, 256) to (256, 64)
2406+ if len (data_torch .shape ) == 2 and data_torch .shape [0 ] == 64 and data_torch .shape [1 ] == 256 :
2407+ data_torch = data_torch .transpose (0 , 1 ) # Now (256, 64)
2408+ logger .debug (f"Transposed dt_proj to shape: { data_torch .shape } " )
2409+
2410+ # Expand the second dimension from 64 to 8192 (ssm_inner_size)
2411+ if len (data_torch .shape ) == 2 and data_torch .shape [1 ] == 64 :
2412+ # ssm_inner_size should be 8192 for PLaMo2 (64 heads * 128 dim_per_head)
2413+ expected_inner_size = 8192
2414+ repeat_factor = expected_inner_size // data_torch .shape [1 ]
2415+ data_torch = data_torch .repeat (1 , repeat_factor )
2416+ logger .debug (f"Expanded dt_proj to shape: { data_torch .shape } " )
2417+
2418+ # Since GGUF writer might transpose, we need to ensure we get (256, 8192) in the end
2419+ # If we currently have (256, 8192) and GGUF transposes to (8192, 256),
2420+ # we need to pre-transpose to (8192, 256) so GGUF ends up with (256, 8192)
2421+ if len (data_torch .shape ) == 2 and data_torch .shape == torch .Size ([256 , 8192 ]):
2422+ data_torch = data_torch .transpose (0 , 1 ) # Pre-transpose to (8192, 256)
2423+ logger .debug (f"Pre-transposed dt_proj for GGUF writer: { data_torch .shape } " )
2424+
2425+ return [(new_name , data_torch )]
2426+
2427+ # Fix tensor name mappings for PLaMo2 to match llama.cpp expectations
2428+ result = super ().modify_tensors (data_torch , name , bid )
2429+ fixed_result = []
2430+ for tensor_name , tensor_data in result :
2431+ # Map PLaMo2-specific norm tensor names to match llama.cpp expectations
2432+ if ".attn_norm_2.weight" in tensor_name :
2433+ tensor_name = tensor_name .replace (".attn_norm_2.weight" , ".post_attn_norm.weight" )
2434+ elif ".post_ffw_norm.weight" in tensor_name :
2435+ tensor_name = tensor_name .replace (".post_ffw_norm.weight" , ".post_mlp_norm.weight" )
2436+
2437+ fixed_result .append ((tensor_name , tensor_data ))
2438+
2439+ return fixed_result
23462440
23472441
23482442@ModelBase .register ("DeciLMForCausalLM" )
0 commit comments