@@ -3417,6 +3417,167 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
34173417 return [(new_name , data_torch )]
34183418
34193419
3420+ @ModelBase .register ("Plamo2ForCausalLM" , "PLaMo2ForCausalLM" )
3421+ class Plamo2Model (TextModel ):
3422+ model_arch = gguf .MODEL_ARCH .PLAMO2
3423+
3424+ def set_vocab (self ):
3425+ # PLaMo 2 uses a custom tokenizer with a .jsonl file
3426+ # We need to handle this specially
3427+ tokenizer_jsonl_path = self .dir_model / "tokenizer.jsonl"
3428+ tokenizer_config_path = self .dir_model / "tokenizer_config.json"
3429+
3430+ if not tokenizer_jsonl_path .is_file ():
3431+ raise FileNotFoundError (f"PLaMo 2 tokenizer file not found: { tokenizer_jsonl_path } " )
3432+
3433+ # Load tokenizer config
3434+ with open (tokenizer_config_path , 'r' , encoding = 'utf-8' ) as f :
3435+ tokenizer_config = json .load (f )
3436+
3437+ # Load tokens from JSONL file (actually a list format)
3438+ tokens = []
3439+ scores = []
3440+ toktypes = []
3441+
3442+ with open (tokenizer_jsonl_path , 'r' , encoding = 'utf-8' ) as f :
3443+ for line_num , line in enumerate (f ):
3444+ if line .strip ():
3445+ token_data = json .loads (line )
3446+ # Format: [token, score, type, ?, ?, ?, ?]
3447+ token = token_data [0 ].encode ("utf-8" )
3448+ score = float (token_data [1 ])
3449+ token_type_str = token_data [2 ] if len (token_data ) > 2 else "NORMAL"
3450+
3451+ tokens .append (token )
3452+ scores .append (score )
3453+
3454+ # Map token type strings to GGUF token types
3455+ if token_type_str == "UNKNOWN" :
3456+ toktypes .append (gguf .TokenType .UNKNOWN )
3457+ elif token_type_str == "CONTROL" :
3458+ toktypes .append (gguf .TokenType .CONTROL )
3459+ elif token_type_str == "BYTE" :
3460+ toktypes .append (gguf .TokenType .BYTE )
3461+ else :
3462+ toktypes .append (gguf .TokenType .NORMAL )
3463+
3464+ # Use "llama" (SPM) tokenizer type which doesn't require merges
3465+ # PLaMo 2's tokenizer is more similar to SPM than GPT2
3466+ self .gguf_writer .add_tokenizer_model ("llama" )
3467+ self .gguf_writer .add_tokenizer_pre ("default" )
3468+ self .gguf_writer .add_token_list (tokens )
3469+ self .gguf_writer .add_token_scores (scores )
3470+ self .gguf_writer .add_token_types (toktypes )
3471+
3472+ # Add special tokens from config
3473+ if "bos_token_id" in tokenizer_config :
3474+ self .gguf_writer .add_bos_token_id (tokenizer_config ["bos_token_id" ])
3475+ if "eos_token_id" in tokenizer_config :
3476+ self .gguf_writer .add_eos_token_id (tokenizer_config ["eos_token_id" ])
3477+ if "pad_token_id" in tokenizer_config :
3478+ self .gguf_writer .add_pad_token_id (tokenizer_config ["pad_token_id" ])
3479+ if "unk_token_id" in tokenizer_config :
3480+ self .gguf_writer .add_unk_token_id (tokenizer_config ["unk_token_id" ])
3481+
3482+ self .gguf_writer .add_add_space_prefix (False )
3483+
3484+ def set_gguf_parameters (self ):
3485+ hparams = self .hparams
3486+ block_count = hparams ["num_hidden_layers" ]
3487+
3488+ self .gguf_writer .add_context_length (hparams .get ("max_position_embeddings" , 2048 ))
3489+ self .gguf_writer .add_embedding_length (hparams .get ("hidden_size" , 4096 ))
3490+ self .gguf_writer .add_block_count (block_count )
3491+ self .gguf_writer .add_head_count (hparams .get ("num_attention_heads" , 32 ))
3492+ self .gguf_writer .add_head_count_kv (hparams .get ("num_key_value_heads" , 4 ))
3493+ self .gguf_writer .add_layer_norm_rms_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3494+ self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 1000000.0 ))
3495+
3496+ # Mamba parameters
3497+ self .gguf_writer .add_ssm_state_size (hparams .get ("mamba_d_state" , 64 ))
3498+ self .gguf_writer .add_ssm_conv_kernel (hparams .get ("mamba_d_conv" , 4 ))
3499+ self .gguf_writer .add_ssm_num_heads (hparams .get ("mamba_num_heads" , 64 ))
3500+ self .gguf_writer .add_ssm_head_dim (hparams .get ("hidden_size_per_head" , 128 ))
3501+ self .gguf_writer .add_ssm_inner_size (hparams .get ("hidden_size_per_head" , 128 ) * hparams .get ("mamba_num_heads" , 64 ))
3502+ self .gguf_writer .add_ssm_time_step_rank (hparams .get ("time_step_limit" , 192 ))
3503+ self .gguf_writer .add_ssm_dt_min (hparams .get ("time_step_min" , 0.001 ))
3504+ self .gguf_writer .add_ssm_dt_max (hparams .get ("time_step_max" , 0.1 ))
3505+ self .gguf_writer .add_hybrid_mamba_step (hparams .get ("mamba_step" , 2 ))
3506+
3507+ # MLP feed forward parameters (for attention layers)
3508+ self .gguf_writer .add_feed_forward_length (hparams .get ("intermediate_size" , 16384 ))
3509+
3510+ # Which layers are Mamba layers
3511+ # PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer)
3512+ # This logic matches modeling_plamo.py's is_mamba function
3513+ mamba_step = hparams .get ("mamba_step" , 2 )
3514+ mamba_enabled = hparams .get ("mamba_enabled" , True )
3515+ mamba_layers = []
3516+
3517+ if mamba_enabled :
3518+ for i in range (block_count ):
3519+ if block_count <= (mamba_step // 2 ):
3520+ # use attention in last layer
3521+ is_mamba = (i != block_count - 1 )
3522+ else :
3523+ is_mamba = (i % mamba_step ) != (mamba_step // 2 )
3524+ if is_mamba :
3525+ mamba_layers .append (i )
3526+
3527+ if mamba_layers :
3528+ self .gguf_writer .add_hybrid_mamba_layers (mamba_layers )
3529+
3530+ self .gguf_writer .add_file_type (self .ftype )
3531+
3532+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3533+ del bid # unused
3534+
3535+ if name .endswith (".dt_bias" ):
3536+ name = name .rpartition (".dt_bias" )[0 ] + ".dt_proj.bias"
3537+ elif name .endswith (".dt_norm_weight" ):
3538+ name = name .rpartition (".dt_norm_weight" )[0 ] + ".dt_norm.weight"
3539+ elif name .endswith (".B_norm_weight" ):
3540+ name = name .rpartition (".B_norm_weight" )[0 ] + ".B_norm.weight"
3541+ elif name .endswith (".C_norm_weight" ):
3542+ name = name .rpartition (".C_norm_weight" )[0 ] + ".C_norm.weight"
3543+ elif name .endswith (".k_weight" ):
3544+ name = name .rpartition (".k_weight" )[0 ] + ".k.weight"
3545+ elif name .endswith (".q_weight" ):
3546+ name = name .rpartition (".q_weight" )[0 ] + ".q.weight"
3547+ elif name .endswith (".conv1d.weight" ):
3548+ data_torch = torch .squeeze (data_torch ) # remove (, 1, )
3549+ assert data_torch .ndim == 2
3550+ elif name .endswith (".pre_mixer_norm.weight" ):
3551+ data_torch += 1.0
3552+ elif name .endswith (".post_mixer_norm.weight" ):
3553+ data_torch += 1.0 / 5
3554+ elif name .endswith (".pre_mlp_norm.weight" ):
3555+ data_torch += 1.0
3556+ elif name .endswith (".post_mlp_norm.weight" ):
3557+ data_torch += 1.0 / (5 ** 1.5 )
3558+ elif name .endswith (".gate_up_proj.weight" ):
3559+ # Split the combined gate_up tensor
3560+ split_size = data_torch .shape [0 ] // 2
3561+ gate_tensor = data_torch [:split_size , :]
3562+ up_tensor = data_torch [split_size :, :]
3563+
3564+ # Return both tensors - remove .weight suffix if present
3565+ name_base = name .replace (".gate_up_proj.weight" , "" )
3566+ gate_name = name_base + ".ffn_gate.weight"
3567+ up_name = name_base + ".ffn_up.weight"
3568+
3569+ gate_mapped = self .map_tensor_name (gate_name )
3570+ up_mapped = self .map_tensor_name (up_name )
3571+
3572+ return [(gate_mapped , gate_tensor ), (up_mapped , up_tensor )]
3573+
3574+ new_name = self .map_tensor_name (name )
3575+
3576+ print (f"Plamo2Model: { name } -> { new_name } , shape={ data_torch .shape } " )
3577+
3578+ return [(new_name , data_torch )]
3579+
3580+
34203581@ModelBase .register ("CodeShellForCausalLM" )
34213582class CodeShellModel (TextModel ):
34223583 model_arch = gguf .MODEL_ARCH .CODESHELL
0 commit comments