@@ -3476,6 +3476,183 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
34763476 return [(new_name , data_torch )]
34773477
34783478
3479+ @ModelBase .register ("Plamo2ForCausalLM" , "PLaMo2ForCausalLM" )
3480+ class Plamo2Model (TextModel ):
3481+ model_arch = gguf .MODEL_ARCH .PLAMO2
3482+
3483+ def set_vocab (self ):
3484+ # PLaMo 2 uses a custom tokenizer with a .jsonl file
3485+ # We need to handle this specially
3486+ tokenizer_jsonl_path = self .dir_model / "tokenizer.jsonl"
3487+ tokenizer_config_path = self .dir_model / "tokenizer_config.json"
3488+
3489+ if not tokenizer_jsonl_path .is_file ():
3490+ raise FileNotFoundError (f"PLaMo 2 tokenizer file not found: { tokenizer_jsonl_path } " )
3491+
3492+ # Load tokenizer config
3493+ with open (tokenizer_config_path , 'r' , encoding = 'utf-8' ) as f :
3494+ tokenizer_config = json .load (f )
3495+
3496+ # Load tokens from JSONL file (actually a list format)
3497+ tokens = []
3498+ scores = []
3499+ toktypes = []
3500+
3501+ with open (tokenizer_jsonl_path , 'r' , encoding = 'utf-8' ) as f :
3502+ for line_num , line in enumerate (f ):
3503+ if line .strip ():
3504+ token_data = json .loads (line )
3505+ # Format: [token, score, type, ?, ?, ?, ?]
3506+ token = token_data [0 ].encode ("utf-8" )
3507+ score = float (token_data [1 ])
3508+ token_type_str = token_data [2 ] if len (token_data ) > 2 else "NORMAL"
3509+
3510+ tokens .append (token )
3511+ scores .append (score )
3512+
3513+ # Map token type strings to GGUF token types
3514+ if token_type_str == "UNKNOWN" :
3515+ toktypes .append (gguf .TokenType .UNKNOWN )
3516+ elif token_type_str == "CONTROL" :
3517+ toktypes .append (gguf .TokenType .CONTROL )
3518+ elif token_type_str == "BYTE" :
3519+ toktypes .append (gguf .TokenType .BYTE )
3520+ else :
3521+ # Check for PLaMo-2 special tokens
3522+ token_str = token_data [0 ]
3523+ if token_str .startswith ("<|plamo:" ) and token_str .endswith ("|>" ):
3524+ toktypes .append (gguf .TokenType .CONTROL )
3525+ else :
3526+ toktypes .append (gguf .TokenType .NORMAL )
3527+
3528+ # Use "plamo2" tokenizer type for PLaMo-2's custom Aho-Corasick tokenizer
3529+ self .gguf_writer .add_tokenizer_model ("plamo2" )
3530+ self .gguf_writer .add_tokenizer_pre ("default" )
3531+ self .gguf_writer .add_token_list (tokens )
3532+ self .gguf_writer .add_token_scores (scores )
3533+ self .gguf_writer .add_token_types (toktypes )
3534+
3535+ # Add special tokens from config
3536+ if "bos_token_id" in tokenizer_config :
3537+ self .gguf_writer .add_bos_token_id (tokenizer_config ["bos_token_id" ])
3538+ if "eos_token_id" in tokenizer_config :
3539+ self .gguf_writer .add_eos_token_id (tokenizer_config ["eos_token_id" ])
3540+ if "pad_token_id" in tokenizer_config :
3541+ self .gguf_writer .add_pad_token_id (tokenizer_config ["pad_token_id" ])
3542+ if "unk_token_id" in tokenizer_config :
3543+ self .gguf_writer .add_unk_token_id (tokenizer_config ["unk_token_id" ])
3544+
3545+ self .gguf_writer .add_add_space_prefix (False )
3546+
3547+ def set_gguf_parameters (self ):
3548+ hparams = self .hparams
3549+ block_count = hparams ["num_hidden_layers" ]
3550+
3551+ # Which layers are Mamba layers
3552+ # PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer)
3553+ # This logic matches modeling_plamo.py's is_mamba function
3554+ mamba_step = hparams .get ("mamba_step" , 2 )
3555+ mamba_enabled = hparams .get ("mamba_enabled" , True )
3556+ mamba_layers = []
3557+
3558+ if mamba_enabled :
3559+ for i in range (block_count ):
3560+ if block_count <= (mamba_step // 2 ):
3561+ # use attention in last layer
3562+ is_mamba = (i != block_count - 1 )
3563+ else :
3564+ is_mamba = (i % mamba_step ) != (mamba_step // 2 )
3565+ if is_mamba :
3566+ mamba_layers .append (0 )
3567+ else :
3568+ mamba_layers .append (hparams .get ("num_key_value_heads" , 4 ))
3569+
3570+ if mamba_layers :
3571+ self .gguf_writer .add_head_count_kv (mamba_layers )
3572+
3573+ self .gguf_writer .add_context_length (hparams .get ("max_position_embeddings" , 2048 ))
3574+ self .gguf_writer .add_embedding_length (hparams .get ("hidden_size" , 4096 ))
3575+ self .gguf_writer .add_block_count (block_count )
3576+ self .gguf_writer .add_head_count (hparams .get ("num_attention_heads" , 32 ))
3577+ self .gguf_writer .add_layer_norm_rms_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3578+ self .gguf_writer .add_group_norm_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3579+ self .gguf_writer .add_layer_norm_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3580+ self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 1000000.0 ))
3581+
3582+ # Mamba parameters
3583+ self .gguf_writer .add_ssm_state_size (hparams .get ("mamba_d_state" , 64 ))
3584+ self .gguf_writer .add_ssm_conv_kernel (hparams .get ("mamba_d_conv" , 4 ))
3585+ self .gguf_writer .add_ssm_time_step_rank (hparams .get ("mamba_num_heads" , 64 ))
3586+ intermediate_size = hparams .get ("mamba_num_heads" , 64 ) * hparams .get ("hidden_size_per_head" , 128 )
3587+ self .gguf_writer .add_ssm_inner_size (intermediate_size )
3588+ self .gguf_writer .add_ssm_group_count (0 )
3589+
3590+ # MLP feed forward parameters (for attention layers)
3591+ self .gguf_writer .add_feed_forward_length (hparams .get ("intermediate_size" , 16384 ))
3592+ self .gguf_writer .add_file_type (self .ftype )
3593+
3594+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3595+ del bid # unused
3596+
3597+ if name .endswith (".embed_tokens.weight" ):
3598+ # If there is no lm_head, we need to map the token embedding to the output layer
3599+ assert self .tensor_names is not None
3600+ if all (['lm_head' not in name for name in self .tensor_names ]):
3601+ name_base = name .replace (".embed_tokens.weight" , "" )
3602+ output_name = "lm_head"
3603+
3604+ embed_tokens_mapped = self .map_tensor_name (name )
3605+ output_mapped = self .map_tensor_name (output_name ) + ".weight"
3606+
3607+ return [(embed_tokens_mapped , data_torch ), (output_mapped , data_torch )]
3608+ elif name .endswith (".A_log" ):
3609+ data_torch = - torch .exp (data_torch )
3610+ elif name .endswith (".dt_bias" ):
3611+ name = name .rpartition (".dt_bias" )[0 ] + ".dt_proj.bias"
3612+ elif name .endswith (".dt_norm_weight" ):
3613+ name = name .rpartition (".dt_norm_weight" )[0 ] + ".dt_norm.weight"
3614+ elif name .endswith (".B_norm_weight" ):
3615+ name = name .rpartition (".B_norm_weight" )[0 ] + ".B_norm.weight"
3616+ elif name .endswith (".C_norm_weight" ):
3617+ name = name .rpartition (".C_norm_weight" )[0 ] + ".C_norm.weight"
3618+ elif name .endswith (".k_weight" ):
3619+ name = name .rpartition (".k_weight" )[0 ] + ".k.weight"
3620+ elif name .endswith (".q_weight" ):
3621+ name = name .rpartition (".q_weight" )[0 ] + ".q.weight"
3622+ elif name .endswith (".conv1d.weight" ):
3623+ data_torch = torch .squeeze (data_torch ) # remove (, 1, )
3624+ assert data_torch .ndim == 2
3625+ elif name .endswith (".pre_mixer_norm.weight" ):
3626+ data_torch += 1.0
3627+ elif name .endswith (".post_mixer_norm.weight" ):
3628+ data_torch += 1.0 / 5
3629+ elif name .endswith (".pre_mlp_norm.weight" ):
3630+ data_torch += 1.0
3631+ elif name .endswith (".post_mlp_norm.weight" ):
3632+ data_torch += 1.0 / (5 ** 1.5 )
3633+ elif name .endswith (".norm.weight" ):
3634+ data_torch += 1.0
3635+ elif name .endswith (".gate_up_proj.weight" ):
3636+ # Split the combined gate_up tensor
3637+ split_size = data_torch .shape [0 ] // 2
3638+ gate_tensor = data_torch [:split_size , :]
3639+ up_tensor = data_torch [split_size :, :]
3640+
3641+ # Return both tensors - remove .weight suffix if present
3642+ name_base = name .replace (".gate_up_proj.weight" , "" )
3643+ gate_name = name_base + ".ffn_gate.weight"
3644+ up_name = name_base + ".ffn_up.weight"
3645+
3646+ gate_mapped = self .map_tensor_name (gate_name )
3647+ up_mapped = self .map_tensor_name (up_name )
3648+
3649+ return [(gate_mapped , gate_tensor ), (up_mapped , up_tensor )]
3650+
3651+ new_name = self .map_tensor_name (name )
3652+
3653+ return [(new_name , data_torch )]
3654+
3655+
34793656@ModelBase .register ("CodeShellForCausalLM" )
34803657class CodeShellModel (TextModel ):
34813658 model_arch = gguf .MODEL_ARCH .CODESHELL
0 commit comments