@@ -840,6 +840,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
840840 if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51" :
841841 # ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer
842842 res = "lfm2"
843+ if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890" :
844+ # ref: https://huggingface.co/moonshotai/Kimi-K2-Base
845+ res = "kimi-k2"
843846
844847 if res is None :
845848 logger .warning ("\n " )
@@ -1082,7 +1085,14 @@ def _set_vocab_rwkv_world(self):
10821085 self .gguf_writer .add_token_list (tokens )
10831086 self .gguf_writer .add_token_types (toktypes )
10841087 special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
1085- special_vocab .chat_template = "rwkv-world"
1088+ if special_vocab .chat_template is None :
1089+ template_path = Path (__file__ ).parent / "models" / "templates" / "llama-cpp-rwkv-world.jinja"
1090+ if template_path .is_file ():
1091+ with open (template_path , "r" , encoding = "utf-8" ) as f :
1092+ template = f .read ()
1093+ else :
1094+ template = "rwkv-world"
1095+ special_vocab .chat_template = template
10861096 # hack: Add '\n\n' as the EOT token to make it chat normally
10871097 special_vocab ._set_special_token ("eot" , 261 )
10881098 # hack: Override these as they have already been set (incorrectly)
@@ -3501,6 +3511,175 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
35013511 return [(new_name , data_torch )]
35023512
35033513
3514+ @ModelBase .register ("Plamo2ForCausalLM" , "PLaMo2ForCausalLM" )
3515+ class Plamo2Model (TextModel ):
3516+ model_arch = gguf .MODEL_ARCH .PLAMO2
3517+
3518+ def set_vocab (self ):
3519+ # PLaMo 2 uses a custom tokenizer with a .jsonl file
3520+ # We need to handle this specially
3521+ tokenizer_jsonl_path = self .dir_model / "tokenizer.jsonl"
3522+ tokenizer_config_path = self .dir_model / "tokenizer_config.json"
3523+
3524+ if not tokenizer_jsonl_path .is_file ():
3525+ raise FileNotFoundError (f"PLaMo 2 tokenizer file not found: { tokenizer_jsonl_path } " )
3526+
3527+ # Load tokenizer config
3528+ with open (tokenizer_config_path , 'r' , encoding = 'utf-8' ) as f :
3529+ tokenizer_config = json .load (f )
3530+
3531+ # Load tokens from JSONL file (actually a list format)
3532+ tokens = []
3533+ scores = []
3534+ toktypes = []
3535+
3536+ with open (tokenizer_jsonl_path , 'r' , encoding = 'utf-8' ) as f :
3537+ for line_num , line in enumerate (f ):
3538+ if line .strip ():
3539+ token_data = json .loads (line )
3540+ # Format: [token, score, type, ?, ?, ?, ?]
3541+ token = token_data [0 ].encode ("utf-8" )
3542+ score = float (token_data [1 ])
3543+ token_type_str = token_data [2 ] if len (token_data ) > 2 else "NORMAL"
3544+
3545+ tokens .append (token )
3546+ scores .append (score )
3547+
3548+ # Map token type strings to GGUF token types
3549+ if token_type_str == "UNKNOWN" :
3550+ toktypes .append (gguf .TokenType .UNKNOWN )
3551+ elif token_type_str == "CONTROL" :
3552+ toktypes .append (gguf .TokenType .CONTROL )
3553+ elif token_type_str == "BYTE" :
3554+ toktypes .append (gguf .TokenType .BYTE )
3555+ else :
3556+ # Check for PLaMo-2 special tokens
3557+ token_str = token_data [0 ]
3558+ if token_str .startswith ("<|plamo:" ) and token_str .endswith ("|>" ):
3559+ toktypes .append (gguf .TokenType .CONTROL )
3560+ else :
3561+ toktypes .append (gguf .TokenType .NORMAL )
3562+
3563+ vocab_size = self .hparams ["vocab_size" ]
3564+ if vocab_size > len (tokens ):
3565+ pad_count = vocab_size - len (tokens )
3566+ logger .debug (f"Padding vocab with { pad_count } token(s) - [PAD1] through [PAD{ pad_count } ]" )
3567+ for i in range (1 , pad_count + 1 ):
3568+ tokens .append (bytes (f"[PAD{ i } ]" , encoding = "utf-8" ))
3569+ scores .append (- 1000.0 )
3570+ toktypes .append (gguf .TokenType .UNUSED )
3571+
3572+ # Use "plamo2" tokenizer type for PLaMo-2's custom Aho-Corasick tokenizer
3573+ self .gguf_writer .add_tokenizer_model ("plamo2" )
3574+ self .gguf_writer .add_tokenizer_pre ("default" )
3575+ self .gguf_writer .add_token_list (tokens )
3576+ self .gguf_writer .add_token_scores (scores )
3577+ self .gguf_writer .add_token_types (toktypes )
3578+
3579+ # Add special tokens from config
3580+ if "bos_token" in tokenizer_config and tokenizer_config ["bos_token" ] is not None :
3581+ token_id = tokens .index (tokenizer_config ["bos_token" ].encode ("utf-8" ))
3582+ self .gguf_writer .add_bos_token_id (token_id )
3583+ if "eos_token" in tokenizer_config and tokenizer_config ["eos_token" ] is not None :
3584+ token_id = tokens .index (tokenizer_config ["eos_token" ].encode ("utf-8" ))
3585+ self .gguf_writer .add_eos_token_id (token_id )
3586+ if "pad_token" in tokenizer_config and tokenizer_config ["pad_token" ] is not None :
3587+ token_id = tokens .index (tokenizer_config ["pad_token" ].encode ("utf-8" ))
3588+ self .gguf_writer .add_pad_token_id (token_id )
3589+ if "sep_token" in tokenizer_config and tokenizer_config ["sep_token" ] is not None :
3590+ token_id = tokens .index (tokenizer_config ["sep_token" ].encode ("utf-8" ))
3591+ self .gguf_writer .add_sep_token_id (token_id )
3592+ if "unk_token" in tokenizer_config and tokenizer_config ["unk_token" ] is not None :
3593+ token_id = tokens .index (tokenizer_config ["unk_token" ].encode ("utf-8" ))
3594+ self .gguf_writer .add_unk_token_id (token_id )
3595+
3596+ # Add <|plamo:op|> as EOT to ensure appropriate end of generation
3597+ self .gguf_writer .add_eot_token_id (4 )
3598+
3599+ self .gguf_writer .add_add_space_prefix (False )
3600+
3601+ def set_gguf_parameters (self ):
3602+ hparams = self .hparams
3603+ block_count = hparams ["num_hidden_layers" ]
3604+ self .gguf_writer .add_vocab_size (self .hparams ["vocab_size" ])
3605+
3606+ # Which layers are Mamba layers
3607+ # PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer)
3608+ # This logic matches modeling_plamo.py's is_mamba function
3609+ mamba_step = hparams .get ("mamba_step" , 2 )
3610+ mamba_enabled = hparams .get ("mamba_enabled" , True )
3611+ mamba_layers = []
3612+
3613+ if mamba_enabled :
3614+ for i in range (block_count ):
3615+ if block_count <= (mamba_step // 2 ):
3616+ # use attention in last layer
3617+ is_mamba = (i != block_count - 1 )
3618+ else :
3619+ is_mamba = (i % mamba_step ) != (mamba_step // 2 )
3620+ if is_mamba :
3621+ mamba_layers .append (0 )
3622+ else :
3623+ mamba_layers .append (hparams .get ("num_key_value_heads" , 4 ))
3624+
3625+ if mamba_layers :
3626+ self .gguf_writer .add_head_count_kv (mamba_layers )
3627+
3628+ self .gguf_writer .add_context_length (hparams .get ("max_position_embeddings" , 2048 ))
3629+ self .gguf_writer .add_embedding_length (hparams .get ("hidden_size" , 4096 ))
3630+ self .gguf_writer .add_block_count (block_count )
3631+ self .gguf_writer .add_head_count (hparams .get ("num_attention_heads" , 32 ))
3632+ self .gguf_writer .add_layer_norm_rms_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3633+ self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 1000000.0 ))
3634+
3635+ # Mamba parameters
3636+ self .gguf_writer .add_ssm_state_size (hparams .get ("mamba_d_state" , 64 ))
3637+ self .gguf_writer .add_ssm_conv_kernel (hparams .get ("mamba_d_conv" , 4 ))
3638+ self .gguf_writer .add_ssm_time_step_rank (hparams .get ("mamba_num_heads" , 64 ))
3639+ intermediate_size = hparams .get ("mamba_num_heads" , 64 ) * hparams .get ("hidden_size_per_head" , 128 )
3640+ self .gguf_writer .add_ssm_inner_size (intermediate_size )
3641+ self .gguf_writer .add_ssm_group_count (0 )
3642+
3643+ # MLP feed forward parameters (for attention layers)
3644+ self .gguf_writer .add_feed_forward_length (hparams .get ("intermediate_size" , 16384 ))
3645+ self .gguf_writer .add_file_type (self .ftype )
3646+
3647+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3648+ del bid # unused
3649+
3650+ if name .endswith (".A_log" ):
3651+ data_torch = - torch .exp (data_torch )
3652+ elif name .endswith (".dt_bias" ):
3653+ name = name .rpartition (".dt_bias" )[0 ] + ".dt_proj.bias"
3654+ elif name .endswith (".dt_norm_weight" ):
3655+ name = name .rpartition (".dt_norm_weight" )[0 ] + ".dt_norm.weight"
3656+ elif name .endswith (".B_norm_weight" ):
3657+ name = name .rpartition (".B_norm_weight" )[0 ] + ".B_norm.weight"
3658+ elif name .endswith (".C_norm_weight" ):
3659+ name = name .rpartition (".C_norm_weight" )[0 ] + ".C_norm.weight"
3660+ elif name .endswith (".k_weight" ):
3661+ name = name .rpartition (".k_weight" )[0 ] + ".k.weight"
3662+ elif name .endswith (".q_weight" ):
3663+ name = name .rpartition (".q_weight" )[0 ] + ".q.weight"
3664+ elif name .endswith (".conv1d.weight" ):
3665+ data_torch = torch .squeeze (data_torch ) # remove (, 1, )
3666+ assert data_torch .ndim == 2
3667+ elif name .endswith (".pre_mixer_norm.weight" ):
3668+ data_torch += 1.0
3669+ elif name .endswith (".post_mixer_norm.weight" ):
3670+ data_torch += 1.0 / 5
3671+ elif name .endswith (".pre_mlp_norm.weight" ):
3672+ data_torch += 1.0
3673+ elif name .endswith (".post_mlp_norm.weight" ):
3674+ data_torch += 1.0 / (5 ** 1.5 )
3675+ elif name .endswith (".norm.weight" ):
3676+ data_torch += 1.0
3677+
3678+ new_name = self .map_tensor_name (name )
3679+
3680+ return [(new_name , data_torch )]
3681+
3682+
35043683@ModelBase .register ("CodeShellForCausalLM" )
35053684class CodeShellModel (TextModel ):
35063685 model_arch = gguf .MODEL_ARCH .CODESHELL
@@ -5563,7 +5742,58 @@ class DeepseekV2Model(TextModel):
55635742 model_arch = gguf .MODEL_ARCH .DEEPSEEK2
55645743
55655744 def set_vocab (self ):
5566- self ._set_vocab_gpt2 ()
5745+ try :
5746+ self ._set_vocab_gpt2 ()
5747+ return
5748+ except Exception :
5749+ pass
5750+
5751+ from transformers import AutoTokenizer
5752+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
5753+ tokpre = self .get_vocab_base_pre (tokenizer )
5754+
5755+ if tokpre == "kimi-k2" :
5756+ # Build merges list using the approach similar to HunYuanMoE
5757+ merges = []
5758+ vocab = {}
5759+ mergeable_ranks = tokenizer .model ._mergeable_ranks
5760+ for token , rank in mergeable_ranks .items ():
5761+ vocab [QwenModel .token_bytes_to_string (token )] = rank
5762+ if len (token ) == 1 :
5763+ continue
5764+ merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
5765+ if len (merged ) == 2 :
5766+ merges .append (' ' .join (map (QwenModel .token_bytes_to_string , merged )))
5767+
5768+ # Build token list
5769+ vocab_size = self .hparams ["vocab_size" ]
5770+ special_tokens = tokenizer .special_tokens
5771+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
5772+ tokens : list [str ] = []
5773+ toktypes : list [int ] = []
5774+
5775+ for i in range (vocab_size ):
5776+ if i not in reverse_vocab :
5777+ tokens .append (f"[PAD{ i } ]" )
5778+ toktypes .append (gguf .TokenType .UNUSED )
5779+ else :
5780+ token = reverse_vocab [i ]
5781+ tokens .append (token )
5782+ if i in special_tokens .values ():
5783+ toktypes .append (gguf .TokenType .CONTROL )
5784+ else :
5785+ toktypes .append (gguf .TokenType .NORMAL )
5786+
5787+ self .gguf_writer .add_tokenizer_model ("gpt2" )
5788+ self .gguf_writer .add_tokenizer_pre (tokpre )
5789+ self .gguf_writer .add_token_list (tokens )
5790+ self .gguf_writer .add_token_types (toktypes )
5791+ self .gguf_writer .add_token_merges (merges )
5792+
5793+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False )
5794+ special_vocab .add_to_gguf (self .gguf_writer )
5795+ else :
5796+ raise NotImplementedError (f"Deepseek pre-tokenizer { tokpre !r} is not supported yet!" )
55675797
55685798 def set_gguf_parameters (self ):
55695799
0 commit comments