@@ -1301,15 +1301,23 @@ def set_vocab(self):
13011301 try :
13021302 self . _set_vocab_sentencepiece ()
13031303 except FileNotFoundError :
1304- self ._set_vocab_llama_hf ()
1305-
1306- special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False ,
1307- special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ])
1308- special_vocab ._set_special_token ("prefix" , 32007 )
1309- special_vocab ._set_special_token ("suffix" , 32008 )
1310- special_vocab ._set_special_token ("middle" , 32009 )
1311- special_vocab ._set_special_token ("eot" , 32010 )
1312- special_vocab .add_to_gguf (self .gguf_writer )
1304+ try :
1305+ self ._set_vocab_llama_hf ()
1306+ except (FileNotFoundError , TypeError ):
1307+ # Llama 3
1308+ self ._set_vocab_gpt2 ()
1309+
1310+ # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256)
1311+ if self .hparams .get ("vocab_size" , 32000 ) == 32016 :
1312+ special_vocab = gguf .SpecialVocab (
1313+ self .dir_model , load_merges = False ,
1314+ special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ]
1315+ )
1316+ special_vocab ._set_special_token ("prefix" , 32007 )
1317+ special_vocab ._set_special_token ("suffix" , 32008 )
1318+ special_vocab ._set_special_token ("middle" , 32009 )
1319+ special_vocab ._set_special_token ("eot" , 32010 )
1320+ special_vocab .add_to_gguf (self .gguf_writer )
13131321
13141322 def set_gguf_parameters (self ):
13151323 super ().set_gguf_parameters ()
@@ -2194,6 +2202,8 @@ def set_vocab(self):
21942202 old_eos = special_vocab .special_token_ids ["eos" ]
21952203 if "chat" in os .path .basename (self .dir_model .absolute ()):
21962204 # For the chat model, we replace the eos with '<|im_end|>'.
2205+ # TODO: this is a hack, should be fixed
2206+ # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048
21972207 special_vocab .special_token_ids ["eos" ] = self ._try_get_sft_eos (tokenizer )
21982208 print (f"Replace eos:{ old_eos } with a special token:{ special_vocab .special_token_ids ['eos' ]} \
21992209 in chat mode so that the conversation can end normally." )
@@ -2429,12 +2439,15 @@ class GemmaModel(Model):
24292439
24302440 def set_vocab (self ):
24312441 self ._set_vocab_sentencepiece ()
2442+
2443+ # TODO: these special tokens should be exported only for the CodeGemma family
24322444 special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = False ,
2433- special_token_types = ['prefix' , 'suffix' , 'middle' , 'eot' ])
2445+ special_token_types = ['prefix' , 'suffix' , 'middle' , 'fsep' , ' eot' ])
24342446 special_vocab ._set_special_token ("prefix" , 67 )
24352447 special_vocab ._set_special_token ("suffix" , 69 )
24362448 special_vocab ._set_special_token ("middle" , 68 )
2437- special_vocab ._set_special_token ("eot" , 70 )
2449+ special_vocab ._set_special_token ("fsep" , 70 )
2450+ special_vocab ._set_special_token ("eot" , 107 )
24382451 special_vocab .add_to_gguf (self .gguf_writer )
24392452
24402453 def set_gguf_parameters (self ):
@@ -2523,28 +2536,34 @@ def set_vocab(self):
25232536
25242537 field = neox_reader .get_field (gguf .Keys .Tokenizer .MODEL )
25252538 self .gguf_writer .add_tokenizer_model (bytes (field .parts [- 1 ]))
2539+
25262540 field = neox_reader .get_field (gguf .Keys .Tokenizer .LIST )
25272541 self .gguf_writer .add_token_list ([bytes (field .parts [i ]) for i in field .data ][:vocab_size ])
2542+
25282543 field = neox_reader .get_field (gguf .Keys .Tokenizer .TOKEN_TYPE )
25292544 self .gguf_writer .add_token_types ([field .parts [i ].tolist ()[0 ] for i in field .data ][:vocab_size ])
2545+
25302546 field = neox_reader .get_field (gguf .Keys .Tokenizer .MERGES )
25312547 self .gguf_writer .add_token_merges ([bytes (field .parts [i ]) for i in field .data ])
2548+
25322549 field = neox_reader .get_field (gguf .Keys .Tokenizer .BOS_ID )
25332550 self .gguf_writer .add_bos_token_id (field .parts [- 1 ].tolist ()[0 ])
2551+
25342552 field = neox_reader .get_field (gguf .Keys .Tokenizer .EOS_ID )
25352553 self .gguf_writer .add_eos_token_id (field .parts [- 1 ].tolist ()[0 ])
2554+
25362555 field = neox_reader .get_field (gguf .Keys .Tokenizer .UNK_ID )
25372556 self .gguf_writer .add_unk_token_id (field .parts [- 1 ].tolist ()[0 ])
25382557
25392558 def set_gguf_parameters (self ):
2540- d_model = self .find_hparam (["hidden_size" , "d_model" ])
2541- d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
2559+ d_model = self .find_hparam (["hidden_size" , "d_model" ])
2560+ d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
25422561 d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
2543- d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 16
2562+ d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 16
25442563 # ceiling division
25452564 # ref: https://stackoverflow.com/a/17511341/22827863
25462565 # ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
2547- dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
2566+ dt_rank = self .find_hparam (["time_step_rank" , "dt_rank" ], optional = True ) or - (d_model // - 16 )
25482567 rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
25492568
25502569 # Fail early for models which don't have a block expansion factor of 2
0 commit comments