@@ -3551,14 +3551,21 @@ def set_vocab(self):
35513551 self .gguf_writer .add_token_types (toktypes )
35523552
35533553 # Add special tokens from config
3554- if "bos_token_id" in tokenizer_config :
3555- self .gguf_writer .add_bos_token_id (tokenizer_config ["bos_token_id" ])
3556- if "eos_token_id" in tokenizer_config :
3557- self .gguf_writer .add_eos_token_id (tokenizer_config ["eos_token_id" ])
3558- if "pad_token_id" in tokenizer_config :
3559- self .gguf_writer .add_pad_token_id (tokenizer_config ["pad_token_id" ])
3560- if "unk_token_id" in tokenizer_config :
3561- self .gguf_writer .add_unk_token_id (tokenizer_config ["unk_token_id" ])
3554+ if "bos_token" in tokenizer_config and tokenizer_config ["bos_token" ] is not None :
3555+ token_id = tokens .index (tokenizer_config ["bos_token" ].encode ("utf-8" ))
3556+ self .gguf_writer .add_bos_token_id (token_id )
3557+ if "eos_token" in tokenizer_config and tokenizer_config ["eos_token" ] is not None :
3558+ token_id = tokens .index (tokenizer_config ["eos_token" ].encode ("utf-8" ))
3559+ self .gguf_writer .add_eos_token_id (token_id )
3560+ if "pad_token" in tokenizer_config and tokenizer_config ["pad_token" ] is not None :
3561+ token_id = tokens .index (tokenizer_config ["pad_token" ].encode ("utf-8" ))
3562+ self .gguf_writer .add_pad_token_id (token_id )
3563+ if "sep_token" in tokenizer_config and tokenizer_config ["sep_token" ] is not None :
3564+ token_id = tokens .index (tokenizer_config ["sep_token" ].encode ("utf-8" ))
3565+ self .gguf_writer .add_sep_token_id (token_id )
3566+ if "unk_token" in tokenizer_config and tokenizer_config ["unk_token" ] is not None :
3567+ token_id = tokens .index (tokenizer_config ["unk_token" ].encode ("utf-8" ))
3568+ self .gguf_writer .add_unk_token_id (token_id )
35623569
35633570 self .gguf_writer .add_add_space_prefix (False )
35643571
0 commit comments