@@ -487,6 +487,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
487487        if  chkhsh  ==  "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a" :
488488            # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code 
489489            res  =  "jina-v2-code" 
490+         if  chkhsh  ==  "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" :
491+             # ref: https://huggingface.co/THUDM/glm-4-9b-chat 
492+             res  =  "chatglm-bpe" 
490493        if  chkhsh  ==  "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee" :
491494            # ref: https://huggingface.co/LumiOpen/Viking-7B 
492495            res  =  "viking" 
@@ -3175,6 +3178,190 @@ def write_tensors(self):
31753178        self .gguf_writer .add_max_alibi_bias (self .max_alibi_bias )
31763179
31773180
3181+ @Model .register ("ChatGLMModel" , "ChatGLMForConditionalGeneration" ) 
3182+ class  ChatGLMModel (Model ):
3183+     model_arch  =  gguf .MODEL_ARCH .CHATGLM 
3184+ 
3185+     def  set_vocab_chatglm3 (self ):
3186+         dir_model  =  self .dir_model 
3187+         hparams  =  self .hparams 
3188+         tokens : list [bytearray ] =  []
3189+         toktypes : list [int ] =  []
3190+         scores : list [float ] =  []
3191+ 
3192+         from  transformers  import  AutoTokenizer 
3193+         tokenizer  =  AutoTokenizer .from_pretrained (dir_model , trust_remote_code = True )
3194+         vocab_size  =  hparams .get ("padded_vocab_size" , len (tokenizer .get_vocab ()))
3195+         assert  max (tokenizer .get_vocab ().values ()) <  vocab_size 
3196+         role_special_tokens  =  ["<|system|>" , "<|user|>" , "<|assistant|>" , "<|observation|>" ]
3197+         special_tokens  =  ["[MASK]" , "[gMASK]" , "[sMASK]" , "sop" , "eop" ] +  role_special_tokens 
3198+         for  token_id  in  range (vocab_size ):
3199+             piece  =  tokenizer ._convert_id_to_token (token_id )
3200+             if  token_id  ==  0 :
3201+                 piece  =  "<unk>" 
3202+             elif  token_id  ==  1 :
3203+                 piece  =  "<bos>" 
3204+             elif  token_id  ==  2 :
3205+                 piece  =  "<eos>" 
3206+ 
3207+             text  =  piece .encode ("utf-8" )
3208+             score  =  0.0 
3209+             # Referencing the tokenizer Python implementation(https://huggingface.co/THUDM/chatglm3-6b/blob/main/tokenization_chatglm.py), 
3210+             # it is only valid if it is less than tokenizer.tokenizer.sp_model.vocab_size() 
3211+             if  len (piece ) !=  0  and  token_id  <  tokenizer .tokenizer .sp_model .vocab_size ():
3212+                 score  =  tokenizer .tokenizer .sp_model .get_score (token_id )
3213+ 
3214+             if  len (piece ) ==  0 :
3215+                 text  =  f"[PAD{ token_id }  .encode ("utf-8" )
3216+ 
3217+             if  token_id  >=  tokenizer .tokenizer .sp_model .vocab_size ():
3218+                 if  piece  in  special_tokens :
3219+                     # show special tokens in prompt 
3220+                     toktype  =  SentencePieceTokenTypes .USER_DEFINED 
3221+                 else :
3222+                     toktype  =  SentencePieceTokenTypes .UNKNOWN 
3223+                 tokens .append (text )
3224+                 scores .append (score )
3225+                 toktypes .append (toktype )
3226+                 continue 
3227+ 
3228+             toktype  =  SentencePieceTokenTypes .NORMAL 
3229+             if  tokenizer .tokenizer .sp_model .is_unknown (token_id ):
3230+                 toktype  =  SentencePieceTokenTypes .UNKNOWN 
3231+             elif  tokenizer .tokenizer .sp_model .is_control (token_id ):
3232+                 toktype  =  SentencePieceTokenTypes .CONTROL 
3233+             elif  tokenizer .tokenizer .sp_model .is_unused (token_id ):
3234+                 toktype  =  SentencePieceTokenTypes .UNUSED 
3235+             elif  tokenizer .tokenizer .sp_model .is_byte (token_id ):
3236+                 toktype  =  SentencePieceTokenTypes .BYTE 
3237+ 
3238+             tokens .append (text )
3239+             scores .append (score )
3240+             toktypes .append (toktype )
3241+ 
3242+         self .gguf_writer .add_tokenizer_model ("llama" )
3243+         # glm3 needs prefix and suffix formatted as: 
3244+         # prompt = "[gMASK]sop<|user|>\n" + prompt + "<|assistant|>" 
3245+         self .gguf_writer .add_tokenizer_pre ("chatglm-spm" )
3246+         self .gguf_writer .add_token_list (tokens )
3247+         self .gguf_writer .add_token_scores (scores )
3248+         self .gguf_writer .add_token_types (toktypes )
3249+ 
3250+         special_vocab  =  gguf .SpecialVocab (self .dir_model , n_vocab = len (tokens ))
3251+         special_vocab .add_to_gguf (self .gguf_writer )
3252+ 
3253+     @staticmethod  
3254+     def  token_bytes_to_string (b ):
3255+         from  transformers .models .gpt2 .tokenization_gpt2  import  bytes_to_unicode 
3256+         byte_encoder  =  bytes_to_unicode ()
3257+         return  '' .join ([byte_encoder [ord (char )] for  char  in  b .decode ('latin-1' )])
3258+ 
3259+     @staticmethod  
3260+     def  bpe (mergeable_ranks : dict [bytes , int ], token : bytes , max_rank : int  |  None  =  None ) ->  list [bytes ]:
3261+         parts  =  [bytes ([b ]) for  b  in  token ]
3262+         while  True :
3263+             min_idx  =  None 
3264+             min_rank  =  None 
3265+             for  i , pair  in  enumerate (zip (parts [:- 1 ], parts [1 :])):
3266+                 rank  =  mergeable_ranks .get (pair [0 ] +  pair [1 ])
3267+                 if  rank  is  not None  and  (min_rank  is  None  or  rank  <  min_rank ):
3268+                     min_idx  =  i 
3269+                     min_rank  =  rank 
3270+             if  min_rank  is  None  or  (max_rank  is  not None  and  min_rank  >=  max_rank ):
3271+                 break 
3272+             assert  min_idx  is  not None 
3273+             parts  =  parts [:min_idx ] +  [parts [min_idx ] +  parts [min_idx  +  1 ]] +  parts [min_idx  +  2 :]
3274+         return  parts 
3275+ 
3276+     def  set_vocab (self ):
3277+         if  "THUDM/chatglm3-6b"  in  self .hparams .get ("_name_or_path" , "" ):
3278+             self .set_vocab_chatglm3 ()
3279+             return 
3280+ 
3281+         dir_model  =  self .dir_model 
3282+         hparams  =  self .hparams 
3283+         tokens : list [str ] =  []
3284+         toktypes : list [int ] =  []
3285+ 
3286+         from  transformers  import  AutoTokenizer 
3287+         tokenizer  =  AutoTokenizer .from_pretrained (dir_model , trust_remote_code = True )
3288+         vocab_size  =  hparams ["padded_vocab_size" ]
3289+         assert  max (tokenizer .get_vocab ().values ()) <  vocab_size 
3290+ 
3291+         tokpre  =  self .get_vocab_base_pre (tokenizer )
3292+ 
3293+         merges  =  []
3294+         vocab  =  {}
3295+         mergeable_ranks  =  tokenizer .mergeable_ranks 
3296+         for  token , rank  in  mergeable_ranks .items ():
3297+             vocab [ChatGLMModel .token_bytes_to_string (token )] =  rank 
3298+             if  len (token ) ==  1 :
3299+                 continue 
3300+             merged  =  ChatGLMModel .bpe (mergeable_ranks , token , max_rank = rank )
3301+             assert  len (merged ) >=  2  and  len (merged ) <=  7 
3302+             merges .append (' ' .join (map (ChatGLMModel .token_bytes_to_string , merged )))
3303+ 
3304+         # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined 
3305+         added_vocab  =  tokenizer .get_added_vocab ()
3306+         reverse_vocab  =  {id_  : encoded_tok  for  encoded_tok , id_  in  {** vocab , ** added_vocab }.items ()}
3307+ 
3308+         for  i  in  range (vocab_size ):
3309+             if  i  not  in reverse_vocab :
3310+                 tokens .append (f"[PAD{ i }  )
3311+                 toktypes .append (gguf .TokenType .USER_DEFINED )
3312+             elif  reverse_vocab [i ] in  added_vocab :
3313+                 tokens .append (reverse_vocab [i ])
3314+                 if  tokenizer .added_tokens_decoder [i ].special :
3315+                     toktypes .append (gguf .TokenType .CONTROL )
3316+                 else :
3317+                     toktypes .append (gguf .TokenType .USER_DEFINED )
3318+             else :
3319+                 tokens .append (reverse_vocab [i ])
3320+                 toktypes .append (gguf .TokenType .NORMAL )
3321+ 
3322+         self .gguf_writer .add_tokenizer_model ("gpt2" )
3323+         self .gguf_writer .add_tokenizer_pre (tokpre )
3324+         self .gguf_writer .add_token_list (tokens )
3325+         self .gguf_writer .add_token_types (toktypes )
3326+ 
3327+         special_vocab  =  gguf .SpecialVocab (dir_model , load_merges = False )
3328+         special_vocab .merges  =  merges 
3329+         # only add special tokens when they were not already loaded from config.json 
3330+         special_vocab ._set_special_token ("eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ])
3331+         special_vocab ._set_special_token ("eot" , tokenizer .get_added_vocab ()["<|user|>" ])
3332+         # this one is usually not in config.json anyway 
3333+         special_vocab ._set_special_token ("unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ])
3334+         special_vocab .add_to_gguf (self .gguf_writer )
3335+ 
3336+     def  set_gguf_parameters (self ):
3337+         self .gguf_writer .add_name (self .hparams .get ("_name_or_path" ).split ("/" )[1 ]) # THUDM/glm4-9b-chat or THUDM/chatglm3-6b 
3338+         n_embed  =  self .hparams .get ("hidden_size" , self .hparams .get ("n_embed" ))
3339+         n_head  =  self .hparams .get ("n_head" , self .hparams .get ("num_attention_heads" ))
3340+         n_head_kv  =  self .hparams .get ("multi_query_group_num" , n_head )
3341+         self .gguf_writer .add_context_length (self .hparams .get ("seq_length" , n_embed ))
3342+         self .gguf_writer .add_embedding_length (n_embed )
3343+         self .gguf_writer .add_feed_forward_length (self .hparams .get ("ffn_hidden_size" , 4  *  n_embed ))
3344+         self .gguf_writer .add_block_count (self .hparams ["num_layers" ])
3345+         self .gguf_writer .add_head_count (n_head )
3346+         self .gguf_writer .add_head_count_kv (n_head_kv )
3347+         self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["layernorm_epsilon" ])
3348+         self .gguf_writer .add_file_type (self .ftype )
3349+         self .gguf_writer .add_rope_dimension_count (64 )
3350+         self .gguf_writer .add_add_bos_token (False )
3351+         rope_freq  =  10000 
3352+         if  "rope_ratio"  in  self .hparams :
3353+             rope_freq  =  rope_freq  *  self .hparams ["rope_ratio" ]
3354+         self .gguf_writer .add_rope_freq_base (rope_freq )
3355+ 
3356+     def  modify_tensors (self , data_torch : Tensor , name : str , bid : int  |  None ) ->  Iterable [tuple [str , Tensor ]]:
3357+         del  bid   # unused 
3358+ 
3359+         if  name .endswith (".rotary_pos_emb.inv_freq" ):
3360+             return  []
3361+ 
3362+         name  =  name .removeprefix ("transformer." )
3363+         return  [(self .map_tensor_name (name ), data_torch )]
3364+ 
31783365###### CONVERSION LOGIC ###### 
31793366
31803367
0 commit comments