@@ -346,6 +346,8 @@ def prepare_tensors(self):
346346 data_qtype = gguf .GGMLQuantizationType .BF16
347347 elif self .ftype == gguf .LlamaFileType .MOSTLY_Q8_0 :
348348 data_qtype = gguf .GGMLQuantizationType .Q8_0
349+ elif self .ftype == gguf .LlamaFileType .MOSTLY_Q4_0 :
350+ data_qtype = gguf .GGMLQuantizationType .Q4_0
349351 elif self .ftype == gguf .LlamaFileType .MOSTLY_TQ1_0 :
350352 data_qtype = gguf .GGMLQuantizationType .TQ1_0
351353 elif self .ftype == gguf .LlamaFileType .MOSTLY_TQ2_0 :
@@ -6419,7 +6421,7 @@ def set_vocab(self):
64196421 vocab = {}
64206422 mergeable_ranks = tokenizer .mergeable_ranks
64216423 for token , rank in mergeable_ranks .items ():
6422- # vocab[QwenModel.token_bytes_to_string(token)] = rank
6424+ vocab [QwenModel .token_bytes_to_string (token )] = rank
64236425 if len (token ) == 1 :
64246426 continue
64256427 merged = QwenModel .bpe (mergeable_ranks , token , max_rank = rank )
@@ -6428,9 +6430,8 @@ def set_vocab(self):
64286430
64296431 # 3. Generate the tokens and toktypes lists
64306432 vocab_size = self .hparams ["vocab_size" ]
6431- special_token_ids = set (tokenizer .special_tokens .values ())
6432- reverse_vocab = tokenizer .decoder
6433- #reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_token_ids}.items()}
6433+ special_tokens = tokenizer .special_tokens
6434+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in {** vocab , ** special_tokens }.items ()}
64346435 tokens : list [str ] = []
64356436 toktypes : list [int ] = []
64366437 for i in range (vocab_size ):
@@ -6440,7 +6441,7 @@ def set_vocab(self):
64406441 else :
64416442 token = reverse_vocab [i ]
64426443 tokens .append (token )
6443- if i in special_token_ids :
6444+ if i in special_tokens . values () :
64446445 toktypes .append (gguf .TokenType .CONTROL )
64456446 else :
64466447 toktypes .append (gguf .TokenType .NORMAL )
@@ -6614,7 +6615,7 @@ def parse_args() -> argparse.Namespace:
66146615 help = "path to write to; default: based on input. {ftype} will be replaced by the outtype." ,
66156616 )
66166617 parser .add_argument (
6617- "--outtype" , type = str , choices = ["f32" , "f16" , "bf16" , "q8_0" , "tq1_0" , "tq2_0" , "auto" ], default = "f16" ,
6618+ "--outtype" , type = str , choices = ["f32" , "f16" , "bf16" , "q4_0" , " q8_0" , "tq1_0" , "tq2_0" , "auto" ], default = "f16" ,
66186619 help = "output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, tq1_0 or tq2_0 for ternary, and auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type" ,
66196620 )
66206621 parser .add_argument (
@@ -6746,6 +6747,7 @@ def main() -> None:
67466747 "f32" : gguf .LlamaFileType .ALL_F32 ,
67476748 "f16" : gguf .LlamaFileType .MOSTLY_F16 ,
67486749 "bf16" : gguf .LlamaFileType .MOSTLY_BF16 ,
6750+ "q4_0" : gguf .LlamaFileType .MOSTLY_Q4_0 ,
67496751 "q8_0" : gguf .LlamaFileType .MOSTLY_Q8_0 ,
67506752 "tq1_0" : gguf .LlamaFileType .MOSTLY_TQ1_0 ,
67516753 "tq2_0" : gguf .LlamaFileType .MOSTLY_TQ2_0 ,
0 commit comments