@@ -180,7 +180,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
180180 extra = sorted (tensor_names_from_parts .difference (self .tensor_names ))
181181 missing_files = sorted (set (weight_map [n ] for n in missing if n in weight_map ))
182182 if len (extra ) == 0 and len (missing_files ) > 0 :
183- raise ValueError (f"Missing or incomplete model files: { missing_files } " )
183+ raise ValueError (f"Missing or incomplete model files: { missing_files } \n "
184+ f"Missing tensors: { missing } " )
184185 else :
185186 raise ValueError ("Mismatch between weight map and model parts for tensor names:\n "
186187 f"Missing tensors: { missing } \n "
@@ -528,6 +529,8 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
528529 reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in tokenizer .vocab .items ()}
529530 added_vocab = tokenizer .get_added_vocab ()
530531
532+ added_tokens_decoder = tokenizer .added_tokens_decoder
533+
531534 for i in range (vocab_size ):
532535 if i not in reverse_vocab :
533536 tokens .append (f"[PAD{ i } ]" )
@@ -537,13 +540,13 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
537540 if token in added_vocab :
538541 # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
539542 # To avoid unexpected issues - we make sure to normalize non-normalized tokens
540- if not tokenizer . added_tokens_decoder [i ].normalized :
543+ if not added_tokens_decoder [i ].normalized :
541544 previous_token = token
542545 token = tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
543546 if previous_token != token :
544547 logger .info (f"{ repr (previous_token )} is encoded and decoded back to { repr (token )} using AutoTokenizer" )
545548
546- if tokenizer . added_tokens_decoder [i ].special or self .does_token_look_special (token ):
549+ if added_tokens_decoder [i ].special or self .does_token_look_special (token ):
547550 toktypes .append (gguf .TokenType .CONTROL )
548551 else :
549552 # NOTE: this was added for Gemma.
@@ -702,6 +705,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
702705 if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e" :
703706 # ref: https://huggingface.co/Xenova/gpt-4o
704707 res = "gpt-4o"
708+ if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f" :
709+ # ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710+ res = "superbpe"
705711
706712 if res is None :
707713 logger .warning ("\n " )
@@ -1099,13 +1105,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
10991105
11001106 tensors .append ((self .map_tensor_name (name ), data_torch ))
11011107
1102- if name == "word_embeddings.weight" :
1103- assert self .tensor_names is not None
1104-
1105- # TODO: tie them at runtime, don't duplicate in the model file
1106- if all (s not in self .tensor_names for s in ("lm_head.weight" , "output.weight" )):
1107- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
1108-
11091108 return tensors
11101109
11111110
@@ -1747,6 +1746,25 @@ def prepare_tensors(self):
17471746 raise ValueError (f"Unprocessed experts: { experts } " )
17481747
17491748
1749+ @Model .register ("Mistral3ForConditionalGeneration" )
1750+ class Mistral3Model (LlamaModel ):
1751+ model_arch = gguf .MODEL_ARCH .LLAMA
1752+
1753+ # we need to merge the text_config into the root level of hparams
1754+ def __init__ (self , * args , ** kwargs ):
1755+ hparams = kwargs ["hparams" ] if "hparams" in kwargs else Model .load_hparams (args [0 ])
1756+ if "text_config" in hparams :
1757+ hparams = {** hparams , ** hparams ["text_config" ]}
1758+ kwargs ["hparams" ] = hparams
1759+ super ().__init__ (* args , ** kwargs )
1760+
1761+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
1762+ name = name .replace ("language_model." , "" )
1763+ if "multi_modal_projector" in name or "vision_tower" in name :
1764+ return []
1765+ return super ().modify_tensors (data_torch , name , bid )
1766+
1767+
17501768@Model .register ("DeciLMForCausalLM" )
17511769class DeciModel (Model ):
17521770 model_arch = gguf .MODEL_ARCH .DECI
@@ -2404,10 +2422,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
24042422
24052423 tensors .append ((new_name , data_torch ))
24062424
2407- # note: GPT2 output is tied to (same as) wte in original model
2408- if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
2409- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
2410-
24112425 return tensors
24122426
24132427
@@ -2737,21 +2751,26 @@ def set_gguf_parameters(self):
27372751 self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
27382752 self .gguf_writer .add_rope_scaling_factor (1.0 )
27392753
2754+ _has_tok_embd = False
2755+
27402756 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
27412757 del bid # unused
27422758
2743- new_name = self .map_tensor_name (name )
2744-
2745- tensors : list [tuple [str , Tensor ]] = [(new_name , data_torch )]
2759+ output_name = self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT )
2760+ tok_embd_name = self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD )
27462761
2747- if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
2748- assert self .tensor_names is not None
2762+ new_name = self .map_tensor_name (name )
27492763
2750- if all (s not in self .tensor_names for s in ("lm_head.weight" , "output.weight" )):
2751- # copy tok_embd.weight to output.weight
2752- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
2764+ # assuming token_embd.weight is seen before output.weight
2765+ if not self ._has_tok_embd and new_name == self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ):
2766+ # even though the tensor file(s) does not contain the word embeddings they are still in the weight map
2767+ if self .tensor_names and "transformer.wte.weight" in self .tensor_names :
2768+ logger .debug (f"{ tok_embd_name } not found before { output_name } , assuming they are tied" )
2769+ self .tensor_names .remove ("transformer.wte.weight" )
2770+ elif new_name == tok_embd_name :
2771+ self ._has_tok_embd = True
27532772
2754- return tensors
2773+ return [( new_name , data_torch )]
27552774
27562775
27572776@Model .register ("InternLM2ForCausalLM" )
@@ -3366,7 +3385,7 @@ class Gemma3Model(Model):
33663385
33673386 # we need to merge the text_config into the root level of hparams
33683387 def __init__ (self , * args , ** kwargs ):
3369- hparams = Model .load_hparams (kwargs [ "dir_model" ])
3388+ hparams = kwargs [ "hparams" ] if "hparams" in kwargs else Model .load_hparams (args [ 0 ])
33703389 if "text_config" in hparams :
33713390 hparams = {** hparams , ** hparams ["text_config" ]}
33723391 kwargs ["hparams" ] = hparams
@@ -5339,7 +5358,7 @@ def main() -> None:
53395358 logger .error (f"Model { model_architecture } is not supported" )
53405359 sys .exit (1 )
53415360
5342- model_instance = model_class (dir_model = dir_model , ftype = output_type , fname_out = fname_out ,
5361+ model_instance = model_class (dir_model , output_type , fname_out ,
53435362 is_big_endian = args .bigendian , use_temp_file = args .use_temp_file ,
53445363 eager = args .no_lazy ,
53455364 metadata_override = args .metadata , model_name = args .model_name ,
0 commit comments