1616from hashlib import sha256
1717from typing import TYPE_CHECKING , Any , Callable , ContextManager , Iterable , Iterator , Literal , Sequence , TypeVar , cast
1818from itertools import chain
19+ from transformers import AutoConfig
1920
2021import math
2122import numpy as np
@@ -66,8 +67,6 @@ class ModelBase:
6667 part_names : list [str ]
6768 is_safetensors : bool
6869 hparams : dict [str , Any ]
69- block_count : int
70- tensor_map : gguf .TensorNameMap
7170 tensor_names : set [str ] | None
7271 gguf_writer : gguf .GGUFWriter
7372 model_name : str | None
@@ -78,6 +77,10 @@ class ModelBase:
7877 # subclasses should define this!
7978 model_arch : gguf .MODEL_ARCH
8079
80+ # subclasses should initialize this!
81+ block_count : int
82+ tensor_map : gguf .TensorNameMap
83+
8184 def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , * , is_big_endian : bool = False ,
8285 use_temp_file : bool = False , eager : bool = False ,
8386 metadata_override : Path | None = None , model_name : str | None = None ,
@@ -113,8 +116,6 @@ def get_remote_tensors() -> Iterator[tuple[str, Tensor]]:
113116 if not self .is_safetensors :
114117 self .part_names = ModelBase .get_model_part_names (self .dir_model , "pytorch_model" , ".bin" )
115118 self .hparams = ModelBase .load_hparams (self .dir_model ) if hparams is None else hparams
116- self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" ])
117- self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
118119 self .tensor_names = None
119120 self .metadata_override = metadata_override
120121 self .model_name = model_name
@@ -417,15 +418,15 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]
417418
418419 @staticmethod
419420 def load_hparams (dir_model : Path ):
420- with open ( dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
421- hparams = json . load ( f )
422- architectures = hparams . get ( "architectures" )
423- if "text_config" in hparams :
424- hparams = { ** hparams , ** hparams [ "text_config" ]}
425- if architectures is not None :
426- # preserve "architectures" from root level config
427- hparams [ "architectures" ] = architectures
428- return hparams
421+ try :
422+ # for security reason, we don't allow loading remote code by default
423+ # if a model need remote code, we will fallback to config.json
424+ return AutoConfig . from_pretrained ( dir_model , trust_remote_code = False ). to_dict ()
425+ except Exception as e :
426+ logger . warning ( f"Failed to load model config from { dir_model } : { e } " )
427+ logger . warning ( "Trying to load config.json instead" )
428+ with open ( dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
429+ return json . load ( f )
429430
430431 @classmethod
431432 def register (cls , * names : str ) -> Callable [[AnyModel ], AnyModel ]:
@@ -454,6 +455,23 @@ def from_model_architecture(cls, arch: str, model_type = ModelType.TEXT) -> type
454455
455456
456457class TextModel (ModelBase ):
458+ def __init__ (self , * args , ** kwargs ):
459+ super ().__init__ (* args , ** kwargs )
460+
461+ if "text_config" in self .hparams :
462+ # move the text_config to the root level
463+ self .hparams = {** self .hparams , ** self .hparams ["text_config" ]}
464+
465+ self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" ])
466+ self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
467+
468+ @classmethod
469+ def __init_subclass__ (cls ):
470+ # can't use an abstract property, because overriding it without type errors
471+ # would require using decorated functions instead of simply defining the property
472+ if "model_arch" not in cls .__dict__ :
473+ raise TypeError (f"Missing property 'model_arch' for { cls .__name__ !r} " )
474+
457475 def set_vocab (self ):
458476 self ._set_vocab_gpt2 ()
459477
@@ -1070,9 +1088,9 @@ def __init__(self, *args, **kwargs):
10701088 if self .model_arch != gguf .MODEL_ARCH .CLIP_VISION :
10711089 raise TypeError ("VisionModel must be subclassed with model_arch = gguf.MODEL_ARCH.CLIP_VISION" )
10721090
1073- # small hack to correct the number of layers
1074- self . tensor_map = gguf . get_tensor_name_map ( gguf . MODEL_ARCH . CLIP_VISION , 128 )
1075- self .n_embd_text = self . find_hparam ([ "hidden_size" , "n_embd" ] )
1091+ # get n_embd of the text model
1092+ text_config = { ** self . hparams , ** self . hparams [ "text_config" ]}
1093+ self .n_embd_text = text_config . get ( "hidden_size" , text_config . get ( "n_embd" , 0 ) )
10761094 assert self .n_embd_text > 0 , "n_embd not found in hparams"
10771095
10781096 if "vision_config" not in self .hparams :
@@ -1081,6 +1099,9 @@ def __init__(self, *args, **kwargs):
10811099 self .global_config = self .hparams
10821100 self .hparams = self .hparams ["vision_config" ]
10831101
1102+ self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" , "depth" ])
1103+ self .tensor_map = gguf .get_tensor_name_map (gguf .MODEL_ARCH .CLIP_VISION , self .block_count )
1104+
10841105 # load preprocessor config
10851106 with open (self .dir_model / "preprocessor_config.json" , "r" , encoding = "utf-8" ) as f :
10861107 self .preprocessor_config = json .load (f )
@@ -1098,12 +1119,12 @@ def set_gguf_parameters(self):
10981119 self .gguf_writer .add_vision_patch_size (self .find_hparam (["patch_size" ]))
10991120 self .gguf_writer .add_vision_embedding_length (self .find_hparam (["hidden_size" ]))
11001121 self .gguf_writer .add_vision_feed_forward_length (self .find_hparam (["intermediate_size" ]))
1101- self .gguf_writer .add_vision_block_count (self .find_hparam ([ "num_hidden_layers" ]) )
1122+ self .gguf_writer .add_vision_block_count (self .block_count )
11021123 self .gguf_writer .add_vision_head_count (self .find_hparam (["num_attention_heads" ]))
11031124
11041125 # preprocessor config
11051126 self .gguf_writer .add_vision_image_mean (self .preprocessor_config ["image_mean" ])
1106- self .gguf_writer .add_vision_image_std (self .preprocessor_config ["image_mean " ])
1127+ self .gguf_writer .add_vision_image_std (self .preprocessor_config ["image_std " ])
11071128
11081129 def write_vocab (self ):
11091130 raise ValueError ("VisionModel does not support vocab writing" )
@@ -1719,23 +1740,12 @@ def prepare_tensors(self):
17191740 "LlamaForCausalLM" ,
17201741 "MistralForCausalLM" ,
17211742 "MixtralForCausalLM" ,
1722- "Idefics3ForConditionalGeneration" ,
1723- "SmolVLMForConditionalGeneration" ,
1743+ "VLlama3ForCausalLM" ,
17241744 "LlavaForConditionalGeneration" )
17251745class LlamaModel (TextModel ):
17261746 model_arch = gguf .MODEL_ARCH .LLAMA
17271747 undo_permute = True
17281748
1729- def __init__ (self , * args , ** kwargs ):
1730- super ().__init__ (* args , ** kwargs )
1731- # fix for SmolVLM2, missing `num_attention_heads` in config.json
1732- if self .hparams ["architectures" ][0 ] == "SmolVLMForConditionalGeneration" :
1733- self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
1734- # fix for Pixtral, missing `num_attention_heads` in config.json
1735- if self .hparams ["architectures" ][0 ] == "LlavaForConditionalGeneration" \
1736- and self .hparams .get ("model_type" ) == "mistral" :
1737- self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
1738-
17391749 def set_vocab (self ):
17401750 try :
17411751 self ._set_vocab_sentencepiece ()
@@ -1891,31 +1901,50 @@ def prepare_tensors(self):
18911901 raise ValueError (f"Unprocessed experts: { experts } " )
18921902
18931903
1894- @ModelBase .register ("LlavaForConditionalGeneration" )
1904+ @ModelBase .register (
1905+ "LlavaForConditionalGeneration" , # pixtral
1906+ "Mistral3ForConditionalGeneration" , # mistral small 3.1
1907+ )
18951908class LlavaVisionModel (VisionModel ):
18961909 img_break_tok_id = - 1
18971910
18981911 def __init__ (self , * args , ** kwargs ):
18991912 super ().__init__ (* args , ** kwargs )
19001913 if self .hparams ["model_type" ] == "pixtral" :
1901- # fix missing config.json values
1902- self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 16 )
1903- self .hparams ["num_hidden_layers" ] = self .hparams .get ("num_hidden_layers" , 24 )
1904- self .hparams ["intermediate_size" ] = self .hparams .get ("intermediate_size" , 4096 )
1905- self .hparams ["hidden_size" ] = self .hparams .get ("hidden_size" , 1024 )
1914+ # layer_norm_eps is not in config.json, it is hard-coded in modeling_pixtral.py
19061915 self .hparams ["layer_norm_eps" ] = self .hparams .get ("layer_norm_eps" , 1e-5 )
1907- self .img_break_tok_id = 12 # see tokenizer_config.json
1916+ self .img_break_tok_id = self .get_token_id ("[IMG_BREAK]" )
1917+ logger .info (f"Image break token id: { self .img_break_tok_id } " )
19081918 else :
19091919 raise ValueError (f"Unsupported model type: { self .hparams ['model_type' ]} " )
19101920
1921+ def get_token_id (self , token : str ) -> int :
1922+ tokenizer_config_file = self .dir_model / 'tokenizer_config.json'
1923+ with open (tokenizer_config_file , "r" , encoding = "utf-8" ) as f :
1924+ added_tokens_decoder = json .load (f )['added_tokens_decoder' ]
1925+ for id_ , token_data in added_tokens_decoder .items ():
1926+ if token_data ["content" ] == token :
1927+ return int (id_ )
1928+ raise ValueError (f"Token '{ token } ' not found in tokenizer config." )
1929+
19111930 def set_gguf_parameters (self ):
19121931 super ().set_gguf_parameters ()
19131932 hparams = self .hparams
19141933 if hparams ["model_type" ] == "pixtral" :
19151934 self .gguf_writer .add_vision_projector_type (gguf .VisionProjectorType .PIXTRAL )
1916- # default values below are taken from HF tranformers code
19171935 self .gguf_writer .add_vision_attention_layernorm_eps (hparams ["layer_norm_eps" ])
1918- self .gguf_writer .add_vision_use_silu (True )
1936+
1937+ # hidden_act
1938+ if hparams ["hidden_act" ] == "silu" :
1939+ self .gguf_writer .add_vision_use_silu (True )
1940+ elif hparams ["hidden_act" ] == "gelu" :
1941+ self .gguf_writer .add_vision_use_gelu (True )
1942+ else :
1943+ raise ValueError (f"Unsupported hidden_act: { hparams ['hidden_act' ]} " )
1944+
1945+ # spatial_merge_size
1946+ if "spatial_merge_size" in self .global_config :
1947+ self .gguf_writer .add_vision_spatial_merge_size (self .global_config ["spatial_merge_size" ])
19191948
19201949 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
19211950 del bid # unused
@@ -1944,13 +1973,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
19441973class SmolVLMModel (VisionModel ):
19451974 def __init__ (self , * args , ** kwargs ):
19461975 super ().__init__ (* args , ** kwargs )
1947- # fix for SmolVLM2, missing some keys in config.json
1948- # default values are taken from transformers code
19491976 if self .hparams ["model_type" ] == "smolvlm_vision" :
1977+ # fix for SmolVLM2, missing some keys in config.json
1978+ # default values are taken from transformers code
19501979 self .hparams ["hidden_size" ] = self .hparams .get ("hidden_size" , 1152 )
19511980 self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 16 )
19521981 self .hparams ["intermediate_size" ] = self .hparams .get ("intermediate_size" , 3072 )
1953- self .hparams ["num_hidden_layers" ] = self .hparams .get ("num_hidden_layers" , 12 )
19541982
19551983 def set_gguf_parameters (self ):
19561984 super ().set_gguf_parameters ()
@@ -3505,6 +3533,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
35053533
35063534@ModelBase .register ("NomicBertModel" )
35073535class NomicBertModel (BertModel ):
3536+ model_arch = gguf .MODEL_ARCH .BERT
3537+
35083538 def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , ** kwargs : Any ):
35093539 hparams = kwargs .pop ("hparams" , None )
35103540 if hparams is None :
@@ -5849,6 +5879,19 @@ def split_str_to_n_bytes(split_str: str) -> int:
58495879 return n
58505880
58515881
5882+ def get_model_architecture (dir_model : Path , model_type : ModelType , hparams : Any = None ) -> str :
5883+ hparams = ModelBase .load_hparams (dir_model ) if hparams is None else hparams
5884+ text_config = hparams .get ("text_config" , {})
5885+ vision_config = hparams .get ("vision_config" , {})
5886+ arch = hparams ["architectures" ][0 ]
5887+ # if "architectures" is found in the sub-config, use that instead
5888+ if model_type == ModelType .TEXT and text_config .get ("architectures" ) is not None :
5889+ arch = text_config ["architectures" ][0 ]
5890+ elif model_type == ModelType .VISION and vision_config .get ("architectures" ) is not None :
5891+ arch = vision_config ["architectures" ][0 ]
5892+ return arch
5893+
5894+
58525895def main () -> None :
58535896 args = parse_args ()
58545897
@@ -5901,16 +5944,15 @@ def main() -> None:
59015944
59025945 logger .info (f"Loading model: { dir_model .name } " )
59035946
5904- hparams = ModelBase .load_hparams (dir_model )
5905-
59065947 if args .mmproj :
59075948 if "mmproj" not in fname_out .name :
59085949 fname_out = ModelBase .add_prefix_to_filename (fname_out , "mmproj-" )
59095950
59105951 with torch .inference_mode ():
59115952 output_type = ftype_map [args .outtype ]
5912- model_architecture = hparams ["architectures" ][0 ]
59135953 model_type = ModelType .VISION if args .mmproj else ModelType .TEXT
5954+ model_architecture = get_model_architecture (dir_model , model_type )
5955+ logger .info (f"Model architecture: { model_architecture } " )
59145956 try :
59155957 model_class = ModelBase .from_model_architecture (model_architecture , model_type = model_type )
59165958 except NotImplementedError :
0 commit comments