@@ -1334,6 +1334,12 @@ def _find_param(self, obj: dict[str, Any], keys: Iterable[str], optional: bool =
13341334 return None
13351335 raise KeyError (f"could not find any of: { keys } " )
13361336
1337+ def tensor_force_quant (self , name , new_name , bid , n_dims ):
1338+ del bid , name , n_dims # unused
1339+ if ".patch_embd.weight" in new_name :
1340+ return gguf .GGMLQuantizationType .F16 if self .ftype == gguf .LlamaFileType .MOSTLY_F16 else gguf .GGMLQuantizationType .F32
1341+ return False
1342+
13371343
13381344@ModelBase .register ("GPTNeoXForCausalLM" )
13391345class GPTNeoXModel (TextModel ):
@@ -2305,10 +2311,9 @@ def set_gguf_parameters(self):
23052311 self .gguf_writer .add_vision_use_gelu (True )
23062312
23072313 def tensor_force_quant (self , name , new_name , bid , n_dims ):
2308- del bid , new_name , n_dims # unused
23092314 if ".embeddings." in name :
23102315 return gguf .GGMLQuantizationType .F32
2311- return False
2316+ return super (). tensor_force_quant ( name , new_name , bid , n_dims )
23122317
23132318 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
23142319 del bid # unused
@@ -3296,12 +3301,9 @@ def set_gguf_parameters(self):
32963301 self .gguf_writer .add_vision_attention_layernorm_eps (self .global_config .get ("rms_norm_eps" , 1e-6 ))
32973302
32983303 def tensor_force_quant (self , name , new_name , bid , n_dims ):
3299- del bid , name , n_dims # unused
3300- if ".patch_embd." in new_name :
3301- return gguf .GGMLQuantizationType .F16
33023304 if ".position_embd." in new_name :
33033305 return gguf .GGMLQuantizationType .F32
3304- return False
3306+ return super (). tensor_force_quant ( name , new_name , bid , n_dims )
33053307
33063308 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
33073309 del bid # unused
@@ -3374,10 +3376,9 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
33743376 yield ("audio_tower.embed_positions.weight" , pos_embd )
33753377
33763378 def tensor_force_quant (self , name , new_name , bid , n_dims ):
3377- del bid , new_name , n_dims # unused
33783379 if ".conv" in name and ".weight" in name :
33793380 return gguf .GGMLQuantizationType .F16
3380- return False
3381+ return super (). tensor_force_quant ( name , new_name , bid , n_dims )
33813382
33823383 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
33833384 if name .startswith ("thinker." ):
@@ -3423,12 +3424,9 @@ def set_gguf_parameters(self):
34233424 self .gguf_writer .add_vision_projector_scale_factor (int (1.0 / downsample_ratio ))
34243425
34253426 def tensor_force_quant (self , name , new_name , bid , n_dims ):
3426- del bid , name , n_dims # unused
3427- if ".patch_embd." in new_name :
3428- return gguf .GGMLQuantizationType .F16
34293427 if ".position_embd." in new_name :
34303428 return gguf .GGMLQuantizationType .F32
3431- return False
3429+ return super (). tensor_force_quant ( name , new_name , bid , n_dims )
34323430
34333431 def _mapping_interns1_name (self , name ):
34343432 names_map = {
@@ -5062,13 +5060,12 @@ def set_gguf_parameters(self):
50625060 self .gguf_writer .add_vision_projector_scale_factor (proj_scale_factor )
50635061
50645062 def tensor_force_quant (self , name , new_name , bid , n_dims ):
5065- del bid , new_name , n_dims # unused
50665063 # related to https://github.com/ggml-org/llama.cpp/issues/13025
50675064 if "input_projection" in name :
50685065 return gguf .GGMLQuantizationType .F16
50695066 if ".embeddings." in name :
50705067 return gguf .GGMLQuantizationType .F32
5071- return False
5068+ return super (). tensor_force_quant ( name , new_name , bid , n_dims )
50725069
50735070 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
50745071 del bid # unused
@@ -7727,10 +7724,9 @@ def set_gguf_parameters(self):
77277724 self .gguf_writer .add_audio_attention_layernorm_eps (self .hparams .get ("layer_norm_eps" , 1e-5 ))
77287725
77297726 def tensor_force_quant (self , name , new_name , bid , n_dims ):
7730- del bid , new_name , n_dims # unused
77317727 if ".conv" in name and ".weight" in name :
77327728 return gguf .GGMLQuantizationType .F16
7733- return False
7729+ return super (). tensor_force_quant ( name , new_name , bid , n_dims )
77347730
77357731 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
77367732 del bid # unused
@@ -8251,8 +8247,7 @@ def set_gguf_parameters(self):
82518247 self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling .get ("original_max_position_embeddings" , 4096 ))
82528248
82538249
8254- @ModelBase .register ("Lfm2ForCausalLM" )
8255- @ModelBase .register ("LFM2ForCausalLM" )
8250+ @ModelBase .register ("Lfm2ForCausalLM" , "LFM2ForCausalLM" )
82568251class LFM2Model (TextModel ):
82578252 model_arch = gguf .MODEL_ARCH .LFM2
82588253
@@ -8287,13 +8282,55 @@ def set_gguf_parameters(self):
82878282 self ._add_feed_forward_length ()
82888283
82898284 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
8285+ is_vision_tensor = "vision_tower" in name or "multi_modal_projector" in name
8286+ if is_vision_tensor :
8287+ # skip vision tensors
8288+ return []
8289+
8290+ name = name .replace ("language_model." , "" )
8291+
82908292 # conv op requires 2d tensor
82918293 if 'conv.conv' in name :
82928294 data_torch = data_torch .squeeze (1 )
82938295
82948296 return [(self .map_tensor_name (name ), data_torch )]
82958297
82968298
8299+ @ModelBase .register ("Lfm2VlForConditionalGeneration" )
8300+ class LFM2VLModel (MmprojModel ):
8301+ def __init__ (self , * args , ** kwargs ):
8302+ super ().__init__ (* args , ** kwargs )
8303+ assert self .hparams_vision is not None
8304+ # TODO(tarek): for dynamic resolution image_size is not specified, setting here for compatibility
8305+ self .hparams_vision ["image_size" ] = 256
8306+
8307+ def set_gguf_parameters (self ):
8308+ super ().set_gguf_parameters ()
8309+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .LFM2 )
8310+ self .gguf_writer .add_vision_attention_layernorm_eps (self .find_vparam (["layer_norm_eps" ]))
8311+ self .gguf_writer .add_vision_projector_scale_factor (self .global_config .get ("downsample_factor" , 2 ))
8312+ self .gguf_writer .add_vision_use_gelu (True )
8313+ # python notation, e.g. for vision_feature_layer == -1, we pick last layer -> vision_feature_layers_to_drop = 0
8314+ vision_feature_layers_to_drop = - (self .global_config .get ("vision_feature_layer" , - 1 ) + 1 )
8315+ self .gguf_writer .add_vision_block_count (self .find_vparam (self .n_block_keys ) - vision_feature_layers_to_drop )
8316+
8317+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
8318+ del bid # unused
8319+ is_vision_tensor = "vision_tower" in name or "multi_modal_projector" in name
8320+
8321+ if is_vision_tensor :
8322+ # remove "model." prefix
8323+ name = name .replace ("model.vision_tower." , "vision_tower." )
8324+ name = name .replace ("model.multi_modal_projector." , "multi_modal_projector." )
8325+
8326+ if "patch_embedding.weight" in name :
8327+ data_torch = data_torch .view (data_torch .shape [0 ], 16 , 16 , 3 ).permute (0 , 3 , 1 , 2 )
8328+
8329+ return [(self .map_tensor_name (name ), data_torch )]
8330+
8331+ return [] # skip other tensors
8332+
8333+
82978334@ModelBase .register ("SmallThinkerForCausalLM" )
82988335class SmallThinkerModel (TextModel ):
82998336 model_arch = gguf .MODEL_ARCH .SMALLTHINKER
0 commit comments