@@ -3234,7 +3234,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
32343234 yield from super ().modify_tensors (data_torch , name , bid )
32353235
32363236
3237- @ModelBase .register ("Ernie4_5_ForCausalLM" , "Ernie4_5ForCausalLM" )
3237+ @ModelBase .register ("Ernie4_5_ForCausalLM" , "Ernie4_5ForCausalLM" , "PaddleOCRVLForConditionalGeneration" )
32383238class Ernie4_5Model (TextModel ):
32393239 model_arch = gguf .MODEL_ARCH .ERNIE4_5
32403240
@@ -3250,6 +3250,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
32503250 if (head_dim := self .hparams .get ("head_dim" )) is None :
32513251 head_dim = self .hparams ["hidden_size" ] // num_heads
32523252
3253+ if "mlp_AR" in name or "vision_model" in name :
3254+ # skip vision model and projector tensors
3255+ return []
3256+
32533257 if "ernie." in name :
32543258 name = name .replace ("ernie." , "model." )
32553259 # split the qkv weights
@@ -3368,6 +3372,36 @@ def prepare_tensors(self):
33683372 raise ValueError (f"Unprocessed experts: { experts } " )
33693373
33703374
3375+ @ModelBase .register ("SiglipVisionModel" )
3376+ class PaddleOCRVisionModel (MmprojModel ):
3377+ # PaddleOCR uses Siglip under the hood
3378+ min_pixels : int = 0
3379+ max_pixels : int = 0
3380+
3381+ def __init__ (self , * args , ** kwargs ):
3382+ super ().__init__ (* args , ** kwargs )
3383+ assert self .hparams_vision is not None
3384+ self .min_pixels = self .preprocessor_config ["size" ]["min_pixels" ]
3385+ self .max_pixels = self .preprocessor_config ["size" ]["max_pixels" ]
3386+ self .hparams_vision ["image_size" ] = int (math .sqrt (self .max_pixels ))
3387+
3388+ def set_gguf_parameters (self ):
3389+ super ().set_gguf_parameters ()
3390+ assert self .hparams_vision is not None
3391+ hparams = self .hparams_vision
3392+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .PADDLEOCR )
3393+ self .gguf_writer .add_vision_max_pixels (self .max_pixels )
3394+ self .gguf_writer .add_vision_min_pixels (self .min_pixels )
3395+ self .gguf_writer .add_vision_use_gelu (True )
3396+ self .gguf_writer .add_vision_attention_layernorm_eps (hparams .get ("rms_norm_eps" , 1e-6 ))
3397+
3398+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3399+ del bid # unused
3400+ if "vision_model" in name or "mlp_AR" in name :
3401+ return [(self .map_tensor_name (name ), data_torch )]
3402+ return [] # skip other tensors
3403+
3404+
33713405@ModelBase .register (
33723406 "Qwen2VLModel" ,
33733407 "Qwen2VLForConditionalGeneration" ,
0 commit comments