@@ -1531,7 +1531,9 @@ class MmprojModel(ModelBase):
15311531 preprocessor_config : dict [str , Any ]
15321532 global_config : dict [str , Any ]
15331533
1534- n_block_keys = ["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" , "depth" ]
1534+ # Prefer explicit "layers" (e.g. JinaCLIP),
1535+ # keep legacy keys for other models.
1536+ n_block_keys = ["layers" , "n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" , "depth" ]
15351537
15361538 has_vision_encoder : bool = True # by default
15371539 has_audio_encoder : bool = False
@@ -6775,6 +6777,11 @@ def __init__(self, *args, **kwargs):
67756777 with open (config_path , encoding = "utf-8" ) as f :
67766778 self .vision_config = json .load (f )
67776779
6780+ def get_vision_config (self ) -> dict [str , Any ] | None :
6781+ # For JinaCLIPVisionModel, the top-level AutoConfig dict is already
6782+ # the vision-only configuration.
6783+ return self .global_config
6784+
67786785 def set_vocab (self ):
67796786 # Vision encoder doesn't need vocabulary
67806787 pass
@@ -6832,73 +6839,10 @@ def set_gguf_parameters(self):
68326839 def _strip_vm_prefix (self , name : str ) -> str :
68336840 return name [len ('vision_model.' ):] if name .startswith ('vision_model.' ) else name
68346841
6835- def _map_block_tensor (self , layer : int , rest : str , data_torch : Tensor , name : str ) -> list [tuple [str , Tensor ]] | None :
6836- parts = rest .split ('.' )
6837- # layer norms
6838- if rest .startswith ('norm1.' ):
6839- suffix = parts [- 1 ]
6840- return [(f'v.blk.{ layer } .ln1.{ suffix } ' , data_torch )]
6841- if rest .startswith ('norm2.' ):
6842- suffix = parts [- 1 ]
6843- return [(f'v.blk.{ layer } .ln2.{ suffix } ' , data_torch )]
6844- if rest .startswith ('attn.inner_attn_ln.' ):
6845- suffix = parts [- 1 ]
6846- return [(f'v.blk.{ layer } .attn_ln.{ suffix } ' , data_torch )]
6847-
6848- if rest == 'attn.q_bias' :
6849- return [(f'v.blk.{ layer } .attn_q.bias' , data_torch )]
6850- if rest == 'attn.v_bias' :
6851- return [(f'v.blk.{ layer } .attn_v.bias' , data_torch )]
6852-
6853- if rest .startswith ('attn.q_proj.' ):
6854- suffix = parts [- 1 ]
6855- return [(f'v.blk.{ layer } .attn_q.{ suffix } ' , data_torch )]
6856- if rest .startswith ('attn.k_proj.' ):
6857- suffix = parts [- 1 ]
6858- return [(f'v.blk.{ layer } .attn_k.{ suffix } ' , data_torch )]
6859- if rest .startswith ('attn.v_proj.' ):
6860- suffix = parts [- 1 ]
6861- return [(f'v.blk.{ layer } .attn_v.{ suffix } ' , data_torch )]
6862- if rest .startswith ('attn.proj.' ):
6863- suffix = parts [- 1 ]
6864- return [(f'v.blk.{ layer } .attn_out.{ suffix } ' , data_torch )]
6865-
6866- # MLP
6867- if rest .startswith ('mlp.w1.' ):
6868- suffix = parts [- 1 ]
6869- return [(f'v.blk.{ layer } .ffn_gate.{ suffix } ' , data_torch )]
6870- if rest .startswith ('mlp.w2.' ):
6871- suffix = parts [- 1 ]
6872- return [(f'v.blk.{ layer } .ffn_up.{ suffix } ' , data_torch )]
6873- if rest .startswith ('mlp.w3.' ):
6874- suffix = parts [- 1 ]
6875- return [(f'v.blk.{ layer } .ffn_down.{ suffix } ' , data_torch )]
6876- if rest .startswith ('mlp.ffn_ln.' ):
6877- suffix = parts [- 1 ]
6878- return [(f'v.blk.{ layer } .ffn_norm.{ suffix } ' , data_torch )]
6879- if rest .startswith ('mlp.fc1.' ):
6880- suffix = parts [- 1 ]
6881- return [(f'v.blk.{ layer } .ffn_up.{ suffix } ' , data_torch )]
6882- if rest .startswith ('mlp.fc2.' ):
6883- suffix = parts [- 1 ]
6884- return [(f'v.blk.{ layer } .ffn_down.{ suffix } ' , data_torch )]
6885- return None
6886-
68876842 def map_tensor_name (self , name : str , try_suffixes : Sequence [str ] = (".weight" , ".bias" )) -> str :
6888- """Prefer base table-driven mapping; keep Jina-specific targets if already mapped; fallback to legacy mapper."""
6889- # Already a GGUF target name (e.g., "v.*" or "mm.*"): return as-is
68906843 if name .startswith ('v.' ) or name .startswith ('mm.' ):
68916844 return name
6892- # Try the base mapping first
6893- try :
6894- return super ().map_tensor_name (name , try_suffixes = try_suffixes )
6895- except Exception :
6896- # Fallback to legacy Jina-specific mapper for any remaining edge keys
6897- if hasattr (self , "_map_jinaclip_tensor_name" ):
6898- mapped = self ._map_jinaclip_tensor_name (name ) # type: ignore[attr-defined]
6899- if mapped :
6900- return mapped
6901- return name
6845+ return super ().map_tensor_name (name , try_suffixes = try_suffixes )
69026846
69036847 def get_tensors (self ) -> Iterator [tuple [str , Tensor ]]:
69046848 yielded_any = False
@@ -6937,39 +6881,10 @@ def _should_be_f32(self, gguf_name: str) -> bool:
69376881 return any (p in gguf_name for p in patterns )
69386882
69396883 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
6940- del bid # unused
6941-
6942- src = name
6943- if src .startswith ('v.' ) or src .startswith ('mm.' ):
6944- return [(src , data_torch )]
6945-
6946- # Drop 'vision_model.' prefix if present
6947- src_no_vm = self ._strip_vm_prefix (src )
6948-
6949- # Top-level direct mappings — use gguf constants directly for canonical names
6950- if src_no_vm == 'cls_token' :
6951- base = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_ENC_EMBD_CLS ]
6952- return [(base , data_torch )]
6953- if src_no_vm .startswith ('patch_embed.proj.' ):
6954- suffix = src_no_vm .split ('.' )[- 1 ]
6955- base = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_ENC_EMBD_PATCH ]
6956- return [(f'{ base } .{ suffix } ' , data_torch )]
6957- if src_no_vm == 'pos_embed' :
6884+ # keep only pos_embed special case (no .weight suffix); all other tensors use table-driven mapping
6885+ if name == 'pos_embed' :
69586886 pos_name = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_ENC_EMBD_POS ] + '.weight'
69596887 return [(pos_name , data_torch )]
6960- if src_no_vm .startswith ('norm.' ):
6961- suffix = src_no_vm .split ('.' )[- 1 ]
6962- base = gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_POST_NORM ]
6963- return [(f'{ base } .{ suffix } ' , data_torch )]
6964-
6965- if src_no_vm .startswith ('blocks.' ):
6966- parts = src_no_vm .split ('.' )
6967- if len (parts ) >= 3 and parts [1 ].isdigit ():
6968- layer = int (parts [1 ])
6969- rest = '.' .join (parts [2 :])
6970- mapped = self ._map_block_tensor (layer , rest , data_torch , name )
6971- if mapped is not None :
6972- return mapped
69736888
69746889 try :
69756890 return [(self .map_tensor_name (name ), data_torch )]
0 commit comments