@@ -9806,7 +9806,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
98069806@ModelBase .register ("JanusForConditionalGeneration" )
98079807class JanusProModel (LlamaModel ):
98089808 model_arch = gguf .MODEL_ARCH .LLAMA # reuse Llama arch
9809-
9809+
98109810 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
98119811 # Skip vision, aligner, and generation tensors
98129812 skip_prefixes = (
@@ -9819,12 +9819,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
98199819 )
98209820 if name .startswith (skip_prefixes ):
98219821 return []
9822-
9822+
98239823 if name .startswith ('model.language_model.' ):
98249824 name = name .replace ('model.language_model.' , 'model.' )
98259825 elif name .startswith ('language_model.' ):
98269826 name = name .replace ('language_model.' , '' )
9827-
9827+
98289828 return super ().modify_tensors (data_torch , name , bid )
98299829
98309830
@@ -9838,25 +9838,25 @@ def __init__(self, *args, **kwargs):
98389838 hidden_size = self .hparams_vision .get ("hidden_size" )
98399839 if mlp_ratio is not None and hidden_size is not None :
98409840 self .hparams_vision ["intermediate_size" ] = int (round (hidden_size * mlp_ratio ))
9841-
9841+
98429842 def set_gguf_parameters (self ):
98439843 super ().set_gguf_parameters ()
98449844 assert self .hparams_vision is not None
9845-
9845+
98469846 self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .JANUS_PRO )
9847-
9847+
98489848 self .gguf_writer .add_vision_attention_layernorm_eps (self .hparams_vision .get ("layer_norm_eps" , 1e-6 ))
9849-
9849+
98509850 hidden_act = str (self .hparams_vision .get ("hidden_act" , "" )).lower ()
98519851 if hidden_act == "gelu" :
98529852 self .gguf_writer .add_vision_use_gelu (True )
98539853 elif hidden_act == "silu" :
98549854 self .gguf_writer .add_vision_use_silu (True )
9855-
9855+
98569856 def _map_aligner_tensor (self , data_torch : Tensor , name : str ) -> Iterable [tuple [str , Tensor ]]:
98579857 """Map aligner tensors to projector format"""
98589858 suffix = ".bias" if name .endswith (".bias" ) else ".weight"
9859-
9859+
98609860 if name .startswith ("model.aligner." ):
98619861 local_name = name [len ("model.aligner." ):]
98629862 elif name .startswith ("aligner." ):
@@ -9873,17 +9873,17 @@ def _map_aligner_tensor(self, data_torch: Tensor, name: str) -> Iterable[tuple[s
98739873 mm_index = int (parts [1 ]) + 1
98749874 else :
98759875 raise ValueError (f"Unsupported Janus aligner tensor: { name } " )
9876-
9876+
98779877 tensor_name = self .format_tensor_name (gguf .MODEL_TENSOR .V_MMPROJ , mm_index , suffix = suffix )
98789878 return [(tensor_name , data_torch )]
9879-
9879+
98809880 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
98819881 del bid # unused
9882-
9882+
98839883 # Skip language model tensors as they will be handled by `JanusProModel`
98849884 if name .startswith (('model.language_model.' , 'language_model.' )):
98859885 return []
9886-
9886+
98879887 # Skip generation-related components
98889888 skip_generation_prefixes = (
98899889 'model.vqmodel.' ,
@@ -9897,15 +9897,15 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
98979897 )
98989898 if name .startswith (skip_generation_prefixes ):
98999899 return []
9900-
9900+
99019901 # Handle aligner tensors
99029902 if name .startswith (('model.aligner.' , 'aligner.' )):
99039903 return list (self ._map_aligner_tensor (data_torch , name ))
9904-
9904+
99059905 # Handle vision tensors
99069906 if name .startswith (('model.vision_model.' , 'vision_model.' )):
99079907 return [(self .map_tensor_name (name ), data_torch )]
9908-
9908+
99099909 return []
99109910
99119911
0 commit comments