@@ -2588,6 +2588,8 @@ def __init__(
25882588        )
25892589        self ._behavior  =  behavior 
25902590        self ._orig_config  =  config 
2591+         model_mapping  =  {2.6 : "llama" , 4.0 : "qwen2" , 4.5 : "qwen3" }
2592+         self .model_type  =  model_mapping [self ._orig_config .version ]
25912593        if  self ._behavior  ==  MiniCPMVConfigBehavior .VISION_EMBEDDINGS  and  hasattr (config , "vision_config" ):
25922594            self ._config  =  config .vision_config 
25932595            self .DUMMY_INPUT_GENERATOR_CLASSES  =  (DummyMiniCPMVImageInputGenerator ,)
@@ -2604,12 +2606,19 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
26042606                "position_ids" : {0 : "batch_size" , 1 : "patch_size" },
26052607            }
26062608        if  self ._behavior  ==  MiniCPMVConfigBehavior .RESAMPLER :
2607-             return  {
2608-                 "image_feature" : {0 : "batch_size" , 1 : "patch_height" , 2 : "patch_width" },
2609-                 "pos_embed" : {0 : "patch_size" , 1 : "batch_size" , 2 : "num_patches" },
2610-                 "key_padding_mask" : {0 : "batch_size" , 1 : "patch_size" },
2611-                 "temporal_embed" : {0 : "patch_size" , 1 : "batch_size" },
2612-             }
2609+             if  self ._orig_config .version  ==  4.5 :
2610+                 return  {
2611+                     "image_feature" : {0 : "batch_size" , 1 : "patch_height" , 2 : "patch_width" },
2612+                     "pos_embed" : {0 : "patch_size" , 1 : "batch_size" , 2 : "num_patches" },
2613+                     "key_padding_mask" : {0 : "batch_size" , 1 : "patch_size" },
2614+                     "temporal_embed" : {0 : "patch_size" , 1 : "batch_size" },
2615+                 }
2616+             else :
2617+                 return  {
2618+                     "image_feature" : {0 : "batch_size" , 1 : "patch_height" , 2 : "patch_width" },
2619+                     "pos_embed" : {0 : "patch_size" , 1 : "batch_size" , 2 : "num_patches" },
2620+                     "key_padding_mask" : {0 : "batch_size" , 1 : "patch_size" },
2621+                 }
26132622        return  {}
26142623
26152624    @property  
@@ -2633,18 +2642,18 @@ def with_behavior(
26332642        """ 
26342643        if  isinstance (behavior , str ) and  not  isinstance (behavior , MiniCPMVConfigBehavior ):
26352644            behavior  =  MiniCPMVConfigBehavior (behavior )
2636-          model_mapping   =  { 2.6 :  "llama" ,  4.0 :  "qwen2" ,  4.5 :  "qwen3" } 
2645+ 
26372646        if  behavior  ==  MiniCPMVConfigBehavior .TEXT_EMBEDDINGS :
26382647            return  get_vlm_text_embeddings_config (
2639-                 model_mapping [ self ._orig_config . version ] ,
2648+                 self .model_type ,
26402649                self ._orig_config ,
26412650                self .int_dtype ,
26422651                self .float_dtype ,
26432652            )
26442653
26452654        if  behavior  ==  MiniCPMVConfigBehavior .LANGUAGE :
26462655            return  get_vlm_text_generation_config (
2647-                 model_mapping [ self ._orig_config . version ] ,
2656+                 self .model_type ,
26482657                self ._orig_config ,
26492658                self .int_dtype ,
26502659                self .float_dtype ,
0 commit comments