@@ -5572,6 +5572,7 @@ def _new_forward(*args, **kwargs) -> Tensor:
55725572 LoRATM .llama ,
55735573 TemplateType .minicpm_v ,
55745574 support_flash_attn = True ,
5575+ requires = ['timm' , 'transformers<4.42' ],
55755576 tags = ['multi-modal' , 'vision' ],
55765577 hf_model_id = 'openbmb/MiniCPM-V' )
55775578@register_model (
@@ -5580,44 +5581,53 @@ def _new_forward(*args, **kwargs) -> Tensor:
55805581 LoRATM .llama ,
55815582 TemplateType .minicpm_v ,
55825583 support_flash_attn = True ,
5583- requires = ['timm' ],
5584+ requires = ['timm' , 'transformers<4.42' ],
55845585 tags = ['multi-modal' , 'vision' ],
55855586 hf_model_id = 'openbmb/MiniCPM-V-2' )
5586- @register_model (
5587- ModelType .minicpm_v_v2_5_chat ,
5588- 'OpenBMB/MiniCPM-Llama3-V-2_5' ,
5589- LoRATM .minicpm_llama ,
5590- TemplateType .minicpm_v_v2_5 ,
5591- support_flash_attn = True ,
5592- support_lmdeploy = True ,
5593- requires = ['timm' ],
5594- placeholder_tokens = ['<unk>' ],
5595- function_kwargs = {'patching_embedding' : True },
5596- tags = ['multi-modal' , 'vision' ],
5597- hf_model_id = 'openbmb/MiniCPM-Llama3-V-2_5' )
55985587def get_model_tokenizer_minicpm_v (model_dir : str ,
55995588 torch_dtype : Dtype ,
56005589 model_kwargs : Dict [str , Any ],
56015590 load_model : bool = True ,
56025591 ** kwargs ):
5603- patching_embedding = kwargs .pop ('patching_embedding' , False )
56045592 model , tokenizer = get_model_tokenizer_with_flash_attn (model_dir , torch_dtype , model_kwargs , load_model , ** kwargs )
56055593 if load_model :
56065594 model .resampler .to (torch_dtype ) # fix float32
56075595 _patch_minicpm_v_device_map (model )
56085596 func_list = ['generate' , 'get_input_embeddings' , 'forward' ]
56095597 _use_submodel_func (model , 'llm' , func_list )
5610- if patching_embedding :
5611- embedding = model .get_input_embeddings ()
5612- if not hasattr (embedding , '__old_forward' ): # Avoid double patching
5613- old_forward = embedding .forward
5598+ return model , tokenizer
5599+
5600+
5601+ @register_model (
5602+ ModelType .minicpm_v_v2_5_chat ,
5603+ 'OpenBMB/MiniCPM-Llama3-V-2_5' ,
5604+ LoRATM .minicpm_llama ,
5605+ TemplateType .minicpm_v_v2_5 ,
5606+ support_flash_attn = True ,
5607+ requires = ['timm' , 'transformers>=4.36' ],
5608+ placeholder_tokens = ['<unk>' ],
5609+ tags = ['multi-modal' , 'vision' ],
5610+ hf_model_id = 'openbmb/MiniCPM-Llama3-V-2_5' )
5611+ def get_model_tokenizer_minicpm_v_2_5 (model_dir : str ,
5612+ torch_dtype : Dtype ,
5613+ model_kwargs : Dict [str , Any ],
5614+ load_model : bool = True ,
5615+ ** kwargs ):
5616+ from transformers import AutoProcessor
5617+ processor = AutoProcessor .from_pretrained (model_dir , trust_remote_code = True )
5618+ model , tokenizer = get_model_tokenizer_minicpm_v (model_dir , torch_dtype , model_kwargs , load_model , ** kwargs )
5619+ tokenizer .processor = processor
5620+ if load_model :
5621+ embedding = model .get_input_embeddings ()
5622+ if not hasattr (embedding , '__old_forward' ): # Avoid double patching
5623+ old_forward = embedding .forward
56145624
5615- @wraps (old_forward )
5616- def _new_forward (* args , ** kwargs ):
5617- return old_forward (* args , ** kwargs ).requires_grad_ (True ).clone ()
5625+ @wraps (old_forward )
5626+ def _new_forward (* args , ** kwargs ):
5627+ return old_forward (* args , ** kwargs ).requires_grad_ (True ).clone ()
56185628
5619- embedding .__old_forward = old_forward
5620- embedding .forward = _new_forward
5629+ embedding .__old_forward = old_forward
5630+ embedding .forward = _new_forward
56215631 return model , tokenizer
56225632
56235633
0 commit comments