@@ -44,11 +44,12 @@ def __init__(self,
4444 hf_config : AutoConfig = None ,
4545 backend : str = '' ):
4646 super ().__init__ (model_path , with_llm , max_memory , hf_config , backend )
47- self .arch = hf_config .architectures [0 ]
47+ self .arch = self . hf_config .architectures [0 ]
4848
4949 def build_preprocessor (self ):
5050 self .processor = AutoProcessor .from_pretrained (self .model_path , trust_remote_code = True )
5151 tokenizer = self .processor .tokenizer
52+ self .image_token = self .processor .image_token
5253 self .image_token_id = tokenizer .context_image_token_id
5354 self .image_tokens_per_patch = self .processor .image_seq_length
5455 self .tokenizer_init_kwargs = tokenizer .init_kwargs
@@ -146,8 +147,8 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
146147 messages .append (dict (role = 'forward' , content = outputs ))
147148 return messages
148149
149- @staticmethod
150150 def proc_messages (
151+ self ,
151152 messages ,
152153 chat_template ,
153154 sequence_start ,
@@ -156,31 +157,29 @@ def proc_messages(
156157 ):
157158 """Apply chat template to get the prompt."""
158159 prompt_messages = []
159- IMAGE_TOKEN = '<IMAGE_TOKEN>'
160160 for message in messages :
161- if isinstance (message ['content' ], str ):
162- prompt_messages .append (message )
163- continue
164- elif message ['role' ] in ['preprocess' , 'forward' ]:
161+ if message ['role' ] in ['preprocess' , 'forward' ]:
165162 continue
166- n_images = len ([1 for x in message ['content' ] if x ['type' ] == 'image' ])
167- content = [x .get ('text' , '' ) for x in message ['content' ] if x ['type' ] == 'text' ]
168- prompt = content [0 ]
169- if IMAGE_TOKEN in prompt and f'<img>{ IMAGE_TOKEN } ' not in prompt :
170- prompt = prompt .replace (f'{ IMAGE_TOKEN } ' , f'<img>{ IMAGE_TOKEN } </img>' )
171- prompt = prompt .replace ('</img><img>' , '' )
172- prompt = prompt .replace ('<img><img>' , '<img>' )
173- prompt = prompt .replace ('</img></img>' , '</img>' )
174- elif IMAGE_TOKEN not in prompt :
175- prompt = f'<img>{ IMAGE_TOKEN * n_images } </img>\n ' + prompt
163+ role , content = message ['role' ], message ['content' ]
164+ if role == 'user' and isinstance (content , List ):
165+ _content = []
166+ for item in content :
167+ if item ['type' ] == 'text' :
168+ _content .append (item ['text' ])
169+ elif item ['type' ] in ['image' , 'image_url' ]:
170+ _content .append (self .image_token )
171+ else :
172+ raise ValueError (f'Unsupported message type: { item ["type" ]} ' )
173+ message = dict (role = role , content = '\n ' .join (_content ))
174+ prompt_messages .append (message )
176175 else :
177- pass
178- prompt_messages . append ( dict ( role = 'user' , content = prompt ))
176+ prompt_messages . append ( message )
177+
179178 prompt = chat_template .messages2prompt (prompt_messages ,
180179 sequence_start ,
181180 tools = tools ,
182181 enable_thinking = enable_thinking )
183- return prompt , IMAGE_TOKEN
182+ return prompt , self . image_token
184183
185184 def to_pytorch (self ,
186185 messages ,
@@ -190,12 +189,12 @@ def to_pytorch(self,
190189 tools : Optional [List [object ]] = None ,
191190 enable_thinking : Optional [bool ] = None ,
192191 ** kwargs ):
193- prompt , IMAGE_TOKEN = self .proc_messages (messages ,
192+ prompt , image_token = self .proc_messages (messages ,
194193 chat_template ,
195194 sequence_start ,
196195 tools = tools ,
197196 enable_thinking = enable_thinking )
198- return self .to_pytorch_aux (messages , prompt , IMAGE_TOKEN , tokenizer , sequence_start )
197+ return self .to_pytorch_aux (messages , prompt , image_token , tokenizer , sequence_start )
199198
200199 def to_turbomind (self ,
201200 messages ,
0 commit comments