Skip to content

Commit 899f428

Browse files
committed
support interleave text and image in messages
1 parent 1a859f4 commit 899f428

File tree

1 file changed

+21
-22
lines changed

1 file changed

+21
-22
lines changed

lmdeploy/vl/model/internvl3_hf.py

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,12 @@ def __init__(self,
4444
hf_config: AutoConfig = None,
4545
backend: str = ''):
4646
super().__init__(model_path, with_llm, max_memory, hf_config, backend)
47-
self.arch = hf_config.architectures[0]
47+
self.arch = self.hf_config.architectures[0]
4848

4949
def build_preprocessor(self):
5050
self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
5151
tokenizer = self.processor.tokenizer
52+
self.image_token = self.processor.image_token
5253
self.image_token_id = tokenizer.context_image_token_id
5354
self.image_tokens_per_patch = self.processor.image_seq_length
5455
self.tokenizer_init_kwargs = tokenizer.init_kwargs
@@ -146,8 +147,8 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
146147
messages.append(dict(role='forward', content=outputs))
147148
return messages
148149

149-
@staticmethod
150150
def proc_messages(
151+
self,
151152
messages,
152153
chat_template,
153154
sequence_start,
@@ -156,31 +157,29 @@ def proc_messages(
156157
):
157158
"""Apply chat template to get the prompt."""
158159
prompt_messages = []
159-
IMAGE_TOKEN = '<IMAGE_TOKEN>'
160160
for message in messages:
161-
if isinstance(message['content'], str):
162-
prompt_messages.append(message)
163-
continue
164-
elif message['role'] in ['preprocess', 'forward']:
161+
if message['role'] in ['preprocess', 'forward']:
165162
continue
166-
n_images = len([1 for x in message['content'] if x['type'] == 'image'])
167-
content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
168-
prompt = content[0]
169-
if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
170-
prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
171-
prompt = prompt.replace('</img><img>', '')
172-
prompt = prompt.replace('<img><img>', '<img>')
173-
prompt = prompt.replace('</img></img>', '</img>')
174-
elif IMAGE_TOKEN not in prompt:
175-
prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
163+
role, content = message['role'], message['content']
164+
if role == 'user' and isinstance(content, List):
165+
_content = []
166+
for item in content:
167+
if item['type'] == 'text':
168+
_content.append(item['text'])
169+
elif item['type'] in ['image', 'image_url']:
170+
_content.append(self.image_token)
171+
else:
172+
raise ValueError(f'Unsupported message type: {item["type"]}')
173+
message = dict(role=role, content='\n'.join(_content))
174+
prompt_messages.append(message)
176175
else:
177-
pass
178-
prompt_messages.append(dict(role='user', content=prompt))
176+
prompt_messages.append(message)
177+
179178
prompt = chat_template.messages2prompt(prompt_messages,
180179
sequence_start,
181180
tools=tools,
182181
enable_thinking=enable_thinking)
183-
return prompt, IMAGE_TOKEN
182+
return prompt, self.image_token
184183

185184
def to_pytorch(self,
186185
messages,
@@ -190,12 +189,12 @@ def to_pytorch(self,
190189
tools: Optional[List[object]] = None,
191190
enable_thinking: Optional[bool] = None,
192191
**kwargs):
193-
prompt, IMAGE_TOKEN = self.proc_messages(messages,
192+
prompt, image_token = self.proc_messages(messages,
194193
chat_template,
195194
sequence_start,
196195
tools=tools,
197196
enable_thinking=enable_thinking)
198-
return self.to_pytorch_aux(messages, prompt, IMAGE_TOKEN, tokenizer, sequence_start)
197+
return self.to_pytorch_aux(messages, prompt, image_token, tokenizer, sequence_start)
199198

200199
def to_turbomind(self,
201200
messages,

0 commit comments

Comments
 (0)