InternLM · lvhan028 · Nov 20, 2025 · Nov 20, 2025 · Nov 21, 2025 · Nov 21, 2025
diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -35,43 +35,24 @@ jobs:
     runs-on: [self-hosted, linux-a100-s2]
     timeout-minutes: 4320 # 72hours
     container:
-      image: nvidia/cuda:11.8.0-devel-ubuntu22.04
+      image: openmmlab/lmdeploy:dev-cu12.8
       options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 --pull never"
       volumes:
         - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
         - /nvme/share_data/github-actions/hf_home:/root/.cache/huggingface
         - /nvme/share_data/github-actions/packages:/root/packages
         - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
     steps:
-      - name: Setup systems
-        run: |
-          apt-get update -y && apt-get install -y software-properties-common wget git curl &&\
-          add-apt-repository ppa:deadsnakes/ppa -y && apt-get update -y && apt-get install -y --no-install-recommends \
-          ninja-build rapidjson-dev libgoogle-glog-dev gdb python3.10 python3.10-dev python3.10-venv \
-          && apt-get clean -y && rm -rf /var/lib/apt/lists/* && cd /opt && python3 -m venv py3
-          echo "PATH=/opt/py3/bin:$PATH" >> "$GITHUB_ENV"
       - name: Clone repository
-        uses: actions/checkout@v2
-      - name: Install pytorch
-        run: |
-          python3 -V
-          python3 -m pip cache dir
-          python3 -m pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu118
+        uses: actions/checkout@v5
       - name: Install lmdeploy
         run: |
-          python3 -m pip install packaging protobuf transformers_stream_generator matplotlib
-          # manually install flash attn
-          python3 -m pip install /root/packages/cu118/flash_attn-*.whl
-          python3 -m pip install -r requirements_cuda.txt -r requirements/test.txt
+          python3 -m pip install -r requirements/test.txt
           python3 -m pip install -e .
       - name: Check env
         run: |
           python3 -m pip list
           lmdeploy check_env
-      - name: Test lmdeploy csrc
-        run: |
-          #./build/bin/build/bin/unittest
-          echo "TODO"
       - name: Test lmdeploy python UT
         run: |
           coverage run --branch --source lmdeploy -m pytest -rsE tests

diff --git a/lmdeploy/messages.py b/lmdeploy/messages.py
@@ -473,6 +473,33 @@ class Response:
     index: int = 0
     routed_experts: Any = None
 
+    def __str__(self):
+        fields = []
+
+        fields.append('text=')
+        fields.append(self.text if self.text is not None else 'None')
+        fields.append(f'input_token_len={self.input_token_len}')
+        fields.append(f'generate_token_len={self.generate_token_len}')
+        fields.append(f'finish_reason="{self.finish_reason}"')
+        fields.append(f'token_ids={self.token_ids}')
+        fields.append(f'logprobs={self.logprobs}')
+
+        # Helper function to format tensor information
+        def _format_tensor(name: str, tensor: Optional[torch.Tensor]) -> List[str]:
+            if tensor is None:
+                return [f'{name}=None']
+            return [f'{name}.shape={tensor.shape}', f'{name}={tensor}']
+
+        # Format tensor fields
+        fields.extend(_format_tensor('logits', self.logits))
+        fields.extend(_format_tensor('last_hidden_state', self.last_hidden_state))
+
+        if self.routed_experts is None:
+            fields.append('routed_experts=None')
+        else:
+            fields.append(f'routed_experts.shape={self.routed_experts.shape}')
+        return '\n'.join(fields)
+
     def __repr__(self):
         logits = 'logits=None' if self.logits is None else f'logits.shape={self.logits.shape}\nlogits={self.logits}'
         hidden_state = (

diff --git a/lmdeploy/vl/model/base.py b/lmdeploy/vl/model/base.py
@@ -181,6 +181,27 @@ def collect_images(messages):
             }) for x in content if x['type'] == 'image'])
         return images
 
+    @staticmethod
+    def IMAGE_TOKEN_included(messages):
+        """Check whether the IMAGE_TOKEN is included in the messages.
+
+        Args:
+            messages (List[Dict]): a list of message
+        Returns:
+            bool: whether the IMAGE_TOKEN is included in the messages
+        """
+        for message in messages:
+            role, content = message['role'], message['content']
+            if role != 'user':
+                continue
+            if isinstance(content, str) and '<IMAGE_TOKEN>' in content:
+                return True
+            elif isinstance(content, List):
+                content = [x['text'] for x in content if x['type'] == 'text']
+                if any('<IMAGE_TOKEN>' in x for x in content):
+                    return True
+        return False
+
     def to_pytorch_with_input_ids(self, messages):
         """Pack the preprocessing results in a format compatible with what is
         required by pytorch engine when input_ids are provided directly.

diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py
@@ -76,9 +76,9 @@ def __init__(self,
                  hf_config: AutoConfig = None,
                  backend: str = ''):
         super().__init__(model_path, with_llm, max_memory, hf_config, backend)
-        IMG_CONTEXT_TOKEN = '<IMG_CONTEXT>'
+        self.image_token = '<IMG_CONTEXT>'
         tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
-        self.image_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
+        self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token)
 
     def build_preprocessor(self):
         self.config = self.hf_config
@@ -224,8 +224,8 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         messages.append(dict(role='forward', content=outputs))
         return messages
 
-    @staticmethod
     def proc_messages(
+        self,
         messages,
         chat_template,
         sequence_start,
@@ -235,32 +235,48 @@ def proc_messages(
         """Apply chat template to get the prompt."""
         prompt_messages = []
         IMAGE_TOKEN = '<IMAGE_TOKEN>'
-        for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
-                continue
-            elif message['role'] in ['preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
-            if len(content) == 0:
-                content.append('')
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
-                prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
-                prompt = prompt.replace('</img><img>', '')
-                prompt = prompt.replace('<img><img>', '<img>')
-                prompt = prompt.replace('</img></img>', '</img>')
-            elif IMAGE_TOKEN not in prompt:
-                prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
-            else:
-                pass
-            prompt_messages.append(dict(role='user', content=prompt))
+        messages = [x for x in messages if x['role'] not in ['preprocess', 'forward']]
+        if VisonModel.IMAGE_TOKEN_included(messages):
+            # backward compatibility
+            for message in messages:
+                role, content = message['role'], message['content']
+                if role != 'user' or isinstance(content, str):
+                    prompt_messages.append(message)
+                    continue
+                n_images = len([1 for x in content if x['type'] == 'image'])
+                content = [x['text'] for x in content if x['type'] == 'text']
+                prompt = '\n'.join(content)
+                if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
+                    prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{self.image_token}</img>')
+                    prompt = prompt.replace('</img><img>', '')
+                    prompt = prompt.replace('<img><img>', '<img>')
+                    prompt = prompt.replace('</img></img>', '</img>')
+                elif IMAGE_TOKEN not in prompt:
+                    prompt = f'<img>{self.image_token * n_images}</img>\n' + prompt
+                else:
+                    pass
+                prompt_messages.append(dict(role='user', content=prompt))
+        else:
+            for message in messages:
+                role, content = message['role'], message['content']
+                if role != 'user' or isinstance(content, str):
+                    prompt_messages.append(message)
+                    continue
+                _content = []
+                for item in content:
+                    item_type = item['type']
+                    if item_type == 'text':
+                        _content.append(item['text'])
+                    elif item_type in ['image', 'image_url']:
+                        _content.append(f'<img>{self.image_token}</img>')
+                    else:
+                        raise ValueError(f'Unsupported message type: {item["type"]}')
+                prompt_messages.append(dict(role='user', content='\n'.join(_content)))
         prompt = chat_template.messages2prompt(prompt_messages,
                                                sequence_start,
                                                tools=tools,
                                                enable_thinking=enable_thinking)
-        return prompt, IMAGE_TOKEN
+        return prompt, self.image_token
 
     def to_pytorch(self,
                    messages,

diff --git a/lmdeploy/vl/model/internvl3_hf.py b/lmdeploy/vl/model/internvl3_hf.py
@@ -44,11 +44,12 @@ def __init__(self,
                  hf_config: AutoConfig = None,
                  backend: str = ''):
         super().__init__(model_path, with_llm, max_memory, hf_config, backend)
-        self.arch = hf_config.architectures[0]
+        self.arch = self.hf_config.architectures[0]
 
     def build_preprocessor(self):
         self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
         tokenizer = self.processor.tokenizer
+        self.image_token = self.processor.image_token
         self.image_token_id = tokenizer.context_image_token_id
         self.image_tokens_per_patch = self.processor.image_seq_length
         self.tokenizer_init_kwargs = tokenizer.init_kwargs
@@ -146,8 +147,38 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         messages.append(dict(role='forward', content=outputs))
         return messages
 
-    @staticmethod
+    def proc_internvl_hf_messages(self, content: List[Dict]):
+        """Process the content list of role 'user' for InternVL HF models."""
+        res = []
+        for item in content:
+            if item['type'] == 'text':
+                # backward compatibility
+                text = item['text']
+                text = (text.replace('<IMAGE_TOKEN>', self.image_token) if '<IMAGE_TOKEN>' in text else text)
+                res.append(text)
+            elif item['type'] in ['image', 'image_url']:
+                res.append(f'{self.image_token}\n')
+            else:
+                raise ValueError(f'Unsupported message type: {item["type"]}')
+        return ''.join(res)
+
+    def proc_interns1_messages(self, content: List[Dict]):
+        """Process the content list of role 'user' for InternS1 models."""
+        res = []
+        for item in content:
+            if item['type'] == 'text':
+                # backward compatibility
+                text = item['text']
+                text = (text.replace('<IMAGE_TOKEN>', self.image_token) if '<IMAGE_TOKEN>' in text else text)
+                res.append(text)
+            elif item['type'] in ['image', 'image_url']:
+                res.append(f'{self.image_token}')
+            else:
+                raise ValueError(f'Unsupported message type: {item["type"]}')
+        return '\n'.join(res)
+
     def proc_messages(
+        self,
         messages,
         chat_template,
         sequence_start,
@@ -156,31 +187,28 @@ def proc_messages(
     ):
         """Apply chat template to get the prompt."""
         prompt_messages = []
-        IMAGE_TOKEN = '<IMAGE_TOKEN>'
+
         for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
+            if message['role'] in ['preprocess', 'forward']:
                 continue
-            elif message['role'] in ['preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
-                prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
-                prompt = prompt.replace('</img><img>', '')
-                prompt = prompt.replace('<img><img>', '<img>')
-                prompt = prompt.replace('</img></img>', '</img>')
-            elif IMAGE_TOKEN not in prompt:
-                prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
+            role, content = message['role'], message['content']
+            if role == 'user' and isinstance(content, List):
+                content = (self.proc_internvl_hf_messages(content)
+                           if self.arch == 'InternVLForConditionalGeneration' else self.proc_interns1_messages(content))
+                message = dict(role=role, content=content)
+                prompt_messages.append(message)
             else:
-                pass
-            prompt_messages.append(dict(role='user', content=prompt))
+                # backward compatibility
+                content = (content.replace('<IMAGE_TOKEN>', self.image_token)
+                           if isinstance(content, str) and '<IMAGE_TOKEN>' in content else content)
+                message = dict(role=role, content=content)
+                prompt_messages.append(message)
+
         prompt = chat_template.messages2prompt(prompt_messages,
                                                sequence_start,
                                                tools=tools,
                                                enable_thinking=enable_thinking)
-        return prompt, IMAGE_TOKEN
+        return prompt, self.image_token
 
     def to_pytorch(self,
                    messages,

diff --git a/lmdeploy/vl/model/qwen2.py b/lmdeploy/vl/model/qwen2.py
@@ -32,8 +32,8 @@ def build_preprocessor(self):
         from transformers import AutoProcessor
         self.processor = AutoProcessor.from_pretrained(self.model_path)
         tokenizer = self.processor.tokenizer
-        image_token = self.processor.image_token
-        self.image_token_id = tokenizer.encode(image_token)[-1]
+        self.image_token = self.processor.image_token
+        self.image_token_id = tokenizer.encode(self.image_token)[-1]
 
     def preprocess(self, messages: List[Dict]) -> List[Dict]:
         """Refer to `super().preprocess()` for spec."""
@@ -124,33 +124,36 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         messages.append(dict(role='forward', content=outputs))
         return messages
 
-    @staticmethod
-    def proc_messages(messages, chat_template, sequence_start):
+    def proc_messages(self, messages, chat_template, sequence_start):
         """Apply chat template to get the prompt."""
         prompt_messages = []
         IMAGE_TOKEN = '<IMAGE_TOKEN>'
         for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
+            if message['role'] in ['preprocess', 'forward']:
                 continue
-            elif message['role'] in ['images', 'preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [item['text'] for item in message['content'] if item['type'] == 'text']
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and '<|vision_start|>' not in prompt:
-                prompt = prompt.replace(IMAGE_TOKEN, f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>')
+            role, content = message['role'], message['content']
+            if role == 'user' and isinstance(content, List):
+                _content = []
+                for item in content:
+                    if item['type'] == 'text':
+                        # backward compatibility
+                        text = item['text']
+                        if IMAGE_TOKEN in text:
+                            text = text.replace(IMAGE_TOKEN, self.image_token)
+                        _content.append(text)
+                    elif item['type'] in ['image', 'image_url']:
+                        _content.append(f'<|vision_start|>{self.image_token}<|vision_end|>')
+                    else:
+                        raise ValueError(f'Unsupported message type: {item["type"]}')
+                message = dict(role=role, content=''.join(_content))
+                prompt_messages.append(message)
             else:
-                # Qwen2-VL-2B-Instruct will concat image and user prompt
-                # according to their order in the content list
-                # we insert image token before user prompt by default. The
-                # user can use custom image token position if they want the
-                # same decorated prompt as Qwen2-VL
-                prompt = f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>' * \
-                    n_images + prompt
-            prompt_messages.append(dict(role=message['role'], content=prompt))
+                if IMAGE_TOKEN in content and '<|vision_start|>' not in content:
+                    # backward compatibility
+                    content = content.replace(IMAGE_TOKEN, f'<|vision_start|>{self.image_token}<|vision_end|>')
+                prompt_messages.append(dict(role=role, content=content))
         prompt = chat_template.messages2prompt(prompt_messages, sequence_start)
-        return prompt, IMAGE_TOKEN
+        return prompt, self.image_token
 
     @staticmethod
     def get_mrope_info(seq_len: int,