diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index 53b6a4ba7f..2b6d9256ab 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -35,7 +35,7 @@ jobs:
     runs-on: [self-hosted, linux-a100-s2]
     timeout-minutes: 4320 # 72hours
     container:
-      image: nvidia/cuda:11.8.0-devel-ubuntu22.04
+      image: openmmlab/lmdeploy:dev-cu12.8
       options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 --pull never"
       volumes:
         - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
@@ -43,35 +43,16 @@ jobs:
         - /nvme/share_data/github-actions/packages:/root/packages
         - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
     steps:
-      - name: Setup systems
-        run: |
-          apt-get update -y && apt-get install -y software-properties-common wget git curl &&\
-          add-apt-repository ppa:deadsnakes/ppa -y && apt-get update -y && apt-get install -y --no-install-recommends \
-          ninja-build rapidjson-dev libgoogle-glog-dev gdb python3.10 python3.10-dev python3.10-venv \
-          && apt-get clean -y && rm -rf /var/lib/apt/lists/* && cd /opt && python3 -m venv py3
-          echo "PATH=/opt/py3/bin:$PATH" >> "$GITHUB_ENV"
       - name: Clone repository
-        uses: actions/checkout@v2
-      - name: Install pytorch
-        run: |
-          python3 -V
-          python3 -m pip cache dir
-          python3 -m pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu118
+        uses: actions/checkout@v5
       - name: Install lmdeploy
         run: |
-          python3 -m pip install packaging protobuf transformers_stream_generator matplotlib
-          # manually install flash attn
-          python3 -m pip install /root/packages/cu118/flash_attn-*.whl
-          python3 -m pip install -r requirements_cuda.txt -r requirements/test.txt
+          python3 -m pip install -r requirements/test.txt
           python3 -m pip install -e .
       - name: Check env
         run: |
           python3 -m pip list
           lmdeploy check_env
-      - name: Test lmdeploy csrc
-        run: |
-          #./build/bin/build/bin/unittest
-          echo "TODO"
       - name: Test lmdeploy python UT
         run: |
           coverage run --branch --source lmdeploy -m pytest -rsE tests
diff --git a/lmdeploy/messages.py b/lmdeploy/messages.py
index 69ac157652..95c6de3fa3 100644
--- a/lmdeploy/messages.py
+++ b/lmdeploy/messages.py
@@ -473,6 +473,33 @@ class Response:
     index: int = 0
     routed_experts: Any = None
 
+    def __str__(self):
+        fields = []
+
+        fields.append('text=')
+        fields.append(self.text if self.text is not None else 'None')
+        fields.append(f'input_token_len={self.input_token_len}')
+        fields.append(f'generate_token_len={self.generate_token_len}')
+        fields.append(f'finish_reason="{self.finish_reason}"')
+        fields.append(f'token_ids={self.token_ids}')
+        fields.append(f'logprobs={self.logprobs}')
+
+        # Helper function to format tensor information
+        def _format_tensor(name: str, tensor: Optional[torch.Tensor]) -> List[str]:
+            if tensor is None:
+                return [f'{name}=None']
+            return [f'{name}.shape={tensor.shape}', f'{name}={tensor}']
+
+        # Format tensor fields
+        fields.extend(_format_tensor('logits', self.logits))
+        fields.extend(_format_tensor('last_hidden_state', self.last_hidden_state))
+
+        if self.routed_experts is None:
+            fields.append('routed_experts=None')
+        else:
+            fields.append(f'routed_experts.shape={self.routed_experts.shape}')
+        return '\n'.join(fields)
+
     def __repr__(self):
         logits = 'logits=None' if self.logits is None else f'logits.shape={self.logits.shape}\nlogits={self.logits}'
         hidden_state = (
diff --git a/lmdeploy/vl/model/base.py b/lmdeploy/vl/model/base.py
index f06a175195..9ee05c0763 100644
--- a/lmdeploy/vl/model/base.py
+++ b/lmdeploy/vl/model/base.py
@@ -181,6 +181,27 @@ def collect_images(messages):
             }) for x in content if x['type'] == 'image'])
         return images
 
+    @staticmethod
+    def IMAGE_TOKEN_included(messages):
+        """Check whether the IMAGE_TOKEN is included in the messages.
+
+        Args:
+            messages (List[Dict]): a list of message
+        Returns:
+            bool: whether the IMAGE_TOKEN is included in the messages
+        """
+        for message in messages:
+            role, content = message['role'], message['content']
+            if role != 'user':
+                continue
+            if isinstance(content, str) and '<IMAGE_TOKEN>' in content:
+                return True
+            elif isinstance(content, List):
+                content = [x['text'] for x in content if x['type'] == 'text']
+                if any('<IMAGE_TOKEN>' in x for x in content):
+                    return True
+        return False
+
     def to_pytorch_with_input_ids(self, messages):
         """Pack the preprocessing results in a format compatible with what is
         required by pytorch engine when input_ids are provided directly.
diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py
index a2b8d7f9b7..6a3972cb5c 100644
--- a/lmdeploy/vl/model/internvl.py
+++ b/lmdeploy/vl/model/internvl.py
@@ -76,9 +76,9 @@ def __init__(self,
                  hf_config: AutoConfig = None,
                  backend: str = ''):
         super().__init__(model_path, with_llm, max_memory, hf_config, backend)
-        IMG_CONTEXT_TOKEN = '<IMG_CONTEXT>'
+        self.image_token = '<IMG_CONTEXT>'
         tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
-        self.image_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
+        self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token)
 
     def build_preprocessor(self):
         self.config = self.hf_config
@@ -224,8 +224,8 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         messages.append(dict(role='forward', content=outputs))
         return messages
 
-    @staticmethod
     def proc_messages(
+        self,
         messages,
         chat_template,
         sequence_start,
@@ -235,32 +235,48 @@ def proc_messages(
         """Apply chat template to get the prompt."""
         prompt_messages = []
         IMAGE_TOKEN = '<IMAGE_TOKEN>'
-        for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
-                continue
-            elif message['role'] in ['preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
-            if len(content) == 0:
-                content.append('')
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
-                prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
-                prompt = prompt.replace('</img><img>', '')
-                prompt = prompt.replace('<img><img>', '<img>')
-                prompt = prompt.replace('</img></img>', '</img>')
-            elif IMAGE_TOKEN not in prompt:
-                prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
-            else:
-                pass
-            prompt_messages.append(dict(role='user', content=prompt))
+        messages = [x for x in messages if x['role'] not in ['preprocess', 'forward']]
+        if VisonModel.IMAGE_TOKEN_included(messages):
+            # backward compatibility
+            for message in messages:
+                role, content = message['role'], message['content']
+                if role != 'user' or isinstance(content, str):
+                    prompt_messages.append(message)
+                    continue
+                n_images = len([1 for x in content if x['type'] == 'image'])
+                content = [x['text'] for x in content if x['type'] == 'text']
+                prompt = '\n'.join(content)
+                if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
+                    prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{self.image_token}</img>')
+                    prompt = prompt.replace('</img><img>', '')
+                    prompt = prompt.replace('<img><img>', '<img>')
+                    prompt = prompt.replace('</img></img>', '</img>')
+                elif IMAGE_TOKEN not in prompt:
+                    prompt = f'<img>{self.image_token * n_images}</img>\n' + prompt
+                else:
+                    pass
+                prompt_messages.append(dict(role='user', content=prompt))
+        else:
+            for message in messages:
+                role, content = message['role'], message['content']
+                if role != 'user' or isinstance(content, str):
+                    prompt_messages.append(message)
+                    continue
+                _content = []
+                for item in content:
+                    item_type = item['type']
+                    if item_type == 'text':
+                        _content.append(item['text'])
+                    elif item_type in ['image', 'image_url']:
+                        _content.append(f'<img>{self.image_token}</img>')
+                    else:
+                        raise ValueError(f'Unsupported message type: {item["type"]}')
+                prompt_messages.append(dict(role='user', content='\n'.join(_content)))
         prompt = chat_template.messages2prompt(prompt_messages,
                                                sequence_start,
                                                tools=tools,
                                                enable_thinking=enable_thinking)
-        return prompt, IMAGE_TOKEN
+        return prompt, self.image_token
 
     def to_pytorch(self,
                    messages,
diff --git a/lmdeploy/vl/model/internvl3_hf.py b/lmdeploy/vl/model/internvl3_hf.py
index 3c8738ff18..234a886fc5 100644
--- a/lmdeploy/vl/model/internvl3_hf.py
+++ b/lmdeploy/vl/model/internvl3_hf.py
@@ -44,11 +44,12 @@ def __init__(self,
                  hf_config: AutoConfig = None,
                  backend: str = ''):
         super().__init__(model_path, with_llm, max_memory, hf_config, backend)
-        self.arch = hf_config.architectures[0]
+        self.arch = self.hf_config.architectures[0]
 
     def build_preprocessor(self):
         self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
         tokenizer = self.processor.tokenizer
+        self.image_token = self.processor.image_token
         self.image_token_id = tokenizer.context_image_token_id
         self.image_tokens_per_patch = self.processor.image_seq_length
         self.tokenizer_init_kwargs = tokenizer.init_kwargs
@@ -146,8 +147,38 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         messages.append(dict(role='forward', content=outputs))
         return messages
 
-    @staticmethod
+    def proc_internvl_hf_messages(self, content: List[Dict]):
+        """Process the content list of role 'user' for InternVL HF models."""
+        res = []
+        for item in content:
+            if item['type'] == 'text':
+                # backward compatibility
+                text = item['text']
+                text = (text.replace('<IMAGE_TOKEN>', self.image_token) if '<IMAGE_TOKEN>' in text else text)
+                res.append(text)
+            elif item['type'] in ['image', 'image_url']:
+                res.append(f'{self.image_token}\n')
+            else:
+                raise ValueError(f'Unsupported message type: {item["type"]}')
+        return ''.join(res)
+
+    def proc_interns1_messages(self, content: List[Dict]):
+        """Process the content list of role 'user' for InternS1 models."""
+        res = []
+        for item in content:
+            if item['type'] == 'text':
+                # backward compatibility
+                text = item['text']
+                text = (text.replace('<IMAGE_TOKEN>', self.image_token) if '<IMAGE_TOKEN>' in text else text)
+                res.append(text)
+            elif item['type'] in ['image', 'image_url']:
+                res.append(f'{self.image_token}')
+            else:
+                raise ValueError(f'Unsupported message type: {item["type"]}')
+        return '\n'.join(res)
+
     def proc_messages(
+        self,
         messages,
         chat_template,
         sequence_start,
@@ -156,31 +187,28 @@ def proc_messages(
     ):
         """Apply chat template to get the prompt."""
         prompt_messages = []
-        IMAGE_TOKEN = '<IMAGE_TOKEN>'
+
         for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
+            if message['role'] in ['preprocess', 'forward']:
                 continue
-            elif message['role'] in ['preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
-                prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
-                prompt = prompt.replace('</img><img>', '')
-                prompt = prompt.replace('<img><img>', '<img>')
-                prompt = prompt.replace('</img></img>', '</img>')
-            elif IMAGE_TOKEN not in prompt:
-                prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
+            role, content = message['role'], message['content']
+            if role == 'user' and isinstance(content, List):
+                content = (self.proc_internvl_hf_messages(content)
+                           if self.arch == 'InternVLForConditionalGeneration' else self.proc_interns1_messages(content))
+                message = dict(role=role, content=content)
+                prompt_messages.append(message)
             else:
-                pass
-            prompt_messages.append(dict(role='user', content=prompt))
+                # backward compatibility
+                content = (content.replace('<IMAGE_TOKEN>', self.image_token)
+                           if isinstance(content, str) and '<IMAGE_TOKEN>' in content else content)
+                message = dict(role=role, content=content)
+                prompt_messages.append(message)
+
         prompt = chat_template.messages2prompt(prompt_messages,
                                                sequence_start,
                                                tools=tools,
                                                enable_thinking=enable_thinking)
-        return prompt, IMAGE_TOKEN
+        return prompt, self.image_token
 
     def to_pytorch(self,
                    messages,
diff --git a/lmdeploy/vl/model/qwen2.py b/lmdeploy/vl/model/qwen2.py
index 43096be28b..56dc383163 100644
--- a/lmdeploy/vl/model/qwen2.py
+++ b/lmdeploy/vl/model/qwen2.py
@@ -32,8 +32,8 @@ def build_preprocessor(self):
         from transformers import AutoProcessor
         self.processor = AutoProcessor.from_pretrained(self.model_path)
         tokenizer = self.processor.tokenizer
-        image_token = self.processor.image_token
-        self.image_token_id = tokenizer.encode(image_token)[-1]
+        self.image_token = self.processor.image_token
+        self.image_token_id = tokenizer.encode(self.image_token)[-1]
 
     def preprocess(self, messages: List[Dict]) -> List[Dict]:
         """Refer to `super().preprocess()` for spec."""
@@ -124,33 +124,36 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         messages.append(dict(role='forward', content=outputs))
         return messages
 
-    @staticmethod
-    def proc_messages(messages, chat_template, sequence_start):
+    def proc_messages(self, messages, chat_template, sequence_start):
         """Apply chat template to get the prompt."""
         prompt_messages = []
         IMAGE_TOKEN = '<IMAGE_TOKEN>'
         for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
+            if message['role'] in ['preprocess', 'forward']:
                 continue
-            elif message['role'] in ['images', 'preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [item['text'] for item in message['content'] if item['type'] == 'text']
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and '<|vision_start|>' not in prompt:
-                prompt = prompt.replace(IMAGE_TOKEN, f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>')
+            role, content = message['role'], message['content']
+            if role == 'user' and isinstance(content, List):
+                _content = []
+                for item in content:
+                    if item['type'] == 'text':
+                        # backward compatibility
+                        text = item['text']
+                        if IMAGE_TOKEN in text:
+                            text = text.replace(IMAGE_TOKEN, self.image_token)
+                        _content.append(text)
+                    elif item['type'] in ['image', 'image_url']:
+                        _content.append(f'<|vision_start|>{self.image_token}<|vision_end|>')
+                    else:
+                        raise ValueError(f'Unsupported message type: {item["type"]}')
+                message = dict(role=role, content=''.join(_content))
+                prompt_messages.append(message)
             else:
-                # Qwen2-VL-2B-Instruct will concat image and user prompt
-                # according to their order in the content list
-                # we insert image token before user prompt by default. The
-                # user can use custom image token position if they want the
-                # same decorated prompt as Qwen2-VL
-                prompt = f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>' * \
-                    n_images + prompt
-            prompt_messages.append(dict(role=message['role'], content=prompt))
+                if IMAGE_TOKEN in content and '<|vision_start|>' not in content:
+                    # backward compatibility
+                    content = content.replace(IMAGE_TOKEN, f'<|vision_start|>{self.image_token}<|vision_end|>')
+                prompt_messages.append(dict(role=role, content=content))
         prompt = chat_template.messages2prompt(prompt_messages, sequence_start)
-        return prompt, IMAGE_TOKEN
+        return prompt, self.image_token
 
     @staticmethod
     def get_mrope_info(seq_len: int,
diff --git a/lmdeploy/vl/model/qwen3.py b/lmdeploy/vl/model/qwen3.py
index 40f2bf485c..f7b367ad0e 100644
--- a/lmdeploy/vl/model/qwen3.py
+++ b/lmdeploy/vl/model/qwen3.py
@@ -1,9 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, List, Tuple
+from typing import Dict, List
 
 import torch
 
-from lmdeploy.vl.model.base import VISION_MODELS, VisonModel
+from lmdeploy.vl.model.base import VISION_MODELS
+from lmdeploy.vl.model.qwen2 import Qwen2VLModel
 
 
 def check_transformers():
@@ -15,7 +16,7 @@ def check_transformers():
 
 
 @VISION_MODELS.register_module()
-class Qwen3VLModel(VisonModel):
+class Qwen3VLModel(Qwen2VLModel):
     """Qwen3VL model."""
 
     _arch = ['Qwen3VLForConditionalGeneration', 'Qwen3VLMoeForConditionalGeneration']
@@ -25,8 +26,8 @@ def build_preprocessor(self):
         from transformers import AutoProcessor
         self.processor = AutoProcessor.from_pretrained(self.model_path)
         tokenizer = self.processor.tokenizer
-        image_token = self.processor.image_token
-        self.image_token_id = tokenizer.encode(image_token)[-1]
+        self.image_token = self.processor.image_token
+        self.image_token_id = tokenizer.encode(self.image_token)[-1]
 
     def preprocess(self, messages: List[Dict]) -> List[Dict]:
         """Refer to `super().preprocess()` for spec."""
@@ -65,64 +66,6 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
         # TODO: implement for turbomind
         pass
 
-    @staticmethod
-    def proc_messages(messages, chat_template, sequence_start):
-        """Apply chat template to get the prompt."""
-        prompt_messages = []
-        IMAGE_TOKEN = '<IMAGE_TOKEN>'
-        for message in messages:
-            if isinstance(message['content'], str):
-                prompt_messages.append(message)
-                continue
-            elif message['role'] in ['images', 'preprocess', 'forward']:
-                continue
-            n_images = len([1 for x in message['content'] if x['type'] == 'image'])
-            content = [item['text'] for item in message['content'] if item['type'] == 'text']
-            prompt = content[0]
-            if IMAGE_TOKEN in prompt and '<|vision_start|>' not in prompt:
-                prompt = prompt.replace(IMAGE_TOKEN, f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>')
-            else:
-                # Qwen2-VL-2B-Instruct will concat image and user prompt
-                # according to their order in the content list
-                # we insert image token before user prompt by default. The
-                # user can use custom image token position if they want the
-                # same decorated prompt as Qwen2-VL
-                prompt = f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>' * \
-                    n_images + prompt
-            prompt_messages.append(dict(role=message['role'], content=prompt))
-        prompt = chat_template.messages2prompt(prompt_messages, sequence_start)
-        return prompt, IMAGE_TOKEN
-
-    @staticmethod
-    def get_mrope_info(seq_len: int,
-                       grid_thws: List[Tuple[int, int, int]] = None,
-                       ranges: List[Tuple[int, int]] = None):
-        mrope_position_ids = [torch.arange(ranges[0][0]).expand(3, -1)]
-        st_idx = ranges[0][0]
-        for i, (grid_thw, embedding_range) in enumerate(zip(grid_thws, ranges)):
-            llm_grid_t, llm_grid_h, llm_grid_w = grid_thw
-            llm_grid_h //= 2
-            llm_grid_w //= 2
-            t_index = torch.arange(llm_grid_t).view(-1, 1).expand(-1, llm_grid_h * llm_grid_w).flatten()
-            h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(llm_grid_t, -1, llm_grid_w).flatten()
-            w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(llm_grid_t, llm_grid_h, -1).flatten()
-            mrope_position_ids.append(torch.stack([t_index, h_index, w_index]) + st_idx)
-            st_idx += max(llm_grid_h, llm_grid_w)
-            if i < len(ranges) - 1:
-                text_len = ranges[i + 1][0] - ranges[i][1]
-            else:
-                text_len = seq_len - embedding_range[1]
-            mrope_position_ids.append(torch.arange(text_len).expand(3, -1) + st_idx)
-            st_idx += text_len
-        mrope_position_ids = torch.cat(mrope_position_ids, dim=-1)
-        mrope_position_delta = torch.tensor([st_idx - seq_len], dtype=torch.long)
-        return mrope_position_ids, mrope_position_delta
-
-    def to_pytorch(self, messages, chat_template, tokenizer, sequence_start, **kwargs):
-        """Return to the information needed by pytorch engine."""
-        prompt, IMAGE_TOKEN = self.proc_messages(messages, chat_template, sequence_start)
-        return self.to_pytorch_aux(messages, prompt, IMAGE_TOKEN, tokenizer, sequence_start)
-
     def to_turbomind(self, messages, chat_template, tokenizer, sequence_start, **kwargs):
         # TODO: implement for turbomind
         pass
diff --git a/requirements/test.txt b/requirements/test.txt
index 12c643d92b..7620715d71 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -11,4 +11,5 @@ pytest-rerunfailures
 pytest-sugar
 pytest-xdist
 pyyaml
+qwen_vl_utils
 timm
diff --git a/tests/test_lmdeploy/test_vl/test_hf_chat_template.py b/tests/test_lmdeploy/test_vl/test_hf_chat_template.py
new file mode 100644
index 0000000000..5715e696a1
--- /dev/null
+++ b/tests/test_lmdeploy/test_vl/test_hf_chat_template.py
@@ -0,0 +1,54 @@
+import os
+
+import pytest
+
+from lmdeploy.model import MODELS, best_match_model
+from lmdeploy.vl.model.builder import load_vl_model
+
+
+def get_model_and_chat_template(model_path):
+    if os.getenv('LMDEPLOY_USE_MODELSCOPE', 'False').lower() == 'true':
+        from modelscope import snapshot_download
+    elif os.getenv('LMDEPLOY_USE_OPENMIND_HUB', 'False').lower() == 'true':
+        from openmind_hub import snapshot_download
+    else:
+        from huggingface_hub import snapshot_download
+    model_path = snapshot_download(model_path, allow_patterns=['*.json', '*.py', '*.txt', '*.model'])
+    model = load_vl_model(model_path=model_path, with_llm=False, backend='pytorch')
+    chat_template_name = best_match_model(model_path)
+    chat_template = MODELS.module_dict[chat_template_name](model_path=model_path)
+    return model, chat_template
+
+
+class TestVLHFChatTemplate:
+
+    @pytest.fixture(scope='module')
+    def models(self):
+        model_list = [
+            'OpenGVLab/InternVL3_5-8B-HF', 'internlm/Intern-S1-mini', 'Qwen/Qwen2-VL-7B-Instruct',
+            'Qwen/Qwen2.5-VL-7B-Instruct', 'Qwen/Qwen3-VL-8B-Instruct'
+        ]
+        models = [get_model_and_chat_template(model_path) for model_path in model_list]
+        return models
+
+    @pytest.fixture(scope='module')
+    def mock_messages(self):
+        return [
+            dict(role='user',
+                 content=[
+                     dict(type='text', text='Describe the following images in detail'),
+                     dict(type='image', url=dict(url='http://images.cocodataset.org/val2017/000000039769.jpg')),
+                     dict(type='image', url=dict(url='http://images.cocodataset.org/val2017/000000039769.jpg')),
+                     dict(type='text', text='How many cats are there in total?')
+                 ]),
+        ]
+
+    def test_proc_messages(self, models, mock_messages):
+        for model, chat_template in models:
+            model.build_preprocessor()
+            reference = model.processor.apply_chat_template(mock_messages,
+                                                            add_generation_prompt=True,
+                                                            tokenize=False,
+                                                            return_dict=True)
+            prompt, _ = model.proc_messages(mock_messages, chat_template, sequence_start=True)
+            assert prompt == reference