diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 53b6a4ba7f..2b6d9256ab 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -35,7 +35,7 @@ jobs: runs-on: [self-hosted, linux-a100-s2] timeout-minutes: 4320 # 72hours container: - image: nvidia/cuda:11.8.0-devel-ubuntu22.04 + image: openmmlab/lmdeploy:dev-cu12.8 options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 --pull never" volumes: - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip @@ -43,35 +43,16 @@ jobs: - /nvme/share_data/github-actions/packages:/root/packages - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro steps: - - name: Setup systems - run: | - apt-get update -y && apt-get install -y software-properties-common wget git curl &&\ - add-apt-repository ppa:deadsnakes/ppa -y && apt-get update -y && apt-get install -y --no-install-recommends \ - ninja-build rapidjson-dev libgoogle-glog-dev gdb python3.10 python3.10-dev python3.10-venv \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* && cd /opt && python3 -m venv py3 - echo "PATH=/opt/py3/bin:$PATH" >> "$GITHUB_ENV" - name: Clone repository - uses: actions/checkout@v2 - - name: Install pytorch - run: | - python3 -V - python3 -m pip cache dir - python3 -m pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu118 + uses: actions/checkout@v5 - name: Install lmdeploy run: | - python3 -m pip install packaging protobuf transformers_stream_generator matplotlib - # manually install flash attn - python3 -m pip install /root/packages/cu118/flash_attn-*.whl - python3 -m pip install -r requirements_cuda.txt -r requirements/test.txt + python3 -m pip install -r requirements/test.txt python3 -m pip install -e . - name: Check env run: | python3 -m pip list lmdeploy check_env - - name: Test lmdeploy csrc - run: | - #./build/bin/build/bin/unittest - echo "TODO" - name: Test lmdeploy python UT run: | coverage run --branch --source lmdeploy -m pytest -rsE tests diff --git a/lmdeploy/messages.py b/lmdeploy/messages.py index 69ac157652..95c6de3fa3 100644 --- a/lmdeploy/messages.py +++ b/lmdeploy/messages.py @@ -473,6 +473,33 @@ class Response: index: int = 0 routed_experts: Any = None + def __str__(self): + fields = [] + + fields.append('text=') + fields.append(self.text if self.text is not None else 'None') + fields.append(f'input_token_len={self.input_token_len}') + fields.append(f'generate_token_len={self.generate_token_len}') + fields.append(f'finish_reason="{self.finish_reason}"') + fields.append(f'token_ids={self.token_ids}') + fields.append(f'logprobs={self.logprobs}') + + # Helper function to format tensor information + def _format_tensor(name: str, tensor: Optional[torch.Tensor]) -> List[str]: + if tensor is None: + return [f'{name}=None'] + return [f'{name}.shape={tensor.shape}', f'{name}={tensor}'] + + # Format tensor fields + fields.extend(_format_tensor('logits', self.logits)) + fields.extend(_format_tensor('last_hidden_state', self.last_hidden_state)) + + if self.routed_experts is None: + fields.append('routed_experts=None') + else: + fields.append(f'routed_experts.shape={self.routed_experts.shape}') + return '\n'.join(fields) + def __repr__(self): logits = 'logits=None' if self.logits is None else f'logits.shape={self.logits.shape}\nlogits={self.logits}' hidden_state = ( diff --git a/lmdeploy/vl/model/base.py b/lmdeploy/vl/model/base.py index f06a175195..9ee05c0763 100644 --- a/lmdeploy/vl/model/base.py +++ b/lmdeploy/vl/model/base.py @@ -181,6 +181,27 @@ def collect_images(messages): }) for x in content if x['type'] == 'image']) return images + @staticmethod + def IMAGE_TOKEN_included(messages): + """Check whether the IMAGE_TOKEN is included in the messages. + + Args: + messages (List[Dict]): a list of message + Returns: + bool: whether the IMAGE_TOKEN is included in the messages + """ + for message in messages: + role, content = message['role'], message['content'] + if role != 'user': + continue + if isinstance(content, str) and '' in content: + return True + elif isinstance(content, List): + content = [x['text'] for x in content if x['type'] == 'text'] + if any('' in x for x in content): + return True + return False + def to_pytorch_with_input_ids(self, messages): """Pack the preprocessing results in a format compatible with what is required by pytorch engine when input_ids are provided directly. diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py index a2b8d7f9b7..6a3972cb5c 100644 --- a/lmdeploy/vl/model/internvl.py +++ b/lmdeploy/vl/model/internvl.py @@ -76,9 +76,9 @@ def __init__(self, hf_config: AutoConfig = None, backend: str = ''): super().__init__(model_path, with_llm, max_memory, hf_config, backend) - IMG_CONTEXT_TOKEN = '' + self.image_token = '' tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False) - self.image_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN) + self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token) def build_preprocessor(self): self.config = self.hf_config @@ -224,8 +224,8 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]: messages.append(dict(role='forward', content=outputs)) return messages - @staticmethod def proc_messages( + self, messages, chat_template, sequence_start, @@ -235,32 +235,48 @@ def proc_messages( """Apply chat template to get the prompt.""" prompt_messages = [] IMAGE_TOKEN = '' - for message in messages: - if isinstance(message['content'], str): - prompt_messages.append(message) - continue - elif message['role'] in ['preprocess', 'forward']: - continue - n_images = len([1 for x in message['content'] if x['type'] == 'image']) - content = [x.get('text', '') for x in message['content'] if x['type'] == 'text'] - if len(content) == 0: - content.append('') - prompt = content[0] - if IMAGE_TOKEN in prompt and f'{IMAGE_TOKEN}' not in prompt: - prompt = prompt.replace(f'{IMAGE_TOKEN}', f'{IMAGE_TOKEN}') - prompt = prompt.replace('', '') - prompt = prompt.replace('', '') - prompt = prompt.replace('', '') - elif IMAGE_TOKEN not in prompt: - prompt = f'{IMAGE_TOKEN * n_images}\n' + prompt - else: - pass - prompt_messages.append(dict(role='user', content=prompt)) + messages = [x for x in messages if x['role'] not in ['preprocess', 'forward']] + if VisonModel.IMAGE_TOKEN_included(messages): + # backward compatibility + for message in messages: + role, content = message['role'], message['content'] + if role != 'user' or isinstance(content, str): + prompt_messages.append(message) + continue + n_images = len([1 for x in content if x['type'] == 'image']) + content = [x['text'] for x in content if x['type'] == 'text'] + prompt = '\n'.join(content) + if IMAGE_TOKEN in prompt and f'{IMAGE_TOKEN}' not in prompt: + prompt = prompt.replace(f'{IMAGE_TOKEN}', f'{self.image_token}') + prompt = prompt.replace('', '') + prompt = prompt.replace('', '') + prompt = prompt.replace('', '') + elif IMAGE_TOKEN not in prompt: + prompt = f'{self.image_token * n_images}\n' + prompt + else: + pass + prompt_messages.append(dict(role='user', content=prompt)) + else: + for message in messages: + role, content = message['role'], message['content'] + if role != 'user' or isinstance(content, str): + prompt_messages.append(message) + continue + _content = [] + for item in content: + item_type = item['type'] + if item_type == 'text': + _content.append(item['text']) + elif item_type in ['image', 'image_url']: + _content.append(f'{self.image_token}') + else: + raise ValueError(f'Unsupported message type: {item["type"]}') + prompt_messages.append(dict(role='user', content='\n'.join(_content))) prompt = chat_template.messages2prompt(prompt_messages, sequence_start, tools=tools, enable_thinking=enable_thinking) - return prompt, IMAGE_TOKEN + return prompt, self.image_token def to_pytorch(self, messages, diff --git a/lmdeploy/vl/model/internvl3_hf.py b/lmdeploy/vl/model/internvl3_hf.py index 3c8738ff18..234a886fc5 100644 --- a/lmdeploy/vl/model/internvl3_hf.py +++ b/lmdeploy/vl/model/internvl3_hf.py @@ -44,11 +44,12 @@ def __init__(self, hf_config: AutoConfig = None, backend: str = ''): super().__init__(model_path, with_llm, max_memory, hf_config, backend) - self.arch = hf_config.architectures[0] + self.arch = self.hf_config.architectures[0] def build_preprocessor(self): self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True) tokenizer = self.processor.tokenizer + self.image_token = self.processor.image_token self.image_token_id = tokenizer.context_image_token_id self.image_tokens_per_patch = self.processor.image_seq_length self.tokenizer_init_kwargs = tokenizer.init_kwargs @@ -146,8 +147,38 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]: messages.append(dict(role='forward', content=outputs)) return messages - @staticmethod + def proc_internvl_hf_messages(self, content: List[Dict]): + """Process the content list of role 'user' for InternVL HF models.""" + res = [] + for item in content: + if item['type'] == 'text': + # backward compatibility + text = item['text'] + text = (text.replace('', self.image_token) if '' in text else text) + res.append(text) + elif item['type'] in ['image', 'image_url']: + res.append(f'{self.image_token}\n') + else: + raise ValueError(f'Unsupported message type: {item["type"]}') + return ''.join(res) + + def proc_interns1_messages(self, content: List[Dict]): + """Process the content list of role 'user' for InternS1 models.""" + res = [] + for item in content: + if item['type'] == 'text': + # backward compatibility + text = item['text'] + text = (text.replace('', self.image_token) if '' in text else text) + res.append(text) + elif item['type'] in ['image', 'image_url']: + res.append(f'{self.image_token}') + else: + raise ValueError(f'Unsupported message type: {item["type"]}') + return '\n'.join(res) + def proc_messages( + self, messages, chat_template, sequence_start, @@ -156,31 +187,28 @@ def proc_messages( ): """Apply chat template to get the prompt.""" prompt_messages = [] - IMAGE_TOKEN = '' + for message in messages: - if isinstance(message['content'], str): - prompt_messages.append(message) + if message['role'] in ['preprocess', 'forward']: continue - elif message['role'] in ['preprocess', 'forward']: - continue - n_images = len([1 for x in message['content'] if x['type'] == 'image']) - content = [x.get('text', '') for x in message['content'] if x['type'] == 'text'] - prompt = content[0] - if IMAGE_TOKEN in prompt and f'{IMAGE_TOKEN}' not in prompt: - prompt = prompt.replace(f'{IMAGE_TOKEN}', f'{IMAGE_TOKEN}') - prompt = prompt.replace('', '') - prompt = prompt.replace('', '') - prompt = prompt.replace('', '') - elif IMAGE_TOKEN not in prompt: - prompt = f'{IMAGE_TOKEN * n_images}\n' + prompt + role, content = message['role'], message['content'] + if role == 'user' and isinstance(content, List): + content = (self.proc_internvl_hf_messages(content) + if self.arch == 'InternVLForConditionalGeneration' else self.proc_interns1_messages(content)) + message = dict(role=role, content=content) + prompt_messages.append(message) else: - pass - prompt_messages.append(dict(role='user', content=prompt)) + # backward compatibility + content = (content.replace('', self.image_token) + if isinstance(content, str) and '' in content else content) + message = dict(role=role, content=content) + prompt_messages.append(message) + prompt = chat_template.messages2prompt(prompt_messages, sequence_start, tools=tools, enable_thinking=enable_thinking) - return prompt, IMAGE_TOKEN + return prompt, self.image_token def to_pytorch(self, messages, diff --git a/lmdeploy/vl/model/qwen2.py b/lmdeploy/vl/model/qwen2.py index 43096be28b..56dc383163 100644 --- a/lmdeploy/vl/model/qwen2.py +++ b/lmdeploy/vl/model/qwen2.py @@ -32,8 +32,8 @@ def build_preprocessor(self): from transformers import AutoProcessor self.processor = AutoProcessor.from_pretrained(self.model_path) tokenizer = self.processor.tokenizer - image_token = self.processor.image_token - self.image_token_id = tokenizer.encode(image_token)[-1] + self.image_token = self.processor.image_token + self.image_token_id = tokenizer.encode(self.image_token)[-1] def preprocess(self, messages: List[Dict]) -> List[Dict]: """Refer to `super().preprocess()` for spec.""" @@ -124,33 +124,36 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]: messages.append(dict(role='forward', content=outputs)) return messages - @staticmethod - def proc_messages(messages, chat_template, sequence_start): + def proc_messages(self, messages, chat_template, sequence_start): """Apply chat template to get the prompt.""" prompt_messages = [] IMAGE_TOKEN = '' for message in messages: - if isinstance(message['content'], str): - prompt_messages.append(message) + if message['role'] in ['preprocess', 'forward']: continue - elif message['role'] in ['images', 'preprocess', 'forward']: - continue - n_images = len([1 for x in message['content'] if x['type'] == 'image']) - content = [item['text'] for item in message['content'] if item['type'] == 'text'] - prompt = content[0] - if IMAGE_TOKEN in prompt and '<|vision_start|>' not in prompt: - prompt = prompt.replace(IMAGE_TOKEN, f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>') + role, content = message['role'], message['content'] + if role == 'user' and isinstance(content, List): + _content = [] + for item in content: + if item['type'] == 'text': + # backward compatibility + text = item['text'] + if IMAGE_TOKEN in text: + text = text.replace(IMAGE_TOKEN, self.image_token) + _content.append(text) + elif item['type'] in ['image', 'image_url']: + _content.append(f'<|vision_start|>{self.image_token}<|vision_end|>') + else: + raise ValueError(f'Unsupported message type: {item["type"]}') + message = dict(role=role, content=''.join(_content)) + prompt_messages.append(message) else: - # Qwen2-VL-2B-Instruct will concat image and user prompt - # according to their order in the content list - # we insert image token before user prompt by default. The - # user can use custom image token position if they want the - # same decorated prompt as Qwen2-VL - prompt = f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>' * \ - n_images + prompt - prompt_messages.append(dict(role=message['role'], content=prompt)) + if IMAGE_TOKEN in content and '<|vision_start|>' not in content: + # backward compatibility + content = content.replace(IMAGE_TOKEN, f'<|vision_start|>{self.image_token}<|vision_end|>') + prompt_messages.append(dict(role=role, content=content)) prompt = chat_template.messages2prompt(prompt_messages, sequence_start) - return prompt, IMAGE_TOKEN + return prompt, self.image_token @staticmethod def get_mrope_info(seq_len: int, diff --git a/lmdeploy/vl/model/qwen3.py b/lmdeploy/vl/model/qwen3.py index 40f2bf485c..f7b367ad0e 100644 --- a/lmdeploy/vl/model/qwen3.py +++ b/lmdeploy/vl/model/qwen3.py @@ -1,9 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, List, Tuple +from typing import Dict, List import torch -from lmdeploy.vl.model.base import VISION_MODELS, VisonModel +from lmdeploy.vl.model.base import VISION_MODELS +from lmdeploy.vl.model.qwen2 import Qwen2VLModel def check_transformers(): @@ -15,7 +16,7 @@ def check_transformers(): @VISION_MODELS.register_module() -class Qwen3VLModel(VisonModel): +class Qwen3VLModel(Qwen2VLModel): """Qwen3VL model.""" _arch = ['Qwen3VLForConditionalGeneration', 'Qwen3VLMoeForConditionalGeneration'] @@ -25,8 +26,8 @@ def build_preprocessor(self): from transformers import AutoProcessor self.processor = AutoProcessor.from_pretrained(self.model_path) tokenizer = self.processor.tokenizer - image_token = self.processor.image_token - self.image_token_id = tokenizer.encode(image_token)[-1] + self.image_token = self.processor.image_token + self.image_token_id = tokenizer.encode(self.image_token)[-1] def preprocess(self, messages: List[Dict]) -> List[Dict]: """Refer to `super().preprocess()` for spec.""" @@ -65,64 +66,6 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]: # TODO: implement for turbomind pass - @staticmethod - def proc_messages(messages, chat_template, sequence_start): - """Apply chat template to get the prompt.""" - prompt_messages = [] - IMAGE_TOKEN = '' - for message in messages: - if isinstance(message['content'], str): - prompt_messages.append(message) - continue - elif message['role'] in ['images', 'preprocess', 'forward']: - continue - n_images = len([1 for x in message['content'] if x['type'] == 'image']) - content = [item['text'] for item in message['content'] if item['type'] == 'text'] - prompt = content[0] - if IMAGE_TOKEN in prompt and '<|vision_start|>' not in prompt: - prompt = prompt.replace(IMAGE_TOKEN, f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>') - else: - # Qwen2-VL-2B-Instruct will concat image and user prompt - # according to their order in the content list - # we insert image token before user prompt by default. The - # user can use custom image token position if they want the - # same decorated prompt as Qwen2-VL - prompt = f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>' * \ - n_images + prompt - prompt_messages.append(dict(role=message['role'], content=prompt)) - prompt = chat_template.messages2prompt(prompt_messages, sequence_start) - return prompt, IMAGE_TOKEN - - @staticmethod - def get_mrope_info(seq_len: int, - grid_thws: List[Tuple[int, int, int]] = None, - ranges: List[Tuple[int, int]] = None): - mrope_position_ids = [torch.arange(ranges[0][0]).expand(3, -1)] - st_idx = ranges[0][0] - for i, (grid_thw, embedding_range) in enumerate(zip(grid_thws, ranges)): - llm_grid_t, llm_grid_h, llm_grid_w = grid_thw - llm_grid_h //= 2 - llm_grid_w //= 2 - t_index = torch.arange(llm_grid_t).view(-1, 1).expand(-1, llm_grid_h * llm_grid_w).flatten() - h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand(llm_grid_t, -1, llm_grid_w).flatten() - w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand(llm_grid_t, llm_grid_h, -1).flatten() - mrope_position_ids.append(torch.stack([t_index, h_index, w_index]) + st_idx) - st_idx += max(llm_grid_h, llm_grid_w) - if i < len(ranges) - 1: - text_len = ranges[i + 1][0] - ranges[i][1] - else: - text_len = seq_len - embedding_range[1] - mrope_position_ids.append(torch.arange(text_len).expand(3, -1) + st_idx) - st_idx += text_len - mrope_position_ids = torch.cat(mrope_position_ids, dim=-1) - mrope_position_delta = torch.tensor([st_idx - seq_len], dtype=torch.long) - return mrope_position_ids, mrope_position_delta - - def to_pytorch(self, messages, chat_template, tokenizer, sequence_start, **kwargs): - """Return to the information needed by pytorch engine.""" - prompt, IMAGE_TOKEN = self.proc_messages(messages, chat_template, sequence_start) - return self.to_pytorch_aux(messages, prompt, IMAGE_TOKEN, tokenizer, sequence_start) - def to_turbomind(self, messages, chat_template, tokenizer, sequence_start, **kwargs): # TODO: implement for turbomind pass diff --git a/requirements/test.txt b/requirements/test.txt index 12c643d92b..7620715d71 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -11,4 +11,5 @@ pytest-rerunfailures pytest-sugar pytest-xdist pyyaml +qwen_vl_utils timm diff --git a/tests/test_lmdeploy/test_vl/test_hf_chat_template.py b/tests/test_lmdeploy/test_vl/test_hf_chat_template.py new file mode 100644 index 0000000000..5715e696a1 --- /dev/null +++ b/tests/test_lmdeploy/test_vl/test_hf_chat_template.py @@ -0,0 +1,54 @@ +import os + +import pytest + +from lmdeploy.model import MODELS, best_match_model +from lmdeploy.vl.model.builder import load_vl_model + + +def get_model_and_chat_template(model_path): + if os.getenv('LMDEPLOY_USE_MODELSCOPE', 'False').lower() == 'true': + from modelscope import snapshot_download + elif os.getenv('LMDEPLOY_USE_OPENMIND_HUB', 'False').lower() == 'true': + from openmind_hub import snapshot_download + else: + from huggingface_hub import snapshot_download + model_path = snapshot_download(model_path, allow_patterns=['*.json', '*.py', '*.txt', '*.model']) + model = load_vl_model(model_path=model_path, with_llm=False, backend='pytorch') + chat_template_name = best_match_model(model_path) + chat_template = MODELS.module_dict[chat_template_name](model_path=model_path) + return model, chat_template + + +class TestVLHFChatTemplate: + + @pytest.fixture(scope='module') + def models(self): + model_list = [ + 'OpenGVLab/InternVL3_5-8B-HF', 'internlm/Intern-S1-mini', 'Qwen/Qwen2-VL-7B-Instruct', + 'Qwen/Qwen2.5-VL-7B-Instruct', 'Qwen/Qwen3-VL-8B-Instruct' + ] + models = [get_model_and_chat_template(model_path) for model_path in model_list] + return models + + @pytest.fixture(scope='module') + def mock_messages(self): + return [ + dict(role='user', + content=[ + dict(type='text', text='Describe the following images in detail'), + dict(type='image', url=dict(url='http://images.cocodataset.org/val2017/000000039769.jpg')), + dict(type='image', url=dict(url='http://images.cocodataset.org/val2017/000000039769.jpg')), + dict(type='text', text='How many cats are there in total?') + ]), + ] + + def test_proc_messages(self, models, mock_messages): + for model, chat_template in models: + model.build_preprocessor() + reference = model.processor.apply_chat_template(mock_messages, + add_generation_prompt=True, + tokenize=False, + return_dict=True) + prompt, _ = model.proc_messages(mock_messages, chat_template, sequence_start=True) + assert prompt == reference