Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 3 additions & 22 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,43 +35,24 @@ jobs:
runs-on: [self-hosted, linux-a100-s2]
timeout-minutes: 4320 # 72hours
container:
image: nvidia/cuda:11.8.0-devel-ubuntu22.04
image: openmmlab/lmdeploy:dev-cu12.8
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 --pull never"
volumes:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/hf_home:/root/.cache/huggingface
- /nvme/share_data/github-actions/packages:/root/packages
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
run: |
apt-get update -y && apt-get install -y software-properties-common wget git curl &&\
add-apt-repository ppa:deadsnakes/ppa -y && apt-get update -y && apt-get install -y --no-install-recommends \
ninja-build rapidjson-dev libgoogle-glog-dev gdb python3.10 python3.10-dev python3.10-venv \
&& apt-get clean -y && rm -rf /var/lib/apt/lists/* && cd /opt && python3 -m venv py3
echo "PATH=/opt/py3/bin:$PATH" >> "$GITHUB_ENV"
- name: Clone repository
uses: actions/checkout@v2
- name: Install pytorch
run: |
python3 -V
python3 -m pip cache dir
python3 -m pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu118
uses: actions/checkout@v5
- name: Install lmdeploy
run: |
python3 -m pip install packaging protobuf transformers_stream_generator matplotlib
# manually install flash attn
python3 -m pip install /root/packages/cu118/flash_attn-*.whl
python3 -m pip install -r requirements_cuda.txt -r requirements/test.txt
python3 -m pip install -r requirements/test.txt
python3 -m pip install -e .
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Test lmdeploy csrc
run: |
#./build/bin/build/bin/unittest
echo "TODO"
- name: Test lmdeploy python UT
run: |
coverage run --branch --source lmdeploy -m pytest -rsE tests
Expand Down
27 changes: 27 additions & 0 deletions lmdeploy/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,33 @@ class Response:
index: int = 0
routed_experts: Any = None

def __str__(self):
fields = []

fields.append('text=')
fields.append(self.text if self.text is not None else 'None')
fields.append(f'input_token_len={self.input_token_len}')
fields.append(f'generate_token_len={self.generate_token_len}')
fields.append(f'finish_reason="{self.finish_reason}"')
fields.append(f'token_ids={self.token_ids}')
fields.append(f'logprobs={self.logprobs}')

# Helper function to format tensor information
def _format_tensor(name: str, tensor: Optional[torch.Tensor]) -> List[str]:
if tensor is None:
return [f'{name}=None']
return [f'{name}.shape={tensor.shape}', f'{name}={tensor}']

# Format tensor fields
fields.extend(_format_tensor('logits', self.logits))
fields.extend(_format_tensor('last_hidden_state', self.last_hidden_state))

if self.routed_experts is None:
fields.append('routed_experts=None')
else:
fields.append(f'routed_experts.shape={self.routed_experts.shape}')
return '\n'.join(fields)

def __repr__(self):
logits = 'logits=None' if self.logits is None else f'logits.shape={self.logits.shape}\nlogits={self.logits}'
hidden_state = (
Expand Down
21 changes: 21 additions & 0 deletions lmdeploy/vl/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,27 @@ def collect_images(messages):
}) for x in content if x['type'] == 'image'])
return images

@staticmethod
def IMAGE_TOKEN_included(messages):
"""Check whether the IMAGE_TOKEN is included in the messages.

Args:
messages (List[Dict]): a list of message
Returns:
bool: whether the IMAGE_TOKEN is included in the messages
"""
for message in messages:
role, content = message['role'], message['content']
if role != 'user':
continue
if isinstance(content, str) and '<IMAGE_TOKEN>' in content:
return True
elif isinstance(content, List):
content = [x['text'] for x in content if x['type'] == 'text']
if any('<IMAGE_TOKEN>' in x for x in content):
return True
return False

def to_pytorch_with_input_ids(self, messages):
"""Pack the preprocessing results in a format compatible with what is
required by pytorch engine when input_ids are provided directly.
Expand Down
66 changes: 41 additions & 25 deletions lmdeploy/vl/model/internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ def __init__(self,
hf_config: AutoConfig = None,
backend: str = ''):
super().__init__(model_path, with_llm, max_memory, hf_config, backend)
IMG_CONTEXT_TOKEN = '<IMG_CONTEXT>'
self.image_token = '<IMG_CONTEXT>'
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
self.image_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token)

def build_preprocessor(self):
self.config = self.hf_config
Expand Down Expand Up @@ -224,8 +224,8 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
messages.append(dict(role='forward', content=outputs))
return messages

@staticmethod
def proc_messages(
self,
messages,
chat_template,
sequence_start,
Expand All @@ -235,32 +235,48 @@ def proc_messages(
"""Apply chat template to get the prompt."""
prompt_messages = []
IMAGE_TOKEN = '<IMAGE_TOKEN>'
for message in messages:
if isinstance(message['content'], str):
prompt_messages.append(message)
continue
elif message['role'] in ['preprocess', 'forward']:
continue
n_images = len([1 for x in message['content'] if x['type'] == 'image'])
content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
if len(content) == 0:
content.append('')
prompt = content[0]
if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
prompt = prompt.replace('</img><img>', '')
prompt = prompt.replace('<img><img>', '<img>')
prompt = prompt.replace('</img></img>', '</img>')
elif IMAGE_TOKEN not in prompt:
prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
else:
pass
prompt_messages.append(dict(role='user', content=prompt))
messages = [x for x in messages if x['role'] not in ['preprocess', 'forward']]
if VisonModel.IMAGE_TOKEN_included(messages):
# backward compatibility
for message in messages:
role, content = message['role'], message['content']
if role != 'user' or isinstance(content, str):
prompt_messages.append(message)
continue
n_images = len([1 for x in content if x['type'] == 'image'])
content = [x['text'] for x in content if x['type'] == 'text']
prompt = '\n'.join(content)
if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{self.image_token}</img>')
prompt = prompt.replace('</img><img>', '')
prompt = prompt.replace('<img><img>', '<img>')
prompt = prompt.replace('</img></img>', '</img>')
elif IMAGE_TOKEN not in prompt:
prompt = f'<img>{self.image_token * n_images}</img>\n' + prompt
else:
pass
prompt_messages.append(dict(role='user', content=prompt))
else:
for message in messages:
role, content = message['role'], message['content']
if role != 'user' or isinstance(content, str):
prompt_messages.append(message)
continue
_content = []
for item in content:
item_type = item['type']
if item_type == 'text':
_content.append(item['text'])
elif item_type in ['image', 'image_url']:
_content.append(f'<img>{self.image_token}</img>')
else:
raise ValueError(f'Unsupported message type: {item["type"]}')
prompt_messages.append(dict(role='user', content='\n'.join(_content)))
prompt = chat_template.messages2prompt(prompt_messages,
sequence_start,
tools=tools,
enable_thinking=enable_thinking)
return prompt, IMAGE_TOKEN
return prompt, self.image_token

def to_pytorch(self,
messages,
Expand Down
68 changes: 48 additions & 20 deletions lmdeploy/vl/model/internvl3_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ def __init__(self,
hf_config: AutoConfig = None,
backend: str = ''):
super().__init__(model_path, with_llm, max_memory, hf_config, backend)
self.arch = hf_config.architectures[0]
self.arch = self.hf_config.architectures[0]

def build_preprocessor(self):
self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
tokenizer = self.processor.tokenizer
self.image_token = self.processor.image_token
self.image_token_id = tokenizer.context_image_token_id
self.image_tokens_per_patch = self.processor.image_seq_length
self.tokenizer_init_kwargs = tokenizer.init_kwargs
Expand Down Expand Up @@ -146,8 +147,38 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
messages.append(dict(role='forward', content=outputs))
return messages

@staticmethod
def proc_internvl_hf_messages(self, content: List[Dict]):
"""Process the content list of role 'user' for InternVL HF models."""
res = []
for item in content:
if item['type'] == 'text':
# backward compatibility
text = item['text']
text = (text.replace('<IMAGE_TOKEN>', self.image_token) if '<IMAGE_TOKEN>' in text else text)
res.append(text)
elif item['type'] in ['image', 'image_url']:
res.append(f'{self.image_token}\n')
else:
raise ValueError(f'Unsupported message type: {item["type"]}')
return ''.join(res)

def proc_interns1_messages(self, content: List[Dict]):
"""Process the content list of role 'user' for InternS1 models."""
res = []
for item in content:
if item['type'] == 'text':
# backward compatibility
text = item['text']
text = (text.replace('<IMAGE_TOKEN>', self.image_token) if '<IMAGE_TOKEN>' in text else text)
res.append(text)
elif item['type'] in ['image', 'image_url']:
res.append(f'{self.image_token}')
else:
raise ValueError(f'Unsupported message type: {item["type"]}')
return '\n'.join(res)

def proc_messages(
self,
messages,
chat_template,
sequence_start,
Expand All @@ -156,31 +187,28 @@ def proc_messages(
):
"""Apply chat template to get the prompt."""
prompt_messages = []
IMAGE_TOKEN = '<IMAGE_TOKEN>'

for message in messages:
if isinstance(message['content'], str):
prompt_messages.append(message)
if message['role'] in ['preprocess', 'forward']:
continue
elif message['role'] in ['preprocess', 'forward']:
continue
n_images = len([1 for x in message['content'] if x['type'] == 'image'])
content = [x.get('text', '') for x in message['content'] if x['type'] == 'text']
prompt = content[0]
if IMAGE_TOKEN in prompt and f'<img>{IMAGE_TOKEN}' not in prompt:
prompt = prompt.replace(f'{IMAGE_TOKEN}', f'<img>{IMAGE_TOKEN}</img>')
prompt = prompt.replace('</img><img>', '')
prompt = prompt.replace('<img><img>', '<img>')
prompt = prompt.replace('</img></img>', '</img>')
elif IMAGE_TOKEN not in prompt:
prompt = f'<img>{IMAGE_TOKEN * n_images}</img>\n' + prompt
role, content = message['role'], message['content']
if role == 'user' and isinstance(content, List):
content = (self.proc_internvl_hf_messages(content)
if self.arch == 'InternVLForConditionalGeneration' else self.proc_interns1_messages(content))
message = dict(role=role, content=content)
prompt_messages.append(message)
else:
pass
prompt_messages.append(dict(role='user', content=prompt))
# backward compatibility
content = (content.replace('<IMAGE_TOKEN>', self.image_token)
if isinstance(content, str) and '<IMAGE_TOKEN>' in content else content)
message = dict(role=role, content=content)
prompt_messages.append(message)

prompt = chat_template.messages2prompt(prompt_messages,
sequence_start,
tools=tools,
enable_thinking=enable_thinking)
return prompt, IMAGE_TOKEN
return prompt, self.image_token

def to_pytorch(self,
messages,
Expand Down
47 changes: 25 additions & 22 deletions lmdeploy/vl/model/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def build_preprocessor(self):
from transformers import AutoProcessor
self.processor = AutoProcessor.from_pretrained(self.model_path)
tokenizer = self.processor.tokenizer
image_token = self.processor.image_token
self.image_token_id = tokenizer.encode(image_token)[-1]
self.image_token = self.processor.image_token
self.image_token_id = tokenizer.encode(self.image_token)[-1]

def preprocess(self, messages: List[Dict]) -> List[Dict]:
"""Refer to `super().preprocess()` for spec."""
Expand Down Expand Up @@ -124,33 +124,36 @@ def forward(self, messages: List[Dict], max_batch_size: int = 1) -> List[Dict]:
messages.append(dict(role='forward', content=outputs))
return messages

@staticmethod
def proc_messages(messages, chat_template, sequence_start):
def proc_messages(self, messages, chat_template, sequence_start):
"""Apply chat template to get the prompt."""
prompt_messages = []
IMAGE_TOKEN = '<IMAGE_TOKEN>'
for message in messages:
if isinstance(message['content'], str):
prompt_messages.append(message)
if message['role'] in ['preprocess', 'forward']:
continue
elif message['role'] in ['images', 'preprocess', 'forward']:
continue
n_images = len([1 for x in message['content'] if x['type'] == 'image'])
content = [item['text'] for item in message['content'] if item['type'] == 'text']
prompt = content[0]
if IMAGE_TOKEN in prompt and '<|vision_start|>' not in prompt:
prompt = prompt.replace(IMAGE_TOKEN, f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>')
role, content = message['role'], message['content']
if role == 'user' and isinstance(content, List):
_content = []
for item in content:
if item['type'] == 'text':
# backward compatibility
text = item['text']
if IMAGE_TOKEN in text:
text = text.replace(IMAGE_TOKEN, self.image_token)
_content.append(text)
elif item['type'] in ['image', 'image_url']:
_content.append(f'<|vision_start|>{self.image_token}<|vision_end|>')
else:
raise ValueError(f'Unsupported message type: {item["type"]}')
message = dict(role=role, content=''.join(_content))
prompt_messages.append(message)
else:
# Qwen2-VL-2B-Instruct will concat image and user prompt
# according to their order in the content list
# we insert image token before user prompt by default. The
# user can use custom image token position if they want the
# same decorated prompt as Qwen2-VL
prompt = f'<|vision_start|>{IMAGE_TOKEN}<|vision_end|>' * \
n_images + prompt
prompt_messages.append(dict(role=message['role'], content=prompt))
if IMAGE_TOKEN in content and '<|vision_start|>' not in content:
# backward compatibility
content = content.replace(IMAGE_TOKEN, f'<|vision_start|>{self.image_token}<|vision_end|>')
prompt_messages.append(dict(role=role, content=content))
prompt = chat_template.messages2prompt(prompt_messages, sequence_start)
return prompt, IMAGE_TOKEN
return prompt, self.image_token

@staticmethod
def get_mrope_info(seq_len: int,
Expand Down
Loading
Loading