Skip to content

Commit 6833f03

Browse files
committed
feat: Add Qwen3VLChatHandler into llama_chat_format.py
1 parent ee6963e commit 6833f03

File tree

1 file changed

+85
-3
lines changed

1 file changed

+85
-3
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3638,7 +3638,6 @@ class Qwen25VLChatHandler(Llava15ChatHandler):
36383638

36393639
CHAT_FORMAT = (
36403640
"{% set image_count = namespace(value=0) %}"
3641-
#"{% set video_count = namespace(value=0) %}"
36423641
"{% for message in messages %}"
36433642
"{% if loop.first and message['role'] != 'system' %}"
36443643
"<|im_start|>system\n"
@@ -3685,13 +3684,96 @@ def __call__(self, **kwargs):
36853684

36863685
if self.verbose:
36873686
messages = kwargs.get('messages', [])
3688-
image_count = len(self.get_image_urls(messages))
3689-
print(f"Minimal - Cleared state, processing {image_count} images", file=sys.stderr)
3687+
try:
3688+
image_count = len(self.get_image_urls(messages))
3689+
print(f"Qwen25VLChatHandler - Cleared state, processing {image_count} images", file=sys.stderr)
3690+
except Exception:
3691+
print(f"Qwen25VLChatHandler - Cleared state", file=sys.stderr)
36903692

36913693
# Use parent implementation
36923694
return super().__call__(**kwargs)
36933695

36943696

3697+
class Qwen3VLChatHandler(Llava15ChatHandler):
3698+
DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
3699+
3700+
CHAT_FORMAT_BASE = (
3701+
"{% set image_count = namespace(value=0) %}"
3702+
"{% for message in messages %}"
3703+
"{% if loop.first and message['role'] != 'system' %}"
3704+
"<|im_start|>system\n"
3705+
"{{ self.DEFAULT_SYSTEM_MESSAGE }}<|im_end|>\n"
3706+
"{% endif %}"
3707+
"<|im_start|>{{ message['role'] }}\n"
3708+
"{% if message['content'] is string %}"
3709+
"{{ message['content'] }}<|im_end|>\n"
3710+
"{% else %}"
3711+
"{% for content in message['content'] %}"
3712+
"{% if content['type'] == 'image_url' %}"
3713+
"{% if content.image_url is string %}"
3714+
"{% set image_count.value = image_count.value + 1 %}"
3715+
"Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url }} <|vision_end|>"
3716+
"{% else %}"
3717+
"{% set image_count.value = image_count.value + 1 %}"
3718+
"Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url.url }} <|vision_end|>"
3719+
"{% endif %}"
3720+
"{% elif content['type'] == 'text' %}"
3721+
"{{ content['text'] }}"
3722+
"{% endif %}"
3723+
"{% endfor %}"
3724+
"<|im_end|>\n"
3725+
"{% endif %}"
3726+
"{% endfor %}"
3727+
)
3728+
3729+
def __init__(
3730+
self,
3731+
use_think_prompt: bool = True,
3732+
verbose: bool = True,
3733+
**kwargs,
3734+
):
3735+
"""
3736+
Parameters:
3737+
- use_think_prompt (bool):
3738+
- True (default): Use the '<think>' prompt (for Thinking version).
3739+
- False: Do not use '<think>' (for Instruct version).
3740+
- verbose (bool): Whether to print verbose logs.
3741+
"""
3742+
self.use_think_prompt = use_think_prompt
3743+
self.verbose = verbose
3744+
3745+
if self.use_think_prompt:
3746+
self.CHAT_FORMAT = self.CHAT_FORMAT_BASE + "<|im_start|>assistant\n<think>\n"
3747+
else:
3748+
self.CHAT_FORMAT = self.CHAT_FORMAT_BASE + "<|im_start|>assistant\n"
3749+
3750+
def __call__(self, **kwargs):
3751+
llama = kwargs['llama']
3752+
3753+
# Clear state for multiple runs
3754+
llama.reset()
3755+
llama._ctx.memory_clear(True)
3756+
llama.n_tokens = 0
3757+
3758+
if hasattr(llama, 'input_ids'):
3759+
llama.input_ids.fill(0)
3760+
3761+
# Clear any handler state
3762+
if hasattr(self, '_last_image_embed'):
3763+
self._last_image_embed = None
3764+
self._last_image_hash = None
3765+
3766+
if self.verbose:
3767+
messages = kwargs.get('messages', [])
3768+
try:
3769+
image_count = len(self.get_image_urls(messages))
3770+
print(f"Qwen3VLHandler(think={self.use_think_prompt}) - Cleared state, processing {image_count} images", file=sys.stderr)
3771+
except Exception:
3772+
print(f"Qwen3VLHandler(think={self.use_think_prompt}) - Cleared state", file=sys.stderr)
3773+
3774+
# Use parent implementation
3775+
return super().__call__(**kwargs)
3776+
36953777

36963778
@register_chat_completion_handler("chatml-function-calling")
36973779
def chatml_function_calling(

0 commit comments

Comments
 (0)