@@ -3638,7 +3638,6 @@ class Qwen25VLChatHandler(Llava15ChatHandler):
36383638
36393639 CHAT_FORMAT = (
36403640 "{% set image_count = namespace(value=0) %}"
3641- #"{% set video_count = namespace(value=0) %}"
36423641 "{% for message in messages %}"
36433642 "{% if loop.first and message['role'] != 'system' %}"
36443643 "<|im_start|>system\n "
@@ -3685,13 +3684,98 @@ def __call__(self, **kwargs):
36853684
36863685 if self .verbose :
36873686 messages = kwargs .get ('messages' , [])
3688- image_count = len (self .get_image_urls (messages ))
3689- print (f"Minimal - Cleared state, processing { image_count } images" , file = sys .stderr )
3687+ try :
3688+ image_count = len (self .get_image_urls (messages ))
3689+ print (f"Qwen25VLChatHandler - Cleared state, processing { image_count } images" , file = sys .stderr )
3690+ except Exception :
3691+ print (f"Qwen25VLChatHandler - Cleared state" , file = sys .stderr )
36903692
36913693 # Use parent implementation
36923694 return super ().__call__ (** kwargs )
36933695
36943696
3697+ class Qwen3VLChatHandler (Llava15ChatHandler ):
3698+ DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant."
3699+
3700+ CHAT_FORMAT_BASE = (
3701+ "{% set image_count = namespace(value=0) %}"
3702+ "{% for message in messages %}"
3703+ "{% if loop.first and message['role'] != 'system' %}"
3704+ "<|im_start|>system\n "
3705+ "{{ self.DEFAULT_SYSTEM_MESSAGE }}<|im_end|>\n "
3706+ "{% endif %}"
3707+ "<|im_start|>{{ message['role'] }}\n "
3708+ "{% if message['content'] is string %}"
3709+ "{{ message['content'] }}<|im_end|>\n "
3710+ "{% else %}"
3711+ "{% for content in message['content'] %}"
3712+ "{% if content['type'] == 'image_url' %}"
3713+ "{% if content.image_url is string %}"
3714+ "{% set image_count.value = image_count.value + 1 %}"
3715+ "Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url }} <|vision_end|>"
3716+ "{% else %}"
3717+ "{% set image_count.value = image_count.value + 1 %}"
3718+ "Picture {{ image_count.value }}: <|vision_start|> {{ content.image_url.url }} <|vision_end|>"
3719+ "{% endif %}"
3720+ "{% elif content['type'] == 'text' %}"
3721+ "{{ content['text'] }}"
3722+ "{% endif %}"
3723+ "{% endfor %}"
3724+ "<|im_end|>\n "
3725+ "{% endif %}"
3726+ "{% endfor %}"
3727+ )
3728+
3729+ def __init__ (
3730+ self ,
3731+ use_think_prompt : bool = True ,
3732+ verbose : bool = True ,
3733+ ** kwargs ,
3734+ ):
3735+ """
3736+ Parameters:
3737+ - use_think_prompt (bool):
3738+ - True (default): Use the '<think>' prompt (for Thinking version).
3739+ - False: Do not use '<think>' (for Instruct version).
3740+ - verbose (bool): Whether to print verbose logs.
3741+ """
3742+ self .use_think_prompt = use_think_prompt
3743+ self .verbose = verbose
3744+
3745+ if self .use_think_prompt :
3746+ self .CHAT_FORMAT = self .CHAT_FORMAT_BASE + "<|im_start|>assistant\n <think>\n "
3747+ else :
3748+ self .CHAT_FORMAT = self .CHAT_FORMAT_BASE + "<|im_start|>assistant\n "
3749+
3750+ super ().__init__ (** kwargs )
3751+
3752+ def __call__ (self , ** kwargs ):
3753+ llama = kwargs ['llama' ]
3754+
3755+ # Clear state for multiple runs
3756+ llama .reset ()
3757+ llama ._ctx .memory_clear (True )
3758+ llama .n_tokens = 0
3759+
3760+ if hasattr (llama , 'input_ids' ):
3761+ llama .input_ids .fill (0 )
3762+
3763+ # Clear any handler state
3764+ if hasattr (self , '_last_image_embed' ):
3765+ self ._last_image_embed = None
3766+ self ._last_image_hash = None
3767+
3768+ if self .verbose :
3769+ messages = kwargs .get ('messages' , [])
3770+ try :
3771+ image_count = len (self .get_image_urls (messages ))
3772+ print (f"Qwen3VLHandler(think={ self .use_think_prompt } ) - Cleared state, processing { image_count } images" , file = sys .stderr )
3773+ except Exception :
3774+ print (f"Qwen3VLHandler(think={ self .use_think_prompt } ) - Cleared state" , file = sys .stderr )
3775+
3776+ # Use parent implementation
3777+ return super ().__call__ (** kwargs )
3778+
36953779
36963780@register_chat_completion_handler ("chatml-function-calling" )
36973781def chatml_function_calling (
0 commit comments