@@ -49,17 +49,16 @@ def _parse_generalized_path(s: str):
4949 return s , None
5050
5151
52- def _should_skip_prompt (
53- prompt , tokenizer , processor , metadata , max_length , apply_chat_template , apply_chat_template_kwargs
54- ):
52+ def _should_skip_prompt (formatted_prompt : str , tokenizer , processor , max_length , multimodal_inputs = None ):
5553 if max_length is None :
5654 return False
5755
58- from slime .utils .processing_utils import prepare_model_inputs
56+ if processor :
57+ processor_output = processor (text = formatted_prompt , ** multimodal_inputs )
58+ input_ids = processor_output ["input_ids" ][0 ]
59+ else :
60+ input_ids = tokenizer .encode (formatted_prompt , add_special_tokens = False )
5961
60- input_ids , _ = prepare_model_inputs (
61- prompt , tokenizer , processor , metadata , apply_chat_template , apply_chat_template_kwargs
62- )
6362 return len (input_ids ) > max_length
6463
6564
@@ -140,6 +139,7 @@ def __init__(
140139 prompt = _build_messages (data , prompt_key , as_conversation , multimodal_keys )
141140
142141 metadata = data .get (metadata_key ) or {}
142+ tools = None
143143 if tool_key is not None and tool_key in data :
144144 tools = data [tool_key ]
145145 if isinstance (tools , str ):
@@ -149,17 +149,37 @@ def __init__(
149149 assert isinstance (tools , list ), f"tools must be a list, got { type (tools )} instead"
150150 metadata ["tools" ] = tools
151151
152+ if apply_chat_template :
153+ formatted_prompt = tokenizer .apply_chat_template (
154+ prompt ,
155+ tools = tools ,
156+ tokenize = False ,
157+ add_generation_prompt = True ,
158+ ** (apply_chat_template_kwargs or {}),
159+ )
160+ else :
161+ formatted_prompt = prompt
162+
163+ if processor :
164+ # temporary solution, will write image utils for slime later
165+ from qwen_vl_utils import process_vision_info
166+
167+ assert isinstance (prompt , list )
168+ images , videos = process_vision_info (prompt )
169+ multimodal_inputs = {"images" : images , "videos" : videos }
170+ else :
171+ multimodal_inputs = None
172+
152173 # TODO: this is slow.
153- if _should_skip_prompt (
154- prompt , tokenizer , processor , metadata , max_length , apply_chat_template , apply_chat_template_kwargs
155- ):
174+ if _should_skip_prompt (formatted_prompt , tokenizer , processor , max_length , multimodal_inputs ):
156175 continue
157176
158177 self .origin_samples .append (
159178 Sample (
160- prompt = prompt ,
179+ prompt = formatted_prompt ,
161180 label = data [label_key ] if label_key is not None else None ,
162181 metadata = metadata ,
182+ multimodal_inputs = multimodal_inputs ,
163183 )
164184 )
165185
0 commit comments