@@ -2637,6 +2637,28 @@ def compress_tools_array(tools_array):
26372637
26382638 return tools_array_filtered
26392639
2640+ def sweep_media_from_messages (messages_array ):
2641+ images = []
2642+ audio = []
2643+ for message in messages_array :
2644+ curr_content = message .get ("content" , None )
2645+ if isinstance (curr_content , list ):
2646+ for item in curr_content :
2647+ if item .get ("type" ) == "image_url" :
2648+ url = item .get ("image_url" , {}).get ("url" , "" )
2649+ if url .startswith ("data:image" ):
2650+ images .append (url .split ("," , 1 )[1 ])
2651+ elif item .get ("type" ) == "input_audio" :
2652+ data = item .get ("input_audio" , {}).get ("data" )
2653+ if data :
2654+ audio .append (data )
2655+ imgs_ollama = message .get ("images" , None )
2656+ if imgs_ollama :
2657+ for img in imgs_ollama :
2658+ images .append (img )
2659+ return images , audio
2660+
2661+
26402662def transform_genparams (genparams , api_format , use_jinja ):
26412663 global chatcompl_adapter , maxctx
26422664
@@ -2784,6 +2806,8 @@ def transform_genparams(genparams, api_format, use_jinja):
27842806 messages_string = jinja_output
27852807 if jinjatools and len (jinjatools )> 0 :
27862808 genparams ["using_openai_tools" ] = True
2809+ # handle media
2810+ images_added , audio_added = sweep_media_from_messages (messages_array )
27872811 else :
27882812 if jinjatools :
27892813 # inject the tools list at the top of the context window, even if context has shifted
0 commit comments