1818from tavily import TavilyClient
1919from datetime import datetime
2020from crawl4ai import WebCrawler
21+ from PIL import Image
22+ import io
23+ import base64
2124
2225# Set up logging
2326logger = logging .getLogger (__name__ )
@@ -303,28 +306,65 @@ def tavily_web_search(query):
303306async def main (message : cl .Message ):
304307 model_name = load_setting ("model_name" ) or os .getenv ("MODEL_NAME" ) or "gpt-4o-mini"
305308 message_history = cl .user_session .get ("message_history" , [])
306- message_history .append ({"role" : "user" , "content" : message .content })
307309 gatherer = ContextGatherer ()
308310 context , token_count , context_tree = gatherer .run ()
309311 now = datetime .now ().strftime ("%Y-%m-%d %H:%M:%S" )
310- prompt_history = message_history
311- prompt_history .append ({"role" : "user" , "content" : """
312- Answer the question and use tools if needed:\n {question}.\n \n
313- Current Date and Time: {now}
314- Below is the Context:\n {context}\n \n """
315- .format (context = context , question = message .content , now = now )})
312+
313+ # Check if an image was uploaded with this message
314+ image = None
315+ if message .elements and isinstance (message .elements [0 ], cl .Image ):
316+ image_element = message .elements [0 ]
317+ try :
318+ # Open the image and keep it in memory
319+ image = Image .open (image_element .path )
320+ image .load () # This ensures the file is fully loaded into memory
321+ cl .user_session .set ("image" , image )
322+ except Exception as e :
323+ logger .error (f"Error processing image: { str (e )} " )
324+ await cl .Message (content = "There was an error processing the uploaded image. Please try again." ).send ()
325+ return
326+
327+ # Prepare user message
328+ user_message = f"""
329+ Answer the question and use tools if needed:\n { message .content } .\n \n
330+ Current Date and Time: { now }
331+
332+ Context:
333+ { context }
334+ """
335+
336+ if image :
337+ user_message = f"Image uploaded. { user_message } "
338+
339+ message_history .append ({"role" : "user" , "content" : user_message })
316340
317341 msg = cl .Message (content = "" )
318342 await msg .send ()
319343
320344 # Prepare the completion parameters
321345 completion_params = {
322346 "model" : model_name ,
323- "messages" : prompt_history ,
347+ "messages" : message_history ,
324348 "stream" : True ,
325349 }
326350
327- # Only add tools and tool_choice if Tavily API key is available
351+ # If an image is uploaded, include it in the message
352+ if image :
353+ buffered = io .BytesIO ()
354+ image .save (buffered , format = "PNG" )
355+ img_str = base64 .b64encode (buffered .getvalue ()).decode ()
356+
357+ completion_params ["messages" ][- 1 ] = {
358+ "role" : "user" ,
359+ "content" : [
360+ {"type" : "text" , "text" : user_message },
361+ {"type" : "image_url" , "image_url" : {"url" : f"data:image/png;base64,{ img_str } " }}
362+ ]
363+ }
364+ # Use a vision-capable model when an image is present
365+ completion_params ["model" ] = "gpt-4-vision-preview" # Adjust this to your actual vision-capable model
366+
367+ # Only add tools and tool_choice if Tavily API key is available and no image is uploaded
328368 if tavily_api_key :
329369 completion_params ["tools" ] = tools
330370 completion_params ["tool_choice" ] = "auto"
@@ -380,7 +420,7 @@ async def main(message: cl.Message):
380420 available_functions = {
381421 "tavily_web_search" : tavily_web_search ,
382422 }
383- messages = prompt_history + [{"role" : "assistant" , "content" : None , "function_call" : {
423+ messages = message_history + [{"role" : "assistant" , "content" : None , "function_call" : {
384424 "name" : tool_calls [0 ]['function' ]['name' ],
385425 "arguments" : tool_calls [0 ]['function' ]['arguments' ]
386426 }}]
@@ -497,3 +537,10 @@ async def on_chat_resume(thread: ThreadDict):
497537 logger .warning (f"Message without recognized type: { message } " )
498538
499539 cl .user_session .set ("message_history" , message_history )
540+
541+ # Check if there's an image in the thread metadata
542+ image_data = metadata .get ("image" )
543+ if image_data :
544+ image = Image .open (io .BytesIO (base64 .b64decode (image_data )))
545+ cl .user_session .set ("image" , image )
546+ await cl .Message (content = "Previous image loaded. You can continue asking questions about it, upload a new image, or just chat." ).send ()
0 commit comments