Merge pull request #162 from MervinPraison/develop

MervinPraison · web-flow · commit 151facab62da · 2024-10-02T09:19:47.000+01:00
Adding Support to Multimodal Model
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.11-slim
 WORKDIR /app
 COPY . .
-RUN pip install flask praisonai==0.0.72 gunicorn markdown
+RUN pip install flask praisonai==0.0.73 gunicorn markdown
 EXPOSE 8080
 CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]
diff --git a/docs/api/praisonai/deploy.html b/docs/api/praisonai/deploy.html
@@ -110,7 +110,7 @@ <h2 id="raises">Raises</h2>
             file.write(&#34;FROM python:3.11-slim\n&#34;)
             file.write(&#34;WORKDIR /app\n&#34;)
             file.write(&#34;COPY . .\n&#34;)
-            file.write(&#34;RUN pip install flask praisonai==0.0.72 gunicorn markdown\n&#34;)
+            file.write(&#34;RUN pip install flask praisonai==0.0.73 gunicorn markdown\n&#34;)
             file.write(&#34;EXPOSE 8080\n&#34;)
             file.write(&#39;CMD [&#34;gunicorn&#34;, &#34;-b&#34;, &#34;0.0.0.0:8080&#34;, &#34;api:app&#34;]\n&#39;)
             
diff --git a/praisonai.rb b/praisonai.rb
@@ -3,7 +3,7 @@ class Praisonai < Formula
   
     desc "AI tools for various AI applications"
     homepage "https://github.com/MervinPraison/PraisonAI"
-    url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/0.0.72.tar.gz"
+    url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/0.0.73.tar.gz"
     sha256 "1828fb9227d10f991522c3f24f061943a254b667196b40b1a3e4a54a8d30ce32"  # Replace with actual SHA256 checksum
     license "MIT"
   
diff --git a/praisonai/deploy.py b/praisonai/deploy.py
@@ -56,7 +56,7 @@ def create_dockerfile(self):
             file.write("FROM python:3.11-slim\n")
             file.write("WORKDIR /app\n")
             file.write("COPY . .\n")
-            file.write("RUN pip install flask praisonai==0.0.72 gunicorn markdown\n")
+            file.write("RUN pip install flask praisonai==0.0.73 gunicorn markdown\n")
             file.write("EXPOSE 8080\n")
             file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')
             
diff --git a/praisonai/ui/chat.py b/praisonai/ui/chat.py
@@ -17,6 +17,9 @@
 from tavily import TavilyClient
 from crawl4ai import WebCrawler
 import asyncio
+from PIL import Image
+import io
+import base64
 
 # Set up logging
 logger = logging.getLogger(__name__)
@@ -292,11 +295,32 @@ async def main(message: cl.Message):
     message_history = cl.user_session.get("message_history", [])
     now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     
-    # Add the current date and time to the user's message
+    # Check if an image was uploaded with this message
+    image = None
+    if message.elements and isinstance(message.elements[0], cl.Image):
+        image_element = message.elements[0]
+        try:
+            # Open the image and keep it in memory
+            image = Image.open(image_element.path)
+            image.load()  # This ensures the file is fully loaded into memory
+            cl.user_session.set("image", image)
+        except Exception as e:
+            logger.error(f"Error processing image: {str(e)}")
+            await cl.Message(content="There was an error processing the uploaded image. Please try again.").send()
+            return
+
+    # Prepare user message
     user_message = f"""
-Answer the question and use tools if needed:\n{message.content}.\n\n
+Answer the question and use tools if needed:\n
+
 Current Date and Time: {now}
+
+User Question: {message.content}
 """
+
+    if image:
+        user_message = f"Image uploaded. {user_message}"
+
     message_history.append({"role": "user", "content": user_message})
 
     msg = cl.Message(content="")
@@ -309,6 +333,19 @@ async def main(message: cl.Message):
         "stream": True,
     }
 
+    # If an image is uploaded, include it in the message
+    if image:
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        
+        completion_params["messages"][-1] = {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": user_message},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}
+            ]
+        }
     # Only add tools and tool_choice if Tavily API key is available
     if tavily_api_key:
         completion_params["tools"] = tools
@@ -359,6 +396,7 @@ async def main(message: cl.Message):
     cl.user_session.set("message_history", message_history)
     await msg.update()
 
+    # Handle tool calls if any
     if tavily_api_key and tool_calls:
         available_functions = {
             "tavily_web_search": tavily_web_search,
@@ -411,7 +449,7 @@ async def main(message: cl.Message):
         msg.content = full_response
         await msg.update()
     else:
-        # If no tool calls or Tavily API key is not set, the full_response is already set
+        # If no tool calls, the full_response is already set
         msg.content = full_response
         await msg.update()
 
@@ -433,7 +471,7 @@ async def send_count():
     ).send()
 
 @cl.on_chat_resume
-async def on_chat_resume(thread: ThreadDict):  # Change the type hint here
+async def on_chat_resume(thread: ThreadDict):
     logger.info(f"Resuming chat: {thread['id']}")
     model_name = load_setting("model_name") or os.getenv("MODEL_NAME") or "gpt-4o-mini"
     logger.debug(f"Model name: {model_name}")
@@ -481,3 +519,10 @@ async def on_chat_resume(thread: ThreadDict):  # Change the type hint here
             logger.warning(f"Message without recognized type: {message}")
 
     cl.user_session.set("message_history", message_history)
+
+    # Check if there's an image in the thread metadata
+    image_data = metadata.get("image")
+    if image_data:
+        image = Image.open(io.BytesIO(base64.b64decode(image_data)))
+        cl.user_session.set("image", image)
+        await cl.Message(content="Previous image loaded. You can continue asking questions about it or upload a new image.").send()
diff --git a/praisonai/ui/code.py b/praisonai/ui/code.py
@@ -18,6 +18,9 @@
 from tavily import TavilyClient
 from datetime import datetime
 from crawl4ai import WebCrawler
+from PIL import Image
+import io
+import base64
 
 # Set up logging
 logger = logging.getLogger(__name__)
@@ -303,28 +306,65 @@ def tavily_web_search(query):
 async def main(message: cl.Message):
     model_name = load_setting("model_name") or os.getenv("MODEL_NAME") or "gpt-4o-mini"
     message_history = cl.user_session.get("message_history", [])
-    message_history.append({"role": "user", "content": message.content})
     gatherer = ContextGatherer()
     context, token_count, context_tree = gatherer.run()
     now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    prompt_history = message_history
-    prompt_history.append({"role": "user", "content": """
-                           Answer the question and use tools if needed:\n{question}.\n\n
-                           Current Date and Time: {now}
-                           Below is the Context:\n{context}\n\n"""
-                           .format(context=context, question=message.content, now=now)})
+
+    # Check if an image was uploaded with this message
+    image = None
+    if message.elements and isinstance(message.elements[0], cl.Image):
+        image_element = message.elements[0]
+        try:
+            # Open the image and keep it in memory
+            image = Image.open(image_element.path)
+            image.load()  # This ensures the file is fully loaded into memory
+            cl.user_session.set("image", image)
+        except Exception as e:
+            logger.error(f"Error processing image: {str(e)}")
+            await cl.Message(content="There was an error processing the uploaded image. Please try again.").send()
+            return
+
+    # Prepare user message
+    user_message = f"""
+Answer the question and use tools if needed:\n{message.content}.\n\n
+Current Date and Time: {now}
+
+Context:
+{context}
+"""
+
+    if image:
+        user_message = f"Image uploaded. {user_message}"
+
+    message_history.append({"role": "user", "content": user_message})
 
     msg = cl.Message(content="")
     await msg.send()
 
     # Prepare the completion parameters
     completion_params = {
         "model": model_name,
-        "messages": prompt_history,
+        "messages": message_history,
         "stream": True,
     }
 
-    # Only add tools and tool_choice if Tavily API key is available
+    # If an image is uploaded, include it in the message
+    if image:
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        
+        completion_params["messages"][-1] = {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": user_message},
+                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}
+            ]
+        }
+        # Use a vision-capable model when an image is present
+        completion_params["model"] = "gpt-4-vision-preview"  # Adjust this to your actual vision-capable model
+
+    # Only add tools and tool_choice if Tavily API key is available and no image is uploaded
     if tavily_api_key:
         completion_params["tools"] = tools
         completion_params["tool_choice"] = "auto"
@@ -380,7 +420,7 @@ async def main(message: cl.Message):
         available_functions = {
             "tavily_web_search": tavily_web_search,
         }
-        messages = prompt_history + [{"role": "assistant", "content": None, "function_call": {
+        messages = message_history + [{"role": "assistant", "content": None, "function_call": {
             "name": tool_calls[0]['function']['name'],
             "arguments": tool_calls[0]['function']['arguments']
         }}]
@@ -497,3 +537,10 @@ async def on_chat_resume(thread: ThreadDict):
             logger.warning(f"Message without recognized type: {message}")
 
     cl.user_session.set("message_history", message_history)
+
+    # Check if there's an image in the thread metadata
+    image_data = metadata.get("image")
+    if image_data:
+        image = Image.open(io.BytesIO(base64.b64decode(image_data)))
+        cl.user_session.set("image", image)
+        await cl.Message(content="Previous image loaded. You can continue asking questions about it, upload a new image, or just chat.").send()
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "PraisonAI"
-version = "0.0.72"
+version = "0.0.73"
 description = "PraisonAI application combines AutoGen and CrewAI or similar frameworks into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customization, and efficient human-agent collaboration."
 authors = ["Mervin Praison"]
 license = ""