Add gpt-4-with-som model option

joshbickett · joshbickett · commit 61584a6922e8 · 2024-01-05T08:00:25.000-08:00
diff --git a/operate/actions/actions.py b/operate/actions/actions.py
@@ -7,6 +7,7 @@
 import asyncio
 import aiohttp
 from PIL import Image
+from ultralytics import YOLO
 import google.generativeai as genai
 from operate.config.settings import Config
 from operate.exceptions.exceptions import ModelNotRecognizedException
@@ -38,22 +39,19 @@
 
 client = config.initialize_openai_client()
 
-# yolo_model = YOLO(
-#     "something/here"
-# )  # Load your tra
 
 yolo_model = None
 
 
-def get_next_action(model, messages, objective):
-    if model == "gpt-4-vision-preview":
-        content = call_gpt_4_v(messages, objective)
-        return content
+async def get_next_action(model, messages, objective):
+    if model == "gpt-4":
+        return call_gpt_4_v(messages, objective)
+    if model == "gpt-4-with-som":
+        return await call_gpt_4_v_labeled(messages, objective)
     elif model == "agent-1":
         return "coming soon"
     elif model == "gemini-pro-vision":
-        content = call_gemini_pro_vision(messages, objective)
-        return content
+        return call_gemini_pro_vision(messages, objective)
 
     raise ModelNotRecognizedException(model)
 
diff --git a/operate/dialogs/dialog.py b/operate/dialogs/dialog.py
@@ -1,6 +1,7 @@
 import sys
 import os
 import platform
+import asyncio
 from prompt_toolkit.shortcuts import message_dialog
 from prompt_toolkit import prompt
 from operate.exceptions.exceptions import ModelNotRecognizedException
@@ -102,7 +103,7 @@ def main(model, terminal_prompt, voice_mode=False):
         if config.debug:
             print("[loop] messages before next action:\n\n\n", messages[1:])
         try:
-            response = get_next_action(model, messages, objective)
+            response = asyncio.run(get_next_action(model, messages, objective))
 
             action = parse_response(response)
             action_type = action.get("type")
diff --git a/operate/main.py b/operate/main.py
@@ -15,7 +15,7 @@ def main_entry():
         "--model",
         help="Specify the model to use",
         required=False,
-        default="gpt-4-vision-preview",
+        default="gpt-4",
     )
 
     # Add a voice flag

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ def main_entry():`
`15`	`15`	`"--model",`
`16`	`16`	`help="Specify the model to use",`
`17`	`17`	`required=False,`
`18`		`- default="gpt-4-vision-preview",`
	`18`	`+ default="gpt-4",`
`19`	`19`	`)`
`20`	`20`
`21`	`21`	`# Add a voice flag`