Local II

KillianLucas · KillianLucas · commit 5598b030fd7f · 2024-06-03T15:48:54.000-04:00
diff --git a/interpreter/core/computer/vision/vision.py b/interpreter/core/computer/vision/vision.py
@@ -19,27 +19,38 @@ def __init__(self, computer):
         self.tokenizer = None  # Will load upon first use
 
     def load(self):
-        import transformers  # Wait until we use it. Transformers can't be lazy loaded for some reason!
-
-        os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-        if self.computer.debug:
-            print(
-                "Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
-            )
-            print(
-                "Alternativley, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
+        print("\n  *Loading Moondream model...*\n")
+        try:
+            with contextlib.redirect_stdout(
+                open(os.devnull, "w")
+            ), contextlib.redirect_stderr(open(os.devnull, "w")):
+                import transformers  # Wait until we use it. Transformers can't be lazy loaded for some reason!
+
+                os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+                if self.computer.debug:
+                    print(
+                        "Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
+                    )
+                    print(
+                        "Alternativley, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
+                    )
+                model_id = "vikhyatk/moondream2"
+                revision = "2024-04-02"
+                print("loading model")
+
+                self.model = transformers.AutoModelForCausalLM.from_pretrained(
+                    model_id, trust_remote_code=True, revision=revision
+                )
+                self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+                    model_id, revision=revision
+                )
+                return True
+        except ImportError:
+            self.computer.interpreter.display_message(
+                "\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
             )
-        model_id = "vikhyatk/moondream2"
-        revision = "2024-04-02"
-        print("loading model")
-
-        self.model = transformers.AutoModelForCausalLM.from_pretrained(
-            model_id, trust_remote_code=True, revision=revision
-        )
-        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
-            model_id, revision=revision
-        )
+            return False
 
     def ocr(
         self,
@@ -107,15 +118,8 @@ def query(
         """
 
         if self.model == None and self.tokenizer == None:
-            try:
-                with contextlib.redirect_stdout(
-                    open(os.devnull, "w")
-                ), contextlib.redirect_stderr(open(os.devnull, "w")):
-                    self.load()
-            except ImportError:
-                self.computer.interpreter.display_message(
-                    "\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
-                )
+            success = self.load()
+            if not success:
                 return ""
 
         if lmc:
diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
@@ -121,7 +121,7 @@ def run(self, messages):
         elif self.supports_vision == False and self.vision_renderer:
             for img_msg in image_messages:
                 if img_msg["format"] != "description":
-                    self.interpreter.display_message("*Viewing image...*")
+                    self.interpreter.display_message("\n  *Viewing image...*\n")
 
                     if img_msg["format"] == "path":
                         precursor = f"The image I'm referring to ({img_msg['content']}) contains the following: "
@@ -134,9 +134,9 @@ def run(self, messages):
                         postcursor = ""
 
                     img_msg["content"] = (
-                        precursor
-                        + self.vision_renderer(lmc=img_msg)
-                        + "\n---\nThe image contains the following text exactly: '''\n"
+                        # precursor
+                        # + self.vision_renderer(lmc=img_msg) +
+                        "\n---\nThe image contains the following text exactly: '''\n"
                         + self.interpreter.computer.vision.ocr(lmc=img_msg)
                         + "\n'''"
                         + postcursor
diff --git a/interpreter/terminal_interface/profiles/defaults/codestral-os.py b/interpreter/terminal_interface/profiles/defaults/codestral-os.py
@@ -359,3 +359,5 @@
 
 interpreter.system_message = """You are an AI assistant that writes working markdown code snippets to answer the user's request. You speak concisely and quickly. You say nothing irrelevant to the user's request. YOU NEVER USE PLACEHOLDERS, and instead always send code that 'just works' by figuring out placeholders dynamically. When you send code that fails, you identify the issue, then send new code that doesn't fail."""
 interpreter.computer.import_computer_api = True
+
+interpreter.auto_run = True
diff --git a/interpreter/terminal_interface/profiles/defaults/codestral.py b/interpreter/terminal_interface/profiles/defaults/codestral.py
@@ -348,3 +348,14 @@
 ]
 
 interpreter.system_message = """You are an AI assistant that writes working markdown code snippets to answer the user's request. You speak concisely and quickly. You say nothing irrelevant to the user's request. YOU NEVER USE PLACEHOLDERS, and instead always send code that 'just works' by figuring out placeholders dynamically. When you send code that fails, you identify the issue, then send new code that doesn't fail."""
+
+
+interpreter.max_output = 600
+interpreter.llm.context_window = 8000
+interpreter.force_task_completion = False
+interpreter.user_message_template = "{content}. If my question must be solved by running code on my computer, send me code to run enclosed in ```python (preferred) or ```shell (less preferred). Otherwise, don't send code. Be concise, don't include anything unnecessary. Don't use placeholders, I can't edit code. Send code that will determine any placeholders (e.g. determine my username)."
+interpreter.user_message_template = "I'm trying to help someone use their computer. Here's the last thing they said: '{content}'. What is some code that might be able to answer that question / what should I say to them? DONT USE PLACEHOLDERS! It needs to just work. If it's like a simple greeting, just tell me what to say (without code)."
+# interpreter.user_message_template = "{content}"
+interpreter.always_apply_user_message_template = False
+interpreter.llm.execution_instructions = False
+interpreter.auto_run = False
diff --git a/interpreter/terminal_interface/profiles/defaults/llama3.py b/interpreter/terminal_interface/profiles/defaults/llama3.py
@@ -129,62 +129,27 @@
 interpreter.user_message_template = "I'm trying to help someone use their computer. Here's the last thing they said: '{content}'. What is some code that might be able to answer that question / what should I say to them? DONT USE PLACEHOLDERS! It needs to just work."
 # interpreter.user_message_template = "{content}"
 interpreter.llm.execution_instructions = False
-interpreter.auto_run = True
+interpreter.auto_run = False
 
 # Set offline for all local models
 interpreter.offline = True
 
 
 ##### FOR LLAMA3
+interpreter.messages = []
+interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
 
-interpreter.system_message = """You are an AI assistant specialized in coding and automation, providing concise code snippets and friendly responses to enhance the user's productivity."""
+User: Open the chrome app.
+Assistant: On it. 
+```python
+import webbrowser
+webbrowser.open('https://chrome.google.com')
+```
+User: The code you ran produced no output. Was this expected, or are we finished?
+Assistant: No further action is required; the provided snippet opens Chrome.
+
+Now, your turn:"""
 
-interpreter.messages = [
-    {
-        "role": "user",
-        "type": "message",
-        "content": "Run a directory listing in the current folder.",
-    },
-    {
-        "role": "assistant",
-        "type": "message",
-        "content": "Absolutely, fetching the directory listing now.",
-    },
-    {"role": "assistant", "type": "code", "format": "shell", "content": "ls -la"},
-    {
-        "role": "computer",
-        "type": "console",
-        "format": "output",
-        "content": "total 48\ndrwxr-xr-x  12 user  staff  384 Jan 12 12:34 .\ndrwxr-xr-x   6 user  staff  192 Jan 12 12:34 ..",
-    },
-    {
-        "role": "assistant",
-        "type": "message",
-        "content": "Here's the directory listing:\n\ntotal 48\ndrwxr-xr-x  12 user  staff  384 Jan 12 12:34 .\ndrwxr-xr-x   6 user  staff  192 Jan 12 12:34 ..\n\nWhat's next on your agenda?",
-    },
-    {
-        "role": "user",
-        "type": "message",
-        "content": "Can you multiply 2380 by 3875 for me?",
-    },
-    {"role": "assistant", "type": "code", "format": "python", "content": "2380*3875"},
-    {"role": "computer", "type": "console", "format": "output", "content": "9222500"},
-    {
-        "role": "assistant",
-        "type": "message",
-        "content": "The multiplication of 2380 by 3875 gives you 9222500. Do you need this data for anything else?",
-    },
-    {
-        "role": "user",
-        "type": "message",
-        "content": "Great, I'll talk to you in an hour!",
-    },
-    {
-        "role": "assistant",
-        "type": "message",
-        "content": "Alright, I'll be here. Talk to you soon!",
-    },
-]
 
 try:
     # List out all downloaded ollama models. Will fail if ollama isn't installed
diff --git a/interpreter/terminal_interface/profiles/defaults/local-os.py b/interpreter/terminal_interface/profiles/defaults/local-os.py
@@ -275,3 +275,5 @@
         "**Warning:** In this mode, Open Interpreter will not require approval before performing actions. Be ready to close your terminal."
     )
     print("")  # < - Aesthetic choice
+
+interpreter.auto_run = True
diff --git a/interpreter/terminal_interface/profiles/defaults/local.py b/interpreter/terminal_interface/profiles/defaults/local.py
@@ -185,3 +185,19 @@
         "content": "Alright, I'll be here. Talk to you soon!",
     },
 ]
+
+interpreter.messages = []
+interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
+
+User: Open the chrome app.
+Assistant: On it. 
+```python
+import webbrowser
+webbrowser.open('https://chrome.google.com')
+```
+User: The code you ran produced no output. Was this expected, or are we finished?
+Assistant: No further action is required; the provided snippet opens Chrome.
+
+Now, your turn:"""
+
+interpreter.auto_run = False
diff --git a/interpreter/terminal_interface/start_terminal_interface.py b/interpreter/terminal_interface/start_terminal_interface.py
@@ -362,21 +362,39 @@ def start_terminal_interface(interpreter):
 
     if args.local:
         args.profile = "local.py"
+        if args.vision:
+            # This is local vision, set up moondream!
+            interpreter.computer.vision.load()
 
     if args.codestral:
         args.profile = "codestral.py"
+        if args.vision:
+            # This is local vision, set up moondream!
+            interpreter.computer.vision.load()
 
     if args.llama3:
         args.profile = "llama3.py"
+        if args.vision:
+            # This is local vision, set up moondream!
+            interpreter.computer.vision.load()
 
     if args.os and args.local:
         args.profile = "local-os.py"
+        if args.vision:
+            # This is local vision, set up moondream!
+            interpreter.computer.vision.load()
 
     if args.codestral and args.os:
         args.profile = "codestral-os.py"
+        if args.vision:
+            # This is local vision, set up moondream!
+            interpreter.computer.vision.load()
 
     if args.llama3 and args.os:
         args.profile = "llama3-os.py"
+        if args.vision:
+            # This is local vision, set up moondream!
+            interpreter.computer.vision.load()
 
     ### Set attributes on interpreter, so that a profile script can read the arguments passed in via the CLI
 

Original file line number	Diff line number	Diff line change
`@@ -275,3 +275,5 @@`
`275`	`275`	`"Warning: In this mode, Open Interpreter will not require approval before performing actions. Be ready to close your terminal."`
`276`	`276`	`)`
`277`	`277`	`print("") # < - Aesthetic choice`
	`278`	`+`
	`279`	`+interpreter.auto_run = True`