openinterpreter
diff --git a/‎interpreter/core/computer/vision/vision.py
Lines changed: 1 addition & 1 deletion b/‎interpreter/core/computer/vision/vision.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎interpreter/core/llm/llm.py
Lines changed: 51 additions & 6 deletions b/‎interpreter/core/llm/llm.py
Lines changed: 51 additions & 6 deletions
diff --git a/‎interpreter/core/respond.py
Lines changed: 1 addition & 1 deletion b/‎interpreter/core/respond.py
Lines changed: 1 addition & 1 deletion
@@ -19,7 +19,7 @@ def __init__(self, computer):
         self.tokenizer = None  # Will load upon first use
 
     def load(self):
-        print("\n  *Loading Moondream model...*\n")
+        print("\nLoading Moondream (vision)...\n")
         try:
             with contextlib.redirect_stdout(
                 open(os.devnull, "w")
 
@@ -4,6 +4,7 @@
 import litellm
 
 litellm.suppress_debug_info = True
+import subprocess
 import time
 import uuid
 
@@ -133,10 +134,14 @@ def run(self, messages):
                         precursor = "Imagine I have just shown you an image with this description: "
                         postcursor = ""
 
+                    image_description = self.vision_renderer(lmc=img_msg)
+
+                    # It would be nice to format this as a message to the user and display it like: "I see: image_description"
+
                     img_msg["content"] = (
-                        # precursor
-                        # + self.vision_renderer(lmc=img_msg) +
-                        "\n---\nThe image contains the following text exactly: '''\n"
+                        precursor
+                        + image_description
+                        + "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
                         + self.interpreter.computer.vision.ocr(lmc=img_msg)
                         + "\n'''"
                         + postcursor
@@ -195,9 +200,9 @@ def run(self, messages):
                                 """
 **We were unable to determine the context window of this model.** Defaulting to 3000.
 
-If your model can handle more, run `interpreter.llm.context_window = {token limit}`.
+If your model can handle more, run `self.context_window = {token limit}`.
 
-Also please set `interpreter.llm.max_tokens = {max tokens per response}`.
+Also please set `self.max_tokens = {max tokens per response}`.
 
 Continuing...
                             """
@@ -259,6 +264,46 @@ def run(self, messages):
         else:
             yield from run_text_llm(self, params)
 
+    def load(self):
+        if self.model.startswith("ollama/"):
+            # WOAH we should also hit up ollama and set max_tokens and context_window based on the LLM. I think they let u do that
+
+            model_name = self.model.replace("ollama/", "")
+            try:
+                # List out all downloaded ollama models. Will fail if ollama isn't installed
+                result = subprocess.run(
+                    ["ollama", "list"], capture_output=True, text=True, check=True
+                )
+            except Exception as e:
+                print(str(e))
+                self.interpreter.display_message(
+                    f"> Ollama not found\n\nPlease download Ollama from [ollama.com](https://ollama.com/) to use `{model_name}`.\n"
+                )
+                exit()
+
+            lines = result.stdout.split("\n")
+            names = [
+                line.split()[0].replace(":latest", "")
+                for line in lines[1:]
+                if line.strip()
+            ]  # Extract names, trim out ":latest", skip header
+
+            if model_name not in names:
+                self.interpreter.display_message(f"\nDownloading {model_name}...\n")
+                subprocess.run(["ollama", "pull", model_name], check=True)
+
+            # Send a ping, which will actually load the model
+            print(f"\nLoading {model_name}...\n")
+
+            old_max_tokens = self.max_tokens
+            self.max_tokens = 1
+            self.interpreter.computer.ai.chat("ping")
+            self.max_tokens = old_max_tokens
+
+            # self.interpreter.display_message("\n*Model loaded.*\n")
+
+        # Validate LLM should be moved here!!
+
 
 def fixed_litellm_completions(**params):
     """
@@ -289,7 +334,7 @@ def fixed_litellm_completions(**params):
 
         if "api key" in str(first_error).lower() and "api_key" not in params:
             print(
-                "LiteLLM requires an API key. Please set a dummy API key to prevent this message. (e.g `interpreter --api_key x` or `interpreter.llm.api_key = 'x'`)"
+                "LiteLLM requires an API key. Please set a dummy API key to prevent this message. (e.g `interpreter --api_key x` or `self.api_key = 'x'`)"
             )
 
         # So, let's try one more time with a dummy API key:
 
@@ -198,7 +198,7 @@ def respond(interpreter):
                     code = re.sub(r"import computer\.\w+\n", "pass\n", code)
                     # If it does this it sees the screenshot twice (which is expected jupyter behavior)
                     if any(
-                        code.split("\n")[-1].startswith(text)
+                        code.strip().split("\n")[-1].startswith(text)
                         for text in [
                             "computer.display.view",
                             "computer.display.screenshot",