interpreter --model ollama/codestral

KillianLucas · KillianLucas · commit e27bfde1dff0 · 2024-06-01T15:56:45.000-07:00
diff --git a/interpreter/terminal_interface/validate_llm_settings.py b/interpreter/terminal_interface/validate_llm_settings.py
@@ -3,6 +3,7 @@
 """
 
 import os
+import subprocess
 import time
 
 import litellm
@@ -80,6 +81,31 @@ def validate_llm_settings(interpreter):
                     time.sleep(2)
                     break
 
+            elif interpreter.llm.model.startswith("ollama/"):
+                model_name = interpreter.llm.model.replace("ollama/", "")
+                try:
+                    # List out all downloaded ollama models. Will fail if ollama isn't installed
+                    result = subprocess.run(
+                        ["ollama", "list"], capture_output=True, text=True, check=True
+                    )
+                    lines = result.stdout.split("\n")
+                    names = [
+                        line.split()[0].replace(":latest", "")
+                        for line in lines[1:]
+                        if line.strip()
+                    ]  # Extract names, trim out ":latest", skip header
+
+                    if "codestral" not in names:
+                        interpreter.display_message(f"\nDownloading {model_name}...\n")
+                        subprocess.run(["ollama", "pull", model_name], check=True)
+
+                    interpreter.display_message(f"> Model set to `{model_name}`")
+                except:
+                    interpreter.display_message(
+                        f"> Ollama not found\n\nPlease download Ollama from [ollama.com](https://ollama.com/) to use `codestral`.\n"
+                    )
+                    exit()
+
             # This is a model we don't have checks for yet.
             break
 
diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py
@@ -22,6 +22,88 @@
 from websocket import create_connection
 
 
+def test_generator():
+    """
+    Sends two messages, makes sure everything is correct with display both on and off.
+    """
+
+    for tests in [
+        {"query": "What's 38023*40334? Use Python", "display": True},
+        {"query": "What's 2334*34335555? Use Python", "display": True},
+        {"query": "What's 3545*22? Use Python", "display": False},
+        {"query": "What's 0.0021*3433335555? Use Python", "display": False},
+    ]:
+        assistant_message_found = False
+        console_output_found = False
+        active_line_found = False
+        flag_checker = []
+
+        for chunk in interpreter.chat(
+            tests["query"]
+            + "\nNo talk or plan, just immediately code, then tell me the answer.",
+            stream=True,
+            display=True,
+        ):
+            print(chunk)
+            # Check if chunk has the right schema
+            assert "role" in chunk, "Chunk missing 'role'"
+            assert "type" in chunk, "Chunk missing 'type'"
+            if "start" not in chunk and "end" not in chunk:
+                assert "content" in chunk, "Chunk missing 'content'"
+            if "format" in chunk:
+                assert isinstance(chunk["format"], str), "'format' should be a string"
+
+            flag_checker.append(chunk)
+
+            # Check if assistant message, console output, and active line are found
+            if chunk["role"] == "assistant" and chunk["type"] == "message":
+                assistant_message_found = True
+            if chunk["role"] == "computer" and chunk["type"] == "console":
+                console_output_found = True
+            if "format" in chunk:
+                if (
+                    chunk["role"] == "computer"
+                    and chunk["type"] == "console"
+                    and chunk["format"] == "active_line"
+                ):
+                    active_line_found = True
+
+        # Ensure all flags are proper
+        assert (
+            flag_checker.count(
+                {"role": "assistant", "type": "code", "format": "python", "start": True}
+            )
+            == 1
+        ), "Incorrect number of 'assistant code start' flags"
+        assert (
+            flag_checker.count(
+                {"role": "assistant", "type": "code", "format": "python", "end": True}
+            )
+            == 1
+        ), "Incorrect number of 'assistant code end' flags"
+        assert (
+            flag_checker.count({"role": "assistant", "type": "message", "start": True})
+            == 1
+        ), "Incorrect number of 'assistant message start' flags"
+        assert (
+            flag_checker.count({"role": "assistant", "type": "message", "end": True})
+            == 1
+        ), "Incorrect number of 'assistant message end' flags"
+        assert (
+            flag_checker.count({"role": "computer", "type": "console", "start": True})
+            == 1
+        ), "Incorrect number of 'computer console output start' flags"
+        assert (
+            flag_checker.count({"role": "computer", "type": "console", "end": True})
+            == 1
+        ), "Incorrect number of 'computer console output end' flags"
+
+        # Assert that assistant message, console output, and active line were found
+        assert assistant_message_found, "No assistant message was found"
+        assert console_output_found, "No console output was found"
+        assert active_line_found, "No active line was found"
+
+
 @pytest.mark.skip(reason="Requires uvicorn, which we don't require by default")
 def test_server():
     # Start the server in a new thread
@@ -483,87 +565,6 @@ def setup_function():
     interpreter.verbose = False
 
 
-def test_generator():
-    """
-    Sends two messages, makes sure everything is correct with display both on and off.
-    """
-
-    for tests in [
-        {"query": "What's 38023*40334? Use Python", "display": True},
-        {"query": "What's 2334*34335555? Use Python", "display": True},
-        {"query": "What's 3545*22? Use Python", "display": False},
-        {"query": "What's 0.0021*3433335555? Use Python", "display": False},
-    ]:
-        assistant_message_found = False
-        console_output_found = False
-        active_line_found = False
-        flag_checker = []
-        for chunk in interpreter.chat(
-            tests["query"]
-            + "\nNo talk or plan, just immediately code, then tell me the answer.",
-            stream=True,
-            display=tests["display"],
-        ):
-            print(chunk)
-            # Check if chunk has the right schema
-            assert "role" in chunk, "Chunk missing 'role'"
-            assert "type" in chunk, "Chunk missing 'type'"
-            if "start" not in chunk and "end" not in chunk:
-                assert "content" in chunk, "Chunk missing 'content'"
-            if "format" in chunk:
-                assert isinstance(chunk["format"], str), "'format' should be a string"
-
-            flag_checker.append(chunk)
-
-            # Check if assistant message, console output, and active line are found
-            if chunk["role"] == "assistant" and chunk["type"] == "message":
-                assistant_message_found = True
-            if chunk["role"] == "computer" and chunk["type"] == "console":
-                console_output_found = True
-            if "format" in chunk:
-                if (
-                    chunk["role"] == "computer"
-                    and chunk["type"] == "console"
-                    and chunk["format"] == "active_line"
-                ):
-                    active_line_found = True
-
-        # Ensure all flags are proper
-        assert (
-            flag_checker.count(
-                {"role": "assistant", "type": "code", "format": "python", "start": True}
-            )
-            == 1
-        ), "Incorrect number of 'assistant code start' flags"
-        assert (
-            flag_checker.count(
-                {"role": "assistant", "type": "code", "format": "python", "end": True}
-            )
-            == 1
-        ), "Incorrect number of 'assistant code end' flags"
-        assert (
-            flag_checker.count({"role": "assistant", "type": "message", "start": True})
-            == 1
-        ), "Incorrect number of 'assistant message start' flags"
-        assert (
-            flag_checker.count({"role": "assistant", "type": "message", "end": True})
-            == 1
-        ), "Incorrect number of 'assistant message end' flags"
-        assert (
-            flag_checker.count({"role": "computer", "type": "console", "start": True})
-            == 1
-        ), "Incorrect number of 'computer console output start' flags"
-        assert (
-            flag_checker.count({"role": "computer", "type": "console", "end": True})
-            == 1
-        ), "Incorrect number of 'computer console output end' flags"
-
-        # Assert that assistant message, console output, and active line were found
-        assert assistant_message_found, "No assistant message was found"
-        assert console_output_found, "No console output was found"
-        assert active_line_found, "No active line was found"
-
-
 @pytest.mark.skip(
     reason="Not working consistently, I think GPT related changes? It worked recently"
 )