Skip to content

Commit e27bfde

Browse files
committed
interpreter --model ollama/codestral
1 parent 7239d03 commit e27bfde

File tree

2 files changed

+108
-81
lines changed

2 files changed

+108
-81
lines changed

interpreter/terminal_interface/validate_llm_settings.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import os
6+
import subprocess
67
import time
78

89
import litellm
@@ -80,6 +81,31 @@ def validate_llm_settings(interpreter):
8081
time.sleep(2)
8182
break
8283

84+
elif interpreter.llm.model.startswith("ollama/"):
85+
model_name = interpreter.llm.model.replace("ollama/", "")
86+
try:
87+
# List out all downloaded ollama models. Will fail if ollama isn't installed
88+
result = subprocess.run(
89+
["ollama", "list"], capture_output=True, text=True, check=True
90+
)
91+
lines = result.stdout.split("\n")
92+
names = [
93+
line.split()[0].replace(":latest", "")
94+
for line in lines[1:]
95+
if line.strip()
96+
] # Extract names, trim out ":latest", skip header
97+
98+
if "codestral" not in names:
99+
interpreter.display_message(f"\nDownloading {model_name}...\n")
100+
subprocess.run(["ollama", "pull", model_name], check=True)
101+
102+
interpreter.display_message(f"> Model set to `{model_name}`")
103+
except:
104+
interpreter.display_message(
105+
f"> Ollama not found\n\nPlease download Ollama from [ollama.com](https://ollama.com/) to use `codestral`.\n"
106+
)
107+
exit()
108+
83109
# This is a model we don't have checks for yet.
84110
break
85111

tests/test_interpreter.py

Lines changed: 82 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,88 @@
2222
from websocket import create_connection
2323

2424

25+
def test_generator():
26+
"""
27+
Sends two messages, makes sure everything is correct with display both on and off.
28+
"""
29+
30+
for tests in [
31+
{"query": "What's 38023*40334? Use Python", "display": True},
32+
{"query": "What's 2334*34335555? Use Python", "display": True},
33+
{"query": "What's 3545*22? Use Python", "display": False},
34+
{"query": "What's 0.0021*3433335555? Use Python", "display": False},
35+
]:
36+
assistant_message_found = False
37+
console_output_found = False
38+
active_line_found = False
39+
flag_checker = []
40+
41+
for chunk in interpreter.chat(
42+
tests["query"]
43+
+ "\nNo talk or plan, just immediately code, then tell me the answer.",
44+
stream=True,
45+
display=True,
46+
):
47+
print(chunk)
48+
# Check if chunk has the right schema
49+
assert "role" in chunk, "Chunk missing 'role'"
50+
assert "type" in chunk, "Chunk missing 'type'"
51+
if "start" not in chunk and "end" not in chunk:
52+
assert "content" in chunk, "Chunk missing 'content'"
53+
if "format" in chunk:
54+
assert isinstance(chunk["format"], str), "'format' should be a string"
55+
56+
flag_checker.append(chunk)
57+
58+
# Check if assistant message, console output, and active line are found
59+
if chunk["role"] == "assistant" and chunk["type"] == "message":
60+
assistant_message_found = True
61+
if chunk["role"] == "computer" and chunk["type"] == "console":
62+
console_output_found = True
63+
if "format" in chunk:
64+
if (
65+
chunk["role"] == "computer"
66+
and chunk["type"] == "console"
67+
and chunk["format"] == "active_line"
68+
):
69+
active_line_found = True
70+
71+
# Ensure all flags are proper
72+
assert (
73+
flag_checker.count(
74+
{"role": "assistant", "type": "code", "format": "python", "start": True}
75+
)
76+
== 1
77+
), "Incorrect number of 'assistant code start' flags"
78+
assert (
79+
flag_checker.count(
80+
{"role": "assistant", "type": "code", "format": "python", "end": True}
81+
)
82+
== 1
83+
), "Incorrect number of 'assistant code end' flags"
84+
assert (
85+
flag_checker.count({"role": "assistant", "type": "message", "start": True})
86+
== 1
87+
), "Incorrect number of 'assistant message start' flags"
88+
assert (
89+
flag_checker.count({"role": "assistant", "type": "message", "end": True})
90+
== 1
91+
), "Incorrect number of 'assistant message end' flags"
92+
assert (
93+
flag_checker.count({"role": "computer", "type": "console", "start": True})
94+
== 1
95+
), "Incorrect number of 'computer console output start' flags"
96+
assert (
97+
flag_checker.count({"role": "computer", "type": "console", "end": True})
98+
== 1
99+
), "Incorrect number of 'computer console output end' flags"
100+
101+
# Assert that assistant message, console output, and active line were found
102+
assert assistant_message_found, "No assistant message was found"
103+
assert console_output_found, "No console output was found"
104+
assert active_line_found, "No active line was found"
105+
106+
25107
@pytest.mark.skip(reason="Requires uvicorn, which we don't require by default")
26108
def test_server():
27109
# Start the server in a new thread
@@ -483,87 +565,6 @@ def setup_function():
483565
interpreter.verbose = False
484566

485567

486-
def test_generator():
487-
"""
488-
Sends two messages, makes sure everything is correct with display both on and off.
489-
"""
490-
491-
for tests in [
492-
{"query": "What's 38023*40334? Use Python", "display": True},
493-
{"query": "What's 2334*34335555? Use Python", "display": True},
494-
{"query": "What's 3545*22? Use Python", "display": False},
495-
{"query": "What's 0.0021*3433335555? Use Python", "display": False},
496-
]:
497-
assistant_message_found = False
498-
console_output_found = False
499-
active_line_found = False
500-
flag_checker = []
501-
for chunk in interpreter.chat(
502-
tests["query"]
503-
+ "\nNo talk or plan, just immediately code, then tell me the answer.",
504-
stream=True,
505-
display=tests["display"],
506-
):
507-
print(chunk)
508-
# Check if chunk has the right schema
509-
assert "role" in chunk, "Chunk missing 'role'"
510-
assert "type" in chunk, "Chunk missing 'type'"
511-
if "start" not in chunk and "end" not in chunk:
512-
assert "content" in chunk, "Chunk missing 'content'"
513-
if "format" in chunk:
514-
assert isinstance(chunk["format"], str), "'format' should be a string"
515-
516-
flag_checker.append(chunk)
517-
518-
# Check if assistant message, console output, and active line are found
519-
if chunk["role"] == "assistant" and chunk["type"] == "message":
520-
assistant_message_found = True
521-
if chunk["role"] == "computer" and chunk["type"] == "console":
522-
console_output_found = True
523-
if "format" in chunk:
524-
if (
525-
chunk["role"] == "computer"
526-
and chunk["type"] == "console"
527-
and chunk["format"] == "active_line"
528-
):
529-
active_line_found = True
530-
531-
# Ensure all flags are proper
532-
assert (
533-
flag_checker.count(
534-
{"role": "assistant", "type": "code", "format": "python", "start": True}
535-
)
536-
== 1
537-
), "Incorrect number of 'assistant code start' flags"
538-
assert (
539-
flag_checker.count(
540-
{"role": "assistant", "type": "code", "format": "python", "end": True}
541-
)
542-
== 1
543-
), "Incorrect number of 'assistant code end' flags"
544-
assert (
545-
flag_checker.count({"role": "assistant", "type": "message", "start": True})
546-
== 1
547-
), "Incorrect number of 'assistant message start' flags"
548-
assert (
549-
flag_checker.count({"role": "assistant", "type": "message", "end": True})
550-
== 1
551-
), "Incorrect number of 'assistant message end' flags"
552-
assert (
553-
flag_checker.count({"role": "computer", "type": "console", "start": True})
554-
== 1
555-
), "Incorrect number of 'computer console output start' flags"
556-
assert (
557-
flag_checker.count({"role": "computer", "type": "console", "end": True})
558-
== 1
559-
), "Incorrect number of 'computer console output end' flags"
560-
561-
# Assert that assistant message, console output, and active line were found
562-
assert assistant_message_found, "No assistant message was found"
563-
assert console_output_found, "No console output was found"
564-
assert active_line_found, "No active line was found"
565-
566-
567568
@pytest.mark.skip(
568569
reason="Not working consistently, I think GPT related changes? It worked recently"
569570
)

0 commit comments

Comments
 (0)