Skip to content

Commit 0f8bf8a

Browse files
committed
Local II
1 parent b031e0c commit 0f8bf8a

File tree

15 files changed

+568
-93
lines changed

15 files changed

+568
-93
lines changed

interpreter/core/computer/ai/ai.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,19 +118,46 @@ def __init__(self, computer):
118118
self.computer = computer
119119

120120
def chat(self, text):
121+
messages = [
122+
{
123+
"role": "system",
124+
"type": "message",
125+
"content": "You are a helpful AI assistant.",
126+
},
127+
{"role": "user", "type": "message", "content": text},
128+
]
129+
response = ""
130+
for chunk in self.computer.interpreter.llm.run(messages):
131+
if "content" in chunk:
132+
response += chunk.get("content")
133+
return response
134+
135+
# Old way
121136
old_messages = self.computer.interpreter.llm.interpreter.messages
122137
old_system_message = self.computer.interpreter.llm.interpreter.system_message
138+
old_import_computer_api = self.computer.import_computer_api
139+
old_execution_instructions = (
140+
self.computer.interpreter.llm.execution_instructions
141+
)
123142
try:
124143
self.computer.interpreter.llm.interpreter.system_message = (
125144
"You are an AI assistant."
126145
)
127146
self.computer.interpreter.llm.interpreter.messages = []
147+
self.computer.import_computer_api = False
148+
self.computer.interpreter.llm.execution_instructions = ""
149+
128150
response = self.computer.interpreter.llm.interpreter.chat(text)
129151
finally:
130152
self.computer.interpreter.llm.interpreter.messages = old_messages
131153
self.computer.interpreter.llm.interpreter.system_message = (
132154
old_system_message
133155
)
156+
self.computer.import_computer_api = old_import_computer_api
157+
self.computer.interpreter.llm.execution_instructions = (
158+
old_execution_instructions
159+
)
160+
134161
return response[-1].get("content")
135162

136163
def query(self, text, query, custom_reduce_query=None):

interpreter/core/computer/utils/html_to_png_base64.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55

66
from html2image import Html2Image
77

8+
from ....core.utils.lazy_import import lazy_import
9+
10+
html2image = lazy_import("html2image")
11+
812
from ....terminal_interface.utils.local_storage_path import get_storage_path
913

1014

1115
def html_to_png_base64(code):
1216
# Convert the HTML into an image using html2image
13-
hti = Html2Image()
17+
hti = html2image.Html2Image()
1418

1519
# Generate a random filename for the temporary image
1620
temp_filename = "".join(random.choices(string.digits, k=10)) + ".png"

interpreter/core/llm/llm.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import os
2+
3+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
14
import litellm
25

36
litellm.suppress_debug_info = True
@@ -72,6 +75,7 @@ def run(self, messages):
7275
model = "openai/i"
7376
if not hasattr(self.interpreter, "conversation_id"): # Only do this once
7477
self.context_window = 7000
78+
self.api_key = "x"
7579
self.max_tokens = 1000
7680
self.api_base = "https://api.openinterpreter.com/v0"
7781
self.interpreter.conversation_id = str(uuid.uuid4())
@@ -117,12 +121,25 @@ def run(self, messages):
117121
elif self.supports_vision == False and self.vision_renderer:
118122
for img_msg in image_messages:
119123
if img_msg["format"] != "description":
124+
self.interpreter.display_message("*Viewing image...*")
125+
126+
if img_msg["format"] == "path":
127+
precursor = f"The image I'm referring to ({img_msg['content']}) contains the following: "
128+
if self.interpreter.computer.import_computer_api:
129+
postcursor = f"\nIf you want to ask questions about the image, run `computer.vision.query(path='{img_msg['content']}', query='(ask any question here)')` and a vision AI will answer it."
130+
else:
131+
postcursor = ""
132+
else:
133+
precursor = "Imagine I have just shown you an image with this description: "
134+
postcursor = ""
135+
120136
img_msg["content"] = (
121-
"Imagine I have just shown you an image with this description: "
137+
precursor
122138
+ self.vision_renderer(lmc=img_msg)
123-
+ "\n---\nThe image contains the following text exactly, extracted via OCR: '''\n"
139+
+ "\n---\nThe image contains the following text exactly: '''\n"
124140
+ self.interpreter.computer.vision.ocr(lmc=img_msg)
125141
+ "\n'''"
142+
+ postcursor
126143
)
127144
img_msg["format"] = "description"
128145

interpreter/core/respond.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import json
2+
import os
23
import re
34
import traceback
45

6+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
57
import litellm
68

79
from ..terminal_interface.utils.display_markdown_message import display_markdown_message

interpreter/terminal_interface/local_setup.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,19 @@ def download_model(models_dir, models, interpreter):
234234
if line.strip()
235235
] # Extract names, trim out ":latest", skip header
236236

237-
for model in ["llama3", "phi3", "wizardlm2"]:
237+
if "llama3" in names:
238+
names.remove("llama3")
239+
names = ["llama3"] + names
240+
241+
if "codestral" in names:
242+
names.remove("codestral")
243+
names = ["codestral"] + names
244+
245+
for model in ["llama3", "phi3", "wizardlm2", "codestral"]:
238246
if model not in names:
239-
names.append("→ Download " + model)
247+
names.append("↓ Download " + model)
248+
249+
names.append("Browse Models ↗")
240250

241251
# Create a new inquirer selection from the names
242252
name_question = [
@@ -253,15 +263,37 @@ def download_model(models_dir, models, interpreter):
253263

254264
selected_name = name_answer["name"]
255265

256-
if "download" in selected_name.lower():
266+
if "↓ Download " in selected_name:
257267
model = selected_name.split(" ")[-1]
258268
interpreter.display_message(f"\nDownloading {model}...\n")
259269
subprocess.run(["ollama", "pull", model], check=True)
270+
elif "Browse Models ↗" in selected_name:
271+
interpreter.display_message(
272+
"Opening [ollama.com/library](ollama.com/library)."
273+
)
274+
import webbrowser
275+
276+
webbrowser.open("https://ollama.com/library")
277+
exit()
260278
else:
261279
model = selected_name.strip()
262280

263281
# Set the model to the selected model
264282
interpreter.llm.model = f"ollama/{model}"
283+
284+
# Send a ping, which will actually load the model
285+
interpreter.display_message("Loading model...")
286+
287+
old_max_tokens = interpreter.llm.max_tokens
288+
old_context_window = interpreter.llm.context_window
289+
interpreter.llm.max_tokens = 1
290+
interpreter.llm.context_window = 100
291+
292+
interpreter.computer.ai.chat("ping")
293+
294+
interpreter.llm.max_tokens = old_max_tokens
295+
interpreter.llm.context_window = old_context_window
296+
265297
interpreter.display_message(f"> Model set to `{model}`")
266298

267299
# If Ollama is not installed or not recognized as a command, prompt the user to download Ollama and try again

interpreter/terminal_interface/profiles/defaults/01.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
interpreter.llm.supports_vision = True
66
interpreter.shrink_images = True # Faster but less accurate
77

8-
interpreter.llm.model = "gpt-4-vision-preview"
8+
interpreter.llm.model = "gpt-4o"
99

1010
interpreter.llm.supports_functions = False
1111
interpreter.llm.context_window = 110000
1212
interpreter.llm.max_tokens = 4096
1313
interpreter.auto_run = True
14-
14+
interpreter.computer.import_computer_api = True
1515
interpreter.force_task_completion = True
1616
interpreter.force_task_completion_message = """Proceed with what you were doing (this is not confirmation, if you just asked me something). You CAN run code on my machine. If you want to run code, start your message with "```"! If the entire task is done, say exactly 'The task is done.' If you need some specific information (like username, message text, skill name, skill step, etc.) say EXACTLY 'Please provide more information.' If it's impossible, say 'The task is impossible.' (If I haven't provided a task, say exactly 'Let me know what you'd like to do next.') Otherwise keep going. CRITICAL: REMEMBER TO FOLLOW ALL PREVIOUS INSTRUCTIONS. If I'm teaching you something, remember to run the related `computer.skills.new_skill` function."""
1717
interpreter.force_task_completion_breakers = [

0 commit comments

Comments
 (0)