Skip to content

Commit 5598b03

Browse files
committed
Local II
1 parent 0f8bf8a commit 5598b03

File tree

8 files changed

+99
-81
lines changed

8 files changed

+99
-81
lines changed

interpreter/core/computer/vision/vision.py

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,38 @@ def __init__(self, computer):
1919
self.tokenizer = None # Will load upon first use
2020

2121
def load(self):
22-
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!
23-
24-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
25-
26-
if self.computer.debug:
27-
print(
28-
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
29-
)
30-
print(
31-
"Alternativley, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
22+
print("\n *Loading Moondream model...*\n")
23+
try:
24+
with contextlib.redirect_stdout(
25+
open(os.devnull, "w")
26+
), contextlib.redirect_stderr(open(os.devnull, "w")):
27+
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!
28+
29+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
30+
31+
if self.computer.debug:
32+
print(
33+
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
34+
)
35+
print(
36+
"Alternativley, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
37+
)
38+
model_id = "vikhyatk/moondream2"
39+
revision = "2024-04-02"
40+
print("loading model")
41+
42+
self.model = transformers.AutoModelForCausalLM.from_pretrained(
43+
model_id, trust_remote_code=True, revision=revision
44+
)
45+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
46+
model_id, revision=revision
47+
)
48+
return True
49+
except ImportError:
50+
self.computer.interpreter.display_message(
51+
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
3252
)
33-
model_id = "vikhyatk/moondream2"
34-
revision = "2024-04-02"
35-
print("loading model")
36-
37-
self.model = transformers.AutoModelForCausalLM.from_pretrained(
38-
model_id, trust_remote_code=True, revision=revision
39-
)
40-
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
41-
model_id, revision=revision
42-
)
53+
return False
4354

4455
def ocr(
4556
self,
@@ -107,15 +118,8 @@ def query(
107118
"""
108119

109120
if self.model == None and self.tokenizer == None:
110-
try:
111-
with contextlib.redirect_stdout(
112-
open(os.devnull, "w")
113-
), contextlib.redirect_stderr(open(os.devnull, "w")):
114-
self.load()
115-
except ImportError:
116-
self.computer.interpreter.display_message(
117-
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
118-
)
121+
success = self.load()
122+
if not success:
119123
return ""
120124

121125
if lmc:

interpreter/core/llm/llm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def run(self, messages):
121121
elif self.supports_vision == False and self.vision_renderer:
122122
for img_msg in image_messages:
123123
if img_msg["format"] != "description":
124-
self.interpreter.display_message("*Viewing image...*")
124+
self.interpreter.display_message("\n *Viewing image...*\n")
125125

126126
if img_msg["format"] == "path":
127127
precursor = f"The image I'm referring to ({img_msg['content']}) contains the following: "
@@ -134,9 +134,9 @@ def run(self, messages):
134134
postcursor = ""
135135

136136
img_msg["content"] = (
137-
precursor
138-
+ self.vision_renderer(lmc=img_msg)
139-
+ "\n---\nThe image contains the following text exactly: '''\n"
137+
# precursor
138+
# + self.vision_renderer(lmc=img_msg) +
139+
"\n---\nThe image contains the following text exactly: '''\n"
140140
+ self.interpreter.computer.vision.ocr(lmc=img_msg)
141141
+ "\n'''"
142142
+ postcursor

interpreter/terminal_interface/profiles/defaults/codestral-os.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,5 @@
359359

360360
interpreter.system_message = """You are an AI assistant that writes working markdown code snippets to answer the user's request. You speak concisely and quickly. You say nothing irrelevant to the user's request. YOU NEVER USE PLACEHOLDERS, and instead always send code that 'just works' by figuring out placeholders dynamically. When you send code that fails, you identify the issue, then send new code that doesn't fail."""
361361
interpreter.computer.import_computer_api = True
362+
363+
interpreter.auto_run = True

interpreter/terminal_interface/profiles/defaults/codestral.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,3 +348,14 @@
348348
]
349349

350350
interpreter.system_message = """You are an AI assistant that writes working markdown code snippets to answer the user's request. You speak concisely and quickly. You say nothing irrelevant to the user's request. YOU NEVER USE PLACEHOLDERS, and instead always send code that 'just works' by figuring out placeholders dynamically. When you send code that fails, you identify the issue, then send new code that doesn't fail."""
351+
352+
353+
interpreter.max_output = 600
354+
interpreter.llm.context_window = 8000
355+
interpreter.force_task_completion = False
356+
interpreter.user_message_template = "{content}. If my question must be solved by running code on my computer, send me code to run enclosed in ```python (preferred) or ```shell (less preferred). Otherwise, don't send code. Be concise, don't include anything unnecessary. Don't use placeholders, I can't edit code. Send code that will determine any placeholders (e.g. determine my username)."
357+
interpreter.user_message_template = "I'm trying to help someone use their computer. Here's the last thing they said: '{content}'. What is some code that might be able to answer that question / what should I say to them? DONT USE PLACEHOLDERS! It needs to just work. If it's like a simple greeting, just tell me what to say (without code)."
358+
# interpreter.user_message_template = "{content}"
359+
interpreter.always_apply_user_message_template = False
360+
interpreter.llm.execution_instructions = False
361+
interpreter.auto_run = False

interpreter/terminal_interface/profiles/defaults/llama3.py

Lines changed: 13 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -129,62 +129,27 @@
129129
interpreter.user_message_template = "I'm trying to help someone use their computer. Here's the last thing they said: '{content}'. What is some code that might be able to answer that question / what should I say to them? DONT USE PLACEHOLDERS! It needs to just work."
130130
# interpreter.user_message_template = "{content}"
131131
interpreter.llm.execution_instructions = False
132-
interpreter.auto_run = True
132+
interpreter.auto_run = False
133133

134134
# Set offline for all local models
135135
interpreter.offline = True
136136

137137

138138
##### FOR LLAMA3
139+
interpreter.messages = []
140+
interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
139141
140-
interpreter.system_message = """You are an AI assistant specialized in coding and automation, providing concise code snippets and friendly responses to enhance the user's productivity."""
142+
User: Open the chrome app.
143+
Assistant: On it.
144+
```python
145+
import webbrowser
146+
webbrowser.open('https://chrome.google.com')
147+
```
148+
User: The code you ran produced no output. Was this expected, or are we finished?
149+
Assistant: No further action is required; the provided snippet opens Chrome.
150+
151+
Now, your turn:"""
141152

142-
interpreter.messages = [
143-
{
144-
"role": "user",
145-
"type": "message",
146-
"content": "Run a directory listing in the current folder.",
147-
},
148-
{
149-
"role": "assistant",
150-
"type": "message",
151-
"content": "Absolutely, fetching the directory listing now.",
152-
},
153-
{"role": "assistant", "type": "code", "format": "shell", "content": "ls -la"},
154-
{
155-
"role": "computer",
156-
"type": "console",
157-
"format": "output",
158-
"content": "total 48\ndrwxr-xr-x 12 user staff 384 Jan 12 12:34 .\ndrwxr-xr-x 6 user staff 192 Jan 12 12:34 ..",
159-
},
160-
{
161-
"role": "assistant",
162-
"type": "message",
163-
"content": "Here's the directory listing:\n\ntotal 48\ndrwxr-xr-x 12 user staff 384 Jan 12 12:34 .\ndrwxr-xr-x 6 user staff 192 Jan 12 12:34 ..\n\nWhat's next on your agenda?",
164-
},
165-
{
166-
"role": "user",
167-
"type": "message",
168-
"content": "Can you multiply 2380 by 3875 for me?",
169-
},
170-
{"role": "assistant", "type": "code", "format": "python", "content": "2380*3875"},
171-
{"role": "computer", "type": "console", "format": "output", "content": "9222500"},
172-
{
173-
"role": "assistant",
174-
"type": "message",
175-
"content": "The multiplication of 2380 by 3875 gives you 9222500. Do you need this data for anything else?",
176-
},
177-
{
178-
"role": "user",
179-
"type": "message",
180-
"content": "Great, I'll talk to you in an hour!",
181-
},
182-
{
183-
"role": "assistant",
184-
"type": "message",
185-
"content": "Alright, I'll be here. Talk to you soon!",
186-
},
187-
]
188153

189154
try:
190155
# List out all downloaded ollama models. Will fail if ollama isn't installed

interpreter/terminal_interface/profiles/defaults/local-os.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,3 +275,5 @@
275275
"**Warning:** In this mode, Open Interpreter will not require approval before performing actions. Be ready to close your terminal."
276276
)
277277
print("") # < - Aesthetic choice
278+
279+
interpreter.auto_run = True

interpreter/terminal_interface/profiles/defaults/local.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,19 @@
185185
"content": "Alright, I'll be here. Talk to you soon!",
186186
},
187187
]
188+
189+
interpreter.messages = []
190+
interpreter.system_message = """You are an AI assistant that writes markdown code snippets to answer the user's request. You speak very concisely and quickly, you say nothing irrelevant to the user's request. For example:
191+
192+
User: Open the chrome app.
193+
Assistant: On it.
194+
```python
195+
import webbrowser
196+
webbrowser.open('https://chrome.google.com')
197+
```
198+
User: The code you ran produced no output. Was this expected, or are we finished?
199+
Assistant: No further action is required; the provided snippet opens Chrome.
200+
201+
Now, your turn:"""
202+
203+
interpreter.auto_run = False

interpreter/terminal_interface/start_terminal_interface.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,21 +362,39 @@ def start_terminal_interface(interpreter):
362362

363363
if args.local:
364364
args.profile = "local.py"
365+
if args.vision:
366+
# This is local vision, set up moondream!
367+
interpreter.computer.vision.load()
365368

366369
if args.codestral:
367370
args.profile = "codestral.py"
371+
if args.vision:
372+
# This is local vision, set up moondream!
373+
interpreter.computer.vision.load()
368374

369375
if args.llama3:
370376
args.profile = "llama3.py"
377+
if args.vision:
378+
# This is local vision, set up moondream!
379+
interpreter.computer.vision.load()
371380

372381
if args.os and args.local:
373382
args.profile = "local-os.py"
383+
if args.vision:
384+
# This is local vision, set up moondream!
385+
interpreter.computer.vision.load()
374386

375387
if args.codestral and args.os:
376388
args.profile = "codestral-os.py"
389+
if args.vision:
390+
# This is local vision, set up moondream!
391+
interpreter.computer.vision.load()
377392

378393
if args.llama3 and args.os:
379394
args.profile = "llama3-os.py"
395+
if args.vision:
396+
# This is local vision, set up moondream!
397+
interpreter.computer.vision.load()
380398

381399
### Set attributes on interpreter, so that a profile script can read the arguments passed in via the CLI
382400

0 commit comments

Comments
 (0)