Skip to content

Commit 144d8d3

Browse files
committed
Local III
1 parent a46a27a commit 144d8d3

File tree

6 files changed

+58
-67
lines changed

6 files changed

+58
-67
lines changed

interpreter/core/computer/utils/computer_vision.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,9 @@
1414

1515

1616
def pytesseract_get_text(img):
17-
try:
18-
return pytesseract.image_to_string(img)
19-
except ImportError:
20-
print("\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n")
21-
return ""
17+
import pytesseract
18+
19+
return pytesseract.image_to_string(img)
2220

2321

2422
def pytesseract_get_text_bounding_boxes(img):

interpreter/core/computer/vision/vision.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,37 +20,32 @@ def __init__(self, computer):
2020

2121
def load(self):
2222
print("\nLoading Moondream (vision)...\n")
23-
try:
24-
with contextlib.redirect_stdout(
25-
open(os.devnull, "w")
26-
), contextlib.redirect_stderr(open(os.devnull, "w")):
27-
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!
28-
29-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
30-
31-
if self.computer.debug:
32-
print(
33-
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
34-
)
35-
print(
36-
"Alternatively, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
37-
)
38-
model_id = "vikhyatk/moondream2"
39-
revision = "2024-04-02"
40-
print("loading model")
41-
42-
self.model = transformers.AutoModelForCausalLM.from_pretrained(
43-
model_id, trust_remote_code=True, revision=revision
23+
24+
with contextlib.redirect_stdout(
25+
open(os.devnull, "w")
26+
), contextlib.redirect_stderr(open(os.devnull, "w")):
27+
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!
28+
29+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
30+
31+
if self.computer.debug:
32+
print(
33+
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
4434
)
45-
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
46-
model_id, revision=revision
35+
print(
36+
"Alternatively, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
4737
)
48-
return True
49-
except ImportError:
50-
print(
51-
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
38+
model_id = "vikhyatk/moondream2"
39+
revision = "2024-04-02"
40+
print("loading model")
41+
42+
self.model = transformers.AutoModelForCausalLM.from_pretrained(
43+
model_id, trust_remote_code=True, revision=revision
44+
)
45+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
46+
model_id, revision=revision
5247
)
53-
return False
48+
return True
5449

5550
def ocr(
5651
self,
@@ -103,7 +98,13 @@ def ocr(
10398
# Set path to the path of the temporary file
10499
path = temp_file_path
105100

106-
return pytesseract_get_text(path)
101+
try:
102+
return pytesseract_get_text(path)
103+
except ImportError:
104+
print(
105+
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
106+
)
107+
return ""
107108

108109
def query(
109110
self,

interpreter/core/llm/llm.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -134,19 +134,28 @@ def run(self, messages):
134134
precursor = "Imagine I have just shown you an image with this description: "
135135
postcursor = ""
136136

137-
image_description = self.vision_renderer(lmc=img_msg)
138-
139-
# It would be nice to format this as a message to the user and display it like: "I see: image_description"
140-
141-
img_msg["content"] = (
142-
precursor
143-
+ image_description
144-
+ "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
145-
+ self.interpreter.computer.vision.ocr(lmc=img_msg)
146-
+ "\n'''"
147-
+ postcursor
148-
)
149-
img_msg["format"] = "description"
137+
try:
138+
image_description = self.vision_renderer(lmc=img_msg)
139+
ocr = self.interpreter.computer.vision.ocr(lmc=img_msg)
140+
141+
# It would be nice to format this as a message to the user and display it like: "I see: image_description"
142+
143+
img_msg["content"] = (
144+
precursor
145+
+ image_description
146+
+ "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
147+
+ ocr
148+
+ "\n'''"
149+
+ postcursor
150+
)
151+
img_msg["format"] = "description"
152+
153+
except ImportError:
154+
print(
155+
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
156+
)
157+
img_msg["format"] = "description"
158+
img_msg["content"] = ""
150159

151160
# Convert to OpenAI messages format
152161
messages = convert_to_openai_messages(

interpreter/core/llm/vision_for_text_llms.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

interpreter/terminal_interface/profiles/defaults/codestral-few-shot.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
EXPERIMENTAL
33
"""
44

5+
print("Remember to `pip install open-interpreter[local]`.")
6+
57
import subprocess
68

79
from interpreter import interpreter

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "open-interpreter"
33
packages = [
44
{include = "interpreter"},
55
]
6-
version = "0.2.6" # Use "-rc1", "-rc2", etc. for pre-release versions
6+
version = "0.3.0" # Use "-rc1", "-rc2", etc. for pre-release versions
77
description = "Let language models run code"
88
authors = ["Killian Lucas <[email protected]>"]
99
readme = "README.md"

0 commit comments

Comments
 (0)