Skip to content

Commit 0db16d4

Browse files
committed
interpreter --profile local-os for Local OS Mode
1 parent ae76502 commit 0db16d4

File tree

6 files changed

+220
-114
lines changed

6 files changed

+220
-114
lines changed

interpreter/core/computer/computer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def __init__(self, interpreter):
5353

5454
self.import_skills = False
5555
self._has_imported_skills = False
56+
self.max_output = (
57+
self.interpreter.max_output
58+
) # Should mirror interpreter.max_output
5659

5760
# Shortcut for computer.terminal.languages
5861
@property

interpreter/core/computer/display/display.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
np = lazy_import("numpy")
2525
plt = lazy_import("matplotlib.pyplot")
2626
screeninfo = lazy_import("screeninfo")
27+
pywinctl = lazy_import("pywinctl")
2728

2829

2930
from ..utils.computer_vision import find_text_in_image, pytesseract_get_text
@@ -64,7 +65,7 @@ def center(self):
6465

6566
def info(self):
6667
"""
67-
Returns a list of all connected montitor/displays and thir information
68+
Returns a list of all connected monitor/displays and thir information
6869
"""
6970
return get_displays()
7071

@@ -84,7 +85,7 @@ def screenshot(
8485
screen=0,
8586
show=True,
8687
quadrant=None,
87-
active_app_only=False,
88+
active_app_only=True,
8889
force_image=False,
8990
combine_screens=True,
9091
):
@@ -99,31 +100,50 @@ def screenshot(
99100
description = self.computer.vision.query(pil_image=screenshot)
100101
print("A DESCRIPTION OF WHAT'S ON THE SCREEN: " + description)
101102

102-
print("ALL OF THE TEXT ON THE SCREEN: ")
103-
text = self.get_text_as_list_of_lists(screenshot=screenshot)
104-
pp = pprint.PrettyPrinter(indent=4)
105-
pretty_text = pp.pformat(text) # language models like it pretty!
106-
pretty_text = format_to_recipient(pretty_text, "assistant")
107-
print(pretty_text)
108-
print(
109-
format_to_recipient(
110-
"To recieve the text above as a Python object, run computer.display.get_text_as_list_of_lists()",
111-
"assistant",
103+
if self.computer.max_output > 600:
104+
print("ALL OF THE TEXT ON THE SCREEN: ")
105+
text = self.get_text_as_list_of_lists(screenshot=screenshot)
106+
pp = pprint.PrettyPrinter(indent=4)
107+
pretty_text = pp.pformat(text) # language models like it pretty!
108+
pretty_text = format_to_recipient(pretty_text, "assistant")
109+
print(pretty_text)
110+
print(
111+
format_to_recipient(
112+
"To recieve the text above as a Python object, run computer.display.get_text_as_list_of_lists()",
113+
"assistant",
114+
)
112115
)
113-
)
114116
return
115117

116118
if quadrant == None:
117-
# Implement active_app_only!
118119
if active_app_only:
119-
region = self.get_active_window()["region"]
120-
screenshot = pyautogui.screenshot(region=region)
120+
active_window = pywinctl.getActiveWindow()
121+
if active_window:
122+
screenshot = pyautogui.screenshot(
123+
region=(
124+
active_window.left,
125+
active_window.top,
126+
active_window.width,
127+
active_window.height,
128+
)
129+
)
130+
message = format_to_recipient(
131+
"Taking a screenshot of the active app (recommended). To take a screenshot of the entire screen (uncommon), use computer.display.view(active_app_only=False).",
132+
"assistant",
133+
)
134+
print(message)
135+
else:
136+
screenshot = pyautogui.screenshot()
137+
121138
else:
122139
screenshot = take_screenshot_to_pil(
123140
screen=screen, combine_screens=combine_screens
124141
) # this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
125-
# message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
126-
# print(message)
142+
message = format_to_recipient(
143+
"Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.",
144+
"assistant",
145+
)
146+
print(message)
127147

128148
else:
129149
screen_width, screen_height = pyautogui.size()

interpreter/core/respond.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,7 @@ def respond(interpreter):
112112
)
113113
elif interpreter.offline and not interpreter.os:
114114
print(traceback.format_exc())
115-
raise Exception(
116-
"Error occurred. "
117-
+ str(e)
118-
)
115+
raise Exception("Error occurred. " + str(e))
119116
else:
120117
raise
121118

@@ -191,18 +188,22 @@ def respond(interpreter):
191188
)
192189
code = re.sub(r"import computer\.\w+\n", "pass\n", code)
193190
# If it does this it sees the screenshot twice (which is expected jupyter behavior)
194-
if any(code.split("\n")[-1].startswith(text) for text in [
195-
"computer.display.view",
196-
"computer.display.screenshot",
197-
"computer.view",
198-
"computer.screenshot",
199-
]):
191+
if any(
192+
code.split("\n")[-1].startswith(text)
193+
for text in [
194+
"computer.display.view",
195+
"computer.display.screenshot",
196+
"computer.view",
197+
"computer.screenshot",
198+
]
199+
):
200200
code = code + "\npass"
201201

202202
# sync up some things (is this how we want to do this?)
203203
interpreter.computer.verbose = interpreter.verbose
204204
interpreter.computer.debug = interpreter.debug
205205
interpreter.computer.emit_images = interpreter.llm.supports_vision
206+
interpreter.computer.max_output = interpreter.max_output
206207

207208
# sync up the interpreter's computer with your computer
208209
try:

0 commit comments

Comments
 (0)