Skip to content

Commit 459d77d

Browse files
committed
updated prompts to use display.info() and specify display when using display.view/screenshot
1 parent cb987c1 commit 459d77d

File tree

7 files changed

+215
-195
lines changed

7 files changed

+215
-195
lines changed

interpreter/core/computer/display/display.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,25 @@ def center(self):
6161
"""
6262
return self.width // 2, self.height // 2
6363

64-
def view(self, show=True, quadrant=None, all_screens=True, combine_screens=True
64+
def info(self):
65+
"""
66+
Returns a list of all connected montitor/displays and thir information
67+
"""
68+
return get_displays()
69+
70+
71+
def view(self, show=True, quadrant=None, screen=0, combine_screens=True
6572
):
6673
"""
6774
Redirects to self.screenshot
6875
"""
69-
return self.screenshot(all_screens=all_screens, show=show, quadrant=quadrant, combine_screens=combine_screens)
76+
return self.screenshot(screen=screen, show=show, quadrant=quadrant, combine_screens=combine_screens)
7077

7178
# def get_active_window(self):
7279
# return get_active_window()
7380

7481
def screenshot(
75-
self, all_screens=True, show=True, quadrant=None, active_app_only=False, force_image=False,combine_screens=True
82+
self, screen=0, show=True, quadrant=None, active_app_only=False, force_image=False,combine_screens=True
7683
):
7784
"""
7885
Shows you what's on the screen by taking a screenshot of the entire screen or a specified quadrant. Returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
@@ -99,7 +106,7 @@ def screenshot(
99106
region = self.get_active_window()["region"]
100107
screenshot = pyautogui.screenshot(region=region)
101108
else:
102-
screenshot = take_screenshot_to_pil(all_screens=all_screens, combine_screens=combine_screens) # this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
109+
screenshot = take_screenshot_to_pil(screen=screen, combine_screens=combine_screens) # this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
103110
# message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
104111
# print(message)
105112

@@ -273,10 +280,11 @@ def get_text_as_list_of_lists(self, screenshot=None):
273280
)
274281

275282

276-
def take_screenshot_to_pil(all_screens=False, combine_screens=True):
277-
if all_screens:
278-
# Get information about all screens
279-
monitors = get_monitors()
283+
def take_screenshot_to_pil(screen=0, combine_screens=True):
284+
# Get information about all screens
285+
monitors = get_monitors()
286+
if screen == -1: # All screens
287+
280288
# Take a screenshot of each screen and save them in a list
281289
screenshots = [pyautogui.screenshot(region=(monitor.x, monitor.y, monitor.width, monitor.height)) for monitor in monitors]
282290

@@ -334,6 +342,16 @@ def take_screenshot_to_pil(all_screens=False, combine_screens=True):
334342
return new_img
335343
else:
336344
return screenshots
345+
elif screen > 0:
346+
# Take a screenshot of the selected screen
347+
return pyautogui.screenshot(region=(monitors[screen].x, monitors[screen].y, monitors[screen].width, monitors[screen].height))
348+
337349
else:
338350
# Take a screenshot of the primary screen
339-
return pyautogui.screenshot()
351+
return pyautogui.screenshot(region=(monitors[screen].x, monitors[screen].y, monitors[screen].width, monitors[screen].height))
352+
353+
354+
def get_displays():
355+
monitors = get_monitors()
356+
return monitors
357+

interpreter/core/respond.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,12 @@ def respond(interpreter):
186186
)
187187
code = re.sub(r"import computer\.\w+\n", "pass\n", code)
188188
# If it does this it sees the screenshot twice (which is expected jupyter behavior)
189-
if code.split("\n")[-1] in [
190-
"computer.display.view()",
191-
"computer.display.screenshot()",
192-
"computer.view()",
193-
"computer.screenshot()",
194-
]:
189+
if any(code.split("\n")[-1].startswith(text) for text in [
190+
"computer.display.view",
191+
"computer.display.screenshot",
192+
"computer.view",
193+
"computer.screenshot",
194+
]):
195195
code = code + "\npass"
196196

197197
# sync up some things (is this how we want to do this?)

interpreter/terminal_interface/profiles/defaults/01.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@
9090
You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it:
9191
9292
```python
93-
computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
93+
computer.display.info() # Returns a list of connected monitors/Displays and their info (x and y cordinates, width, height, width_mm, height_mm, name). Use this to verify the monitors connected before using computer.display.view() when neccessary
94+
computer.display.view() # Shows you what's on the screen (primary display by default), returns a `pil_image` `in case you need it (rarely). To get a specific display, use the parameter screen=DISPLAY_NUMBER (0 for primary monitor 1 and above for secondary monitors). **You almost always want to do this first!**
9495
computer.keyboard.hotkey(" ", "command") # Opens spotlight
9596
computer.keyboard.write("hello")
9697
computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.

interpreter/terminal_interface/profiles/defaults/os.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535
```python
3636
computer.browser.search(query) # Silently searches Google for the query, returns result. The user's browser is unaffected. (does not open a browser!)
3737
38-
computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
38+
computer.display.info() # Returns a list of connected monitors/Displays and their info (x and y cordinates, width, height, width_mm, height_mm, name). Use this to verify the monitors connected before using computer.display.view() when neccessary
39+
computer.display.view() # Shows you what's on the screen (primary display by default), returns a `pil_image` `in case you need it (rarely). To get a specific display, use the parameter screen=DISPLAY_NUMBER (0 for primary monitor 1 and above for secondary monitors). **You almost always want to do this first!**
3940
4041
computer.keyboard.hotkey(" ", "command") # Opens spotlight (very useful)
4142
computer.keyboard.write("hello")

interpreter/terminal_interface/terminal_interface.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,12 +368,12 @@ def terminal_interface(interpreter, message):
368368
# (unless we figure out how to do this AFTER taking the screenshot)
369369
# otherwise it will try to click this notification!
370370

371-
if action in [
372-
"computer.screenshot()",
373-
"computer.display.screenshot()",
374-
"computer.display.view()",
375-
"computer.view()",
376-
]:
371+
if any(action.startswith(text) for text in [
372+
"computer.screenshot",
373+
"computer.display.screenshot",
374+
"computer.display.view",
375+
"computer.view"
376+
]):
377377
description = "Viewing screen..."
378378
elif action == "computer.mouse.click()":
379379
description = "Clicking..."

0 commit comments

Comments
 (0)