Skip to content

Commit 9dc20c2

Browse files
authored
Merge pull request #1161 from Amazingct/collage-screen
Multiple display support
2 parents df3e29d + 8a0be85 commit 9dc20c2

File tree

7 files changed

+129
-7536
lines changed

7 files changed

+129
-7536
lines changed

interpreter/core/computer/display/display.py

Lines changed: 107 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@
66
import warnings
77
from contextlib import redirect_stdout
88
from io import BytesIO
9-
9+
import io
10+
import subprocess
11+
from PIL import Image
1012
import requests
11-
1213
from ...utils.lazy_import import lazy_import
1314
from ..utils.recipient_utils import format_to_recipient
15+
import cv2
16+
from screeninfo import get_monitors # for getting info about connected monitors
17+
1418

1519
# Still experimenting with this
1620
# from utils.get_active_window import get_active_window
@@ -20,6 +24,7 @@
2024
np = lazy_import("numpy")
2125
plt = lazy_import("matplotlib.pyplot")
2226

27+
2328
from ..utils.computer_vision import find_text_in_image, pytesseract_get_text
2429

2530

@@ -56,20 +61,30 @@ def center(self):
5661
"""
5762
return self.width // 2, self.height // 2
5863

59-
def view(self, show=True, quadrant=None):
64+
def info(self):
65+
"""
66+
Returns a list of all connected montitor/displays and thir information
67+
"""
68+
return get_displays()
69+
70+
71+
def view(self, show=True, quadrant=None, screen=0, combine_screens=True
72+
):
6073
"""
6174
Redirects to self.screenshot
6275
"""
63-
return self.screenshot(show, quadrant)
76+
return self.screenshot(screen=screen, show=show, quadrant=quadrant, combine_screens=combine_screens)
6477

6578
# def get_active_window(self):
6679
# return get_active_window()
6780

6881
def screenshot(
69-
self, show=True, quadrant=None, active_app_only=False, force_image=False
82+
self, screen=0, show=True, quadrant=None, active_app_only=False, force_image=False,combine_screens=True
7083
):
7184
"""
7285
Shows you what's on the screen by taking a screenshot of the entire screen or a specified quadrant. Returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
86+
:param screen: specify which display; 0 for primary and 1 and above for secondary.
87+
:param combine_screens: If True, a collage of all display screens will be returned. Otherwise, a list of display screens will be returned.
7388
"""
7489
if not self.computer.emit_images and force_image == False:
7590
text = self.get_text_as_list_of_lists()
@@ -91,10 +106,7 @@ def screenshot(
91106
region = self.get_active_window()["region"]
92107
screenshot = pyautogui.screenshot(region=region)
93108
else:
94-
if platform.system() == "Darwin":
95-
screenshot = take_screenshot_to_pil()
96-
else:
97-
screenshot = pyautogui.screenshot()
109+
screenshot = take_screenshot_to_pil(screen=screen, combine_screens=combine_screens) # this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
98110
# message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
99111
# print(message)
100112

@@ -121,18 +133,26 @@ def screenshot(
121133

122134
# Open the image file with PIL
123135
# IPython interactive mode auto-displays plots, causing RGBA handling issues, possibly MacOS-specific.
124-
screenshot = screenshot.convert("RGB")
136+
if isinstance(screenshot, list):
137+
screenshot = [img.convert("RGB") for img in screenshot] # if screenshot is a list (i.e combine_screens=False).
138+
else:
139+
screenshot = screenshot.convert("RGB")
125140

126141
if show:
127142
# Show the image using matplotlib
128-
plt.imshow(np.array(screenshot))
143+
if isinstance(screenshot, list):
144+
for img in screenshot:
145+
plt.imshow(np.array(img))
146+
plt.show()
147+
else:
148+
plt.imshow(np.array(screenshot))
129149

130150
with warnings.catch_warnings():
131151
# It displays an annoying message about Agg not being able to display something or WHATEVER
132152
warnings.simplefilter("ignore")
133153
plt.show()
134154

135-
return screenshot
155+
return screenshot # this will be a list of combine_screens == False
136156

137157
def find(self, description, screenshot=None):
138158
if description.startswith('"') and description.endswith('"'):
@@ -260,22 +280,78 @@ def get_text_as_list_of_lists(self, screenshot=None):
260280
)
261281

262282

263-
import io
264-
import subprocess
265-
266-
from PIL import Image
267-
268-
269-
def take_screenshot_to_pil(filename="temp_screenshot.png"):
270-
# Capture the screenshot and save it to a temporary file
271-
subprocess.run(["screencapture", "-x", filename], check=True)
272-
273-
# Open the image file with PIL
274-
with open(filename, "rb") as f:
275-
image_data = f.read()
276-
image = Image.open(io.BytesIO(image_data))
277-
278-
# Optionally, delete the temporary file if you don't need it after loading
279-
os.remove(filename)
280-
281-
return image
283+
def take_screenshot_to_pil(screen=0, combine_screens=True):
284+
# Get information about all screens
285+
monitors = get_monitors()
286+
if screen == -1: # All screens
287+
288+
# Take a screenshot of each screen and save them in a list
289+
screenshots = [pyautogui.screenshot(region=(monitor.x, monitor.y, monitor.width, monitor.height)) for monitor in monitors]
290+
291+
if combine_screens:
292+
# Combine all screenshots horizontally
293+
total_width = sum([img.width for img in screenshots])
294+
max_height = max([img.height for img in screenshots])
295+
296+
# Create a new image with a size that can contain all screenshots
297+
new_img = Image.new('RGB', (total_width, max_height))
298+
299+
# Paste each screenshot into the new image
300+
x_offset = 0
301+
for i, img in enumerate(screenshots):
302+
# Convert PIL Image to OpenCV Image (numpy array)
303+
img_cv = np.array(img)
304+
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
305+
306+
# Convert new_img PIL Image to OpenCV Image (numpy array)
307+
new_img_cv = np.array(new_img)
308+
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_RGB2BGR)
309+
310+
# Paste each screenshot into the new image using OpenCV
311+
new_img_cv[0:img_cv.shape[0], x_offset:x_offset+img_cv.shape[1]] = img_cv
312+
x_offset += img.width
313+
314+
# Add monitor labels using OpenCV
315+
font = cv2.FONT_HERSHEY_SIMPLEX
316+
font_scale = 4
317+
font_color = (255, 255, 255)
318+
line_type = 2
319+
320+
if i == 0:
321+
text = "Primary Monitor"
322+
else:
323+
text = f"Monitor {i}"
324+
325+
# Calculate the font scale that will fit the text perfectly in the center of the monitor
326+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
327+
font_scale = min(img.width / text_size[0], img.height / text_size[1])
328+
329+
# Recalculate the text size with the new font scale
330+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
331+
332+
# Calculate the position to center the text
333+
text_x = x_offset - img.width // 2 - text_size[0] // 2
334+
text_y = max_height // 2 - text_size[1] // 2
335+
336+
cv2.putText(new_img_cv, text, (text_x, text_y), font, font_scale, font_color, line_type)
337+
338+
# Convert new_img from OpenCV Image back to PIL Image
339+
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_BGR2RGB)
340+
new_img = Image.fromarray(new_img_cv)
341+
342+
return new_img
343+
else:
344+
return screenshots
345+
elif screen > 0:
346+
# Take a screenshot of the selected screen
347+
return pyautogui.screenshot(region=(monitors[screen].x, monitors[screen].y, monitors[screen].width, monitors[screen].height))
348+
349+
else:
350+
# Take a screenshot of the primary screen
351+
return pyautogui.screenshot(region=(monitors[screen].x, monitors[screen].y, monitors[screen].width, monitors[screen].height))
352+
353+
354+
def get_displays():
355+
monitors = get_monitors()
356+
return monitors
357+

interpreter/core/respond.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,12 @@ def respond(interpreter):
186186
)
187187
code = re.sub(r"import computer\.\w+\n", "pass\n", code)
188188
# If it does this it sees the screenshot twice (which is expected jupyter behavior)
189-
if code.split("\n")[-1] in [
190-
"computer.display.view()",
191-
"computer.display.screenshot()",
192-
"computer.view()",
193-
"computer.screenshot()",
194-
]:
189+
if any(code.split("\n")[-1].startswith(text) for text in [
190+
"computer.display.view",
191+
"computer.display.screenshot",
192+
"computer.view",
193+
"computer.screenshot",
194+
]):
195195
code = code + "\npass"
196196

197197
# sync up some things (is this how we want to do this?)

interpreter/terminal_interface/profiles/defaults/01.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@
9090
You may use the `computer` module to control the user's keyboard and mouse, if the task **requires** it:
9191
9292
```python
93-
computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
93+
computer.display.info() # Returns a list of connected monitors/Displays and their info (x and y cordinates, width, height, width_mm, height_mm, name). Use this to verify the monitors connected before using computer.display.view() when neccessary
94+
computer.display.view() # Shows you what's on the screen (primary display by default), returns a `pil_image` `in case you need it (rarely). To get a specific display, use the parameter screen=DISPLAY_NUMBER (0 for primary monitor 1 and above for secondary monitors). **You almost always want to do this first!**
9495
computer.keyboard.hotkey(" ", "command") # Opens spotlight
9596
computer.keyboard.write("hello")
9697
computer.mouse.click("text onscreen") # This clicks on the UI element with that text. Use this **frequently** and get creative! To click a video, you could pass the *timestamp* (which is usually written on the thumbnail) into this.

interpreter/terminal_interface/profiles/defaults/os.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
```python
3737
computer.browser.search(query) # Silently searches Google for the query, returns result. The user's browser is unaffected. (does not open a browser!)
3838
39-
computer.display.view() # Shows you what's on the screen, returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
39+
computer.display.info() # Returns a list of connected monitors/Displays and their info (x and y cordinates, width, height, width_mm, height_mm, name). Use this to verify the monitors connected before using computer.display.view() when neccessary
40+
computer.display.view() # Shows you what's on the screen (primary display by default), returns a `pil_image` `in case you need it (rarely). To get a specific display, use the parameter screen=DISPLAY_NUMBER (0 for primary monitor 1 and above for secondary monitors). **You almost always want to do this first!**
4041
4142
computer.keyboard.hotkey(" ", "command") # Opens spotlight (very useful)
4243
computer.keyboard.write("hello")

interpreter/terminal_interface/terminal_interface.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,12 @@ def terminal_interface(interpreter, message):
361361
# (unless we figure out how to do this AFTER taking the screenshot)
362362
# otherwise it will try to click this notification!
363363

364-
if action in [
365-
"computer.screenshot()",
366-
"computer.display.screenshot()",
367-
"computer.display.view()",
368-
"computer.view()",
369-
]:
364+
if any(action.startswith(text) for text in [
365+
"computer.screenshot",
366+
"computer.display.screenshot",
367+
"computer.display.view",
368+
"computer.view"
369+
]):
370370
description = "Viewing screen..."
371371
elif action == "computer.mouse.click()":
372372
description = "Clicking..."

0 commit comments

Comments
 (0)