Skip to content

Commit c52c964

Browse files
committed
updated display.screenshot; still working on it
1 parent 77e5030 commit c52c964

File tree

1 file changed

+75
-73
lines changed

1 file changed

+75
-73
lines changed

interpreter/core/computer/display/display.py

Lines changed: 75 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
import warnings
77
from contextlib import redirect_stdout
88
from io import BytesIO
9-
9+
import io
10+
import subprocess
11+
from PIL import Image
1012
import requests
11-
1213
from ...utils.lazy_import import lazy_import
1314
from ..utils.recipient_utils import format_to_recipient
14-
1515
import cv2
16-
from screeninfo import get_monitors #for getting info about connected monitors
16+
from screeninfo import get_monitors # for getting info about connected monitors
1717

1818

1919
# Still experimenting with this
@@ -61,20 +61,23 @@ def center(self):
6161
"""
6262
return self.width // 2, self.height // 2
6363

64-
def view(self, show=True, quadrant=None, all_screens=True):
64+
def view(self, show=True, quadrant=None, all_screens=False, combine_screens=True
65+
):
6566
"""
6667
Redirects to self.screenshot
6768
"""
68-
return self.screenshot(all_screens, show, quadrant )
69+
return self.screenshot(all_screens=all_screens, show=show, quadrant=quadrant, combine_screens=combine_screens)
6970

7071
# def get_active_window(self):
7172
# return get_active_window()
7273

7374
def screenshot(
74-
self,all_screens, show=True, quadrant=None, active_app_only=False, force_image=False,
75+
self, all_screens=False, show=True, quadrant=None, active_app_only=False, force_image=False,combine_screens=True
7576
):
7677
"""
7778
Shows you what's on the screen by taking a screenshot of the entire screen or a specified quadrant. Returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
79+
:param all_screens: If True, take a screenshot of primary and secondary displays.
80+
:param combine_screens: If True, a collage of all display screens will be returned. Otherwise, a list of display screens will be returned.
7881
"""
7982
if not self.computer.emit_images and force_image == False:
8083
text = self.get_text_as_list_of_lists()
@@ -96,10 +99,7 @@ def screenshot(
9699
region = self.get_active_window()["region"]
97100
screenshot = pyautogui.screenshot(region=region)
98101
else:
99-
if platform.system() == "Darwin":
100-
screenshot = take_screenshot_to_pil(all_screens=all_screens)
101-
else:
102-
screenshot = take_screenshot_to_pil(all_screens=all_screens) #function should work fine for windows too
102+
screenshot = take_screenshot_to_pil(all_screens=all_screens, combine_screens=combine_screens) # this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
103103
# message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
104104
# print(message)
105105

@@ -126,18 +126,26 @@ def screenshot(
126126

127127
# Open the image file with PIL
128128
# IPython interactive mode auto-displays plots, causing RGBA handling issues, possibly MacOS-specific.
129-
screenshot = screenshot.convert("RGB")
129+
if isinstance(screenshot, list):
130+
screenshot = [img.convert("RGB") for img in screenshot] # if screenshot is a list (i.e combine_screens=False).
131+
else:
132+
screenshot = screenshot.convert("RGB")
130133

131134
if show:
132135
# Show the image using matplotlib
133-
plt.imshow(np.array(screenshot))
136+
if isinstance(screenshot, list):
137+
for img in screenshot:
138+
plt.imshow(np.array(img))
139+
plt.show()
140+
else:
141+
plt.imshow(np.array(screenshot))
134142

135143
with warnings.catch_warnings():
136144
# It displays an annoying message about Agg not being able to display something or WHATEVER
137145
warnings.simplefilter("ignore")
138146
plt.show()
139147

140-
return screenshot
148+
return screenshot # this will be a list of combine_screens == False
141149

142150
def find(self, description, screenshot=None):
143151
if description.startswith('"') and description.endswith('"'):
@@ -265,73 +273,67 @@ def get_text_as_list_of_lists(self, screenshot=None):
265273
)
266274

267275

268-
import io
269-
import subprocess
270-
271-
from PIL import Image
272-
273-
274-
def take_screenshot_to_pil(all_screens=True):
276+
def take_screenshot_to_pil(all_screens=False, combine_screens=True):
275277
if all_screens:
276278
# Get information about all screens
277279
monitors = get_monitors()
278-
279280
# Take a screenshot of each screen and save them in a list
280281
screenshots = [pyautogui.screenshot(region=(monitor.x, monitor.y, monitor.width, monitor.height)) for monitor in monitors]
281282

282-
# Combine all screenshots horizontally
283-
total_width = sum([img.width for img in screenshots])
284-
max_height = max([img.height for img in screenshots])
285-
286-
# Create a new image with a size that can contain all screenshots
287-
new_img = Image.new('RGB', (total_width, max_height))
288-
289-
# Paste each screenshot into the new image
290-
291-
x_offset = 0
292-
for i, img in enumerate(screenshots):
293-
# Convert PIL Image to OpenCV Image (numpy array)
294-
img_cv = np.array(img)
295-
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
296-
297-
# Convert new_img PIL Image to OpenCV Image (numpy array)
298-
new_img_cv = np.array(new_img)
299-
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_RGB2BGR)
300-
301-
# Paste each screenshot into the new image using OpenCV
302-
new_img_cv[0:img_cv.shape[0], x_offset:x_offset+img_cv.shape[1]] = img_cv
303-
x_offset += img.width
304-
305-
# Add monitor labels using OpenCV
306-
font = cv2.FONT_HERSHEY_SIMPLEX
307-
font_scale = 4
308-
font_color = (255, 255, 255)
309-
line_type = 2
310-
311-
if i == 0:
312-
text = "Primary Monitor"
313-
else:
314-
text = f"Monitor {i}"
283+
if combine_screens:
284+
# Combine all screenshots horizontally
285+
total_width = sum([img.width for img in screenshots])
286+
max_height = max([img.height for img in screenshots])
287+
288+
# Create a new image with a size that can contain all screenshots
289+
new_img = Image.new('RGB', (total_width, max_height))
290+
291+
# Paste each screenshot into the new image
292+
x_offset = 0
293+
for i, img in enumerate(screenshots):
294+
# Convert PIL Image to OpenCV Image (numpy array)
295+
img_cv = np.array(img)
296+
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
297+
298+
# Convert new_img PIL Image to OpenCV Image (numpy array)
299+
new_img_cv = np.array(new_img)
300+
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_RGB2BGR)
301+
302+
# Paste each screenshot into the new image using OpenCV
303+
new_img_cv[0:img_cv.shape[0], x_offset:x_offset+img_cv.shape[1]] = img_cv
304+
x_offset += img.width
305+
306+
# Add monitor labels using OpenCV
307+
font = cv2.FONT_HERSHEY_SIMPLEX
308+
font_scale = 4
309+
font_color = (255, 255, 255)
310+
line_type = 2
311+
312+
if i == 0:
313+
text = "Primary Monitor"
314+
else:
315+
text = f"Monitor {i}"
316+
317+
# Calculate the font scale that will fit the text perfectly in the center of the monitor
318+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
319+
font_scale = min(img.width / text_size[0], img.height / text_size[1])
320+
321+
# Recalculate the text size with the new font scale
322+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
315323

316-
# Calculate the font scale that will fit the text perfectly in the center of the monitor
317-
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
318-
font_scale = min(img.width / text_size[0], img.height / text_size[1])
319-
320-
# Recalculate the text size with the new font scale
321-
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
322-
323-
# Calculate the position to center the text
324-
text_x = x_offset - img.width // 2 - text_size[0] // 2
325-
text_y = max_height // 2 - text_size[1] // 2
326-
327-
cv2.putText(new_img_cv, text, (text_x, text_y), font, font_scale, font_color, line_type)
328-
329-
# Convert new_img from OpenCV Image back to PIL Image
330-
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_BGR2RGB)
331-
new_img = Image.fromarray(new_img_cv)
332-
333-
return new_img
324+
# Calculate the position to center the text
325+
text_x = x_offset - img.width // 2 - text_size[0] // 2
326+
text_y = max_height // 2 - text_size[1] // 2
327+
328+
cv2.putText(new_img_cv, text, (text_x, text_y), font, font_scale, font_color, line_type)
329+
330+
# Convert new_img from OpenCV Image back to PIL Image
331+
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_BGR2RGB)
332+
new_img = Image.fromarray(new_img_cv)
334333

334+
return new_img
335+
else:
336+
return screenshots
335337
else:
336338
# Take a screenshot of the primary screen
337339
return pyautogui.screenshot()

0 commit comments

Comments
 (0)