Skip to content

Commit 3a2a82c

Browse files
committed
computer.display.view() method now returns all monotor screen has a collage
1 parent cc6291f commit 3a2a82c

File tree

1 file changed

+67
-11
lines changed

1 file changed

+67
-11
lines changed

interpreter/core/computer/display/display.py

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
from ...utils.lazy_import import lazy_import
1313
from ..utils.recipient_utils import format_to_recipient
1414

15+
import cv2
16+
from screeninfo import get_monitors #for getting info about connected monitors
17+
18+
1519
# Still experimenting with this
1620
# from utils.get_active_window import get_active_window
1721

@@ -20,6 +24,7 @@
2024
np = lazy_import("numpy")
2125
plt = lazy_import("matplotlib.pyplot")
2226

27+
2328
from ..utils.computer_vision import find_text_in_image, pytesseract_get_text
2429

2530

@@ -94,7 +99,7 @@ def screenshot(
9499
if platform.system() == "Darwin":
95100
screenshot = take_screenshot_to_pil()
96101
else:
97-
screenshot = pyautogui.screenshot()
102+
screenshot = take_screenshot_to_pil() #function should work fine for windows too
98103
# message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
99104
# print(message)
100105

@@ -266,16 +271,67 @@ def get_text_as_list_of_lists(self, screenshot=None):
266271
from PIL import Image
267272

268273

269-
def take_screenshot_to_pil(filename="temp_screenshot.png"):
270-
# Capture the screenshot and save it to a temporary file
271-
subprocess.run(["screencapture", "-x", filename], check=True)
274+
def take_screenshot_to_pil(all_screens=False):
275+
if all_screens:
276+
# Get information about all screens
277+
monitors = get_monitors()
278+
279+
# Take a screenshot of each screen and save them in a list
280+
screenshots = [pyautogui.screenshot(region=(monitor.x, monitor.y, monitor.width, monitor.height)) for monitor in monitors]
281+
282+
# Combine all screenshots horizontally
283+
total_width = sum([img.width for img in screenshots])
284+
max_height = max([img.height for img in screenshots])
272285

273-
# Open the image file with PIL
274-
with open(filename, "rb") as f:
275-
image_data = f.read()
276-
image = Image.open(io.BytesIO(image_data))
286+
# Create a new image with a size that can contain all screenshots
287+
new_img = Image.new('RGB', (total_width, max_height))
277288

278-
# Optionally, delete the temporary file if you don't need it after loading
279-
os.remove(filename)
289+
# Paste each screenshot into the new image
290+
291+
x_offset = 0
292+
for i, img in enumerate(screenshots):
293+
# Convert PIL Image to OpenCV Image (numpy array)
294+
img_cv = np.array(img)
295+
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
280296

281-
return image
297+
# Convert new_img PIL Image to OpenCV Image (numpy array)
298+
new_img_cv = np.array(new_img)
299+
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_RGB2BGR)
300+
301+
# Paste each screenshot into the new image using OpenCV
302+
new_img_cv[0:img_cv.shape[0], x_offset:x_offset+img_cv.shape[1]] = img_cv
303+
x_offset += img.width
304+
305+
# Add monitor labels using OpenCV
306+
font = cv2.FONT_HERSHEY_SIMPLEX
307+
font_scale = 4
308+
font_color = (255, 255, 255)
309+
line_type = 2
310+
311+
if i == 0:
312+
text = "Primary Monitor"
313+
else:
314+
text = f"Monitor {i}"
315+
316+
# Calculate the font scale that will fit the text perfectly in the center of the monitor
317+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
318+
font_scale = min(img.width / text_size[0], img.height / text_size[1])
319+
320+
# Recalculate the text size with the new font scale
321+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
322+
323+
# Calculate the position to center the text
324+
text_x = x_offset - img.width // 2 - text_size[0] // 2
325+
text_y = max_height // 2 - text_size[1] // 2
326+
327+
cv2.putText(new_img_cv, text, (text_x, text_y), font, font_scale, font_color, line_type)
328+
329+
# Convert new_img from OpenCV Image back to PIL Image
330+
new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_BGR2RGB)
331+
new_img = Image.fromarray(new_img_cv)
332+
333+
return new_img
334+
335+
else:
336+
# Take a screenshot of the primary screen
337+
return pyautogui.screenshot()

0 commit comments

Comments
 (0)