6
6
import warnings
7
7
from contextlib import redirect_stdout
8
8
from io import BytesIO
9
-
9
+ import io
10
+ import subprocess
11
+ from PIL import Image
10
12
import requests
11
-
12
13
from ...utils .lazy_import import lazy_import
13
14
from ..utils .recipient_utils import format_to_recipient
14
-
15
15
import cv2
16
- from screeninfo import get_monitors #for getting info about connected monitors
16
+ from screeninfo import get_monitors # for getting info about connected monitors
17
17
18
18
19
19
# Still experimenting with this
@@ -61,20 +61,23 @@ def center(self):
61
61
"""
62
62
return self .width // 2 , self .height // 2
63
63
64
- def view (self , show = True , quadrant = None , all_screens = True ):
64
+ def view (self , show = True , quadrant = None , all_screens = False , combine_screens = True
65
+ ):
65
66
"""
66
67
Redirects to self.screenshot
67
68
"""
68
- return self .screenshot (all_screens , show , quadrant )
69
+ return self .screenshot (all_screens = all_screens , show = show , quadrant = quadrant , combine_screens = combine_screens )
69
70
70
71
# def get_active_window(self):
71
72
# return get_active_window()
72
73
73
74
def screenshot (
74
- self ,all_screens , show = True , quadrant = None , active_app_only = False , force_image = False ,
75
+ self , all_screens = False , show = True , quadrant = None , active_app_only = False , force_image = False ,combine_screens = True
75
76
):
76
77
"""
77
78
Shows you what's on the screen by taking a screenshot of the entire screen or a specified quadrant. Returns a `pil_image` `in case you need it (rarely). **You almost always want to do this first!**
79
+ :param all_screens: If True, take a screenshot of primary and secondary displays.
80
+ :param combine_screens: If True, a collage of all display screens will be returned. Otherwise, a list of display screens will be returned.
78
81
"""
79
82
if not self .computer .emit_images and force_image == False :
80
83
text = self .get_text_as_list_of_lists ()
@@ -96,10 +99,7 @@ def screenshot(
96
99
region = self .get_active_window ()["region" ]
97
100
screenshot = pyautogui .screenshot (region = region )
98
101
else :
99
- if platform .system () == "Darwin" :
100
- screenshot = take_screenshot_to_pil (all_screens = all_screens )
101
- else :
102
- screenshot = take_screenshot_to_pil (all_screens = all_screens ) #function should work fine for windows too
102
+ screenshot = take_screenshot_to_pil (all_screens = all_screens , combine_screens = combine_screens ) # this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
103
103
# message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
104
104
# print(message)
105
105
@@ -126,18 +126,26 @@ def screenshot(
126
126
127
127
# Open the image file with PIL
128
128
# IPython interactive mode auto-displays plots, causing RGBA handling issues, possibly MacOS-specific.
129
- screenshot = screenshot .convert ("RGB" )
129
+ if isinstance (screenshot , list ):
130
+ screenshot = [img .convert ("RGB" ) for img in screenshot ] # if screenshot is a list (i.e combine_screens=False).
131
+ else :
132
+ screenshot = screenshot .convert ("RGB" )
130
133
131
134
if show :
132
135
# Show the image using matplotlib
133
- plt .imshow (np .array (screenshot ))
136
+ if isinstance (screenshot , list ):
137
+ for img in screenshot :
138
+ plt .imshow (np .array (img ))
139
+ plt .show ()
140
+ else :
141
+ plt .imshow (np .array (screenshot ))
134
142
135
143
with warnings .catch_warnings ():
136
144
# It displays an annoying message about Agg not being able to display something or WHATEVER
137
145
warnings .simplefilter ("ignore" )
138
146
plt .show ()
139
147
140
- return screenshot
148
+ return screenshot # this will be a list of combine_screens == False
141
149
142
150
def find (self , description , screenshot = None ):
143
151
if description .startswith ('"' ) and description .endswith ('"' ):
@@ -265,73 +273,67 @@ def get_text_as_list_of_lists(self, screenshot=None):
265
273
)
266
274
267
275
268
- import io
269
- import subprocess
270
-
271
- from PIL import Image
272
-
273
-
274
- def take_screenshot_to_pil (all_screens = True ):
276
+ def take_screenshot_to_pil (all_screens = False , combine_screens = True ):
275
277
if all_screens :
276
278
# Get information about all screens
277
279
monitors = get_monitors ()
278
-
279
280
# Take a screenshot of each screen and save them in a list
280
281
screenshots = [pyautogui .screenshot (region = (monitor .x , monitor .y , monitor .width , monitor .height )) for monitor in monitors ]
281
282
282
- # Combine all screenshots horizontally
283
- total_width = sum ([img .width for img in screenshots ])
284
- max_height = max ([img .height for img in screenshots ])
285
-
286
- # Create a new image with a size that can contain all screenshots
287
- new_img = Image .new ('RGB' , (total_width , max_height ))
288
-
289
- # Paste each screenshot into the new image
290
-
291
- x_offset = 0
292
- for i , img in enumerate (screenshots ):
293
- # Convert PIL Image to OpenCV Image (numpy array)
294
- img_cv = np .array (img )
295
- img_cv = cv2 .cvtColor (img_cv , cv2 .COLOR_RGB2BGR )
296
-
297
- # Convert new_img PIL Image to OpenCV Image (numpy array)
298
- new_img_cv = np .array (new_img )
299
- new_img_cv = cv2 .cvtColor (new_img_cv , cv2 .COLOR_RGB2BGR )
300
-
301
- # Paste each screenshot into the new image using OpenCV
302
- new_img_cv [0 :img_cv .shape [0 ], x_offset :x_offset + img_cv .shape [1 ]] = img_cv
303
- x_offset += img .width
304
-
305
- # Add monitor labels using OpenCV
306
- font = cv2 .FONT_HERSHEY_SIMPLEX
307
- font_scale = 4
308
- font_color = (255 , 255 , 255 )
309
- line_type = 2
310
-
311
- if i == 0 :
312
- text = "Primary Monitor"
313
- else :
314
- text = f"Monitor { i } "
283
+ if combine_screens :
284
+ # Combine all screenshots horizontally
285
+ total_width = sum ([img .width for img in screenshots ])
286
+ max_height = max ([img .height for img in screenshots ])
287
+
288
+ # Create a new image with a size that can contain all screenshots
289
+ new_img = Image .new ('RGB' , (total_width , max_height ))
290
+
291
+ # Paste each screenshot into the new image
292
+ x_offset = 0
293
+ for i , img in enumerate (screenshots ):
294
+ # Convert PIL Image to OpenCV Image (numpy array)
295
+ img_cv = np .array (img )
296
+ img_cv = cv2 .cvtColor (img_cv , cv2 .COLOR_RGB2BGR )
297
+
298
+ # Convert new_img PIL Image to OpenCV Image (numpy array)
299
+ new_img_cv = np .array (new_img )
300
+ new_img_cv = cv2 .cvtColor (new_img_cv , cv2 .COLOR_RGB2BGR )
301
+
302
+ # Paste each screenshot into the new image using OpenCV
303
+ new_img_cv [0 :img_cv .shape [0 ], x_offset :x_offset + img_cv .shape [1 ]] = img_cv
304
+ x_offset += img .width
305
+
306
+ # Add monitor labels using OpenCV
307
+ font = cv2 .FONT_HERSHEY_SIMPLEX
308
+ font_scale = 4
309
+ font_color = (255 , 255 , 255 )
310
+ line_type = 2
311
+
312
+ if i == 0 :
313
+ text = "Primary Monitor"
314
+ else :
315
+ text = f"Monitor { i } "
316
+
317
+ # Calculate the font scale that will fit the text perfectly in the center of the monitor
318
+ text_size = cv2 .getTextSize (text , font , font_scale , line_type )[0 ]
319
+ font_scale = min (img .width / text_size [0 ], img .height / text_size [1 ])
320
+
321
+ # Recalculate the text size with the new font scale
322
+ text_size = cv2 .getTextSize (text , font , font_scale , line_type )[0 ]
315
323
316
- # Calculate the font scale that will fit the text perfectly in the center of the monitor
317
- text_size = cv2 .getTextSize (text , font , font_scale , line_type )[0 ]
318
- font_scale = min (img .width / text_size [0 ], img .height / text_size [1 ])
319
-
320
- # Recalculate the text size with the new font scale
321
- text_size = cv2 .getTextSize (text , font , font_scale , line_type )[0 ]
322
-
323
- # Calculate the position to center the text
324
- text_x = x_offset - img .width // 2 - text_size [0 ] // 2
325
- text_y = max_height // 2 - text_size [1 ] // 2
326
-
327
- cv2 .putText (new_img_cv , text , (text_x , text_y ), font , font_scale , font_color , line_type )
328
-
329
- # Convert new_img from OpenCV Image back to PIL Image
330
- new_img_cv = cv2 .cvtColor (new_img_cv , cv2 .COLOR_BGR2RGB )
331
- new_img = Image .fromarray (new_img_cv )
332
-
333
- return new_img
324
+ # Calculate the position to center the text
325
+ text_x = x_offset - img .width // 2 - text_size [0 ] // 2
326
+ text_y = max_height // 2 - text_size [1 ] // 2
327
+
328
+ cv2 .putText (new_img_cv , text , (text_x , text_y ), font , font_scale , font_color , line_type )
329
+
330
+ # Convert new_img from OpenCV Image back to PIL Image
331
+ new_img_cv = cv2 .cvtColor (new_img_cv , cv2 .COLOR_BGR2RGB )
332
+ new_img = Image .fromarray (new_img_cv )
334
333
334
+ return new_img
335
+ else :
336
+ return screenshots
335
337
else :
336
338
# Take a screenshot of the primary screen
337
339
return pyautogui .screenshot ()
0 commit comments