|
12 | 12 | from ...utils.lazy_import import lazy_import
|
13 | 13 | from ..utils.recipient_utils import format_to_recipient
|
14 | 14 |
|
| 15 | +import cv2 |
| 16 | +from screeninfo import get_monitors #for getting info about connected monitors |
| 17 | + |
| 18 | + |
15 | 19 | # Still experimenting with this
|
16 | 20 | # from utils.get_active_window import get_active_window
|
17 | 21 |
|
|
20 | 24 | np = lazy_import("numpy")
|
21 | 25 | plt = lazy_import("matplotlib.pyplot")
|
22 | 26 |
|
| 27 | + |
23 | 28 | from ..utils.computer_vision import find_text_in_image, pytesseract_get_text
|
24 | 29 |
|
25 | 30 |
|
@@ -94,7 +99,7 @@ def screenshot(
|
94 | 99 | if platform.system() == "Darwin":
|
95 | 100 | screenshot = take_screenshot_to_pil()
|
96 | 101 | else:
|
97 |
| - screenshot = pyautogui.screenshot() |
| 102 | + screenshot = take_screenshot_to_pil() #function should work fine for windows too |
98 | 103 | # message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
|
99 | 104 | # print(message)
|
100 | 105 |
|
@@ -266,16 +271,67 @@ def get_text_as_list_of_lists(self, screenshot=None):
|
266 | 271 | from PIL import Image
|
267 | 272 |
|
268 | 273 |
|
269 |
| -def take_screenshot_to_pil(filename="temp_screenshot.png"): |
270 |
| - # Capture the screenshot and save it to a temporary file |
271 |
| - subprocess.run(["screencapture", "-x", filename], check=True) |
| 274 | +def take_screenshot_to_pil(all_screens=False): |
| 275 | + if all_screens: |
| 276 | + # Get information about all screens |
| 277 | + monitors = get_monitors() |
| 278 | + |
| 279 | + # Take a screenshot of each screen and save them in a list |
| 280 | + screenshots = [pyautogui.screenshot(region=(monitor.x, monitor.y, monitor.width, monitor.height)) for monitor in monitors] |
| 281 | + |
| 282 | + # Combine all screenshots horizontally |
| 283 | + total_width = sum([img.width for img in screenshots]) |
| 284 | + max_height = max([img.height for img in screenshots]) |
272 | 285 |
|
273 |
| - # Open the image file with PIL |
274 |
| - with open(filename, "rb") as f: |
275 |
| - image_data = f.read() |
276 |
| - image = Image.open(io.BytesIO(image_data)) |
| 286 | + # Create a new image with a size that can contain all screenshots |
| 287 | + new_img = Image.new('RGB', (total_width, max_height)) |
277 | 288 |
|
278 |
| - # Optionally, delete the temporary file if you don't need it after loading |
279 |
| - os.remove(filename) |
| 289 | + # Paste each screenshot into the new image |
| 290 | + |
| 291 | + x_offset = 0 |
| 292 | + for i, img in enumerate(screenshots): |
| 293 | + # Convert PIL Image to OpenCV Image (numpy array) |
| 294 | + img_cv = np.array(img) |
| 295 | + img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR) |
280 | 296 |
|
281 |
| - return image |
| 297 | + # Convert new_img PIL Image to OpenCV Image (numpy array) |
| 298 | + new_img_cv = np.array(new_img) |
| 299 | + new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_RGB2BGR) |
| 300 | + |
| 301 | + # Paste each screenshot into the new image using OpenCV |
| 302 | + new_img_cv[0:img_cv.shape[0], x_offset:x_offset+img_cv.shape[1]] = img_cv |
| 303 | + x_offset += img.width |
| 304 | + |
| 305 | + # Add monitor labels using OpenCV |
| 306 | + font = cv2.FONT_HERSHEY_SIMPLEX |
| 307 | + font_scale = 4 |
| 308 | + font_color = (255, 255, 255) |
| 309 | + line_type = 2 |
| 310 | + |
| 311 | + if i == 0: |
| 312 | + text = "Primary Monitor" |
| 313 | + else: |
| 314 | + text = f"Monitor {i}" |
| 315 | + |
| 316 | + # Calculate the font scale that will fit the text perfectly in the center of the monitor |
| 317 | + text_size = cv2.getTextSize(text, font, font_scale, line_type)[0] |
| 318 | + font_scale = min(img.width / text_size[0], img.height / text_size[1]) |
| 319 | + |
| 320 | + # Recalculate the text size with the new font scale |
| 321 | + text_size = cv2.getTextSize(text, font, font_scale, line_type)[0] |
| 322 | + |
| 323 | + # Calculate the position to center the text |
| 324 | + text_x = x_offset - img.width // 2 - text_size[0] // 2 |
| 325 | + text_y = max_height // 2 - text_size[1] // 2 |
| 326 | + |
| 327 | + cv2.putText(new_img_cv, text, (text_x, text_y), font, font_scale, font_color, line_type) |
| 328 | + |
| 329 | + # Convert new_img from OpenCV Image back to PIL Image |
| 330 | + new_img_cv = cv2.cvtColor(new_img_cv, cv2.COLOR_BGR2RGB) |
| 331 | + new_img = Image.fromarray(new_img_cv) |
| 332 | + |
| 333 | + return new_img |
| 334 | + |
| 335 | + else: |
| 336 | + # Take a screenshot of the primary screen |
| 337 | + return pyautogui.screenshot() |
0 commit comments