diff --git a/src/computer.py b/src/computer.py index 3da57b0..681914b 100644 --- a/src/computer.py +++ b/src/computer.py @@ -1,8 +1,11 @@ -import pyautogui +from wsl import pyautogui_client from PIL import Image import io import base64 import time +from wsl import screenshot as sc + +pyautogui = pyautogui_client.PyAutoGUIClient() class ComputerControl: def __init__(self): @@ -45,7 +48,8 @@ def perform_action(self, action): raise ValueError(f"Unsupported action: {action_type}") def take_screenshot(self): - screenshot = pyautogui.screenshot() + #screenshot = pyautogui.screenshot() + screenshot = sc.screenshot() ai_screenshot = self.resize_for_ai(screenshot) buffered = io.BytesIO() ai_screenshot.save(buffered, format="PNG") diff --git a/wsl/README.md b/wsl/README.md new file mode 100644 index 0000000..207dc98 --- /dev/null +++ b/wsl/README.md @@ -0,0 +1,40 @@ +# Running in WSL + + +## Start pyautogui server in windows + +1. Install python in windows +2. Install Flask and pyautogui +3. Run pyautogui_server.py in windows + + +You'll need to change screen resolution to a scale of 1280x800, +or change the resulotion in `computer.py` to match your screen ratio. + +## Test the screenshot function + +The `screenshot.py` function will take a screenshot of the +screen and save it in the windows "Pictures/Screenshots" folder. + +## Test moving the pointer + +The `test_move_pointer.py` function will move the pointer to the +center of the screen. + +## Run the agent + +You will need to get the IP address of your windows machine. Usually it is +`192.168.x.x`. You will also need to get an API key from the Anthropic website. +Note that you should not change `/etc/resolv.conf` in WSL, as it will break the +network connection. + + +In this directory, run the following command: + +``` +export PYAUTOGUI_SERVER_ADDRESS=192.168.x.x:5000 +export ANTHROPIC_API_KEY=your_api_key +python ../run.py +``` + + diff --git a/wsl/pyautogui_client.py b/wsl/pyautogui_client.py new file mode 100644 index 0000000..0463d52 --- /dev/null +++ b/wsl/pyautogui_client.py @@ -0,0 +1,56 @@ +import requests +import os + +ADDRESS = os.getenv("PYAUTOGUI_SERVER_ADDRESS", "http://localhost:5000") + + +class PyAutoGUIClient: + def __init__(self, base_url=None): + if base_url is None: + # check if the protocol is set + base_url = ADDRESS + if ADDRESS.startswith("http://") or ADDRESS.startswith("https://"): + pass + else: + # add the protocol http:// + base_url = f"http://{ADDRESS}" + self.base_url = base_url + + def size(self): + response = requests.get(f"{self.base_url}/screen/size") + # convert string to int + response_json = response.json() + print(f"Screen size: {response_json}") + return int(response_json['width']), int(response_json['height']) + + def position(self): + response = requests.get(f"{self.base_url}/mouse/position") + print(f"Mouse position: {response.json()}") + return response.json() + + def moveTo(self, x, y, duration=0): + response = requests.post(f"{self.base_url}/mouse/move", json={"x": x, "y": y, "duration": duration}) + print(f"Moving mouse to: {x}, {y}") + return response.json() + + def click(self, x=None, y=None, button='left'): + payload = {"button": button} + if x is not None and y is not None: + payload.update({"x": x, "y": y}) + print(f"Clicking at: {x}, {y}") + response = requests.post(f"{self.base_url}/mouse/click", json=payload) + return response.json() + + def write(self, text, interval=0): + print(f"Writing text: {text}") + response = requests.post(f"{self.base_url}/keyboard/write", json={"text": text, "interval": interval}) + return response.json() + + def press(self, key): + print(f"Pressing key: {key}") + response = requests.post(f"{self.base_url}/keyboard/press", json={"key": key}) + return response.json() + + def screenshot(self): + response = requests.get(f"{self.base_url}/screen/screenshot") + return response.json() diff --git a/wsl/pyautogui_server.py b/wsl/pyautogui_server.py new file mode 100644 index 0000000..0d8dec1 --- /dev/null +++ b/wsl/pyautogui_server.py @@ -0,0 +1,72 @@ +from flask import Flask, request, jsonify +import pyautogui +from pyautogui import FailSafeException + +app = Flask(__name__) +app.config['CORS_HEADERS'] = 'Content-Type' + +from flask_cors import CORS +CORS(app, resources={r'/*': {'origins': '*'}}) + +@app.route('/mouse/move', methods=['POST']) +def mouse_move(): + data = request.get_json() + print(f"Moving mouse to: {data['x']}, {data['y']}") + try: + pyautogui.moveTo(data['x'], data['y'], duration=data.get('duration', 0)) + except FailSafeException: + print("Mouse moved to a corner, fail-safe guard detected.") + return jsonify({'status': 'success'}) + +@app.route('/mouse/click', methods=['POST']) +def mouse_click(): + data = request.get_json() + data['x'] = data.get('x', None) + data['y'] = data.get('y', None) + print(f"Clicking at: {data['x']}, {data['y']}") + pyautogui.click(data.get('x'), data.get('y'), button=data.get('button', 'left')) + return jsonify({'status': 'success'}) + +@app.route('/keyboard/write', methods=['POST']) +def keyboard_write(): + data = request.get_json() + print(f"Writing text: {data['text']}") + pyautogui.write(data['text'], interval=data.get('interval', 0)) + return jsonify({'status': 'success'}) + +@app.route('/keyboard/press', methods=['POST']) +def keyboard_press(): + data = request.get_json() + key = data['key'] + if key.lower() == 'super_l': + key = 'winleft' + # If shortcut divided by + + if '+' in key: + keys = key.split('+') + print(f"Pressing keys: {keys}") + pyautogui.hotkey(*keys) + else: + print(f"Pressing key: {key}") + pyautogui.press(key) + return jsonify({'status': 'success'}) + +@app.route('/screen/screenshot', methods=['GET']) +def screenshot(): + screenshot = pyautogui.screenshot() + screenshot.save('screenshot.png') + return jsonify({'status': 'success', 'file': 'screenshot.png'}) + +@app.route('/mouse/position', methods=['GET']) +def mouse_position(): + x, y = pyautogui.position() + print(f"Mouse position: {x}, {y}") + return jsonify({'x': x, 'y': y}) + +@app.route('/screen/size', methods=['GET']) +def screen_size(): + width, height = pyautogui.size() + print(f"Screen size: {width}, {height}") + return jsonify({'width': width, 'height': height}) + +if __name__ == '__main__': + app.run(debug=True, host='0.0.0.0') diff --git a/wsl/screenshot.py b/wsl/screenshot.py new file mode 100644 index 0000000..55e223a --- /dev/null +++ b/wsl/screenshot.py @@ -0,0 +1,62 @@ +import os +from PIL import Image + +def screenshot(which='primary'): + os.system(""" + powershell.exe \" + Add-Type -AssemblyName System.Windows.Forms,System.Drawing + + \\$screens = [Windows.Forms.Screen]::AllScreens + + # Iterate through each screen + foreach (\\$screen in \\$screens) { + Write-Host "Monitor Name: " \\$screen.DeviceName + Write-Host "Bounds: " \\$screen.Bounds + Write-Host "Working Area: " \\$screen.WorkingArea + Write-Host "Primary: " \\$(\\$screen.Primary) + Write-Host "Bounds Top: " \\$screen.Bounds.Top + Write-Host "Bounds Left: " \\$screen.Bounds.Left + Write-Host "Bounds Right: " \\$screen.Bounds.Right + Write-Host "Bounds Bottom: " \\$screen.Bounds.Bottom + Write-Host "-----------------------------" + + \\$screenshot_dir = \\$env:USERPROFILE + \\\"\\Pictures\\Screenshots\\\" + if (\\$screen.Primary) { + Write-Host "Primary Monitor" + \\$filename = \\$screenshot_dir + \\\"\\screenshot_primary.png\\\" + + } else { + Write-Host "Secondary Monitor" + \\$filename = \\$screenshot_dir + \\\"\\screenshot_secondary.png\\\" + } + \\$top = (\\$screen.Bounds.Top | Measure-Object -Minimum).Minimum + \\$left = (\\$screen.Bounds.Left | Measure-Object -Minimum).Minimum + \\$right = (\\$screen.Bounds.Right | Measure-Object -Maximum).Maximum + \\$bottom = (\\$screen.Bounds.Bottom | Measure-Object -Maximum).Maximum + + \\$bounds = [Drawing.Rectangle]::FromLTRB(\\$left, \\$top, \\$right, \\$bottom) + \\$bmp = New-Object System.Drawing.Bitmap ([int]\\$bounds.width), ([int]\\$bounds.height) + \\$graphics = [Drawing.Graphics]::FromImage(\\$bmp) + + \\$graphics.CopyFromScreen(\\$bounds.Location, [Drawing.Point]::Empty, \\$bounds.size) + + Write-Host \\$filename + \\$bmp.Save(\\$filename, [Drawing.Imaging.ImageFormat]::Png) + + + \\$graphics.Dispose() + \\$bmp.Dispose() + } + \" + """) + username = "Alhazen" + file_path = "/mnt/c/Users/" + username + "/Pictures/Screenshots/" + if which == 'primary': + filename = file_path + "screenshot_primary.png" + else: + filename = file_path + "screenshot_secondary.png" + im = Image.open(filename) + return im + +if __name__ == "__main__": + screenshot() diff --git a/wsl/test_move_pointer.py b/wsl/test_move_pointer.py new file mode 100644 index 0000000..dbec530 --- /dev/null +++ b/wsl/test_move_pointer.py @@ -0,0 +1,17 @@ +from pyautogui_client import PyAutoGUIClient + +# Create a client instance +client = PyAutoGUIClient() + +# Get screen size from server +size_data = client.get_screen_size() + +# Calculate center +center_x = size_data['width'] // 2 +center_y = size_data['height'] // 2 + +# Send request to move mouse to center +response = client.move_mouse(center_x, center_y, duration=1.0) + +# Print response +print(response) diff --git a/wsl/test_pyautogui_mouse.py b/wsl/test_pyautogui_mouse.py new file mode 100644 index 0000000..a162c07 --- /dev/null +++ b/wsl/test_pyautogui_mouse.py @@ -0,0 +1,7 @@ +import pyautogui +import time + +print(pyautogui.size()) +while True: + print (pyautogui.position()) + time.sleep(1) diff --git a/wsl/test_pyqt.py b/wsl/test_pyqt.py new file mode 100644 index 0000000..1e6be12 --- /dev/null +++ b/wsl/test_pyqt.py @@ -0,0 +1,10 @@ +import sys +from PyQt6 import QtWidgets + +app = QtWidgets.QApplication(sys.argv) +windows = QtWidgets.QWidget() + +windows.resize(500,500) +windows.move(100,100) +windows.show() +sys.exit(app.exec())