@@ -93,15 +93,15 @@ def smooth_move_to(x, y, duration=1.2):
93
93
94
94
class ComputerTool (BaseAnthropicTool ):
95
95
"""
96
- A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer .
96
+ A tool that allows the agent to interact with the primary monitor's screen, keyboard, and mouse.
97
97
The tool parameters are defined by Anthropic and are not editable.
98
98
"""
99
99
100
100
name : Literal ["computer" ] = "computer"
101
101
api_type : Literal ["computer_20241022" ] = "computer_20241022"
102
102
width : int
103
103
height : int
104
- display_num : int | None
104
+ display_num : None # Simplified to always be None since we're only using primary display
105
105
106
106
_screenshot_delay = 2.0
107
107
_scaling_enabled = True
@@ -122,17 +122,8 @@ def to_params(self) -> BetaToolComputerUse20241022Param:
122
122
123
123
def __init__ (self ):
124
124
super ().__init__ ()
125
-
126
125
self .width , self .height = pyautogui .size ()
127
-
128
- if (display_num := os .getenv ("DISPLAY_NUM" )) is not None :
129
- self .display_num = int (display_num )
130
- self ._display_prefix = f"DISPLAY=:{ self .display_num } "
131
- else :
132
- self .display_num = None
133
- self ._display_prefix = ""
134
-
135
- self .xdotool = f"{ self ._display_prefix } xdotool"
126
+ self .display_num = None
136
127
137
128
async def __call__ (
138
129
self ,
@@ -230,7 +221,6 @@ def normalize_key(key):
230
221
231
222
async def screenshot (self ):
232
223
"""Take a screenshot of the current screen and return the base64 encoded image."""
233
- # Use a user-writable directory for temporary files
234
224
temp_dir = Path (tempfile .gettempdir ())
235
225
path = temp_dir / f"screenshot_{ uuid4 ().hex } .png"
236
226
@@ -241,9 +231,12 @@ async def screenshot(self):
241
231
x , y = self .scale_coordinates (
242
232
ScalingSource .COMPUTER , self .width , self .height
243
233
)
244
- await self .shell (
245
- f"convert { path } -resize { x } x{ y } ! { path } " , take_screenshot = False
246
- )
234
+ # Use PIL directly instead of shell convert command
235
+ from PIL import Image
236
+
237
+ with Image .open (path ) as img :
238
+ img = img .resize ((x , y ), Image .Resampling .LANCZOS )
239
+ img .save (path )
247
240
248
241
if path .exists ():
249
242
base64_image = base64 .b64encode (path .read_bytes ()).decode ()
0 commit comments