Skip to content

Commit 53cd77b

Browse files
committed
Cross platform fixes
1 parent 6a5a703 commit 53cd77b

File tree

1 file changed

+9
-16
lines changed

1 file changed

+9
-16
lines changed

interpreter/computer_use/tools/computer.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ def smooth_move_to(x, y, duration=1.2):
9393

9494
class ComputerTool(BaseAnthropicTool):
9595
"""
96-
A tool that allows the agent to interact with the screen, keyboard, and mouse of the current computer.
96+
A tool that allows the agent to interact with the primary monitor's screen, keyboard, and mouse.
9797
The tool parameters are defined by Anthropic and are not editable.
9898
"""
9999

100100
name: Literal["computer"] = "computer"
101101
api_type: Literal["computer_20241022"] = "computer_20241022"
102102
width: int
103103
height: int
104-
display_num: int | None
104+
display_num: None # Simplified to always be None since we're only using primary display
105105

106106
_screenshot_delay = 2.0
107107
_scaling_enabled = True
@@ -122,17 +122,8 @@ def to_params(self) -> BetaToolComputerUse20241022Param:
122122

123123
def __init__(self):
124124
super().__init__()
125-
126125
self.width, self.height = pyautogui.size()
127-
128-
if (display_num := os.getenv("DISPLAY_NUM")) is not None:
129-
self.display_num = int(display_num)
130-
self._display_prefix = f"DISPLAY=:{self.display_num} "
131-
else:
132-
self.display_num = None
133-
self._display_prefix = ""
134-
135-
self.xdotool = f"{self._display_prefix}xdotool"
126+
self.display_num = None
136127

137128
async def __call__(
138129
self,
@@ -230,7 +221,6 @@ def normalize_key(key):
230221

231222
async def screenshot(self):
232223
"""Take a screenshot of the current screen and return the base64 encoded image."""
233-
# Use a user-writable directory for temporary files
234224
temp_dir = Path(tempfile.gettempdir())
235225
path = temp_dir / f"screenshot_{uuid4().hex}.png"
236226

@@ -241,9 +231,12 @@ async def screenshot(self):
241231
x, y = self.scale_coordinates(
242232
ScalingSource.COMPUTER, self.width, self.height
243233
)
244-
await self.shell(
245-
f"convert {path} -resize {x}x{y}! {path}", take_screenshot=False
246-
)
234+
# Use PIL directly instead of shell convert command
235+
from PIL import Image
236+
237+
with Image.open(path) as img:
238+
img = img.resize((x, y), Image.Resampling.LANCZOS)
239+
img.save(path)
247240

248241
if path.exists():
249242
base64_image = base64.b64encode(path.read_bytes()).decode()

0 commit comments

Comments
 (0)