Skip to content

Commit e31a8dd

Browse files
abrichrclaude
andcommitted
Improve OmniParser integration with strict validation
- Add allow_no_parser flag to make it explicit when running without OmniParser - Fail by default if OmniParser server is not available - Update README with clear instructions for OmniParser configuration - Add TODO for future Anthropic ComputerUse integration 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent b854f42 commit e31a8dd

File tree

3 files changed

+74
-16
lines changed

3 files changed

+74
-16
lines changed

omnimcp/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,36 @@ omnimcp server
6969
# Run in debug mode to visualize screen elements
7070
omnimcp debug
7171

72+
# Connect to a remote OmniParser server
73+
omnimcp cli --server-url=https://your-omniparser-server.example.com
74+
75+
# Allow running even if OmniParser isn't available (limited functionality)
76+
omnimcp cli --allow-no-parser
77+
7278
# With additional options
7379
omnimcp cli --use-normalized-coordinates
7480
omnimcp debug --debug-dir=/path/to/debug/folder
7581
```
7682

83+
### OmniParser Configuration
84+
85+
OmniMCP requires access to an OmniParser server for analyzing screenshots:
86+
87+
1. **Use a Remote OmniParser Server** (Recommended)
88+
```bash
89+
omnimcp cli --server-url=https://your-omniparser-server.example.com
90+
```
91+
92+
2. **Use the Default Local Server**
93+
- OmniMCP will try to connect to `http://localhost:8000` by default
94+
- This requires running an OmniParser server locally
95+
96+
By default, OmniMCP will fail if it can't connect to an OmniParser server. Use the `--allow-no-parser` flag to run with limited functionality when no parser is available.
97+
98+
### TODO: Anthropic ComputerUse Integration
99+
100+
Future versions may integrate with Anthropic's ComputerUse system, which provides an official way for Claude to interact with computers through a virtualized desktop environment.
101+
77102
## Features
78103

79104
- Visual UI analysis with OmniParser

openadapt/omnimcp.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -302,19 +302,22 @@ def __init__(
302302
self,
303303
server_url: Optional[str] = None,
304304
claude_api_key: Optional[str] = None,
305-
use_normalized_coordinates: bool = False
305+
use_normalized_coordinates: bool = False,
306+
allow_no_parser: bool = False
306307
):
307308
"""Initialize OmniMCP.
308309
309310
Args:
310311
server_url: URL of OmniParser server
311312
claude_api_key: API key for Claude (overrides config)
312313
use_normalized_coordinates: If True, use normalized (0-1) coordinates
314+
allow_no_parser: If True, continue even if OmniParser is not available
313315
"""
314316
self.omniparser = OmniParserProvider(server_url)
315317
self.visual_state = VisualState()
316318
self.claude_api_key = claude_api_key or config.ANTHROPIC_API_KEY
317319
self.use_normalized_coordinates = use_normalized_coordinates
320+
self.allow_no_parser = allow_no_parser
318321

319322
# Initialize controllers for keyboard and mouse
320323
self.keyboard_controller = keyboard.Controller()
@@ -329,6 +332,12 @@ def __init__(
329332
if not self.omniparser.is_available():
330333
logger.info("OmniParser not available, attempting to deploy...")
331334
self.omniparser.deploy()
335+
336+
# Check again after deployment attempt
337+
if not self.omniparser.is_available() and not allow_no_parser:
338+
raise RuntimeError(
339+
"OmniParser server is not available. Please ensure it's running or use --allow-no-parser flag."
340+
)
332341

333342
def update_visual_state(self) -> VisualState:
334343
"""Take screenshot and update visual state using OmniParser.
@@ -339,16 +348,31 @@ def update_visual_state(self) -> VisualState:
339348
# Take screenshot
340349
screenshot = utils.take_screenshot()
341350

342-
# Convert to bytes
343-
img_byte_arr = io.BytesIO()
344-
screenshot.save(img_byte_arr, format='PNG')
345-
img_bytes = img_byte_arr.getvalue()
346-
347-
# Parse with OmniParser
348-
result = self.omniparser.parse_screenshot(img_bytes)
351+
# Update the screenshot in visual state regardless of parser availability
352+
self.visual_state.screenshot = screenshot
353+
self.visual_state.timestamp = time.time()
349354

350-
# Update visual state
351-
self.visual_state.update_from_omniparser(result, screenshot)
355+
# If OmniParser is available, use it to analyze the screenshot
356+
if self.omniparser.is_available():
357+
# Convert to bytes
358+
img_byte_arr = io.BytesIO()
359+
screenshot.save(img_byte_arr, format='PNG')
360+
img_bytes = img_byte_arr.getvalue()
361+
362+
# Parse with OmniParser
363+
result = self.omniparser.parse_screenshot(img_bytes)
364+
365+
# Update visual state
366+
self.visual_state.update_from_omniparser(result, screenshot)
367+
elif not self.allow_no_parser:
368+
# If parser not available and not allowed to continue without it, raise error
369+
raise RuntimeError(
370+
"OmniParser server is not available. Cannot update visual state."
371+
)
372+
else:
373+
# If parser not available but allowed to continue, log warning
374+
logger.warning("OmniParser not available. Visual state will have no UI elements.")
375+
self.visual_state.elements = []
352376

353377
return self.visual_state
354378

openadapt/run_omnimcp.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ def cli(
5959
server_url=None,
6060
claude_api_key=None,
6161
use_normalized_coordinates=False,
62-
debug_dir=None
62+
debug_dir=None,
63+
allow_no_parser=False
6364
):
6465
"""Run OmniMCP in CLI mode.
6566
@@ -77,12 +78,14 @@ def cli(
7778
claude_api_key: Claude API key (if not provided, uses value from config.py)
7879
use_normalized_coordinates: Use normalized (0-1) coordinates instead of pixels
7980
debug_dir: Directory to save debug visualizations
81+
allow_no_parser: If True, continue even if OmniParser is not available
8082
"""
8183
# Create OmniMCP instance
8284
omnimcp = OmniMCP(
8385
server_url=server_url,
8486
claude_api_key=claude_api_key, # Will use config.ANTHROPIC_API_KEY if None
85-
use_normalized_coordinates=use_normalized_coordinates
87+
use_normalized_coordinates=use_normalized_coordinates,
88+
allow_no_parser=allow_no_parser
8689
)
8790

8891
# Handle debug directory if specified
@@ -107,7 +110,8 @@ def server(
107110
server_url=None,
108111
claude_api_key=None,
109112
use_normalized_coordinates=False,
110-
debug_dir=None
113+
debug_dir=None,
114+
allow_no_parser=False
111115
):
112116
"""Run OmniMCP as an MCP server.
113117
@@ -127,12 +131,14 @@ def server(
127131
claude_api_key: Claude API key (if not provided, uses value from config.py)
128132
use_normalized_coordinates: Use normalized (0-1) coordinates instead of pixels
129133
debug_dir: Directory to save debug visualizations
134+
allow_no_parser: If True, continue even if OmniParser is not available
130135
"""
131136
# Create OmniMCP instance
132137
omnimcp = OmniMCP(
133138
server_url=server_url,
134139
claude_api_key=claude_api_key, # Will use config.ANTHROPIC_API_KEY if None
135-
use_normalized_coordinates=use_normalized_coordinates
140+
use_normalized_coordinates=use_normalized_coordinates,
141+
allow_no_parser=allow_no_parser
136142
)
137143

138144
# Handle debug directory if specified
@@ -157,7 +163,8 @@ def debug(
157163
server_url=None,
158164
claude_api_key=None,
159165
use_normalized_coordinates=False,
160-
debug_dir=None
166+
debug_dir=None,
167+
allow_no_parser=False
161168
):
162169
"""Run OmniMCP in debug mode.
163170
@@ -174,12 +181,14 @@ def debug(
174181
claude_api_key: Claude API key (if not provided, uses value from config.py)
175182
use_normalized_coordinates: Use normalized (0-1) coordinates instead of pixels
176183
debug_dir: Directory to save debug visualizations
184+
allow_no_parser: If True, continue even if OmniParser is not available
177185
"""
178186
# Create OmniMCP instance
179187
omnimcp = OmniMCP(
180188
server_url=server_url,
181189
claude_api_key=claude_api_key, # Will use config.ANTHROPIC_API_KEY if None
182-
use_normalized_coordinates=use_normalized_coordinates
190+
use_normalized_coordinates=use_normalized_coordinates,
191+
allow_no_parser=allow_no_parser
183192
)
184193

185194
# Create debug directory if not specified

0 commit comments

Comments
 (0)