Skip to content

Commit c435c4a

Browse files
committed
WIP: Move core functionality to omnimcp package
This is a work-in-progress commit that: 1. Moves OmniMCP, OmniParser adapter, and MCP server to omnimcp package 2. Updates imports and dependencies to match new structure 3. Adds Computer Use integration (loop.py) as a demo 4. Updates setup.py to include the new entry points Still TODO: - Ensure all imports from OpenAdapt are minimal (just utils.py) - Finish testing the OmniParser + MCP integration - Clean up any remaining references to OpenAdapt
1 parent 9cec405 commit c435c4a

File tree

10 files changed

+793
-18
lines changed

10 files changed

+793
-18
lines changed

omnimcp/README.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ omnimcp server
7070
# Run in debug mode to visualize screen elements
7171
omnimcp debug
7272

73+
# Run Computer Use mode (Anthropic's official Computer Use integration)
74+
computer-use
75+
7376
# Connect to a remote OmniParser server
7477
omnimcp cli --server-url=https://your-omniparser-server.example.com
7578

@@ -85,6 +88,12 @@ omnimcp cli --auto-deploy-parser=False
8588
# With additional options
8689
omnimcp cli --use-normalized-coordinates
8790
omnimcp debug --debug-dir=/path/to/debug/folder
91+
92+
# Computer Use with specific model
93+
computer-use --model=claude-3-opus-20240229
94+
95+
# Computer Use with auto-deploy of OmniParser
96+
computer-use --auto-deploy-parser --skip-confirmation
8897
```
8998

9099
### OmniParser Configuration
@@ -159,8 +168,10 @@ OmniMCP uses code from the OpenAdapt repository but with a minimal set of depend
159168
160169
- `omnimcp/pyproject.toml`: Minimal dependency list
161170
- `omnimcp/setup.py`: Setup script that adds OpenAdapt to the Python path
162-
- Original modules from OpenAdapt:
163-
- `openadapt/omnimcp.py`: Core functionality
164-
- `openadapt/run_omnimcp.py`: CLI interface
165-
- `openadapt/adapters/omniparser.py`: OmniParser integration
166-
- `openadapt/mcp/`: Model Control Protocol implementation
171+
- `omnimcp/omnimcp/` package:
172+
- `omnimcp/omnimcp/omnimcp.py`: Core OmniMCP functionality
173+
- `omnimcp/omnimcp/run_omnimcp.py`: CLI interface
174+
- `omnimcp/omnimcp/computer_use.py`: Computer Use integration
175+
- `omnimcp/omnimcp/pathing.py`: Python path configuration
176+
- `omnimcp/omnimcp/adapters/omniparser.py`: OmniParser client and provider
177+
- `omnimcp/omnimcp/mcp/server.py`: Model Control Protocol server implementation
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Adapters for OmniMCP."""
2+
3+
from omnimcp.adapters.omniparser import OmniParserProvider, OmniParserClient
4+
5+
__all__ = ["OmniParserProvider", "OmniParserClient"]
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,10 @@
77
import io
88
from typing import Dict, List, Any, Optional
99

10+
from loguru import logger
1011
import requests
1112
from PIL import Image
1213

13-
from openadapt.custom_logger import logger
14-
1514

1615
class OmniParserClient:
1716
"""Client for the OmniParser API."""

omnimcp/omnimcp/computer_use.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
"""Anthropic Computer Use integration for OmniMCP.
2+
3+
This module provides helpers for running Anthropic's Computer Use Docker container
4+
with proper configuration for use with Claude.
5+
6+
Usage:
7+
------
8+
# Run Computer Use with default settings
9+
python -m omnimcp.computer_use
10+
11+
# Run with specific API key
12+
python -m omnimcp.computer_use --api-key=your_api_key
13+
14+
# Run with custom screen size
15+
python -m omnimcp.computer_use --width=1280 --height=800
16+
"""
17+
18+
import os
19+
import platform
20+
import subprocess
21+
import sys
22+
23+
import fire
24+
from loguru import logger
25+
26+
# Import pathing first to ensure OpenAdapt is in the path
27+
from . import pathing
28+
from openadapt.config import config
29+
30+
31+
def ensure_docker_installed():
32+
"""Verify that Docker is installed and available."""
33+
try:
34+
result = subprocess.run(
35+
["docker", "--version"],
36+
capture_output=True,
37+
text=True,
38+
check=True
39+
)
40+
logger.info(f"Docker is installed: {result.stdout.strip()}")
41+
return True
42+
except (subprocess.SubprocessError, FileNotFoundError):
43+
logger.error("Docker is not installed or not in the PATH. Please install Docker to use Computer Use.")
44+
return False
45+
46+
47+
def get_home_dir():
48+
"""Get the user's home directory in a cross-platform way."""
49+
return os.path.expanduser("~")
50+
51+
52+
def run_computer_use(
53+
api_key: str = None,
54+
width: int = 1024,
55+
height: int = 768,
56+
api_provider: str = "anthropic",
57+
model: str = "claude-3-sonnet-20240229"
58+
):
59+
"""Run Anthropic's Computer Use Docker container.
60+
61+
Args:
62+
api_key: Anthropic API key (uses config.ANTHROPIC_API_KEY if not provided)
63+
width: Screen width for the virtual desktop
64+
height: Screen height for the virtual desktop
65+
api_provider: API provider (anthropic, bedrock, or vertex)
66+
model: Claude model to use
67+
"""
68+
if not ensure_docker_installed():
69+
return
70+
71+
# Get API key from config if not provided
72+
actual_api_key = api_key or config.ANTHROPIC_API_KEY
73+
if not actual_api_key or actual_api_key == "<ANTHROPIC_API_KEY>":
74+
logger.error("Anthropic API key not set in config or as parameter")
75+
return
76+
77+
# Define the Docker image
78+
docker_image = "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
79+
80+
# Set up environment variables
81+
env_vars = [
82+
f"-e ANTHROPIC_API_KEY={actual_api_key}",
83+
f"-e API_PROVIDER={api_provider}",
84+
f"-e WIDTH={width}",
85+
f"-e HEIGHT={height}",
86+
f"-e CLAUDE_MODEL={model}"
87+
]
88+
89+
# Set up volume mounts
90+
home_dir = get_home_dir()
91+
volumes = [
92+
f"-v {home_dir}/.anthropic:/home/computeruse/.anthropic"
93+
]
94+
95+
# Set up port mappings
96+
ports = [
97+
"-p 5900:5900", # VNC
98+
"-p 8501:8501", # Streamlit
99+
"-p 6080:6080", # noVNC
100+
"-p 8080:8080" # Combined interface
101+
]
102+
103+
# Build the full Docker command
104+
docker_cmd = (
105+
f"docker run -it {' '.join(env_vars)} {' '.join(volumes)} {' '.join(ports)} {docker_image}"
106+
)
107+
108+
# Log the command (without API key for security)
109+
safe_cmd = docker_cmd.replace(actual_api_key, "***")
110+
logger.info(f"Running Docker command: {safe_cmd}")
111+
112+
# Print instructions for the user
113+
print("\n" + "="*80)
114+
print("Starting Anthropic Computer Use Docker container")
115+
print("="*80)
116+
print("\nOnce the container is running, open your browser to:")
117+
print(" Main interface: http://localhost:8080")
118+
print(" Streamlit only: http://localhost:8501")
119+
print(" Desktop view: http://localhost:6080/vnc.html")
120+
print("\nPress Ctrl+C to stop the container\n")
121+
122+
try:
123+
# Run the Docker container interactively
124+
process = subprocess.run(docker_cmd, shell=True)
125+
return process.returncode
126+
except KeyboardInterrupt:
127+
logger.info("Docker container interrupted by user")
128+
return 0
129+
except Exception as e:
130+
logger.error(f"Error running Docker container: {e}")
131+
return 1
132+
133+
134+
def main():
135+
"""Main entry point for running Computer Use."""
136+
fire.Fire(run_computer_use)
137+
138+
139+
if __name__ == "__main__":
140+
main()

0 commit comments

Comments
 (0)