1- """Entry point for OmniMCP CLI."""
1+ """Entry point for OmniMCP CLI.
2+
3+ This module provides a command-line interface for OmniMCP, allowing you to run
4+ it in various modes (CLI, MCP server, debug visualizations).
5+ """
6+
7+ import datetime
8+ import fire
9+ import os
10+ from loguru import logger
211
312# Setup path to include OpenAdapt modules
413from . import pathing
14+ from .omnimcp import OmniMCP
15+ from .config import config
16+
17+
18+ class OmniMCPRunner :
19+ """OmniMCP runner with different modes of operation."""
20+
21+ def cli (
22+ self ,
23+ server_url = None ,
24+ claude_api_key = None ,
25+ use_normalized_coordinates = False ,
26+ debug_dir = None ,
27+ allow_no_parser = False ,
28+ auto_deploy_parser = True ,
29+ skip_confirmation = False
30+ ):
31+ """Run OmniMCP in CLI mode.
32+
33+ In CLI mode, you can enter natural language commands directly in the terminal.
34+ OmniMCP will:
35+ 1. Take a screenshot
36+ 2. Analyze it with OmniParser to identify UI elements
37+ 3. Use Claude to decide what action to take based on your command
38+ 4. Execute the action (click, type, etc.)
39+
40+ This mode is convenient for testing and doesn't require Claude Desktop.
41+
42+ Args:
43+ server_url: URL of the OmniParser server
44+ claude_api_key: Claude API key (if not provided, uses value from config.py)
45+ use_normalized_coordinates: Use normalized (0-1) coordinates instead of pixels
46+ debug_dir: Directory to save debug visualizations
47+ allow_no_parser: If True, continue even if OmniParser is not available
48+ auto_deploy_parser: If True, attempt to deploy OmniParser if not available (default: True)
49+ skip_confirmation: If True, skip user confirmation for OmniParser deployment
50+ """
51+ # Create OmniMCP instance
52+ omnimcp = OmniMCP (
53+ server_url = server_url ,
54+ claude_api_key = claude_api_key , # Will use config.ANTHROPIC_API_KEY if None
55+ use_normalized_coordinates = use_normalized_coordinates ,
56+ allow_no_parser = allow_no_parser ,
57+ auto_deploy_parser = auto_deploy_parser ,
58+ skip_confirmation = skip_confirmation
59+ )
60+
61+ # Handle debug directory if specified
62+ if debug_dir :
63+ os .makedirs (debug_dir , exist_ok = True )
64+
65+ # Take initial screenshot and save debug visualization
66+ logger .info (f"Saving debug visualization to { debug_dir } " )
67+ timestamp = datetime .datetime .now ().strftime ("%Y%m%d_%H%M%S" )
68+ debug_path = os .path .join (debug_dir , f"initial_state_{ timestamp } .png" )
69+ omnimcp .update_visual_state ()
70+ omnimcp .save_visual_debug (debug_path )
71+
72+ logger .info ("Starting OmniMCP in CLI mode" )
73+ logger .info (f"Coordinate mode: { 'normalized (0-1)' if use_normalized_coordinates else 'absolute (pixels)' } " )
74+
75+ # Run CLI interaction loop
76+ omnimcp .run_interactive ()
77+
78+ def server (
79+ self ,
80+ server_url = None ,
81+ claude_api_key = None ,
82+ use_normalized_coordinates = False ,
83+ debug_dir = None ,
84+ allow_no_parser = False ,
85+ auto_deploy_parser = True ,
86+ skip_confirmation = False
87+ ):
88+ """Run OmniMCP as an MCP server.
89+
90+ In server mode, OmniMCP provides UI automation tools to Claude through the
91+ Model Control Protocol. The server exposes tools for:
92+ 1. Getting the current screen state with UI elements
93+ 2. Finding UI elements by description
94+ 3. Clicking on elements or coordinates
95+ 4. Typing text and pressing keys
96+
97+ To use with Claude Desktop:
98+ 1. Configure Claude Desktop to use this server
99+ 2. Ask Claude to perform UI tasks
100+
101+ Args:
102+ server_url: URL of the OmniParser server
103+ claude_api_key: Claude API key (if not provided, uses value from config.py)
104+ use_normalized_coordinates: Use normalized (0-1) coordinates instead of pixels
105+ debug_dir: Directory to save debug visualizations
106+ allow_no_parser: If True, continue even if OmniParser is not available
107+ auto_deploy_parser: If True, attempt to deploy OmniParser if not available (default: True)
108+ skip_confirmation: If True, skip user confirmation for OmniParser deployment
109+ """
110+ # Create OmniMCP instance
111+ omnimcp = OmniMCP (
112+ server_url = server_url ,
113+ claude_api_key = claude_api_key , # Will use config.ANTHROPIC_API_KEY if None
114+ use_normalized_coordinates = use_normalized_coordinates ,
115+ allow_no_parser = allow_no_parser ,
116+ auto_deploy_parser = auto_deploy_parser ,
117+ skip_confirmation = skip_confirmation
118+ )
119+
120+ # Handle debug directory if specified
121+ if debug_dir :
122+ os .makedirs (debug_dir , exist_ok = True )
123+
124+ # Take initial screenshot and save debug visualization
125+ logger .info (f"Saving debug visualization to { debug_dir } " )
126+ timestamp = datetime .datetime .now ().strftime ("%Y%m%d_%H%M%S" )
127+ debug_path = os .path .join (debug_dir , f"initial_state_{ timestamp } .png" )
128+ omnimcp .update_visual_state ()
129+ omnimcp .save_visual_debug (debug_path )
130+
131+ logger .info ("Starting OmniMCP Model Control Protocol server" )
132+ logger .info (f"Coordinate mode: { 'normalized (0-1)' if use_normalized_coordinates else 'absolute (pixels)' } " )
133+
134+ # Run MCP server
135+ omnimcp .run_mcp_server ()
136+
137+ def debug (
138+ self ,
139+ server_url = None ,
140+ claude_api_key = None ,
141+ use_normalized_coordinates = False ,
142+ debug_dir = None ,
143+ allow_no_parser = False ,
144+ auto_deploy_parser = True ,
145+ skip_confirmation = False
146+ ):
147+ """Run OmniMCP in debug mode.
148+
149+ Debug mode takes a screenshot, analyzes it with OmniParser, and saves
150+ a visualization showing the detected UI elements with their descriptions.
151+
152+ This is useful for:
153+ - Understanding what UI elements OmniParser detects
154+ - Debugging issues with element detection
155+ - Fine-tuning OmniParser integration
156+
157+ Args:
158+ server_url: URL of the OmniParser server
159+ claude_api_key: Claude API key (if not provided, uses value from config.py)
160+ use_normalized_coordinates: Use normalized (0-1) coordinates instead of pixels
161+ debug_dir: Directory to save debug visualizations
162+ allow_no_parser: If True, continue even if OmniParser is not available
163+ auto_deploy_parser: If True, attempt to deploy OmniParser if not available (default: True)
164+ skip_confirmation: If True, skip user confirmation for OmniParser deployment
165+ """
166+ # Create OmniMCP instance
167+ omnimcp = OmniMCP (
168+ server_url = server_url ,
169+ claude_api_key = claude_api_key , # Will use config.ANTHROPIC_API_KEY if None
170+ use_normalized_coordinates = use_normalized_coordinates ,
171+ allow_no_parser = allow_no_parser ,
172+ auto_deploy_parser = auto_deploy_parser ,
173+ skip_confirmation = skip_confirmation
174+ )
175+
176+ # Create debug directory if not specified
177+ if not debug_dir :
178+ timestamp = datetime .datetime .now ().strftime ("%Y%m%d_%H%M%S" )
179+ debug_dir = os .path .join (os .path .expanduser ("~" ), "omnimcp_debug" , f"debug_{ timestamp } " )
180+
181+ os .makedirs (debug_dir , exist_ok = True )
182+ logger .info (f"Saving debug visualization to { debug_dir } " )
183+
184+ # Generate debug filename
185+ timestamp = datetime .datetime .now ().strftime ("%Y%m%d_%H%M%S" )
186+ debug_path = os .path .join (debug_dir , f"screen_state_{ timestamp } .png" )
187+
188+ # Update visual state and save debug
189+ logger .info ("Taking screenshot and analyzing with OmniParser..." )
190+ omnimcp .update_visual_state ()
191+ omnimcp .save_visual_debug (debug_path )
192+ logger .info (f"Saved debug visualization to { debug_path } " )
193+
194+ # Print some stats about detected elements
195+ num_elements = len (omnimcp .visual_state .elements )
196+ logger .info (f"Detected { num_elements } UI elements" )
197+
198+ if num_elements > 0 :
199+ # Show a few example elements
200+ logger .info ("Example elements:" )
201+ for i , element in enumerate (omnimcp .visual_state .elements [:5 ]):
202+ content = element .content [:50 ] + "..." if len (element .content ) > 50 else element .content
203+ logger .info (f" { i + 1 } . '{ content } ' at ({ element .x1 } ,{ element .y1 } ,{ element .x2 } ,{ element .y2 } )" )
204+
205+ if num_elements > 5 :
206+ logger .info (f" ... and { num_elements - 5 } more elements" )
207+
208+
209+ def main ():
210+ """Main entry point for OmniMCP."""
211+ fire .Fire (OmniMCPRunner )
5212
6- # Import from OpenAdapt module
7- from openadapt .run_omnimcp import main
8213
9214if __name__ == "__main__" :
10215 main ()
0 commit comments