Skip to content

Commit fd224b0

Browse files
authored
refactor: Introduce AgentExecutor for core loop orchestration
2 parents 1776cdf + a8c54db commit fd224b0

File tree

8 files changed

+1210
-818
lines changed

8 files changed

+1210
-818
lines changed

README.md

Lines changed: 99 additions & 345 deletions
Large diffs are not rendered by default.

cli.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# cli.py
2+
3+
"""
4+
Command-line interface for running OmniMCP agent tasks using AgentExecutor.
5+
"""
6+
7+
import platform
8+
import sys
9+
import time
10+
11+
import fire
12+
13+
# Import necessary components from the project
14+
from omnimcp.agent_executor import AgentExecutor
15+
from omnimcp.config import config
16+
from omnimcp.core import plan_action_for_ui
17+
from omnimcp.input import InputController, _pynput_error # Check pynput import status
18+
from omnimcp.omniparser.client import OmniParserClient
19+
from omnimcp.omnimcp import VisualState
20+
from omnimcp.utils import (
21+
logger,
22+
draw_bounding_boxes,
23+
draw_action_highlight,
24+
NSScreen, # Check for AppKit on macOS
25+
)
26+
27+
28+
# Default configuration
29+
DEFAULT_OUTPUT_DIR = "runs"
30+
DEFAULT_MAX_STEPS = 10
31+
DEFAULT_GOAL = "Open calculator and compute 5 * 9"
32+
33+
34+
def run(
35+
goal: str = DEFAULT_GOAL,
36+
max_steps: int = DEFAULT_MAX_STEPS,
37+
output_dir: str = DEFAULT_OUTPUT_DIR,
38+
):
39+
"""
40+
Runs the OmniMCP agent to achieve a specified goal.
41+
42+
Args:
43+
goal: The natural language goal for the agent.
44+
max_steps: Maximum number of steps to attempt.
45+
output_dir: Base directory to save run artifacts (timestamped subdirs).
46+
"""
47+
# --- Initial Checks ---
48+
logger.info("--- OmniMCP CLI ---")
49+
logger.info("Performing initial checks...")
50+
success = True
51+
52+
# 1. API Key Check
53+
if not config.ANTHROPIC_API_KEY:
54+
logger.critical(
55+
"❌ ANTHROPIC_API_KEY not found in config or .env file. LLM planning requires this."
56+
)
57+
success = False
58+
else:
59+
logger.info("✅ ANTHROPIC_API_KEY found.")
60+
61+
# 2. pynput Check
62+
if _pynput_error:
63+
logger.critical(
64+
f"❌ Input control library (pynput) failed to load: {_pynput_error}"
65+
)
66+
logger.critical(
67+
" Real action execution will not work. Is it installed and prerequisites met (e.g., display server)?"
68+
)
69+
success = False
70+
else:
71+
logger.info("✅ Input control library (pynput) loaded.")
72+
73+
# 3. macOS Scaling Check
74+
if platform.system() == "darwin":
75+
if not NSScreen:
76+
logger.warning(
77+
"⚠️ AppKit (pyobjc-framework-Cocoa) not found or failed to import."
78+
)
79+
logger.warning(
80+
" Coordinate scaling for Retina displays may be incorrect. Install with 'uv pip install pyobjc-framework-Cocoa'."
81+
)
82+
else:
83+
logger.info("✅ AppKit found for macOS scaling.")
84+
85+
if not success:
86+
logger.error("Prerequisite checks failed. Exiting.")
87+
sys.exit(1)
88+
89+
# --- Component Initialization ---
90+
logger.info("\nInitializing components...")
91+
try:
92+
# OmniParser Client (handles deployment if URL not set)
93+
parser_client = OmniParserClient(
94+
server_url=config.OMNIPARSER_URL, auto_deploy=(not config.OMNIPARSER_URL)
95+
)
96+
logger.info(f" - OmniParserClient ready (URL: {parser_client.server_url})")
97+
98+
# Perception Component
99+
visual_state = VisualState(parser_client=parser_client)
100+
logger.info(" - VisualState (Perception) ready.")
101+
102+
# Execution Component
103+
controller = InputController()
104+
logger.info(" - InputController (Execution) ready.")
105+
106+
# Planner Function (already imported)
107+
logger.info(" - LLM Planner function ready.")
108+
109+
# Visualization Functions (already imported)
110+
logger.info(" - Visualization functions ready.")
111+
112+
except ImportError as e:
113+
logger.critical(
114+
f"❌ Component initialization failed due to missing dependency: {e}"
115+
)
116+
logger.critical(
117+
" Ensure all requirements are installed (`uv pip install -e .`)"
118+
)
119+
sys.exit(1)
120+
except Exception as e:
121+
logger.critical(f"❌ Component initialization failed: {e}", exc_info=True)
122+
sys.exit(1)
123+
124+
# --- Agent Executor Initialization ---
125+
logger.info("\nInitializing Agent Executor...")
126+
try:
127+
agent_executor = AgentExecutor(
128+
perception=visual_state,
129+
planner=plan_action_for_ui,
130+
execution=controller,
131+
box_drawer=draw_bounding_boxes,
132+
highlighter=draw_action_highlight,
133+
)
134+
logger.success("✅ Agent Executor initialized successfully.")
135+
except Exception as e:
136+
logger.critical(f"❌ Agent Executor initialization failed: {e}", exc_info=True)
137+
sys.exit(1)
138+
139+
# --- User Confirmation & Start ---
140+
print("\n" + "=" * 60)
141+
print(" WARNING: This script WILL take control of your mouse and keyboard!")
142+
print(f" TARGET OS: {platform.system()}")
143+
print(" Please ensure no sensitive information is visible on screen.")
144+
print(" To stop execution manually: Move mouse RAPIDLY to a screen corner")
145+
print(" OR press Ctrl+C in the terminal.")
146+
print("=" * 60 + "\n")
147+
for i in range(5, 0, -1):
148+
print(f"Starting in {i}...", end="\r")
149+
time.sleep(1)
150+
print("Starting agent run now! ")
151+
152+
# --- Run the Agent ---
153+
overall_success = False
154+
try:
155+
overall_success = agent_executor.run(
156+
goal=goal,
157+
max_steps=max_steps,
158+
output_base_dir=output_dir,
159+
)
160+
except KeyboardInterrupt:
161+
logger.warning("\nExecution interrupted by user (Ctrl+C).")
162+
sys.exit(1)
163+
except Exception as run_e:
164+
logger.critical(
165+
f"\nAn unexpected error occurred during the agent run: {run_e}",
166+
exc_info=True,
167+
)
168+
sys.exit(1)
169+
finally:
170+
# Optional: Add cleanup here if needed (e.g., stopping parser server)
171+
logger.info(
172+
"Reminder: If using auto-deploy, stop the parser server with "
173+
"'python -m omnimcp.omniparser.server stop' when finished."
174+
)
175+
176+
# --- Exit ---
177+
if overall_success:
178+
logger.success("\nAgent run finished successfully (goal achieved).")
179+
sys.exit(0)
180+
else:
181+
logger.error(
182+
"\nAgent run finished unsuccessfully (goal not achieved or error occurred)."
183+
)
184+
sys.exit(1)
185+
186+
187+
if __name__ == "__main__":
188+
fire.Fire(run)

0 commit comments

Comments
 (0)