Skip to content

Commit 0763da3

Browse files
authored
fix(cli): add ci_mode to cli.py
- add ci_mode to cli.py - save log to run dir - add config.LOG_DIR/RUN_OUTPUT_DIR/ANTHROPIC_DEFAULT_MODEL - restore comment - add setup_run_logging - update .gitignore
2 parents 65b8d57 + 1e745b5 commit 0763da3

File tree

7 files changed

+127
-46
lines changed

7 files changed

+127
-46
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,7 @@ jobs:
4848
env:
4949
ANTHROPIC_API_KEY: "ci_dummy_key"
5050
run: uv run pytest tests/
51+
52+
# --- 8. Smoke test cli.lpy
53+
- name: Run CLI Smoke Test (--help)
54+
run: uv run python cli.py --help

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44
omnimcp.egg-info/
55
omnimcp.log
66
__pycache__
7+
runs/
8+
logs/
9+
images/*/

cli.py

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/usr/bin/env python
12
# cli.py
23

34
"""
@@ -10,20 +11,7 @@
1011

1112
import fire
1213

13-
# Import necessary components from the project
14-
from omnimcp.agent_executor import AgentExecutor
15-
from omnimcp.config import config
16-
from omnimcp.core import plan_action_for_ui
17-
from omnimcp.input import InputController, _pynput_error # Check pynput import status
18-
from omnimcp.omniparser.client import OmniParserClient
19-
from omnimcp.omnimcp import VisualState
20-
from omnimcp.utils import (
21-
logger,
22-
draw_bounding_boxes,
23-
draw_action_highlight,
24-
NSScreen, # Check for AppKit on macOS
25-
)
26-
14+
from omnimcp.utils import logger
2715

2816
# Default configuration
2917
DEFAULT_OUTPUT_DIR = "runs"
@@ -35,6 +23,7 @@ def run(
3523
goal: str = DEFAULT_GOAL,
3624
max_steps: int = DEFAULT_MAX_STEPS,
3725
output_dir: str = DEFAULT_OUTPUT_DIR,
26+
ci_mode: bool = False,
3827
):
3928
"""
4029
Runs the OmniMCP agent to achieve a specified goal.
@@ -43,9 +32,34 @@ def run(
4332
goal: The natural language goal for the agent.
4433
max_steps: Maximum number of steps to attempt.
4534
output_dir: Base directory to save run artifacts (timestamped subdirs).
35+
ci_mode: Run in CI mode (skips API validation and actual execution).
4636
"""
4737
# --- Initial Checks ---
4838
logger.info("--- OmniMCP CLI ---")
39+
40+
# Skip import-time checks if we're in CI mode
41+
if ci_mode:
42+
logger.info("Running in CI mode - skipping credential checks and execution")
43+
return 0
44+
45+
# Delay imports to avoid credential checks at import time
46+
try:
47+
# Import necessary components from the project
48+
from omnimcp.config import config
49+
from omnimcp.input import InputController, _pynput_error
50+
from omnimcp.agent_executor import AgentExecutor
51+
from omnimcp.core import plan_action_for_ui
52+
from omnimcp.omniparser.client import OmniParserClient
53+
from omnimcp.visual_state import VisualState
54+
from omnimcp.utils import (
55+
draw_bounding_boxes,
56+
draw_action_highlight,
57+
NSScreen, # Check for AppKit on macOS
58+
)
59+
except ImportError as e:
60+
logger.critical(f"Required dependency not found: {e}")
61+
return 1
62+
4963
logger.info("Performing initial checks...")
5064
success = True
5165

@@ -84,7 +98,7 @@ def run(
8498

8599
if not success:
86100
logger.error("Prerequisite checks failed. Exiting.")
87-
sys.exit(1)
101+
return 1
88102

89103
# --- Component Initialization ---
90104
logger.info("\nInitializing components...")
@@ -116,10 +130,10 @@ def run(
116130
logger.critical(
117131
" Ensure all requirements are installed (`uv pip install -e .`)"
118132
)
119-
sys.exit(1)
133+
return 1
120134
except Exception as e:
121135
logger.critical(f"❌ Component initialization failed: {e}", exc_info=True)
122-
sys.exit(1)
136+
return 1
123137

124138
# --- Agent Executor Initialization ---
125139
logger.info("\nInitializing Agent Executor...")
@@ -134,7 +148,7 @@ def run(
134148
logger.success("✅ Agent Executor initialized successfully.")
135149
except Exception as e:
136150
logger.critical(f"❌ Agent Executor initialization failed: {e}", exc_info=True)
137-
sys.exit(1)
151+
return 1
138152

139153
# --- User Confirmation & Start ---
140154
print("\n" + "=" * 60)
@@ -159,13 +173,13 @@ def run(
159173
)
160174
except KeyboardInterrupt:
161175
logger.warning("\nExecution interrupted by user (Ctrl+C).")
162-
sys.exit(1)
176+
return 1
163177
except Exception as run_e:
164178
logger.critical(
165179
f"\nAn unexpected error occurred during the agent run: {run_e}",
166180
exc_info=True,
167181
)
168-
sys.exit(1)
182+
return 1
169183
finally:
170184
# Optional: Add cleanup here if needed (e.g., stopping parser server)
171185
logger.info(
@@ -176,13 +190,20 @@ def run(
176190
# --- Exit ---
177191
if overall_success:
178192
logger.success("\nAgent run finished successfully (goal achieved).")
179-
sys.exit(0)
193+
return 0
180194
else:
181195
logger.error(
182196
"\nAgent run finished unsuccessfully (goal not achieved or error occurred)."
183197
)
184-
sys.exit(1)
198+
return 1
199+
200+
201+
def main():
202+
"""Main entry point that handles Fire's return code conversion."""
203+
result = fire.Fire(run)
204+
if isinstance(result, int):
205+
sys.exit(result)
185206

186207

187208
if __name__ == "__main__":
188-
fire.Fire(run)
209+
main()

omnimcp/__init__.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,45 @@
1-
# omnimcp/__init__.py
2-
31
import sys
42
import os
5-
63
from loguru import logger
74

85
from omnimcp.config import config
96

10-
log_dir = "logs"
11-
os.makedirs(log_dir, exist_ok=True)
12-
# Define file path using a format string recognized by loguru's sink
13-
log_file_path = os.path.join(log_dir, "run_{time:YYYY-MM-DD_HH-mm-ss}.log")
7+
# Remove default handler
8+
logger.remove()
149

15-
logger.remove() # Remove default handler to configure levels precisely
16-
# Log INFO and above to stderr
10+
# Add stderr handler (keep this functionality)
1711
logger.add(sys.stderr, level=config.LOG_LEVEL.upper() if config.LOG_LEVEL else "INFO")
18-
# Log DEBUG and above to a rotating file
19-
logger.add(
20-
log_file_path, rotation="50 MB", level="DEBUG", encoding="utf8", enqueue=True
21-
) # enqueue for async safety
2212

23-
logger.info("Logger configured.")
24-
# You might want to set LOG_LEVEL=DEBUG in your .env file now
13+
14+
# Define a function to configure run-specific logging
15+
def setup_run_logging(run_dir=None):
16+
"""
17+
Configure additional logging for a specific run.
18+
19+
Args:
20+
run_dir: Directory to store run-specific logs. If None, logs go to default logs directory.
21+
22+
Returns:
23+
The log file path
24+
"""
25+
# Determine log file location
26+
if run_dir:
27+
os.makedirs(run_dir, exist_ok=True)
28+
log_file_path = os.path.join(run_dir, "run.log")
29+
else:
30+
log_dir = config.LOG_DIR or "logs"
31+
os.makedirs(log_dir, exist_ok=True)
32+
log_file_path = os.path.join(log_dir, "run_{time:YYYY-MM-DD_HH-mm-ss}.log")
33+
34+
# Add run-specific log handler
35+
logger.add(
36+
log_file_path, rotation="50 MB", level="DEBUG", encoding="utf8", enqueue=True
37+
)
38+
39+
logger.info(f"Run logging configured. Log path: {log_file_path}")
40+
return log_file_path
41+
42+
43+
# Set up default logging (for non-run use)
44+
if not config.DISABLE_DEFAULT_LOGGING:
45+
setup_run_logging()

omnimcp/agent_executor.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
from PIL import Image
99

1010

11-
# Used for type hinting if Protocol is simple:
12-
from .types import LLMActionPlan, UIElement
13-
from .utils import (
11+
from omnimcp import config, setup_run_logging
12+
from omnimcp.types import LLMActionPlan, UIElement
13+
from omnimcp.utils import (
1414
denormalize_coordinates,
1515
draw_action_highlight,
1616
draw_bounding_boxes,
@@ -194,10 +194,16 @@ def _execute_scroll(
194194

195195
# Comparison Note:
196196
# This `run` method implements an explicit, sequential perceive-plan-act loop.
197-
# Alternative agent architectures exist... (rest of comment remains same)
197+
# Alternative agent architectures exist, such as:
198+
# - ReAct (Reasoning-Acting): Where the LLM explicitly decides between
199+
# reasoning steps and action steps.
200+
# - Callback-driven: Where UI events or timers might trigger agent actions.
201+
# - More complex state machines or graph-based execution flows.
202+
# This simple sequential loop provides a clear baseline. Future work might explore
203+
# these alternatives for more complex or reactive tasks.
198204

199205
def run(
200-
self, goal: str, max_steps: int = 10, output_base_dir: str = "runs"
206+
self, goal: str, max_steps: int = 10, output_base_dir: Optional[str] = None
201207
) -> bool:
202208
"""
203209
Runs the main perceive-plan-act loop to achieve the goal.
@@ -206,16 +212,28 @@ def run(
206212
goal: The natural language goal for the agent.
207213
max_steps: Maximum number of steps to attempt.
208214
output_base_dir: Base directory to save run artifacts (timestamped).
215+
If None, uses config.RUN_OUTPUT_DIR.
209216
210217
Returns:
211218
True if the goal was achieved, False otherwise (error or max steps reached).
212219
"""
220+
221+
# Use configured output dir if none provided
222+
if output_base_dir is None:
223+
output_base_dir = config.RUN_OUTPUT_DIR
224+
213225
run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
214226
run_output_dir = os.path.join(output_base_dir, run_timestamp)
227+
215228
try:
216229
os.makedirs(run_output_dir, exist_ok=True)
230+
231+
# Configure run-specific logging
232+
log_path = setup_run_logging(run_output_dir)
233+
217234
logger.info(f"Starting agent run. Goal: '{goal}'")
218235
logger.info(f"Saving outputs to: {run_output_dir}")
236+
logger.info(f"Run log file: {log_path}")
219237
except OSError as e:
220238
logger.error(f"Failed to create output directory {run_output_dir}: {e}")
221239
return False

omnimcp/completions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
)
3333

3434
MAX_RETRIES = 3
35-
DEFAULT_MODEL = "claude-3-7-sonnet-20250219"
35+
DEFAULT_MODEL = config.ANTHROPIC_DEFAULT_MODEL or "claude-3-7-sonnet-20250219"
3636

3737

3838
@retry(

omnimcp/config.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ class OmniMCPConfig(BaseSettings):
1414

1515
# Claude API configuration
1616
ANTHROPIC_API_KEY: Optional[str] = None
17+
ANTHROPIC_DEFAULT_MODEL: str = "claude-3-7-sonnet-20250219"
18+
# ANTHROPIC_DEFAULT_MODEL: str = "claude-3-haiku-20240307"
1719

1820
# Auto-shutdown OmniParser after 60min inactivity
1921
INACTIVITY_TIMEOUT_MINUTES: int = 60
@@ -29,13 +31,25 @@ class OmniMCPConfig(BaseSettings):
2931
# OmniParser deployment configuration
3032
PROJECT_NAME: str = "omniparser"
3133
REPO_URL: str = "https://github.com/microsoft/OmniParser.git"
32-
AWS_EC2_AMI: str = "ami-06835d15c4de57810"
34+
# AWS_EC2_AMI: str = "ami-06835d15c4de57810"
35+
AWS_EC2_AMI: str = (
36+
"ami-04631c7d8811d9bae" # Official AWS DLAMI Base Ubuntu 22.04 (G6 Compatible)
37+
)
3338
AWS_EC2_DISK_SIZE: int = 128 # GB
34-
AWS_EC2_INSTANCE_TYPE: str = "g4dn.xlarge" # (T4 16GB $0.526/hr x86_64)
39+
# AWS_EC2_INSTANCE_TYPE: str = "g4dn.xlarge" # (T4 16GB $0.526/hr x86_64)
40+
AWS_EC2_INSTANCE_TYPE: str = "g6.xlarge" # (L4 24GB $0.805/hr x86_64)
41+
# AWS_EC2_INSTANCE_TYPE: str = "p3.2xlarge" # (V100 16GB $3.06/hr x86_64)
3542
AWS_EC2_USER: str = "ubuntu"
3643
PORT: int = 8000 # FastAPI port
3744
COMMAND_TIMEOUT: int = 600 # 10 minutes
3845

46+
# Logging configuration
47+
LOG_DIR: Optional[str] = "logs"
48+
DISABLE_DEFAULT_LOGGING: bool = False
49+
50+
# Run output configuration
51+
RUN_OUTPUT_DIR: str = "runs"
52+
3953
# Debug settings
4054
# DEBUG: bool = False
4155
LOG_LEVEL: str = "INFO"

0 commit comments

Comments
 (0)