Wire AgentLoop into JSON executor (AC_run_agent) and MCP (ac_run_agent)

JE-Chen · JE-Chen · commit 1ca504e1a303 · 2026-05-24T23:34:46.000+08:00
diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py
@@ -474,6 +474,63 @@ def _presence_clear() -> Dict[str, Any]:
     return {"cleared": True}
 
 
+def _run_agent(goal: str,
+               backend: str = "anthropic",
+               max_steps: int = 25,
+               wall_seconds: float = 300.0,
+               model: Optional[str] = None,
+               max_tokens: int = 1024) -> Dict[str, Any]:
+    """Executor adapter: drive the closed-loop ``AgentLoop`` against ``goal``.
+
+    ``backend`` selects between the production backends (Anthropic /
+    OpenAI). The Anthropic computer-use raw path remains available
+    via :func:`_computer_use` / ``AC_computer_use``.
+    """
+    from je_auto_control.utils.agent import AgentBudget, AgentLoop
+    from je_auto_control.utils.agent.backends import (
+        AgentBackendError, AnthropicAgentBackend, OpenAIAgentBackend,
+    )
+    from je_auto_control.utils.tool_use_schema import (
+        export_anthropic_tools, export_openai_tools,
+    )
+    name = (backend or "anthropic").strip().lower()
+    if name == "anthropic":
+        tools = export_anthropic_tools()
+        backend_obj = AnthropicAgentBackend(
+            tools=tools,
+            model=model or "claude-opus-4-7",
+            max_tokens=int(max_tokens),
+        )
+    elif name == "openai":
+        tools = export_openai_tools()
+        backend_obj = OpenAIAgentBackend(
+            tools=tools,
+            model=model or "gpt-4o",
+            max_tokens=int(max_tokens),
+        )
+    else:
+        raise ValueError(f"unknown agent backend: {backend!r}")
+    budget = AgentBudget(
+        max_steps=int(max_steps), wall_seconds=float(wall_seconds),
+    )
+    result = AgentLoop(backend_obj, budget=budget).run(goal)
+    return {
+        "succeeded": bool(result.succeeded),
+        "elapsed_s": float(result.elapsed_s),
+        "final_message": result.final_message,
+        "steps": [
+            {
+                "index": step.index,
+                "tool": step.tool,
+                "arguments": step.arguments,
+                "error": step.error,
+                "stop_reason": step.stop_reason,
+            }
+            for step in result.steps
+        ],
+    }
+
+
 def _computer_use(goal: str,
                   display_width_px: Optional[int] = None,
                   display_height_px: Optional[int] = None,
@@ -1478,6 +1535,9 @@ def __init__(self):
             # Computer-use (Anthropic computer_20250124 closed-loop agent)
             "AC_computer_use": _computer_use,
 
+            # Generic plan→act→verify→retry agent loop (Anthropic / OpenAI)
+            "AC_run_agent": _run_agent,
+
             # Cross-host DAG orchestrator
             "AC_run_dag": _run_dag,
 
diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py
@@ -762,6 +762,26 @@ def computer_use_tools() -> List[MCPTool]:
             handler=h.computer_use,
             annotations=DESTRUCTIVE,
         ),
+        MCPTool(
+            name="ac_run_agent",
+            description=("Drive the generic plan→act→verify→retry "
+                         "AgentLoop against goal. backend='anthropic' "
+                         "uses tool-use messages; 'openai' uses the "
+                         "Responses API. Returns {succeeded, "
+                         "final_message, elapsed_s, steps[]}. Requires "
+                         "the matching SDK + API key."),
+            input_schema=schema({
+                "goal": {"type": "string"},
+                "backend": {"type": "string",
+                             "enum": ["anthropic", "openai"]},
+                "max_steps": {"type": "integer"},
+                "wall_seconds": {"type": "number"},
+                "model": {"type": "string"},
+                "max_tokens": {"type": "integer"},
+            }, required=["goal"]),
+            handler=h.run_agent,
+            annotations=DESTRUCTIVE,
+        ),
     ]
 
 
diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py
@@ -1093,6 +1093,21 @@ def computer_use(goal: str,
     return result_to_dict(result)
 
 
+def run_agent(goal: str,
+              backend: str = "anthropic",
+              max_steps: int = 25,
+              wall_seconds: float = 300.0,
+              model: Optional[str] = None,
+              max_tokens: int = 1024) -> Dict[str, Any]:
+    """Drive the generic plan→act→verify→retry AgentLoop against ``goal``."""
+    from je_auto_control.utils.executor.action_executor import _run_agent
+    return _run_agent(
+        goal=goal, backend=backend,
+        max_steps=int(max_steps), wall_seconds=float(wall_seconds),
+        model=model, max_tokens=int(max_tokens),
+    )
+
+
 # === Scheduler / triggers / hotkey daemon ===================================
 
 def _job_to_dict(job: Any) -> Dict[str, Any]:
diff --git a/test/unit_test/headless/test_agent_executor_mcp_wiring.py b/test/unit_test/headless/test_agent_executor_mcp_wiring.py
@@ -0,0 +1,80 @@
+"""Wire-up tests for ``AC_run_agent`` + ``ac_run_agent``.
+
+The closed-loop AgentLoop already has direct-API tests in
+``test_agent_loop.py``. These tests cover the new executor + MCP
+adapters: they verify both surfaces register, dispatch to AgentLoop,
+and faithfully return the structured result. A ``FakeAgentBackend``
+is patched in so the tests never hit a real LLM.
+"""
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+from je_auto_control.utils.agent import FakeAgentBackend
+
+
+def _stub_backend_factory(decisions: List[Dict[str, Any]]):
+    """Return an Anthropic-/OpenAI-backend stub that ignores tools kwargs."""
+    def factory(*_args, **_kwargs):
+        return FakeAgentBackend(decisions)
+    return factory
+
+
+def _patch_backends(monkeypatch, decisions):
+    """Replace both production backends with the FakeAgentBackend stub."""
+    factory = _stub_backend_factory(decisions)
+    import je_auto_control.utils.agent.backends as backends_pkg
+    monkeypatch.setattr(backends_pkg, "AnthropicAgentBackend", factory)
+    monkeypatch.setattr(backends_pkg, "OpenAIAgentBackend", factory)
+    # Disable the screenshot helper so the loop doesn't try to grab a
+    # real frame on the CI runner.
+    from je_auto_control.utils.agent import agent_loop as loop_mod
+    monkeypatch.setattr(loop_mod, "_default_screenshot", lambda: None)
+
+
+def test_executor_registers_ac_run_agent():
+    from je_auto_control.utils.executor.action_executor import executor
+    assert "AC_run_agent" in executor.known_commands()
+
+
+def test_mcp_registry_exposes_ac_run_agent():
+    from je_auto_control.utils.mcp_server.tools import (
+        build_default_tool_registry,
+    )
+    names = {tool.name for tool in build_default_tool_registry()}
+    assert "ac_run_agent" in names
+
+
+def test_executor_path_runs_agent_loop(monkeypatch):
+    _patch_backends(monkeypatch, [
+        {"stop": True, "message": "done by stub"},
+    ])
+    # Stop AgentLoop from trying to dispatch a real AC_* tool.
+    from je_auto_control.utils.executor.action_executor import _run_agent
+    result = _run_agent(
+        goal="probe", backend="anthropic",
+        max_steps=2, wall_seconds=5.0,
+    )
+    assert result["succeeded"] is True
+    assert result["final_message"] == "done by stub"
+    assert len(result["steps"]) == 1
+
+
+def test_mcp_handler_round_trips(monkeypatch):
+    _patch_backends(monkeypatch, [
+        {"stop": True, "message": "mcp-ok"},
+    ])
+    from je_auto_control.utils.mcp_server.tools._handlers import run_agent
+    record = run_agent(
+        goal="probe-mcp", backend="openai",
+        max_steps=2, wall_seconds=5.0,
+    )
+    assert record["succeeded"] is True
+    assert record["final_message"] == "mcp-ok"
+
+
+def test_unknown_backend_raises():
+    from je_auto_control.utils.executor.action_executor import _run_agent
+    import pytest
+    with pytest.raises(ValueError, match="unknown agent backend"):
+        _run_agent(goal="x", backend="bogus")