OpenHands
diff --git a/‎.agents/skills/run-eval.md‎
Lines changed: 1 addition & 1 deletion b/‎.agents/skills/run-eval.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/run-eval/resolve_model_config.py‎
Lines changed: 40 additions & 10 deletions b/‎.github/run-eval/resolve_model_config.py‎
Lines changed: 40 additions & 10 deletions
diff --git a/‎.github/workflows/run-eval.yml‎
Lines changed: 10 additions & 8 deletions b/‎.github/workflows/run-eval.yml‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎examples/01_standalone_sdk/25_agent_delegation.py‎
Lines changed: 56 additions & 22 deletions b/‎examples/01_standalone_sdk/25_agent_delegation.py‎
Lines changed: 56 additions & 22 deletions
diff --git a/‎openhands-agent-server/openhands/agent_server/git_router.py‎
Lines changed: 54 additions & 13 deletions b/‎openhands-agent-server/openhands/agent_server/git_router.py‎
Lines changed: 54 additions & 13 deletions
@@ -32,7 +32,7 @@ curl -X POST \
 
 **Key parameters:**
 - `benchmark`: `swebench`, `swebenchmultimodal`, `gaia`, `swtbench`, `commit0`, `multiswebench`
-- `eval_limit`: `1`, `50`, `100`, `200`, `500`
+- `eval_limit`: Any positive integer (e.g., `1`, `10`, `50`, `200`)
 - `model_ids`: See `.github/run-eval/resolve_model_config.py` for available models
 - `benchmarks_branch`: Use feature branch from the benchmarks repo to test benchmark changes before merging
 
 
@@ -37,7 +37,10 @@
     "kimi-k2-thinking": {
         "id": "kimi-k2-thinking",
         "display_name": "Kimi K2 Thinking",
-        "llm_config": {"model": "litellm_proxy/moonshot/kimi-k2-thinking"},
+        "llm_config": {
+            "model": "litellm_proxy/moonshot/kimi-k2-thinking",
+            "temperature": 1.0,
+        },
     },
     # https://www.kimi.com/blog/kimi-k2-5.html
     "kimi-k2.5": {
@@ -93,17 +96,26 @@
     "gemini-3-pro": {
         "id": "gemini-3-pro",
         "display_name": "Gemini 3 Pro",
-        "llm_config": {"model": "litellm_proxy/gemini-3-pro-preview"},
+        "llm_config": {
+            "model": "litellm_proxy/gemini-3-pro-preview",
+            "temperature": 0.0,
+        },
     },
     "gemini-3-flash": {
         "id": "gemini-3-flash",
         "display_name": "Gemini 3 Flash",
-        "llm_config": {"model": "litellm_proxy/gemini-3-flash-preview"},
+        "llm_config": {
+            "model": "litellm_proxy/gemini-3-flash-preview",
+            "temperature": 0.0,
+        },
     },
     "gemini-3.1-pro": {
         "id": "gemini-3.1-pro",
         "display_name": "Gemini 3.1 Pro",
-        "llm_config": {"model": "litellm_proxy/gemini-3.1-pro-preview"},
+        "llm_config": {
+            "model": "litellm_proxy/gemini-3.1-pro-preview",
+            "temperature": 0.0,
+        },
     },
     "gpt-5.2": {
         "id": "gpt-5.2",
@@ -126,7 +138,10 @@
     "minimax-m2": {
         "id": "minimax-m2",
         "display_name": "MiniMax M2",
-        "llm_config": {"model": "litellm_proxy/minimax/minimax-m2"},
+        "llm_config": {
+            "model": "litellm_proxy/minimax/minimax-m2",
+            "temperature": 0.0,
+        },
     },
     "minimax-m2.5": {
         "id": "minimax-m2.5",
@@ -140,7 +155,10 @@
     "minimax-m2.1": {
         "id": "minimax-m2.1",
         "display_name": "MiniMax M2.1",
-        "llm_config": {"model": "litellm_proxy/minimax/MiniMax-M2.1"},
+        "llm_config": {
+            "model": "litellm_proxy/minimax/MiniMax-M2.1",
+            "temperature": 0.0,
+        },
     },
     "deepseek-v3.2-reasoner": {
         "id": "deepseek-v3.2-reasoner",
@@ -151,7 +169,8 @@
         "id": "qwen-3-coder",
         "display_name": "Qwen 3 Coder",
         "llm_config": {
-            "model": "litellm_proxy/fireworks_ai/qwen3-coder-480b-a35b-instruct"
+            "model": "litellm_proxy/fireworks_ai/qwen3-coder-480b-a35b-instruct",
+            "temperature": 0.0,
         },
     },
     "nemotron-3-nano-30b": {
@@ -167,6 +186,7 @@
         "display_name": "GLM-4.7",
         "llm_config": {
             "model": "litellm_proxy/openrouter/z-ai/glm-4.7",
+            "temperature": 0.0,
             # OpenRouter glm-4.7 is text-only despite LiteLLM reporting vision support
             "disable_vision": True,
         },
@@ -176,24 +196,34 @@
         "display_name": "GLM-5",
         "llm_config": {
             "model": "litellm_proxy/openrouter/z-ai/glm-5",
+            "temperature": 0.0,
             # OpenRouter glm-5 is text-only despite LiteLLM reporting vision support
             "disable_vision": True,
         },
     },
     "qwen3-coder-next": {
         "id": "qwen3-coder-next",
         "display_name": "Qwen3 Coder Next",
-        "llm_config": {"model": "litellm_proxy/openrouter/qwen/qwen3-coder-next"},
+        "llm_config": {
+            "model": "litellm_proxy/openrouter/qwen/qwen3-coder-next",
+            "temperature": 0.0,
+        },
     },
     "qwen3-coder-30b-a3b-instruct": {
         "id": "qwen3-coder-30b-a3b-instruct",
         "display_name": "Qwen3 Coder 30B A3B Instruct",
-        "llm_config": {"model": "litellm_proxy/Qwen3-Coder-30B-A3B-Instruct"},
+        "llm_config": {
+            "model": "litellm_proxy/Qwen3-Coder-30B-A3B-Instruct",
+            "temperature": 0.0,
+        },
     },
     "gpt-oss-20b": {
         "id": "gpt-oss-20b",
         "display_name": "GPT OSS 20B",
-        "llm_config": {"model": "litellm_proxy/gpt-oss-20b"},
+        "llm_config": {
+            "model": "litellm_proxy/gpt-oss-20b",
+            "temperature": 0.0,
+        },
     },
 }
 
 
@@ -32,16 +32,10 @@ on:
                 default: false
                 type: boolean
             eval_limit:
-                description: Number of instances to run
+                description: Number of instances to run (any positive integer)
                 required: false
                 default: '1'
-                type: choice
-                options:
-                    - '1'
-                    - '100'
-                    - '50'
-                    - '200'
-                    - '500'
+                type: string
             model_ids:
                 description: Comma-separated model IDs to evaluate. Must be keys of MODELS in resolve_model_config.py. Defaults to first model in that
                     dict.
@@ -138,6 +132,14 @@ jobs:
               with:
                   python-version: '3.13'
 
+            - name: Validate eval_limit
+              if: github.event_name == 'workflow_dispatch'
+              run: |
+                  if ! [[ "${{ github.event.inputs.eval_limit }}" =~ ^[1-9][0-9]*$ ]]; then
+                    echo "Error: eval_limit must be a positive integer, got: ${{ github.event.inputs.eval_limit }}"
+                    exit 1
+                  fi
+
             - name: Validate SDK reference (semantic version check)
               if: github.event_name == 'workflow_dispatch'
               env:
 
@@ -9,8 +9,6 @@
 
 import os
 
-from pydantic import SecretStr
-
 from openhands.sdk import (
     LLM,
     Agent,
@@ -26,30 +24,26 @@
     DelegateTool,
     DelegationVisualizer,
 )
-from openhands.tools.preset.default import get_default_tools
+from openhands.tools.preset.default import get_default_tools, register_builtins_agents
 
 
 ONLY_RUN_SIMPLE_DELEGATION = False
 
 logger = get_logger(__name__)
 
 # Configure LLM and agent
-# You can get an API key from https://app.all-hands.dev/settings/api-keys
-api_key = os.getenv("LLM_API_KEY")
-assert api_key is not None, "LLM_API_KEY environment variable is not set."
-model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
 llm = LLM(
-    model=model,
-    api_key=SecretStr(api_key),
+    model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
+    api_key=os.getenv("LLM_API_KEY"),
     base_url=os.environ.get("LLM_BASE_URL", None),
     usage_id="agent",
 )
 
 cwd = os.getcwd()
 
-register_tool("DelegateTool", DelegateTool)
-tools = get_default_tools(enable_browser=False)
-tools.append(Tool(name="DelegateTool"))
+tools = get_default_tools(enable_browser=True)
+tools.append(Tool(name=DelegateTool.name))
+register_builtins_agents()
 
 main_agent = Agent(
     llm=llm,
@@ -61,7 +55,7 @@
     visualizer=DelegationVisualizer(name="Delegator"),
 )
 
-task_message = (
+conversation.send_message(
     "Forget about coding. Let's switch to travel planning. "
     "Let's plan a trip to London. I have two issues I need to solve: "
     "Lodging: what are the best areas to stay at while keeping budget in mind? "
@@ -72,7 +66,6 @@
     "They should keep it short. After getting the results, merge both analyses "
     "into a single consolidated report.\n\n"
 )
-conversation.send_message(task_message)
 conversation.run()
 
 conversation.send_message(
@@ -81,18 +74,57 @@
 conversation.run()
 
 # Report cost for simple delegation example
-cost_1 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (simple delegation): {cost_1}")
+cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+print(f"EXAMPLE_COST (simple delegation): {cost_simple}")
 
 print("Simple delegation example done!", "\n" * 20)
 
-
-# -------- Agent Delegation Second Part: User-Defined Agent Types --------
-
 if ONLY_RUN_SIMPLE_DELEGATION:
+    # For CI: always emit the EXAMPLE_COST marker before exiting.
+    print(f"EXAMPLE_COST: {cost_simple}")
     exit(0)
 
 
+# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) --------
+
+main_agent = Agent(
+    llm=llm,
+    tools=[Tool(name=DelegateTool.name)],
+)
+conversation = Conversation(
+    agent=main_agent,
+    workspace=cwd,
+    visualizer=DelegationVisualizer(name="Delegator (builtins)"),
+)
+
+builtin_task_message = (
+    "Demonstrate SDK built-in sub-agent types. "
+    "1) Spawn an 'explore' sub-agent and ask it to list the markdown files in "
+    "openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each "
+    "built-in agent type is for (based on the file contents). "
+    "2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the "
+    "terminal and return the exact output. "
+    "3) Merge both results into a short report. "
+    "Do not use internet access."
+)
+
+print("=" * 100)
+print("Demonstrating built-in agent delegation (explore + bash)...")
+print("=" * 100)
+
+conversation.send_message(builtin_task_message)
+conversation.run()
+
+# Report cost for builtin agent types example
+cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+print(f"EXAMPLE_COST (builtin agents): {cost_builtin}")
+
+print("Built-in agent delegation example done!", "\n" * 20)
+
+
+# -------- Agent Delegation Third Part: User-Defined Agent Types --------
+
+
 def create_lodging_planner(llm: LLM) -> Agent:
     """Create a lodging planner focused on London stays."""
     skills = [
@@ -190,10 +222,12 @@ def create_activities_planner(llm: LLM) -> Agent:
 conversation.run()
 
 # Report cost for user-defined agent types example
-cost_2 = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (user-defined agents): {cost_2}")
+cost_user_defined = (
+    conversation.conversation_stats.get_combined_metrics().accumulated_cost
+)
+print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}")
 
 print("All done!")
 
 # Full example cost report for CI workflow
-print(f"EXAMPLE_COST: {cost_1 + cost_2}")
+print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}")
@@ -4,33 +4,74 @@
 import logging
 from pathlib import Path
 
-from fastapi import APIRouter
+from fastapi import APIRouter, Query
 
 from openhands.agent_server.server_details_router import update_last_execution_time
 from openhands.sdk.git.git_changes import get_git_changes
 from openhands.sdk.git.git_diff import get_git_diff
 from openhands.sdk.git.models import GitChange, GitDiff
+from openhands.sdk.utils.deprecation import deprecated
 
 
 git_router = APIRouter(prefix="/git", tags=["Git"])
 logger = logging.getLogger(__name__)
 
 
-@git_router.get("/changes/{path:path}")
-async def git_changes(
-    path: Path,
-) -> list[GitChange]:
+async def _get_git_changes(path: str) -> list[GitChange]:
+    """Internal helper to get git changes for a given path."""
     update_last_execution_time()
     loop = asyncio.get_running_loop()
-    changes = await loop.run_in_executor(None, get_git_changes, path)
-    return changes
+    return await loop.run_in_executor(None, get_git_changes, Path(path))
 
 
-@git_router.get("/diff/{path:path}")
-async def git_diff(
-    path: Path,
-) -> GitDiff:
+async def _get_git_diff(path: str) -> GitDiff:
+    """Internal helper to get git diff for a given path."""
     update_last_execution_time()
     loop = asyncio.get_running_loop()
-    changes = await loop.run_in_executor(None, get_git_diff, path)
-    return changes
+    return await loop.run_in_executor(None, get_git_diff, Path(path))
+
+
+@git_router.get("/changes")
+async def git_changes_query(
+    path: str = Query(..., description="The git repository path"),
+) -> list[GitChange]:
+    """Get git changes using query parameter (preferred method)."""
+    return await _get_git_changes(path)
+
+
+@git_router.get("/changes/{path:path}")
+@deprecated(
+    deprecated_in="1.15.0",
+    removed_in="1.20.0",
+    details=(
+        "Use the /git/changes endpoint with a query parameter for the path "
+        "instead of a path parameter. This allows for better handling of "
+        "complex paths and is more consistent with other endpoints."
+    ),
+)
+async def git_changes_path(path: str) -> list[GitChange]:
+    """Get git changes using path parameter (legacy, for backwards compatibility)."""
+    return await _get_git_changes(path)
+
+
+@git_router.get("/diff")
+async def git_diff_query(
+    path: str = Query(..., description="The file path to get diff for"),
+) -> GitDiff:
+    """Get git diff using query parameter (preferred method)."""
+    return await _get_git_diff(path)
+
+
+@git_router.get("/diff/{path:path}")
+@deprecated(
+    deprecated_in="1.15.0",
+    removed_in="1.20.0",
+    details=(
+        "Use the /git/diff endpoint with a query parameter for the path "
+        "instead of a path parameter. This allows for better handling of "
+        "complex paths and is more consistent with other endpoints."
+    ),
+)
+async def git_diff_path(path: str) -> GitDiff:
+    """Get git diff using path parameter (legacy, for backwards compatibility)."""
+    return await _get_git_diff(path)