Add support for EXTENSIONS_REF environment variable (#2607)

juanmichelini · openhands-agent · web-flow · commit d2a77abc79bf · 2026-04-09T20:13:46.000Z
Co-authored-by: openhands &lt;openhands@all-hands.dev&gt;
diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml
@@ -63,6 +63,11 @@ on:
                 required: false
                 default: main
                 type: string
+            extensions_branch:
+                description: Extensions repo branch to use (for testing feature branches with skills/plugins)
+                required: false
+                default: main
+                type: string
             instance_ids:
                 description: >-
                     Comma-separated instance IDs to evaluate.
@@ -157,6 +162,7 @@ jobs:
                   echo "reason: ${{ github.event.inputs.reason || 'N/A' }}"
                   echo "eval_branch: ${{ github.event.inputs.eval_branch || 'main' }}"
                   echo "benchmarks_branch: ${{ github.event.inputs.benchmarks_branch || 'main' }}"
+                  echo "extensions_branch: ${{ github.event.inputs.extensions_branch || 'main' }}"
                   echo "instance_ids: ${{ github.event.inputs.instance_ids || 'N/A' }}"
                   echo "num_infer_workers: ${{ github.event.inputs.num_infer_workers || '(default)' }}"
                   echo "num_eval_workers: ${{ github.event.inputs.num_eval_workers || '(default)' }}"
@@ -341,6 +347,7 @@ jobs:
                   EVAL_WORKFLOW: ${{ env.EVAL_WORKFLOW }}
                   EVAL_BRANCH: ${{ github.event.inputs.eval_branch || 'main' }}
                   BENCHMARKS_BRANCH: ${{ github.event.inputs.benchmarks_branch || 'main' }}
+                  EXTENSIONS_BRANCH: ${{ github.event.inputs.extensions_branch || 'main' }}
                   BENCHMARK: ${{ github.event.inputs.benchmark || 'swebench' }}
                   TRIGGER_REASON: ${{ github.event.inputs.reason }}
                   PR_NUMBER: ${{ steps.params.outputs.pr_number }}
@@ -357,7 +364,7 @@ jobs:
                   # Normalize instance_ids: strip all spaces
                   INSTANCE_IDS=$(printf '%s' "$INSTANCE_IDS" | tr -d ' ')
 
-                  echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, tool preset: $TOOL_PRESET)"
+                  echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, extensions branch: $EXTENSIONS_BRANCH, tool preset: $TOOL_PRESET)"
                   PAYLOAD=$(jq -n \
                     --arg sdk "$SDK_SHA" \
                     --arg sdk_run_id "${{ github.run_id }}" \
@@ -367,6 +374,7 @@ jobs:
                     --arg reason "$TRIGGER_REASON" \
                     --arg pr "$PR_NUMBER" \
                     --arg benchmarks "$BENCHMARKS_BRANCH" \
+                    --arg extensions "$EXTENSIONS_BRANCH" \
                     --arg benchmark "$BENCHMARK" \
                     --arg instance_ids "$INSTANCE_IDS" \
                     --arg num_infer_workers "$NUM_INFER_WORKERS" \
@@ -377,7 +385,7 @@ jobs:
                     --arg agent_type "$AGENT_TYPE" \
                     --arg partial_archive_url "$PARTIAL_ARCHIVE_URL" \
                     --arg triggered_by "$TRIGGERED_BY" \
-                    '{ref: $ref, inputs: {sdk_commit: $sdk, sdk_workflow_run_id: $sdk_run_id, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, partial_archive_url: $partial_archive_url, triggered_by: $triggered_by}}')
+                    '{ref: $ref, inputs: {sdk_commit: $sdk, sdk_workflow_run_id: $sdk_run_id, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, extensions_branch: $extensions, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, partial_archive_url: $partial_archive_url, triggered_by: $triggered_by}}')
                   RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \
                     -H "Authorization: token $PAT_TOKEN" \
                     -H "Accept: application/vnd.github+json" \
diff --git a/openhands-sdk/openhands/sdk/context/skills/skill.py b/openhands-sdk/openhands/sdk/context/skills/skill.py
@@ -1,5 +1,6 @@
 import io
 import json
+import os
 import re
 from pathlib import Path
 from typing import Annotated, ClassVar, Literal, Union
@@ -891,7 +892,9 @@ def load_project_skills(work_dir: str | Path) -> list[Skill]:
 
 # Public skills repository configuration
 PUBLIC_SKILLS_REPO = "https://github.com/OpenHands/extensions"
-PUBLIC_SKILLS_BRANCH = "main"
+# Allow overriding the branch via EXTENSIONS_REF environment variable
+# (used by evaluation/benchmarks workflows to test feature branches)
+PUBLIC_SKILLS_BRANCH = os.environ.get("EXTENSIONS_REF", "main")
 DEFAULT_MARKETPLACE_PATH = "marketplaces/default.json"
 
 
diff --git a/tests/sdk/context/skill/test_extensions_ref.py b/tests/sdk/context/skill/test_extensions_ref.py
@@ -0,0 +1,91 @@
+"""Tests for EXTENSIONS_REF environment variable support.
+
+These tests use subprocess to run each test in an isolated Python process,
+avoiding module state pollution that would affect other tests.
+"""
+
+import subprocess
+import sys
+
+
+def _run_in_subprocess(test_code: str, env_extra: dict | None = None) -> None:
+    """Run test code in a subprocess with the given environment variables."""
+    import os
+
+    env = os.environ.copy()
+    if env_extra:
+        env.update(env_extra)
+
+    result = subprocess.run(
+        [sys.executable, "-c", test_code],
+        env=env,
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        raise AssertionError(
+            f"Subprocess test failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+        )
+
+
+def test_extensions_ref_default():
+    """PUBLIC_SKILLS_BRANCH should default to 'main' when EXTENSIONS_REF is not set."""
+    code = """
+import os
+if "EXTENSIONS_REF" in os.environ:
+    del os.environ["EXTENSIONS_REF"]
+from openhands.sdk.context.skills.skill import PUBLIC_SKILLS_BRANCH
+assert PUBLIC_SKILLS_BRANCH == "main", (
+    f"Expected 'main' but got '{PUBLIC_SKILLS_BRANCH}'"
+)
+"""
+    _run_in_subprocess(code)
+
+
+def test_extensions_ref_custom_branch():
+    """PUBLIC_SKILLS_BRANCH should use EXTENSIONS_REF when set."""
+    code = """
+from openhands.sdk.context.skills.skill import PUBLIC_SKILLS_BRANCH
+assert PUBLIC_SKILLS_BRANCH == "feature-branch", (
+    f"Expected 'feature-branch' but got '{PUBLIC_SKILLS_BRANCH}'"
+)
+"""
+    _run_in_subprocess(code, {"EXTENSIONS_REF": "feature-branch"})
+
+
+def test_extensions_ref_with_load_public_skills():
+    """load_public_skills should respect EXTENSIONS_REF environment variable."""
+    code = """
+from unittest import mock
+from openhands.sdk.context.skills.skill import (
+    PUBLIC_SKILLS_BRANCH,
+    load_public_skills,
+)
+assert PUBLIC_SKILLS_BRANCH == "test-branch", (
+    f"Expected 'test-branch' but got '{PUBLIC_SKILLS_BRANCH}'"
+)
+with mock.patch(
+    "openhands.sdk.context.skills.skill.update_skills_repository"
+) as mock_update:
+    mock_update.return_value = None
+    load_public_skills()
+    mock_update.assert_called_once()
+    call_args = mock_update.call_args
+    # branch is 2nd positional arg: (repo_url, branch, cache_dir)
+    assert call_args[0][1] == "test-branch", (
+        f"Expected branch='test-branch' but got {call_args[0][1]}"
+    )
+"""
+    _run_in_subprocess(code, {"EXTENSIONS_REF": "test-branch"})
+
+
+def test_extensions_ref_empty_string():
+    """Empty EXTENSIONS_REF should fall back to 'main'."""
+    code = """
+from openhands.sdk.context.skills.skill import PUBLIC_SKILLS_BRANCH
+# Empty string returns empty string per os.environ.get behavior
+assert PUBLIC_SKILLS_BRANCH == "", (
+    f"Expected '' but got '{PUBLIC_SKILLS_BRANCH}'"
+)
+"""
+    _run_in_subprocess(code, {"EXTENSIONS_REF": ""})