diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml index 8bf0eb4212..68a2e96eb9 100644 --- a/.github/workflows/run-eval.yml +++ b/.github/workflows/run-eval.yml @@ -63,6 +63,11 @@ on: required: false default: main type: string + extensions_branch: + description: Extensions repo branch to use (for testing feature branches with skills/plugins) + required: false + default: main + type: string instance_ids: description: >- Comma-separated instance IDs to evaluate. @@ -157,6 +162,7 @@ jobs: echo "reason: ${{ github.event.inputs.reason || 'N/A' }}" echo "eval_branch: ${{ github.event.inputs.eval_branch || 'main' }}" echo "benchmarks_branch: ${{ github.event.inputs.benchmarks_branch || 'main' }}" + echo "extensions_branch: ${{ github.event.inputs.extensions_branch || 'main' }}" echo "instance_ids: ${{ github.event.inputs.instance_ids || 'N/A' }}" echo "num_infer_workers: ${{ github.event.inputs.num_infer_workers || '(default)' }}" echo "num_eval_workers: ${{ github.event.inputs.num_eval_workers || '(default)' }}" @@ -341,6 +347,7 @@ jobs: EVAL_WORKFLOW: ${{ env.EVAL_WORKFLOW }} EVAL_BRANCH: ${{ github.event.inputs.eval_branch || 'main' }} BENCHMARKS_BRANCH: ${{ github.event.inputs.benchmarks_branch || 'main' }} + EXTENSIONS_BRANCH: ${{ github.event.inputs.extensions_branch || 'main' }} BENCHMARK: ${{ github.event.inputs.benchmark || 'swebench' }} TRIGGER_REASON: ${{ github.event.inputs.reason }} PR_NUMBER: ${{ steps.params.outputs.pr_number }} @@ -357,7 +364,7 @@ jobs: # Normalize instance_ids: strip all spaces INSTANCE_IDS=$(printf '%s' "$INSTANCE_IDS" | tr -d ' ') - echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, tool preset: $TOOL_PRESET)" + echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, extensions branch: $EXTENSIONS_BRANCH, tool preset: $TOOL_PRESET)" PAYLOAD=$(jq -n \ --arg sdk "$SDK_SHA" \ --arg sdk_run_id "${{ github.run_id }}" \ @@ -367,6 +374,7 @@ jobs: --arg reason "$TRIGGER_REASON" \ --arg pr "$PR_NUMBER" \ --arg benchmarks "$BENCHMARKS_BRANCH" \ + --arg extensions "$EXTENSIONS_BRANCH" \ --arg benchmark "$BENCHMARK" \ --arg instance_ids "$INSTANCE_IDS" \ --arg num_infer_workers "$NUM_INFER_WORKERS" \ @@ -377,7 +385,7 @@ jobs: --arg agent_type "$AGENT_TYPE" \ --arg partial_archive_url "$PARTIAL_ARCHIVE_URL" \ --arg triggered_by "$TRIGGERED_BY" \ - '{ref: $ref, inputs: {sdk_commit: $sdk, sdk_workflow_run_id: $sdk_run_id, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, partial_archive_url: $partial_archive_url, triggered_by: $triggered_by}}') + '{ref: $ref, inputs: {sdk_commit: $sdk, sdk_workflow_run_id: $sdk_run_id, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, extensions_branch: $extensions, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, partial_archive_url: $partial_archive_url, triggered_by: $triggered_by}}') RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \ -H "Authorization: token $PAT_TOKEN" \ -H "Accept: application/vnd.github+json" \ diff --git a/openhands-sdk/openhands/sdk/context/skills/skill.py b/openhands-sdk/openhands/sdk/context/skills/skill.py index c402ffe688..d431dbf781 100644 --- a/openhands-sdk/openhands/sdk/context/skills/skill.py +++ b/openhands-sdk/openhands/sdk/context/skills/skill.py @@ -1,5 +1,6 @@ import io import json +import os import re from pathlib import Path from typing import Annotated, ClassVar, Literal, Union @@ -891,7 +892,9 @@ def load_project_skills(work_dir: str | Path) -> list[Skill]: # Public skills repository configuration PUBLIC_SKILLS_REPO = "https://github.com/OpenHands/extensions" -PUBLIC_SKILLS_BRANCH = "main" +# Allow overriding the branch via EXTENSIONS_REF environment variable +# (used by evaluation/benchmarks workflows to test feature branches) +PUBLIC_SKILLS_BRANCH = os.environ.get("EXTENSIONS_REF", "main") DEFAULT_MARKETPLACE_PATH = "marketplaces/default.json" diff --git a/tests/sdk/context/skill/test_extensions_ref.py b/tests/sdk/context/skill/test_extensions_ref.py new file mode 100644 index 0000000000..612135fb4f --- /dev/null +++ b/tests/sdk/context/skill/test_extensions_ref.py @@ -0,0 +1,91 @@ +"""Tests for EXTENSIONS_REF environment variable support. + +These tests use subprocess to run each test in an isolated Python process, +avoiding module state pollution that would affect other tests. +""" + +import subprocess +import sys + + +def _run_in_subprocess(test_code: str, env_extra: dict | None = None) -> None: + """Run test code in a subprocess with the given environment variables.""" + import os + + env = os.environ.copy() + if env_extra: + env.update(env_extra) + + result = subprocess.run( + [sys.executable, "-c", test_code], + env=env, + capture_output=True, + text=True, + ) + if result.returncode != 0: + raise AssertionError( + f"Subprocess test failed:\nstdout: {result.stdout}\nstderr: {result.stderr}" + ) + + +def test_extensions_ref_default(): + """PUBLIC_SKILLS_BRANCH should default to 'main' when EXTENSIONS_REF is not set.""" + code = """ +import os +if "EXTENSIONS_REF" in os.environ: + del os.environ["EXTENSIONS_REF"] +from openhands.sdk.context.skills.skill import PUBLIC_SKILLS_BRANCH +assert PUBLIC_SKILLS_BRANCH == "main", ( + f"Expected 'main' but got '{PUBLIC_SKILLS_BRANCH}'" +) +""" + _run_in_subprocess(code) + + +def test_extensions_ref_custom_branch(): + """PUBLIC_SKILLS_BRANCH should use EXTENSIONS_REF when set.""" + code = """ +from openhands.sdk.context.skills.skill import PUBLIC_SKILLS_BRANCH +assert PUBLIC_SKILLS_BRANCH == "feature-branch", ( + f"Expected 'feature-branch' but got '{PUBLIC_SKILLS_BRANCH}'" +) +""" + _run_in_subprocess(code, {"EXTENSIONS_REF": "feature-branch"}) + + +def test_extensions_ref_with_load_public_skills(): + """load_public_skills should respect EXTENSIONS_REF environment variable.""" + code = """ +from unittest import mock +from openhands.sdk.context.skills.skill import ( + PUBLIC_SKILLS_BRANCH, + load_public_skills, +) +assert PUBLIC_SKILLS_BRANCH == "test-branch", ( + f"Expected 'test-branch' but got '{PUBLIC_SKILLS_BRANCH}'" +) +with mock.patch( + "openhands.sdk.context.skills.skill.update_skills_repository" +) as mock_update: + mock_update.return_value = None + load_public_skills() + mock_update.assert_called_once() + call_args = mock_update.call_args + # branch is 2nd positional arg: (repo_url, branch, cache_dir) + assert call_args[0][1] == "test-branch", ( + f"Expected branch='test-branch' but got {call_args[0][1]}" + ) +""" + _run_in_subprocess(code, {"EXTENSIONS_REF": "test-branch"}) + + +def test_extensions_ref_empty_string(): + """Empty EXTENSIONS_REF should fall back to 'main'.""" + code = """ +from openhands.sdk.context.skills.skill import PUBLIC_SKILLS_BRANCH +# Empty string returns empty string per os.environ.get behavior +assert PUBLIC_SKILLS_BRANCH == "", ( + f"Expected '' but got '{PUBLIC_SKILLS_BRANCH}'" +) +""" + _run_in_subprocess(code, {"EXTENSIONS_REF": ""})