Skip to content

Commit c333aed

Browse files
merge: update settings schema branch with main
- merge latest main into the SDK settings-schema branch - type settings metadata for pyright compatibility - bump openhands-sdk to 1.15.0 for API breakage policy Co-authored-by: openhands <openhands@all-hands.dev>
2 parents b69f7ce + a901efa commit c333aed

31 files changed

Lines changed: 1309 additions & 1941 deletions

File tree

.github/actions/pr-review/action.yml

Lines changed: 0 additions & 157 deletions
This file was deleted.

.github/run-eval/resolve_model_config.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,16 @@ def check_model(
330330
**kwargs,
331331
)
332332

333-
content = response.choices[0].message.content if response.choices else None
333+
response_content = (
334+
response.choices[0].message.content if response.choices else None
335+
)
336+
reasoning_content = (
337+
getattr(response.choices[0].message, "reasoning_content", None)
338+
if response.choices
339+
else None
340+
)
334341

335-
if content:
342+
if response_content or reasoning_content:
336343
return True, f"✓ {display_name}: OK"
337344
else:
338345
# Check if there's any other data in the response for diagnostics

.github/workflows/pr-review-by-openhands.yml

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@
22
name: PR Review by OpenHands
33

44
on:
5-
# TEMPORARY MITIGATION (Clinejection hardening)
6-
#
7-
# We temporarily avoid `pull_request_target` here. We'll restore it after the PR review
8-
# workflow is fully hardened for untrusted execution.
5+
# Use pull_request so workflow changes can be validated in PRs.
6+
# This workflow requires secrets, so the job only runs for same-repo PRs.
7+
# It runs when:
8+
# 1. A new PR is opened (non-draft), OR
9+
# 2. A draft PR is marked as ready for review, OR
10+
# 3. A maintainer adds the 'review-this' label, OR
11+
# 4. A maintainer requests openhands-agent or all-hands-bot as a reviewer
12+
# Adding labels and requesting reviewers still requires write access.
913
pull_request:
1014
types: [opened, ready_for_review, labeled, review_requested]
1115

@@ -16,38 +20,32 @@ permissions:
1620

1721
jobs:
1822
pr-review:
19-
# Note: fork PRs will not have access to repository secrets under `pull_request`.
20-
# Skip forks to avoid noisy failures until we restore a hardened `pull_request_target` flow.
23+
# Run when one of the following conditions is met:
24+
# 1. A new non-draft PR is opened by a non-first-time contributor, OR
25+
# 2. A draft PR is converted to ready for review by a non-first-time contributor, OR
26+
# 3. 'review-this' label is added, OR
27+
# 4. openhands-agent or all-hands-bot is requested as a reviewer
28+
# Note: FIRST_TIME_CONTRIBUTOR and NONE PRs require manual trigger via label/reviewer request.
2129
if: |
22-
github.event.pull_request.head.repo.full_name == github.repository &&
23-
(
24-
(github.event.action == 'opened' && github.event.pull_request.draft == false) ||
25-
github.event.action == 'ready_for_review' ||
26-
(github.event.action == 'labeled' && github.event.label.name == 'review-this') ||
27-
(
28-
github.event.action == 'review_requested' &&
29-
(
30-
github.event.requested_reviewer.login == 'openhands-agent' ||
31-
github.event.requested_reviewer.login == 'all-hands-bot'
32-
)
33-
)
30+
github.event.pull_request.head.repo.full_name == github.repository && (
31+
(github.event.action == 'opened' && github.event.pull_request.draft == false && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' && github.event.pull_request.author_association != 'NONE') ||
32+
(github.event.action == 'ready_for_review' && github.event.pull_request.author_association != 'FIRST_TIME_CONTRIBUTOR' && github.event.pull_request.author_association != 'NONE') ||
33+
github.event.label.name == 'review-this' ||
34+
github.event.requested_reviewer.login == 'openhands-agent' ||
35+
github.event.requested_reviewer.login == 'all-hands-bot'
3436
)
3537
concurrency:
3638
group: pr-review-${{ github.event.pull_request.number }}
3739
cancel-in-progress: true
3840
runs-on: ubuntu-24.04
3941
steps:
4042
- name: Run PR Review
41-
uses: OpenHands/software-agent-sdk/.github/actions/pr-review@main
43+
uses: OpenHands/extensions/plugins/pr-review@main
4244
with:
43-
# LLM model(s) to use. Can be comma-separated for A/B testing
44-
# - one model will be randomly selected per review
4545
llm-model: litellm_proxy/claude-sonnet-4-5-20250929
4646
llm-base-url: https://llm-proxy.app.all-hands.dev
4747
# Review style: roasted (other option: standard)
4848
review-style: roasted
49-
# Use the PR's head commit SHA to test SDK changes on the SDK repo itself
50-
sdk-version: ${{ github.event.pull_request.head.sha }}
5149
llm-api-key: ${{ secrets.LLM_API_KEY }}
5250
github-token: ${{ secrets.ALLHANDS_BOT_GITHUB_PAT }}
5351
lmnr-api-key: ${{ secrets.LMNR_SKILLS_API_KEY }}
Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
---
22
name: PR Review Evaluation
33

4-
# This workflow runs when a PR is merged or closed to evaluate how well
5-
# the review agent's comments were addressed.
6-
#
7-
# It creates an evaluation trace in Laminar that can be processed by a
8-
# signal to determine review effectiveness.
4+
# This workflow evaluates how well PR review comments were addressed.
5+
# It runs when a PR is closed to assess review effectiveness.
96
#
10-
# Prerequisites:
11-
# - PR must have been reviewed by pr-review-by-openhands.yml first
12-
# - Trace info artifact must exist from the review workflow
7+
# Security note: pull_request_target is safe here because:
8+
# 1. Only triggers on PR close (not on code changes)
9+
# 2. Does not checkout PR code - only downloads artifacts from trusted workflow runs
10+
# 3. Runs evaluation scripts from the extensions repo, not from the PR
1311

1412
on:
1513
pull_request_target:
@@ -21,17 +19,13 @@ permissions:
2119

2220
jobs:
2321
evaluate:
24-
# Only run if:
25-
# 1. This is a merged PR, AND
26-
# 2. The PR was previously reviewed (has the trace artifact)
2722
runs-on: ubuntu-24.04
2823
env:
2924
PR_NUMBER: ${{ github.event.pull_request.number }}
3025
REPO_NAME: ${{ github.repository }}
3126
PR_MERGED: ${{ github.event.pull_request.merged }}
27+
3228
steps:
33-
# Note: actions/download-artifact@v5 only works within the same workflow run.
34-
# We use dawidd6/action-download-artifact to download from a different workflow.
3529
- name: Download review trace artifact
3630
id: download-trace
3731
uses: dawidd6/action-download-artifact@v6
@@ -43,8 +37,6 @@ jobs:
4337
search_artifacts: true
4438
if_no_artifact_found: warn
4539

46-
# Check if the trace file actually exists (the artifact download may
47-
# succeed but with no matching artifact, only issuing a warning)
4840
- name: Check if trace file exists
4941
id: check-trace
5042
run: |
@@ -53,53 +45,41 @@ jobs:
5345
echo "Found trace file for PR #$PR_NUMBER"
5446
else
5547
echo "trace_exists=false" >> $GITHUB_OUTPUT
56-
echo "No trace file found for PR #$PR_NUMBER"
57-
echo "This PR may not have been reviewed by the agent, skipping evaluation"
48+
echo "No trace file found for PR #$PR_NUMBER - skipping evaluation"
5849
fi
5950
60-
- name: Checkout software-agent-sdk repository
51+
# Always checkout main branch for security - cannot test script changes in PRs
52+
- name: Checkout extensions repository
6153
if: steps.check-trace.outputs.trace_exists == 'true'
6254
uses: actions/checkout@v5
6355
with:
64-
repository: OpenHands/software-agent-sdk
65-
path: software-agent-sdk
56+
repository: OpenHands/extensions
57+
path: extensions
6658

6759
- name: Set up Python
6860
if: steps.check-trace.outputs.trace_exists == 'true'
6961
uses: actions/setup-python@v6
7062
with:
71-
python-version: '3.13'
72-
73-
- name: Install uv
74-
if: steps.check-trace.outputs.trace_exists == 'true'
75-
uses: astral-sh/setup-uv@v7
76-
with:
77-
enable-cache: true
63+
python-version: '3.12'
7864

7965
- name: Install dependencies
8066
if: steps.check-trace.outputs.trace_exists == 'true'
81-
run: |
82-
# Install lmnr SDK for Laminar integration
83-
uv pip install --system lmnr
67+
run: pip install lmnr
8468

8569
- name: Run evaluation
8670
if: steps.check-trace.outputs.trace_exists == 'true'
8771
env:
72+
# Script expects LMNR_PROJECT_API_KEY; org secret is named LMNR_SKILLS_API_KEY
8873
LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_SKILLS_API_KEY }}
8974
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
9075
run: |
91-
# Copy trace info to working directory
92-
cp trace-info/laminar_trace_info.json .
93-
94-
# Run the evaluation script
95-
uv run python software-agent-sdk/examples/03_github_workflows/02_pr_review/evaluate_review.py
76+
python extensions/plugins/pr-review/scripts/evaluate_review.py \
77+
--trace-file trace-info/laminar_trace_info.json
9678
9779
- name: Upload evaluation logs
9880
uses: actions/upload-artifact@v5
9981
if: always() && steps.check-trace.outputs.trace_exists == 'true'
10082
with:
10183
name: pr-review-evaluation-${{ github.event.pull_request.number }}
102-
path: |
103-
*.log
104-
*.json
84+
path: '*.log'
10585
retention-days: 30

.github/workflows/run-eval.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ on:
1919
- swebench
2020
- swtbench
2121
- commit0
22-
- multiswebench
2322
- swebenchmultimodal
2423
sdk_ref:
2524
description: SDK commit/ref to evaluate (must be a semantic version like v1.0.0 unless 'Allow unreleased branches' is checked)

0 commit comments

Comments
 (0)