Set temperature to 0.0 for claude-opus-4-7 #64
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- | |
| name: QA Changes Evaluation [experimental] | |
| # This workflow evaluates how well QA validation performed. | |
| # It runs when a PR is closed to assess QA effectiveness. | |
| # | |
| # Security note: pull_request_target is safe here because this workflow | |
| # never checks out or executes PR code. It only: | |
| # 1. Downloads artifacts produced by a trusted workflow run | |
| # 2. Runs evaluation scripts from the extensions repo (main/pinned branch) | |
| on: | |
| pull_request_target: | |
| types: [closed] | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| jobs: | |
| evaluate: | |
| runs-on: ubuntu-24.04 | |
| env: | |
| PR_NUMBER: ${{ github.event.pull_request.number }} | |
| REPO_NAME: ${{ github.repository }} | |
| PR_MERGED: ${{ github.event.pull_request.merged }} | |
| steps: | |
| - name: Download QA trace artifact | |
| id: download-trace | |
| uses: dawidd6/action-download-artifact@v20 | |
| continue-on-error: true | |
| with: | |
| workflow: qa-changes-by-openhands.yml | |
| name: qa-changes-trace-${{ github.event.pull_request.number }} | |
| path: trace-info | |
| search_artifacts: true | |
| if_no_artifact_found: warn | |
| - name: Check if trace file exists | |
| id: check-trace | |
| run: | | |
| if [ -f "trace-info/laminar_trace_info.json" ]; then | |
| echo "trace_exists=true" >> $GITHUB_OUTPUT | |
| echo "Found trace file for PR #$PR_NUMBER" | |
| else | |
| echo "trace_exists=false" >> $GITHUB_OUTPUT | |
| echo "No trace file found for PR #$PR_NUMBER - skipping evaluation" | |
| fi | |
| # EXPERIMENTAL: pinned to feature branch while qa-changes plugin is in development. | |
| # Switch to @main (and remove ref:) once the plugin is merged. | |
| - name: Checkout extensions repository | |
| if: steps.check-trace.outputs.trace_exists == 'true' | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: OpenHands/extensions | |
| ref: feat/qa-changes-plugin | |
| path: extensions | |
| - name: Set up Python | |
| if: steps.check-trace.outputs.trace_exists == 'true' | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.12' | |
| - name: Install dependencies | |
| if: steps.check-trace.outputs.trace_exists == 'true' | |
| run: pip install lmnr | |
| - name: Run evaluation | |
| if: steps.check-trace.outputs.trace_exists == 'true' | |
| env: | |
| # Script expects LMNR_PROJECT_API_KEY; org secret is named LMNR_SKILLS_API_KEY | |
| LMNR_PROJECT_API_KEY: ${{ secrets.LMNR_SKILLS_API_KEY }} | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| python extensions/plugins/qa-changes/scripts/evaluate_qa_changes.py \ | |
| --trace-file trace-info/laminar_trace_info.json | |
| - name: Upload evaluation logs | |
| uses: actions/upload-artifact@v7 | |
| if: always() && steps.check-trace.outputs.trace_exists == 'true' | |
| with: | |
| name: qa-changes-evaluation-${{ github.event.pull_request.number }} | |
| path: '*.log' | |
| retention-days: 30 |