Scheduled Tutorial Evaluation #30
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scheduled Tutorial Evaluation | |
| on: | |
| schedule: | |
| - cron: '0 13 * * 5' # Friday 5am PST / 6am PDT (13:00 UTC) | |
| workflow_dispatch: {} # Manual trigger for testing/re-runs | |
| pull_request_target: # PR evaluation (requires approval) | |
| types: [opened, synchronize, reopened] | |
| jobs: | |
| evaluate: | |
| runs-on: ubuntu-latest | |
| environment: ${{ github.event_name == 'schedule' && 'tutorial-evaluation-scheduled' || 'tutorial-evaluation' }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| tutorial: [getting-started, curbside-pickup, building-comfort, absence-of-change, risky-containers] | |
| include: | |
| - tutorial: getting-started | |
| devcontainer: .devcontainer/devcontainer.json | |
| docs_path: docs/content/getting-started | |
| learning_path: tutorial/getting-started | |
| - tutorial: curbside-pickup | |
| devcontainer: .devcontainer/curbside-pickup/devcontainer.json | |
| docs_path: docs/content/tutorials/curbside-pickup | |
| learning_path: tutorial/curbside-pickup | |
| - tutorial: building-comfort | |
| devcontainer: .devcontainer/building-comfort/devcontainer.json | |
| docs_path: docs/content/tutorials/building-comfort | |
| learning_path: tutorial/building-comfort | |
| - tutorial: absence-of-change | |
| devcontainer: .devcontainer/absence-of-change/devcontainer.json | |
| docs_path: docs/content/tutorials/absence-of-change | |
| learning_path: tutorial/absence-of-change | |
| - tutorial: risky-containers | |
| devcontainer: .devcontainer/risky-containers/devcontainer.json | |
| docs_path: docs/content/tutorials/risky-containers | |
| learning_path: tutorial/risky-containers | |
| steps: | |
| - name: Checkout this repo | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.event.pull_request.head.sha || github.sha }} | |
| - name: Checkout docs repo | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: drasi-project/docs | |
| ref: main | |
| path: drasi-docs | |
| - name: Copy tutorial docs and prompt | |
| run: | | |
| mkdir -p ${{ matrix.learning_path }}/tutorial-docs | |
| cp -r drasi-docs/${{ matrix.docs_path }}/* ${{ matrix.learning_path }}/tutorial-docs | |
| cp .github/prompts/tutorial-evaluation.md ${{ matrix.learning_path }}/prompt.md | |
| - name: Run evaluation (attempt 1) | |
| id: attempt1 | |
| continue-on-error: true | |
| uses: devcontainers/ci@v0.3 | |
| env: | |
| COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }} | |
| DRASI_TUTORIAL_EVALUATION: "true" | |
| with: | |
| configFile: ${{ matrix.devcontainer }} | |
| push: never | |
| runCmd: | | |
| timeout 1200 copilot -p "$(cat prompt.md)" \ | |
| --allow-all-tools \ | |
| --allow-all-paths \ | |
| --deny-tool 'fetch' \ | |
| --deny-tool 'websearch' \ | |
| --deny-tool 'githubRepo' \ | |
| --deny-tool 'shell(curl *)' \ | |
| --deny-tool 'shell(wget *)' \ | |
| --deny-tool 'shell(nc *)' \ | |
| --deny-tool 'shell(ssh *)' \ | |
| --deny-tool 'shell(scp *)' \ | |
| --deny-tool 'shell(telnet *)' \ | |
| --deny-tool 'shell(ftp *)' \ | |
| --deny-tool 'shell(rm -rf /*)' \ | |
| --deny-tool 'shell(dd *)' \ | |
| --allow-url localhost \ | |
| --allow-url 127.0.0.1 \ | |
| --model gemini-3-pro-preview || true | |
| REPORT=$(find . -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1) | |
| if [ -z "$REPORT" ] || ! grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then | |
| exit 1 | |
| fi | |
| - name: Clean up before retry (attempt 2) | |
| if: steps.attempt1.outcome == 'failure' | |
| run: | | |
| echo "Attempt 1 failed. Removing all containers and images to force a fresh devcontainer build..." | |
| docker rm -f $(docker ps -aq) 2>/dev/null || true | |
| docker system prune -af --volumes 2>/dev/null || true | |
| sleep 10 | |
| - name: Run evaluation (attempt 2) | |
| id: attempt2 | |
| continue-on-error: true | |
| if: steps.attempt1.outcome == 'failure' | |
| uses: devcontainers/ci@v0.3 | |
| env: | |
| COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }} | |
| DRASI_TUTORIAL_EVALUATION: "true" | |
| with: | |
| configFile: ${{ matrix.devcontainer }} | |
| push: never | |
| runCmd: | | |
| timeout 1200 copilot -p "$(cat prompt.md)" \ | |
| --allow-all-tools \ | |
| --allow-all-paths \ | |
| --deny-tool 'fetch' \ | |
| --deny-tool 'websearch' \ | |
| --deny-tool 'githubRepo' \ | |
| --deny-tool 'shell(curl *)' \ | |
| --deny-tool 'shell(wget *)' \ | |
| --deny-tool 'shell(nc *)' \ | |
| --deny-tool 'shell(ssh *)' \ | |
| --deny-tool 'shell(scp *)' \ | |
| --deny-tool 'shell(telnet *)' \ | |
| --deny-tool 'shell(ftp *)' \ | |
| --deny-tool 'shell(rm -rf /*)' \ | |
| --deny-tool 'shell(dd *)' \ | |
| --allow-url localhost \ | |
| --allow-url 127.0.0.1 \ | |
| --model gemini-3-pro-preview || true | |
| REPORT=$(find . -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1) | |
| if [ -z "$REPORT" ] || ! grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then | |
| exit 1 | |
| fi | |
| - name: Clean up before retry (attempt 3) | |
| if: steps.attempt2.outcome == 'failure' | |
| run: | | |
| echo "Attempt 2 failed. Removing all containers and images to force a fresh devcontainer build..." | |
| docker rm -f $(docker ps -aq) 2>/dev/null || true | |
| docker system prune -af --volumes 2>/dev/null || true | |
| sleep 10 | |
| - name: Run evaluation (attempt 3) | |
| id: attempt3 | |
| if: steps.attempt2.outcome == 'failure' | |
| uses: devcontainers/ci@v0.3 | |
| env: | |
| COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }} | |
| DRASI_TUTORIAL_EVALUATION: "true" | |
| with: | |
| configFile: ${{ matrix.devcontainer }} | |
| push: never | |
| runCmd: | | |
| timeout 1200 copilot -p "$(cat prompt.md)" \ | |
| --allow-all-tools \ | |
| --allow-all-paths \ | |
| --deny-tool 'fetch' \ | |
| --deny-tool 'websearch' \ | |
| --deny-tool 'githubRepo' \ | |
| --deny-tool 'shell(curl *)' \ | |
| --deny-tool 'shell(wget *)' \ | |
| --deny-tool 'shell(nc *)' \ | |
| --deny-tool 'shell(ssh *)' \ | |
| --deny-tool 'shell(scp *)' \ | |
| --deny-tool 'shell(telnet *)' \ | |
| --deny-tool 'shell(ftp *)' \ | |
| --deny-tool 'shell(rm -rf /*)' \ | |
| --deny-tool 'shell(dd *)' \ | |
| --allow-url localhost \ | |
| --allow-url 127.0.0.1 \ | |
| --model claude-opus-4.6 || true | |
| REPORT=$(find . -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1) | |
| if [ -z "$REPORT" ] || ! grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then | |
| exit 1 | |
| fi | |
| - name: Check evaluation status | |
| if: always() | |
| run: | | |
| REPORT=$(find ${{ matrix.learning_path }} -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1) | |
| if [ -z "$REPORT" ]; then | |
| echo "::error::No report.md found in evaluation directory" | |
| exit 1 | |
| fi | |
| echo "Found report: $REPORT" | |
| echo "--- Report Contents ---" | |
| cat "$REPORT" | |
| echo "--- End Report ---" | |
| sed -i '1s/^\xEF\xBB\xBF//' "$REPORT" | |
| sed -i 's/\r$//' "$REPORT" | |
| if grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then | |
| echo "::notice::Tutorial evaluation PASSED" | |
| exit 0 | |
| elif grep -qiE "^##\s*STATUS:\s*FAILURE" "$REPORT"; then | |
| echo "::error::Tutorial evaluation FAILED" | |
| exit 1 | |
| else | |
| echo "::error::No STATUS found in report.md" | |
| echo "First 5 lines of report:" | |
| head -5 "$REPORT" | cat -A | |
| exit 1 | |
| fi | |
| - name: Upload evaluation results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: evaluation-results-${{ matrix.tutorial }}-${{ github.event.pull_request.number || github.run_id }} | |
| path: | | |
| ${{ matrix.learning_path }}/evaluation-*/ | |
| ${{ matrix.learning_path }}/tutorial-docs/ | |
| ${{ matrix.learning_path }}/prompt.md | |
| create-issue-on-failure: | |
| needs: evaluate | |
| runs-on: ubuntu-latest | |
| if: failure() && github.event_name != 'pull_request_target' | |
| steps: | |
| - name: Create GitHub Issue | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; | |
| const artifactsUrl = `${runUrl}#artifacts`; | |
| // 1. Get all jobs for this run to analyze failures and retries | |
| const jobs = await github.rest.actions.listJobsForWorkflowRun({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| run_id: context.runId, | |
| filter: 'latest', // get the latest attempt of the job if it was re-run from UI | |
| }); | |
| // 2. Filter for failed 'evaluate' jobs | |
| // The job name format is "evaluate ({tutorial}, ...)" due to matrix | |
| const evalJobs = jobs.data.jobs.filter(job => job.name.startsWith('evaluate')); | |
| const failedJobs = evalJobs.filter(job => job.conclusion === 'failure'); | |
| if (failedJobs.length === 0) { | |
| console.log("No failed jobs found (or they are not 'evaluate' jobs). Skipping issue creation."); | |
| return; | |
| } | |
| // 3. Get artifacts to find download links | |
| const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| run_id: context.runId, | |
| }); | |
| // Helper to find artifact URL for a tutorial | |
| const findArtifactUrl = (tutorialName) => { | |
| // Artifact name pattern: evaluation-results-{tutorial}-{run_id} | |
| // We match somewhat loosely to be robust | |
| const artifact = artifacts.data.artifacts.find(a => a.name.includes(tutorialName)); | |
| // We return the generic artifacts page anchor if specific one not found, | |
| // as direct download links often expire or require API auth headers not suitable for markdown. | |
| // GitHub UI link is safer: | |
| return artifact ? artifactsUrl : artifactsUrl; | |
| }; | |
| // 4. Analyze each failed job to build the report | |
| let reportBody = `## Scheduled Tutorial Evaluation Failed\n\n**Run ID**: [${context.runId}](${runUrl})\n**Date**: ${new Date().toISOString().split('T')[0]}\n\n`; | |
| for (const job of failedJobs) { | |
| // Extract tutorial name | |
| // Matrix job names look like: "evaluate (getting-started, .devcontainer/devcontainer.json, ...)" | |
| // We just want the first part "getting-started" | |
| const nameMatch = job.name.match(/\(([^,]+)/); | |
| const tutorialName = nameMatch ? nameMatch[1].trim() : "unknown"; | |
| reportBody += `### ❌ Tutorial: ${tutorialName}\n`; | |
| // Analyze steps to determine retry count | |
| // Steps are: "Run evaluation (attempt 1)", "Run evaluation (attempt 2)", "Run evaluation (attempt 3)" | |
| const attempt1 = job.steps.find(s => s.name.includes('attempt 1')); | |
| const attempt2 = job.steps.find(s => s.name.includes('attempt 2')); | |
| const attempt3 = job.steps.find(s => s.name.includes('attempt 3')); | |
| let attemptsCount = 0; | |
| let lastStatus = "Unknown"; | |
| if (attempt1) { | |
| attemptsCount = 1; | |
| lastStatus = attempt1.conclusion; | |
| } | |
| if (attempt2 && (attempt2.conclusion !== 'skipped')) { | |
| attemptsCount = 2; | |
| lastStatus = attempt2.conclusion; | |
| } | |
| if (attempt3 && (attempt3.conclusion !== 'skipped')) { | |
| attemptsCount = 3; | |
| lastStatus = attempt3.conclusion; | |
| } | |
| reportBody += `**Status**: Failed after ${attemptsCount} attempt(s).\n`; | |
| reportBody += `**Artifacts**: [Download Results](${findArtifactUrl(tutorialName)})\n\n`; | |
| reportBody += `<details><summary>Attempt Details</summary>\n\n`; | |
| if (attempt1) reportBody += `- **Attempt 1**: ${attempt1.conclusion ? attempt1.conclusion.toUpperCase() : 'UNKNOWN'}\n`; | |
| if (attempt2 && attempt2.conclusion !== 'skipped') reportBody += `- **Attempt 2**: ${attempt2.conclusion ? attempt2.conclusion.toUpperCase() : 'UNKNOWN'}\n`; | |
| if (attempt3 && attempt3.conclusion !== 'skipped') reportBody += `- **Attempt 3**: ${attempt3.conclusion ? attempt3.conclusion.toUpperCase() : 'UNKNOWN'}\n`; | |
| reportBody += `</details>\n\n---\n`; | |
| } | |
| reportBody += `\n*This issue was automatically created by the scheduled tutorial evaluation workflow.*`; | |
| // 5. Create the issue | |
| await github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: `Scheduled tutorial evaluation failed - ${new Date().toISOString().split('T')[0]}`, | |
| body: reportBody, | |
| labels: ['tutorial-failure', 'automated'] | |
| }); |