Skip to content

Scheduled Tutorial Evaluation #30

Scheduled Tutorial Evaluation

Scheduled Tutorial Evaluation #30

name: Scheduled Tutorial Evaluation
on:
schedule:
- cron: '0 13 * * 5' # Friday 5am PST / 6am PDT (13:00 UTC)
workflow_dispatch: {} # Manual trigger for testing/re-runs
pull_request_target: # PR evaluation (requires approval)
types: [opened, synchronize, reopened]
jobs:
evaluate:
runs-on: ubuntu-latest
environment: ${{ github.event_name == 'schedule' && 'tutorial-evaluation-scheduled' || 'tutorial-evaluation' }}
strategy:
fail-fast: false
matrix:
tutorial: [getting-started, curbside-pickup, building-comfort, absence-of-change, risky-containers]
include:
- tutorial: getting-started
devcontainer: .devcontainer/devcontainer.json
docs_path: docs/content/getting-started
learning_path: tutorial/getting-started
- tutorial: curbside-pickup
devcontainer: .devcontainer/curbside-pickup/devcontainer.json
docs_path: docs/content/tutorials/curbside-pickup
learning_path: tutorial/curbside-pickup
- tutorial: building-comfort
devcontainer: .devcontainer/building-comfort/devcontainer.json
docs_path: docs/content/tutorials/building-comfort
learning_path: tutorial/building-comfort
- tutorial: absence-of-change
devcontainer: .devcontainer/absence-of-change/devcontainer.json
docs_path: docs/content/tutorials/absence-of-change
learning_path: tutorial/absence-of-change
- tutorial: risky-containers
devcontainer: .devcontainer/risky-containers/devcontainer.json
docs_path: docs/content/tutorials/risky-containers
learning_path: tutorial/risky-containers
steps:
- name: Checkout this repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha || github.sha }}
- name: Checkout docs repo
uses: actions/checkout@v4
with:
repository: drasi-project/docs
ref: main
path: drasi-docs
- name: Copy tutorial docs and prompt
run: |
mkdir -p ${{ matrix.learning_path }}/tutorial-docs
cp -r drasi-docs/${{ matrix.docs_path }}/* ${{ matrix.learning_path }}/tutorial-docs
cp .github/prompts/tutorial-evaluation.md ${{ matrix.learning_path }}/prompt.md
- name: Run evaluation (attempt 1)
id: attempt1
continue-on-error: true
uses: devcontainers/ci@v0.3
env:
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }}
DRASI_TUTORIAL_EVALUATION: "true"
with:
configFile: ${{ matrix.devcontainer }}
push: never
runCmd: |
timeout 1200 copilot -p "$(cat prompt.md)" \
--allow-all-tools \
--allow-all-paths \
--deny-tool 'fetch' \
--deny-tool 'websearch' \
--deny-tool 'githubRepo' \
--deny-tool 'shell(curl *)' \
--deny-tool 'shell(wget *)' \
--deny-tool 'shell(nc *)' \
--deny-tool 'shell(ssh *)' \
--deny-tool 'shell(scp *)' \
--deny-tool 'shell(telnet *)' \
--deny-tool 'shell(ftp *)' \
--deny-tool 'shell(rm -rf /*)' \
--deny-tool 'shell(dd *)' \
--allow-url localhost \
--allow-url 127.0.0.1 \
--model gemini-3-pro-preview || true
REPORT=$(find . -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1)
if [ -z "$REPORT" ] || ! grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then
exit 1
fi
- name: Clean up before retry (attempt 2)
if: steps.attempt1.outcome == 'failure'
run: |
echo "Attempt 1 failed. Removing all containers and images to force a fresh devcontainer build..."
docker rm -f $(docker ps -aq) 2>/dev/null || true
docker system prune -af --volumes 2>/dev/null || true
sleep 10
- name: Run evaluation (attempt 2)
id: attempt2
continue-on-error: true
if: steps.attempt1.outcome == 'failure'
uses: devcontainers/ci@v0.3
env:
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }}
DRASI_TUTORIAL_EVALUATION: "true"
with:
configFile: ${{ matrix.devcontainer }}
push: never
runCmd: |
timeout 1200 copilot -p "$(cat prompt.md)" \
--allow-all-tools \
--allow-all-paths \
--deny-tool 'fetch' \
--deny-tool 'websearch' \
--deny-tool 'githubRepo' \
--deny-tool 'shell(curl *)' \
--deny-tool 'shell(wget *)' \
--deny-tool 'shell(nc *)' \
--deny-tool 'shell(ssh *)' \
--deny-tool 'shell(scp *)' \
--deny-tool 'shell(telnet *)' \
--deny-tool 'shell(ftp *)' \
--deny-tool 'shell(rm -rf /*)' \
--deny-tool 'shell(dd *)' \
--allow-url localhost \
--allow-url 127.0.0.1 \
--model gemini-3-pro-preview || true
REPORT=$(find . -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1)
if [ -z "$REPORT" ] || ! grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then
exit 1
fi
- name: Clean up before retry (attempt 3)
if: steps.attempt2.outcome == 'failure'
run: |
echo "Attempt 2 failed. Removing all containers and images to force a fresh devcontainer build..."
docker rm -f $(docker ps -aq) 2>/dev/null || true
docker system prune -af --volumes 2>/dev/null || true
sleep 10
- name: Run evaluation (attempt 3)
id: attempt3
if: steps.attempt2.outcome == 'failure'
uses: devcontainers/ci@v0.3
env:
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }}
DRASI_TUTORIAL_EVALUATION: "true"
with:
configFile: ${{ matrix.devcontainer }}
push: never
runCmd: |
timeout 1200 copilot -p "$(cat prompt.md)" \
--allow-all-tools \
--allow-all-paths \
--deny-tool 'fetch' \
--deny-tool 'websearch' \
--deny-tool 'githubRepo' \
--deny-tool 'shell(curl *)' \
--deny-tool 'shell(wget *)' \
--deny-tool 'shell(nc *)' \
--deny-tool 'shell(ssh *)' \
--deny-tool 'shell(scp *)' \
--deny-tool 'shell(telnet *)' \
--deny-tool 'shell(ftp *)' \
--deny-tool 'shell(rm -rf /*)' \
--deny-tool 'shell(dd *)' \
--allow-url localhost \
--allow-url 127.0.0.1 \
--model claude-opus-4.6 || true
REPORT=$(find . -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1)
if [ -z "$REPORT" ] || ! grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then
exit 1
fi
- name: Check evaluation status
if: always()
run: |
REPORT=$(find ${{ matrix.learning_path }} -name "report.md" -path "*/evaluation-*/*" | sort -r | head -1)
if [ -z "$REPORT" ]; then
echo "::error::No report.md found in evaluation directory"
exit 1
fi
echo "Found report: $REPORT"
echo "--- Report Contents ---"
cat "$REPORT"
echo "--- End Report ---"
sed -i '1s/^\xEF\xBB\xBF//' "$REPORT"
sed -i 's/\r$//' "$REPORT"
if grep -qiE "^##\s*STATUS:\s*SUCCESS" "$REPORT"; then
echo "::notice::Tutorial evaluation PASSED"
exit 0
elif grep -qiE "^##\s*STATUS:\s*FAILURE" "$REPORT"; then
echo "::error::Tutorial evaluation FAILED"
exit 1
else
echo "::error::No STATUS found in report.md"
echo "First 5 lines of report:"
head -5 "$REPORT" | cat -A
exit 1
fi
- name: Upload evaluation results
uses: actions/upload-artifact@v4
if: always()
with:
name: evaluation-results-${{ matrix.tutorial }}-${{ github.event.pull_request.number || github.run_id }}
path: |
${{ matrix.learning_path }}/evaluation-*/
${{ matrix.learning_path }}/tutorial-docs/
${{ matrix.learning_path }}/prompt.md
create-issue-on-failure:
needs: evaluate
runs-on: ubuntu-latest
if: failure() && github.event_name != 'pull_request_target'
steps:
- name: Create GitHub Issue
uses: actions/github-script@v7
with:
script: |
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
const artifactsUrl = `${runUrl}#artifacts`;
// 1. Get all jobs for this run to analyze failures and retries
const jobs = await github.rest.actions.listJobsForWorkflowRun({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.runId,
filter: 'latest', // get the latest attempt of the job if it was re-run from UI
});
// 2. Filter for failed 'evaluate' jobs
// The job name format is "evaluate ({tutorial}, ...)" due to matrix
const evalJobs = jobs.data.jobs.filter(job => job.name.startsWith('evaluate'));
const failedJobs = evalJobs.filter(job => job.conclusion === 'failure');
if (failedJobs.length === 0) {
console.log("No failed jobs found (or they are not 'evaluate' jobs). Skipping issue creation.");
return;
}
// 3. Get artifacts to find download links
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.runId,
});
// Helper to find artifact URL for a tutorial
const findArtifactUrl = (tutorialName) => {
// Artifact name pattern: evaluation-results-{tutorial}-{run_id}
// We match somewhat loosely to be robust
const artifact = artifacts.data.artifacts.find(a => a.name.includes(tutorialName));
// We return the generic artifacts page anchor if specific one not found,
// as direct download links often expire or require API auth headers not suitable for markdown.
// GitHub UI link is safer:
return artifact ? artifactsUrl : artifactsUrl;
};
// 4. Analyze each failed job to build the report
let reportBody = `## Scheduled Tutorial Evaluation Failed\n\n**Run ID**: [${context.runId}](${runUrl})\n**Date**: ${new Date().toISOString().split('T')[0]}\n\n`;
for (const job of failedJobs) {
// Extract tutorial name
// Matrix job names look like: "evaluate (getting-started, .devcontainer/devcontainer.json, ...)"
// We just want the first part "getting-started"
const nameMatch = job.name.match(/\(([^,]+)/);
const tutorialName = nameMatch ? nameMatch[1].trim() : "unknown";
reportBody += `### ❌ Tutorial: ${tutorialName}\n`;
// Analyze steps to determine retry count
// Steps are: "Run evaluation (attempt 1)", "Run evaluation (attempt 2)", "Run evaluation (attempt 3)"
const attempt1 = job.steps.find(s => s.name.includes('attempt 1'));
const attempt2 = job.steps.find(s => s.name.includes('attempt 2'));
const attempt3 = job.steps.find(s => s.name.includes('attempt 3'));
let attemptsCount = 0;
let lastStatus = "Unknown";
if (attempt1) {
attemptsCount = 1;
lastStatus = attempt1.conclusion;
}
if (attempt2 && (attempt2.conclusion !== 'skipped')) {
attemptsCount = 2;
lastStatus = attempt2.conclusion;
}
if (attempt3 && (attempt3.conclusion !== 'skipped')) {
attemptsCount = 3;
lastStatus = attempt3.conclusion;
}
reportBody += `**Status**: Failed after ${attemptsCount} attempt(s).\n`;
reportBody += `**Artifacts**: [Download Results](${findArtifactUrl(tutorialName)})\n\n`;
reportBody += `<details><summary>Attempt Details</summary>\n\n`;
if (attempt1) reportBody += `- **Attempt 1**: ${attempt1.conclusion ? attempt1.conclusion.toUpperCase() : 'UNKNOWN'}\n`;
if (attempt2 && attempt2.conclusion !== 'skipped') reportBody += `- **Attempt 2**: ${attempt2.conclusion ? attempt2.conclusion.toUpperCase() : 'UNKNOWN'}\n`;
if (attempt3 && attempt3.conclusion !== 'skipped') reportBody += `- **Attempt 3**: ${attempt3.conclusion ? attempt3.conclusion.toUpperCase() : 'UNKNOWN'}\n`;
reportBody += `</details>\n\n---\n`;
}
reportBody += `\n*This issue was automatically created by the scheduled tutorial evaluation workflow.*`;
// 5. Create the issue
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `Scheduled tutorial evaluation failed - ${new Date().toISOString().split('T')[0]}`,
body: reportBody,
labels: ['tutorial-failure', 'automated']
});