Scheduled Tutorial Evaluation #30

Workflow file for this run

.github/workflows/tutorial-evaluation-scheduled.yml at b34c3db

	name: Scheduled Tutorial Evaluation

	on:
	schedule:
	- cron: '0 13 * * 5' # Friday 5am PST / 6am PDT (13:00 UTC)
	workflow_dispatch: {} # Manual trigger for testing/re-runs
	pull_request_target: # PR evaluation (requires approval)
	types: [opened, synchronize, reopened]

	jobs:
	evaluate:
	runs-on: ubuntu-latest
	environment: ${{ github.event_name == 'schedule' && 'tutorial-evaluation-scheduled' \|\| 'tutorial-evaluation' }}
	strategy:
	fail-fast: false
	matrix:
	tutorial: [getting-started, curbside-pickup, building-comfort, absence-of-change, risky-containers]
	include:
	- tutorial: getting-started
	devcontainer: .devcontainer/devcontainer.json
	docs_path: docs/content/getting-started
	learning_path: tutorial/getting-started
	- tutorial: curbside-pickup
	devcontainer: .devcontainer/curbside-pickup/devcontainer.json
	docs_path: docs/content/tutorials/curbside-pickup
	learning_path: tutorial/curbside-pickup
	- tutorial: building-comfort
	devcontainer: .devcontainer/building-comfort/devcontainer.json
	docs_path: docs/content/tutorials/building-comfort
	learning_path: tutorial/building-comfort
	- tutorial: absence-of-change
	devcontainer: .devcontainer/absence-of-change/devcontainer.json
	docs_path: docs/content/tutorials/absence-of-change
	learning_path: tutorial/absence-of-change
	- tutorial: risky-containers
	devcontainer: .devcontainer/risky-containers/devcontainer.json
	docs_path: docs/content/tutorials/risky-containers
	learning_path: tutorial/risky-containers

	steps:
	- name: Checkout this repo
	uses: actions/checkout@v4
	with:
	ref: ${{ github.event.pull_request.head.sha \|\| github.sha }}

	- name: Checkout docs repo
	uses: actions/checkout@v4
	with:
	repository: drasi-project/docs
	ref: main
	path: drasi-docs

	- name: Copy tutorial docs and prompt
	run: \|
	mkdir -p ${{ matrix.learning_path }}/tutorial-docs
	cp -r drasi-docs/${{ matrix.docs_path }}/* ${{ matrix.learning_path }}/tutorial-docs
	cp .github/prompts/tutorial-evaluation.md ${{ matrix.learning_path }}/prompt.md

	- name: Run evaluation (attempt 1)
	id: attempt1
	continue-on-error: true
	uses: devcontainers/ci@v0.3
	env:
	COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }}
	DRASI_TUTORIAL_EVALUATION: "true"
	with:
	configFile: ${{ matrix.devcontainer }}
	push: never
	runCmd: \|
	timeout 1200 copilot -p "$(cat prompt.md)" \
	--allow-all-tools \
	--allow-all-paths \
	--deny-tool 'fetch' \
	--deny-tool 'websearch' \
	--deny-tool 'githubRepo' \
	--deny-tool 'shell(curl *)' \
	--deny-tool 'shell(wget *)' \
	--deny-tool 'shell(nc *)' \
	--deny-tool 'shell(ssh *)' \
	--deny-tool 'shell(scp *)' \
	--deny-tool 'shell(telnet *)' \
	--deny-tool 'shell(ftp *)' \
	--deny-tool 'shell(rm -rf /*)' \
	--deny-tool 'shell(dd *)' \
	--allow-url localhost \
	--allow-url 127.0.0.1 \
	--model gemini-3-pro-preview \|\| true
	REPORT=$(find . -name "report.md" -path "/evaluation-/*" \| sort -r \| head -1)
	if [ -z "$REPORT" ] \|\| ! grep -qiE "^##\sSTATUS:\sSUCCESS" "$REPORT"; then
	exit 1
	fi

	- name: Clean up before retry (attempt 2)
	if: steps.attempt1.outcome == 'failure'
	run: \|
	echo "Attempt 1 failed. Removing all containers and images to force a fresh devcontainer build..."
	docker rm -f $(docker ps -aq) 2>/dev/null \|\| true
	docker system prune -af --volumes 2>/dev/null \|\| true
	sleep 10

	- name: Run evaluation (attempt 2)
	id: attempt2
	continue-on-error: true
	if: steps.attempt1.outcome == 'failure'
	uses: devcontainers/ci@v0.3
	env:
	COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }}
	DRASI_TUTORIAL_EVALUATION: "true"
	with:
	configFile: ${{ matrix.devcontainer }}
	push: never
	runCmd: \|
	timeout 1200 copilot -p "$(cat prompt.md)" \
	--allow-all-tools \
	--allow-all-paths \
	--deny-tool 'fetch' \
	--deny-tool 'websearch' \
	--deny-tool 'githubRepo' \
	--deny-tool 'shell(curl *)' \
	--deny-tool 'shell(wget *)' \
	--deny-tool 'shell(nc *)' \
	--deny-tool 'shell(ssh *)' \
	--deny-tool 'shell(scp *)' \
	--deny-tool 'shell(telnet *)' \
	--deny-tool 'shell(ftp *)' \
	--deny-tool 'shell(rm -rf /*)' \
	--deny-tool 'shell(dd *)' \
	--allow-url localhost \
	--allow-url 127.0.0.1 \
	--model gemini-3-pro-preview \|\| true
	REPORT=$(find . -name "report.md" -path "/evaluation-/*" \| sort -r \| head -1)
	if [ -z "$REPORT" ] \|\| ! grep -qiE "^##\sSTATUS:\sSUCCESS" "$REPORT"; then
	exit 1
	fi

	- name: Clean up before retry (attempt 3)
	if: steps.attempt2.outcome == 'failure'
	run: \|
	echo "Attempt 2 failed. Removing all containers and images to force a fresh devcontainer build..."
	docker rm -f $(docker ps -aq) 2>/dev/null \|\| true
	docker system prune -af --volumes 2>/dev/null \|\| true
	sleep 10

	- name: Run evaluation (attempt 3)
	id: attempt3
	if: steps.attempt2.outcome == 'failure'
	uses: devcontainers/ci@v0.3
	env:
	COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT_TOKEN }}
	DRASI_TUTORIAL_EVALUATION: "true"
	with:
	configFile: ${{ matrix.devcontainer }}
	push: never
	runCmd: \|
	timeout 1200 copilot -p "$(cat prompt.md)" \
	--allow-all-tools \
	--allow-all-paths \
	--deny-tool 'fetch' \
	--deny-tool 'websearch' \
	--deny-tool 'githubRepo' \
	--deny-tool 'shell(curl *)' \
	--deny-tool 'shell(wget *)' \
	--deny-tool 'shell(nc *)' \
	--deny-tool 'shell(ssh *)' \
	--deny-tool 'shell(scp *)' \
	--deny-tool 'shell(telnet *)' \
	--deny-tool 'shell(ftp *)' \
	--deny-tool 'shell(rm -rf /*)' \
	--deny-tool 'shell(dd *)' \
	--allow-url localhost \
	--allow-url 127.0.0.1 \
	--model claude-opus-4.6 \|\| true
	REPORT=$(find . -name "report.md" -path "/evaluation-/*" \| sort -r \| head -1)
	if [ -z "$REPORT" ] \|\| ! grep -qiE "^##\sSTATUS:\sSUCCESS" "$REPORT"; then
	exit 1
	fi

	- name: Check evaluation status
	if: always()
	run: \|
	REPORT=$(find ${{ matrix.learning_path }} -name "report.md" -path "/evaluation-/*" \| sort -r \| head -1)

	if [ -z "$REPORT" ]; then
	echo "::error::No report.md found in evaluation directory"
	exit 1
	fi

	echo "Found report: $REPORT"
	echo "--- Report Contents ---"
	cat "$REPORT"
	echo "--- End Report ---"

	sed -i '1s/^\xEF\xBB\xBF//' "$REPORT"
	sed -i 's/\r$//' "$REPORT"

	if grep -qiE "^##\sSTATUS:\sSUCCESS" "$REPORT"; then
	echo "::notice::Tutorial evaluation PASSED"
	exit 0
	elif grep -qiE "^##\sSTATUS:\sFAILURE" "$REPORT"; then
	echo "::error::Tutorial evaluation FAILED"
	exit 1
	else
	echo "::error::No STATUS found in report.md"
	echo "First 5 lines of report:"
	head -5 "$REPORT" \| cat -A
	exit 1
	fi

	- name: Upload evaluation results
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: evaluation-results-${{ matrix.tutorial }}-${{ github.event.pull_request.number \|\| github.run_id }}
	path: \|
	${{ matrix.learning_path }}/evaluation-*/
	${{ matrix.learning_path }}/tutorial-docs/
	${{ matrix.learning_path }}/prompt.md

	create-issue-on-failure:
	needs: evaluate
	runs-on: ubuntu-latest
	if: failure() && github.event_name != 'pull_request_target'
	steps:
	- name: Create GitHub Issue
	uses: actions/github-script@v7
	with:
	script: \|
	const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
	const artifactsUrl = `${runUrl}#artifacts`;

	// 1. Get all jobs for this run to analyze failures and retries
	const jobs = await github.rest.actions.listJobsForWorkflowRun({
	owner: context.repo.owner,
	repo: context.repo.repo,
	run_id: context.runId,
	filter: 'latest', // get the latest attempt of the job if it was re-run from UI
	});

	// 2. Filter for failed 'evaluate' jobs
	// The job name format is "evaluate ({tutorial}, ...)" due to matrix
	const evalJobs = jobs.data.jobs.filter(job => job.name.startsWith('evaluate'));
	const failedJobs = evalJobs.filter(job => job.conclusion === 'failure');

	if (failedJobs.length === 0) {
	console.log("No failed jobs found (or they are not 'evaluate' jobs). Skipping issue creation.");
	return;
	}

	// 3. Get artifacts to find download links
	const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
	owner: context.repo.owner,
	repo: context.repo.repo,
	run_id: context.runId,
	});

	// Helper to find artifact URL for a tutorial
	const findArtifactUrl = (tutorialName) => {
	// Artifact name pattern: evaluation-results-{tutorial}-{run_id}
	// We match somewhat loosely to be robust
	const artifact = artifacts.data.artifacts.find(a => a.name.includes(tutorialName));
	// We return the generic artifacts page anchor if specific one not found,
	// as direct download links often expire or require API auth headers not suitable for markdown.
	// GitHub UI link is safer:
	return artifact ? artifactsUrl : artifactsUrl;
	};

	// 4. Analyze each failed job to build the report
	let reportBody = `## Scheduled Tutorial Evaluation Failed\n\nRun ID: [${context.runId}](${runUrl})\nDate: ${new Date().toISOString().split('T')[0]}\n\n`;

	for (const job of failedJobs) {
	// Extract tutorial name
	// Matrix job names look like: "evaluate (getting-started, .devcontainer/devcontainer.json, ...)"
	// We just want the first part "getting-started"
	const nameMatch = job.name.match(/\(([^,]+)/);
	const tutorialName = nameMatch ? nameMatch[1].trim() : "unknown";

	reportBody += `### ❌ Tutorial: ${tutorialName}\n`;

	// Analyze steps to determine retry count
	// Steps are: "Run evaluation (attempt 1)", "Run evaluation (attempt 2)", "Run evaluation (attempt 3)"
	const attempt1 = job.steps.find(s => s.name.includes('attempt 1'));
	const attempt2 = job.steps.find(s => s.name.includes('attempt 2'));
	const attempt3 = job.steps.find(s => s.name.includes('attempt 3'));

	let attemptsCount = 0;
	let lastStatus = "Unknown";

	if (attempt1) {
	attemptsCount = 1;
	lastStatus = attempt1.conclusion;
	}
	if (attempt2 && (attempt2.conclusion !== 'skipped')) {
	attemptsCount = 2;
	lastStatus = attempt2.conclusion;
	}
	if (attempt3 && (attempt3.conclusion !== 'skipped')) {
	attemptsCount = 3;
	lastStatus = attempt3.conclusion;
	}

	reportBody += `Status: Failed after ${attemptsCount} attempt(s).\n`;
	reportBody += `Artifacts: [Download Results](${findArtifactUrl(tutorialName)})\n\n`;

	reportBody += `<details><summary>Attempt Details</summary>\n\n`;
	if (attempt1) reportBody += `- Attempt 1: ${attempt1.conclusion ? attempt1.conclusion.toUpperCase() : 'UNKNOWN'}\n`;
	if (attempt2 && attempt2.conclusion !== 'skipped') reportBody += `- Attempt 2: ${attempt2.conclusion ? attempt2.conclusion.toUpperCase() : 'UNKNOWN'}\n`;
	if (attempt3 && attempt3.conclusion !== 'skipped') reportBody += `- Attempt 3: ${attempt3.conclusion ? attempt3.conclusion.toUpperCase() : 'UNKNOWN'}\n`;
	reportBody += `</details>\n\n---\n`;
	}

	reportBody += `\nThis issue was automatically created by the scheduled tutorial evaluation workflow.`;

	// 5. Create the issue
	await github.rest.issues.create({
	owner: context.repo.owner,
	repo: context.repo.repo,
	title: `Scheduled tutorial evaluation failed - ${new Date().toISOString().split('T')[0]}`,
	body: reportBody,
	labels: ['tutorial-failure', 'automated']
	});

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Scheduled Tutorial Evaluation #30

Workflow file

Scheduled Tutorial Evaluation #30

Uh oh!

Workflow file for this run