🎼 Conductor Health Check #509

Workflow file for this run

.github/workflows/conductor.yml at ea7c276

	# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json

	name: 🎼 Conductor Health Check

	on:
	schedule:
	# Daily summary at 9 AM UTC (will adapt to weekly/monthly based on activity)
	- cron: '0 9 * * *'
	workflow_dispatch:
	push:
	branches: [ main, master ]
	paths:
	- '.conductor/**'
	- '.github/workflows/conductor.yml'
	pull_request:
	types: [opened, synchronize, reopened]
	paths:
	- '.conductor/**'
	- '.github/workflows/conductor.yml'

	# Prevent concurrent runs and cancel in-progress runs
	concurrency:
	group: conductor-health-check
	cancel-in-progress: true

	permissions:
	contents: read
	issues: write
	pull-requests: write

	jobs:
	health-check:
	name: System Health Check
	runs-on: ubuntu-latest
	# Skip if triggered by GitHub Actions bot to prevent recursion
	if: \|
	github.actor != 'github-actions[bot]' &&
	github.actor != 'dependabot[bot]' &&
	!contains(github.event.head_commit.message, '[skip ci]') &&
	!contains(github.event.head_commit.message, '[ci skip]')

	steps:
	- name: Check repository activity
	id: activity_check
	uses: actions/github-script@v7
	with:
	github-token: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}
	script: \|
	const now = new Date();

	// Skip if this is a scheduled run within 30 minutes of last run
	if (context.eventName === 'schedule') {
	const { data: runs } = await github.rest.actions.listWorkflowRuns({
	owner: context.repo.owner,
	repo: context.repo.repo,
	workflow_id: 'conductor.yml',
	status: 'completed',
	per_page: 1
	});

	if (runs.workflow_runs.length > 0) {
	const lastRun = new Date(runs.workflow_runs[0].created_at);
	const minutesSinceLastRun = (now - lastRun) / (1000 * 60);
	if (minutesSinceLastRun < 30) {
	console.log(`Skipping - last run was ${minutesSinceLastRun} minutes ago`);
	core.setOutput('should_run', false);
	return;
	}
	}
	}

	// Get recent activity
	const [pulls, pushes, issues] = await Promise.all([
	github.rest.pulls.list({
	owner: context.repo.owner,
	repo: context.repo.repo,
	state: 'all',
	sort: 'updated',
	direction: 'desc',
	per_page: 1
	}),
	github.rest.repos.listCommits({
	owner: context.repo.owner,
	repo: context.repo.repo,
	per_page: 1
	}),
	github.rest.issues.listForRepo({
	owner: context.repo.owner,
	repo: context.repo.repo,
	state: 'all',
	sort: 'updated',
	direction: 'desc',
	per_page: 1
	})
	]);

	// Find most recent activity
	let lastActivity = new Date(0);

	if (pulls.data.length > 0) {
	const prDate = new Date(pulls.data[0].updated_at);
	if (prDate > lastActivity) lastActivity = prDate;
	}

	if (pushes.data.length > 0) {
	const pushDate = new Date(pushes.data[0].commit.committer.date);
	if (pushDate > lastActivity) lastActivity = pushDate;
	}

	if (issues.data.length > 0) {
	const issueDate = new Date(issues.data[0].updated_at);
	if (issueDate > lastActivity) lastActivity = issueDate;
	}

	const daysSinceActivity = Math.floor((now - lastActivity) / (1000 * 60 * 60 * 24));

	// Determine if we should run checks
	let shouldRun = true;
	let summaryType = 'daily';

	if (context.eventName === 'schedule') {
	if (daysSinceActivity > 14) {
	// Monthly summary if no activity for 2+ weeks
	const dayOfMonth = now.getDate();
	shouldRun = dayOfMonth === 1; // Only on 1st of month
	summaryType = 'monthly';
	} else if (daysSinceActivity > 3) {
	// Weekly summary if no activity for 3+ days
	const dayOfWeek = now.getDay();
	shouldRun = dayOfWeek === 1; // Only on Mondays
	summaryType = 'weekly';
	}
	}

	console.log(`Event: ${context.eventName}`);
	console.log(`Days since last activity: ${daysSinceActivity}`);
	console.log(`Should run: ${shouldRun}`);
	console.log(`Summary type: ${summaryType}`);

	core.setOutput('should_run', shouldRun);
	core.setOutput('days_inactive', daysSinceActivity);
	core.setOutput('summary_type', summaryType);

	- name: Checkout repository
	if: steps.activity_check.outputs.should_run == 'true'
	uses: actions/checkout@v4

	- name: Setup Python
	if: steps.activity_check.outputs.should_run == 'true'
	uses: actions/setup-python@v4
	with:
	python-version: '3.11'

	- name: Install dependencies
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	pip install pyyaml jq

	- name: Setup GitHub CLI with proper token
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	# Use CONDUCTOR_GITHUB_TOKEN for all operations
	# IMPORTANT: Do not set GH_TOKEN here as it conflicts with gh auth login
	echo "${{ secrets.CONDUCTOR_GITHUB_TOKEN }}" > token.txt
	gh auth login --with-token < token.txt
	rm -f token.txt
	gh auth status

	- name: Ensure required labels exist
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	# Create labels if they don't exist
	labels=(
	"conductor:task\|0e8a16\|Tasks for AI agents"
	"conductor:status\|1d76db\|System status tracking"
	"conductor:in-progress\|fbca04\|Task being worked on"
	"conductor:blocked\|d93f0b\|Task is blocked"
	"conductor:archived\|c5def5\|Completed and archived"
	"conductor:alert\|e11d21\|System health alert"
	"conductor:init\|7057ff\|Initialization task for discovery"
	"code-review\|5319e7\|Code review task for pull requests"
	"needs-review\|fbca04\|PR needs code review"
	"skip-review\|c5def5\|Skip AI code review"
	"effort:small\|76d7c4\|Small effort task"
	"effort:medium\|f39c12\|Medium effort task"
	"effort:large\|e74c3c\|Large effort task"
	"priority:low\|c5def5\|Low priority"
	"priority:medium\|fbca04\|Medium priority"
	"priority:high\|e11d21\|High priority"
	"priority:critical\|b60205\|Critical priority - urgent"
	"skill:frontend\|7057ff\|Frontend development"
	"skill:backend\|008672\|Backend development"
	"skill:devops\|0052cc\|DevOps and infrastructure"
	"skill:ml\|ff6b6b\|Machine learning"
	)

	for label_info in "${labels[@]}"; do
	# Split on pipe characters
	IFS='\|' read -r name color description <<< "$label_info"

	gh label list \| grep -q "^${name}" \|\| \
	gh label create "${name}" --color "${color}" --description "${description}" \|\| true
	done
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Validate configuration
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/validate-config.py \|\| echo "Config validation completed with warnings"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Check system dependencies
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/dependency-check.py \|\| echo "Dependency check completed with warnings"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Run health check
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/health-check.py --summary-type ${{ steps.activity_check.outputs.summary_type }} \|\| echo "Health check completed with warnings"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}
	DAYS_INACTIVE: ${{ steps.activity_check.outputs.days_inactive }}

	- name: Update system status
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/update-status.py --no-comment \|\| echo "Status update completed with warnings"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Generate status summary
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/generate-summary.py > $GITHUB_STEP_SUMMARY \|\| echo "Summary generation completed"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Clean up stale work
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/cleanup-stale.py \|\| echo "Cleanup completed"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Archive completed tasks
	if: steps.activity_check.outputs.should_run == 'true'
	run: \|
	python .conductor/scripts/archive-completed.py --max-age 7 \|\| echo "Archival completed"
	env:
	GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}

	- name: Check for critical issues
	if: steps.activity_check.outputs.should_run == 'true'
	id: critical_check
	run: \|
	# Check if there are any critical system issues
	CRITICAL_ISSUES=0

	# Check for high number of stale agents
	STALE_COUNT=$(GH_TOKEN=${{ secrets.CONDUCTOR_GITHUB_TOKEN }} python .conductor/scripts/health-check.py --json 2>/dev/null \| jq -r '.stale_agents // 0' 2>/dev/null \|\| echo "0")
	if [ "$STALE_COUNT" -gt 3 ]; then
	echo "⚠️ High stale agent count: $STALE_COUNT"
	CRITICAL_ISSUES=1
	fi

	# Check system health score
	HEALTH_SCORE=$(GH_TOKEN=${{ secrets.CONDUCTOR_GITHUB_TOKEN }} python .conductor/scripts/update-status.py --json 2>/dev/null \| jq -r '.health_score // 0' 2>/dev/null \|\| echo "0")
	if (( $(echo "$HEALTH_SCORE < 0.5" \| bc -l 2>/dev/null \|\| echo 0) )); then
	echo "⚠️ Low health score: $HEALTH_SCORE"
	CRITICAL_ISSUES=1
	fi

	echo "critical_issues=$CRITICAL_ISSUES" >> $GITHUB_OUTPUT

	- name: Create issue for critical problems
	if: steps.activity_check.outputs.should_run == 'true' && steps.critical_check.outputs.critical_issues == '1'
	uses: actions/github-script@v7
	with:
	github-token: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }}
	script: \|
	const title = '🚨 Conductor System Health Alert';
	const body = `
	## 🎼 System Health Alert

	The automated health check has detected critical issues with the Code Conductor system.

	### Issues Detected
	- High number of stale agents
	- Low system health score
	- Potential system performance degradation

	### Recommended Actions
	1. Review stale agent cleanup: \`python .conductor/scripts/cleanup-stale.py\`
	2. Check system status: \`python .conductor/scripts/update-status.py\`
	3. Validate configuration: \`python .conductor/scripts/validate-config.py\`
	4. Review recent activity logs

	### System Status
	Generated by: ${context.workflow} #${context.runNumber}
	Timestamp: ${new Date().toISOString()}

	_This issue was created automatically by the health monitoring system._
	`;

	// Check if similar issue already exists
	const existingIssues = await github.rest.issues.listForRepo({
	owner: context.repo.owner,
	repo: context.repo.repo,
	labels: 'conductor:alert',
	state: 'open'
	});

	// Only create if no existing alert and not in stable period
	const daysInactive = parseInt('${{ steps.activity_check.outputs.days_inactive }}');
	const shouldAlert = daysInactive < 2; // Stop alerting after 48h of inactivity

	if (existingIssues.data.length === 0 && shouldAlert) {
	await github.rest.issues.create({
	owner: context.repo.owner,
	repo: context.repo.repo,
	title: title,
	body: body,
	labels: ['conductor:alert', 'priority:high']
	});
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

🎼 Conductor Health Check #509

Workflow file

🎼 Conductor Health Check #509

Uh oh!

Workflow file for this run