🎼 Conductor Health Check #480
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json | |
| name: 🎼 Conductor Health Check | |
| on: | |
| schedule: | |
| # Daily summary at 9 AM UTC (will adapt to weekly/monthly based on activity) | |
| - cron: '0 9 * * *' | |
| workflow_dispatch: | |
| push: | |
| branches: [ main, master ] | |
| paths: | |
| - '.conductor/**' | |
| - '.github/workflows/conductor.yml' | |
| pull_request: | |
| types: [opened, synchronize, reopened] | |
| paths: | |
| - '.conductor/**' | |
| - '.github/workflows/conductor.yml' | |
| # Prevent concurrent runs and cancel in-progress runs | |
| concurrency: | |
| group: conductor-health-check | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| issues: write | |
| pull-requests: write | |
| jobs: | |
| health-check: | |
| name: System Health Check | |
| runs-on: ubuntu-latest | |
| # Skip if triggered by GitHub Actions bot to prevent recursion | |
| if: | | |
| github.actor != 'github-actions[bot]' && | |
| github.actor != 'dependabot[bot]' && | |
| !contains(github.event.head_commit.message, '[skip ci]') && | |
| !contains(github.event.head_commit.message, '[ci skip]') | |
| steps: | |
| - name: Check repository activity | |
| id: activity_check | |
| uses: actions/github-script@v7 | |
| with: | |
| github-token: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| script: | | |
| const now = new Date(); | |
| // Skip if this is a scheduled run within 30 minutes of last run | |
| if (context.eventName === 'schedule') { | |
| const { data: runs } = await github.rest.actions.listWorkflowRuns({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| workflow_id: 'conductor.yml', | |
| status: 'completed', | |
| per_page: 1 | |
| }); | |
| if (runs.workflow_runs.length > 0) { | |
| const lastRun = new Date(runs.workflow_runs[0].created_at); | |
| const minutesSinceLastRun = (now - lastRun) / (1000 * 60); | |
| if (minutesSinceLastRun < 30) { | |
| console.log(`Skipping - last run was ${minutesSinceLastRun} minutes ago`); | |
| core.setOutput('should_run', false); | |
| return; | |
| } | |
| } | |
| } | |
| // Get recent activity | |
| const [pulls, pushes, issues] = await Promise.all([ | |
| github.rest.pulls.list({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| state: 'all', | |
| sort: 'updated', | |
| direction: 'desc', | |
| per_page: 1 | |
| }), | |
| github.rest.repos.listCommits({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| per_page: 1 | |
| }), | |
| github.rest.issues.listForRepo({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| state: 'all', | |
| sort: 'updated', | |
| direction: 'desc', | |
| per_page: 1 | |
| }) | |
| ]); | |
| // Find most recent activity | |
| let lastActivity = new Date(0); | |
| if (pulls.data.length > 0) { | |
| const prDate = new Date(pulls.data[0].updated_at); | |
| if (prDate > lastActivity) lastActivity = prDate; | |
| } | |
| if (pushes.data.length > 0) { | |
| const pushDate = new Date(pushes.data[0].commit.committer.date); | |
| if (pushDate > lastActivity) lastActivity = pushDate; | |
| } | |
| if (issues.data.length > 0) { | |
| const issueDate = new Date(issues.data[0].updated_at); | |
| if (issueDate > lastActivity) lastActivity = issueDate; | |
| } | |
| const daysSinceActivity = Math.floor((now - lastActivity) / (1000 * 60 * 60 * 24)); | |
| // Determine if we should run checks | |
| let shouldRun = true; | |
| let summaryType = 'daily'; | |
| if (context.eventName === 'schedule') { | |
| if (daysSinceActivity > 14) { | |
| // Monthly summary if no activity for 2+ weeks | |
| const dayOfMonth = now.getDate(); | |
| shouldRun = dayOfMonth === 1; // Only on 1st of month | |
| summaryType = 'monthly'; | |
| } else if (daysSinceActivity > 3) { | |
| // Weekly summary if no activity for 3+ days | |
| const dayOfWeek = now.getDay(); | |
| shouldRun = dayOfWeek === 1; // Only on Mondays | |
| summaryType = 'weekly'; | |
| } | |
| } | |
| console.log(`Event: ${context.eventName}`); | |
| console.log(`Days since last activity: ${daysSinceActivity}`); | |
| console.log(`Should run: ${shouldRun}`); | |
| console.log(`Summary type: ${summaryType}`); | |
| core.setOutput('should_run', shouldRun); | |
| core.setOutput('days_inactive', daysSinceActivity); | |
| core.setOutput('summary_type', summaryType); | |
| - name: Checkout repository | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| uses: actions/checkout@v4 | |
| - name: Setup Python | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.11' | |
| - name: Install dependencies | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| pip install pyyaml jq | |
| - name: Setup GitHub CLI with proper token | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| # Use CONDUCTOR_GITHUB_TOKEN for all operations | |
| # IMPORTANT: Do not set GH_TOKEN here as it conflicts with gh auth login | |
| echo "${{ secrets.CONDUCTOR_GITHUB_TOKEN }}" > token.txt | |
| gh auth login --with-token < token.txt | |
| rm -f token.txt | |
| gh auth status | |
| - name: Ensure required labels exist | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| # Create labels if they don't exist | |
| labels=( | |
| "conductor:task|0e8a16|Tasks for AI agents" | |
| "conductor:status|1d76db|System status tracking" | |
| "conductor:in-progress|fbca04|Task being worked on" | |
| "conductor:blocked|d93f0b|Task is blocked" | |
| "conductor:archived|c5def5|Completed and archived" | |
| "conductor:alert|e11d21|System health alert" | |
| "conductor:init|7057ff|Initialization task for discovery" | |
| "code-review|5319e7|Code review task for pull requests" | |
| "needs-review|fbca04|PR needs code review" | |
| "skip-review|c5def5|Skip AI code review" | |
| "effort:small|76d7c4|Small effort task" | |
| "effort:medium|f39c12|Medium effort task" | |
| "effort:large|e74c3c|Large effort task" | |
| "priority:low|c5def5|Low priority" | |
| "priority:medium|fbca04|Medium priority" | |
| "priority:high|e11d21|High priority" | |
| "priority:critical|b60205|Critical priority - urgent" | |
| "skill:frontend|7057ff|Frontend development" | |
| "skill:backend|008672|Backend development" | |
| "skill:devops|0052cc|DevOps and infrastructure" | |
| "skill:ml|ff6b6b|Machine learning" | |
| ) | |
| for label_info in "${labels[@]}"; do | |
| # Split on pipe characters | |
| IFS='|' read -r name color description <<< "$label_info" | |
| gh label list | grep -q "^${name}" || \ | |
| gh label create "${name}" --color "${color}" --description "${description}" || true | |
| done | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Validate configuration | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/validate-config.py || echo "Config validation completed with warnings" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Check system dependencies | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/dependency-check.py || echo "Dependency check completed with warnings" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Run health check | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/health-check.py --summary-type ${{ steps.activity_check.outputs.summary_type }} || echo "Health check completed with warnings" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| DAYS_INACTIVE: ${{ steps.activity_check.outputs.days_inactive }} | |
| - name: Update system status | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/update-status.py --no-comment || echo "Status update completed with warnings" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Generate status summary | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/generate-summary.py > $GITHUB_STEP_SUMMARY || echo "Summary generation completed" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Clean up stale work | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/cleanup-stale.py || echo "Cleanup completed" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Archive completed tasks | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| run: | | |
| python .conductor/scripts/archive-completed.py --max-age 7 || echo "Archival completed" | |
| env: | |
| GH_TOKEN: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| - name: Check for critical issues | |
| if: steps.activity_check.outputs.should_run == 'true' | |
| id: critical_check | |
| run: | | |
| # Check if there are any critical system issues | |
| CRITICAL_ISSUES=0 | |
| # Check for high number of stale agents | |
| STALE_COUNT=$(GH_TOKEN=${{ secrets.CONDUCTOR_GITHUB_TOKEN }} python .conductor/scripts/health-check.py --json 2>/dev/null | jq -r '.stale_agents // 0' 2>/dev/null || echo "0") | |
| if [ "$STALE_COUNT" -gt 3 ]; then | |
| echo "⚠️ High stale agent count: $STALE_COUNT" | |
| CRITICAL_ISSUES=1 | |
| fi | |
| # Check system health score | |
| HEALTH_SCORE=$(GH_TOKEN=${{ secrets.CONDUCTOR_GITHUB_TOKEN }} python .conductor/scripts/update-status.py --json 2>/dev/null | jq -r '.health_score // 0' 2>/dev/null || echo "0") | |
| if (( $(echo "$HEALTH_SCORE < 0.5" | bc -l 2>/dev/null || echo 0) )); then | |
| echo "⚠️ Low health score: $HEALTH_SCORE" | |
| CRITICAL_ISSUES=1 | |
| fi | |
| echo "critical_issues=$CRITICAL_ISSUES" >> $GITHUB_OUTPUT | |
| - name: Create issue for critical problems | |
| if: steps.activity_check.outputs.should_run == 'true' && steps.critical_check.outputs.critical_issues == '1' | |
| uses: actions/github-script@v7 | |
| with: | |
| github-token: ${{ secrets.CONDUCTOR_GITHUB_TOKEN }} | |
| script: | | |
| const title = '🚨 Conductor System Health Alert'; | |
| const body = ` | |
| ## 🎼 System Health Alert | |
| The automated health check has detected critical issues with the Code Conductor system. | |
| ### Issues Detected | |
| - High number of stale agents | |
| - Low system health score | |
| - Potential system performance degradation | |
| ### Recommended Actions | |
| 1. Review stale agent cleanup: \`python .conductor/scripts/cleanup-stale.py\` | |
| 2. Check system status: \`python .conductor/scripts/update-status.py\` | |
| 3. Validate configuration: \`python .conductor/scripts/validate-config.py\` | |
| 4. Review recent activity logs | |
| ### System Status | |
| Generated by: ${context.workflow} #${context.runNumber} | |
| Timestamp: ${new Date().toISOString()} | |
| _This issue was created automatically by the health monitoring system._ | |
| `; | |
| // Check if similar issue already exists | |
| const existingIssues = await github.rest.issues.listForRepo({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| labels: 'conductor:alert', | |
| state: 'open' | |
| }); | |
| // Only create if no existing alert and not in stable period | |
| const daysInactive = parseInt('${{ steps.activity_check.outputs.days_inactive }}'); | |
| const shouldAlert = daysInactive < 2; // Stop alerting after 48h of inactivity | |
| if (existingIssues.data.length === 0 && shouldAlert) { | |
| await github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: title, | |
| body: body, | |
| labels: ['conductor:alert', 'priority:high'] | |
| }); | |
| } |