Skip to content

Update CI Dashboard Data #602

Update CI Dashboard Data

Update CI Dashboard Data #602

Workflow file for this run

name: Update CI Dashboard Data
on:
schedule:
# Run every 3 hours
- cron: '0 */3 * * *'
workflow_dispatch:
# Manual trigger (for "Refresh Now" button)
inputs:
reason:
description: 'Reason for manual refresh'
required: false
default: 'Manual refresh'
jobs:
update-data:
runs-on: ubuntu-latest
outputs:
new_failures: ${{ steps.process.outputs.new_failures }}
notifications: ${{ steps.process.outputs.notifications }}
steps:
- name: Checkout dashboard repo
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install dependencies
run: npm install
- name: Load config
run: |
# Using local config.yaml for now
echo "Using local config.yaml"
cat config.yaml
- name: Fetch workflow runs and jobs
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Fetch recent nightly workflow runs (last 10 days)
echo "Fetching nightly workflow runs..."
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/kata-containers/kata-containers/actions/workflows/ci-nightly.yaml/runs?created=>$(date -d '10 days ago' +%Y-%m-%d)" \
--jq '.workflow_runs' | jq -s 'add // []' > nightly-runs.json
echo "Found $(jq 'length' nightly-runs.json) nightly runs"
# For each nightly run, fetch ALL jobs (with pagination)
echo "Fetching jobs for each run..."
echo '[]' > all-jobs.json
for run_id in $(jq -r '.[0:15] | .[].id' nightly-runs.json); do
echo "Fetching jobs for run $run_id..."
# Use filter=all to get jobs from ALL attempts (not just latest)
# This is critical for tracking flaky tests that fail then pass on retry
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/kata-containers/kata-containers/actions/runs/$run_id/jobs?per_page=100&filter=all" \
--jq '.jobs[]' | \
jq -s --arg run_id "$run_id" '[.[] | . + {workflow_run_id: $run_id}]' > run-jobs.json
echo " Found $(jq 'length' run-jobs.json) jobs"
# Merge
jq -s 'add' all-jobs.json run-jobs.json > temp-jobs.json
mv temp-jobs.json all-jobs.json
done
# Create final format
echo '{"jobs":' > raw-runs.json
cat all-jobs.json >> raw-runs.json
echo '}' >> raw-runs.json
echo "Fetched $(jq '.jobs | length' raw-runs.json) jobs total"
# Show found jobs
echo "Jobs found:"
jq '.jobs[0:30] | .[] | {name: .name, conclusion: .conclusion, started_at: .started_at}' raw-runs.json
# Fetch logs for ALL failed jobs to extract test failure details
echo ""
echo "Fetching logs for failed jobs..."
mkdir -p job-logs
failed_count=$(jq -r '.jobs[] | select(.conclusion == "failure") | .id' raw-runs.json | wc -l)
echo "Found $failed_count failed jobs to fetch logs for"
for job_id in $(jq -r '.jobs[] | select(.conclusion == "failure") | .id' raw-runs.json); do
echo "Fetching logs for job $job_id..."
# GitHub logs API returns a 302 redirect to a signed URL
# Use curl with -L to follow redirects and get the actual log content
curl -sL \
-H "Authorization: token $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/kata-containers/kata-containers/actions/jobs/$job_id/logs" \
-o "job-logs/$job_id.log" 2>&1
# Check if we got actual log content (not an error message)
if [ -f "job-logs/$job_id.log" ]; then
size=$(wc -c < "job-logs/$job_id.log")
echo " Log file size: $size bytes"
# Check if it's actually log content (should be > 1KB and contain common log patterns)
if [ "$size" -lt 1000 ]; then
echo " ⚠️ WARNING: Log file seems too small, might be an error response"
echo " Content preview:"
head -5 "job-logs/$job_id.log" | head -3 | sed 's/^/ /'
elif ! grep -q "not ok\|ok \|TAP\|bats\|Running" "job-logs/$job_id.log" 2>/dev/null; then
echo " ⚠️ WARNING: Log doesn't contain expected TAP/bats output patterns"
echo " First 10 lines:"
head -10 "job-logs/$job_id.log" | sed 's/^/ /'
else
echo " ✓ Log appears valid (contains TAP/bats patterns)"
# Count "not ok" lines for quick verification
not_ok_count=$(grep -c "not ok" "job-logs/$job_id.log" 2>/dev/null || echo "0")
echo " Found $not_ok_count 'not ok' lines"
fi
else
echo " ✗ Failed to create log file"
fi
done
echo "Log files fetched: $(ls job-logs/ 2>/dev/null | wc -l)"
echo "Total log size: $(du -sh job-logs/ 2>/dev/null | cut -f1)"
- name: Fetch s390x nightly workflow runs and jobs
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Fetch s390x nightly workflow runs (last 10 days)
echo "Fetching s390x nightly workflow runs..."
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/kata-containers/kata-containers/actions/workflows/ci-nightly-s390x.yaml/runs?created=>$(date -d '10 days ago' +%Y-%m-%d)" \
--jq '.workflow_runs' | jq -s 'add // []' > s390x-runs.json
echo "Found $(jq 'length' s390x-runs.json) s390x nightly runs"
# Fetch jobs for each run
echo '[]' > s390x-jobs.json
for run_id in $(jq -r '.[0:15] | .[].id' s390x-runs.json); do
echo "Fetching jobs for s390x run $run_id..."
# Use filter=all to get jobs from ALL attempts
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/kata-containers/kata-containers/actions/runs/$run_id/jobs?per_page=100&filter=all" \
--jq '.jobs[]' | \
jq -s --arg run_id "$run_id" '[.[] | . + {workflow_run_id: $run_id, source_workflow: "ci-nightly-s390x"}]' > run-jobs.json
echo " Found $(jq 'length' run-jobs.json) jobs"
jq -s 'add' s390x-jobs.json run-jobs.json > temp-jobs.json
mv temp-jobs.json s390x-jobs.json
done
echo "Fetched $(jq 'length' s390x-jobs.json) s390x jobs total"
# Merge s390x jobs into all-jobs.json
jq -s 'add' all-jobs.json s390x-jobs.json > temp-jobs.json
mv temp-jobs.json all-jobs.json
# Recreate raw-runs.json with merged data
echo '{"jobs":' > raw-runs.json
cat all-jobs.json >> raw-runs.json
echo '}' >> raw-runs.json
echo "Total jobs after s390x merge: $(jq '.jobs | length' raw-runs.json)"
- name: Fetch required tests from gatekeeper
run: |
# Fetch the authoritative list of required tests
curl -sL "https://raw.githubusercontent.com/kata-containers/kata-containers/refs/heads/main/tools/testing/gatekeeper/required-tests.yaml" \
-o required-tests.yaml
echo "Downloaded required-tests.yaml ($(wc -c < required-tests.yaml) bytes)"
- name: Fetch CoCo Charts E2E test data
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "Fetching CoCo Charts E2E workflow runs (scheduled only)..."
# Fetch recent scheduled workflow runs from confidential-containers/charts
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/confidential-containers/charts/actions/workflows/e2e-tests.yaml/runs?event=schedule&created=>$(date -d '10 days ago' +%Y-%m-%d)" \
--jq '.workflow_runs' | jq -s 'add // []' > coco-charts-runs.json
echo "Found $(jq 'length' coco-charts-runs.json) CoCo Charts scheduled runs"
# Fetch jobs for each run
echo '[]' > coco-charts-jobs.json
for run_id in $(jq -r '.[0:15] | .[].id' coco-charts-runs.json); do
echo "Fetching jobs for CoCo Charts run $run_id..."
# Use filter=all to get jobs from ALL attempts
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/confidential-containers/charts/actions/runs/$run_id/jobs?per_page=100&filter=all" \
--jq '.jobs[]' | \
jq -s --arg run_id "$run_id" '[.[] | . + {workflow_run_id: $run_id, source_repo: "confidential-containers/charts"}]' > run-jobs.json
echo " Found $(jq 'length' run-jobs.json) jobs"
jq -s 'add' coco-charts-jobs.json run-jobs.json > temp-jobs.json
mv temp-jobs.json coco-charts-jobs.json
done
echo "Fetched $(jq 'length' coco-charts-jobs.json) CoCo Charts jobs total"
# Fetch logs for failed CoCo Charts jobs
echo ""
echo "Fetching logs for failed CoCo Charts jobs..."
mkdir -p coco-charts-logs
for job_id in $(jq -r '.[] | select(.conclusion == "failure") | .id' coco-charts-jobs.json); do
echo "Fetching logs for CoCo Charts job $job_id..."
curl -sL \
-H "Authorization: token $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/confidential-containers/charts/actions/jobs/$job_id/logs" \
-o "coco-charts-logs/$job_id.log" 2>&1
if [ -f "coco-charts-logs/$job_id.log" ]; then
size=$(wc -c < "coco-charts-logs/$job_id.log")
echo " Log file size: $size bytes"
# Check for Go test FAIL patterns
fail_count=$(grep -c "FAIL:" "coco-charts-logs/$job_id.log" 2>/dev/null || echo "0")
echo " Found $fail_count 'FAIL:' lines (Go test failures)"
fi
done
echo "CoCo Charts log files fetched: $(ls coco-charts-logs/ 2>/dev/null | wc -l)"
- name: Fetch CoCo Cloud API Adaptor E2E test data
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "Fetching Cloud API Adaptor daily E2E workflow runs (scheduled only)..."
# Fetch recent scheduled workflow runs from confidential-containers/cloud-api-adaptor
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/confidential-containers/cloud-api-adaptor/actions/workflows/daily-e2e-tests.yaml/runs?event=schedule&created=>$(date -d '10 days ago' +%Y-%m-%d)" \
--jq '.workflow_runs' | jq -s 'add // []' > coco-caa-runs.json
echo "Found $(jq 'length' coco-caa-runs.json) CAA scheduled runs"
# Fetch jobs for each run
echo '[]' > coco-caa-jobs.json
for run_id in $(jq -r '.[0:15] | .[].id' coco-caa-runs.json); do
echo "Fetching jobs for CAA run $run_id..."
# Use filter=all to get jobs from ALL attempts
gh api \
-H "Accept: application/vnd.github+json" \
--paginate \
"repos/confidential-containers/cloud-api-adaptor/actions/runs/$run_id/jobs?per_page=100&filter=all" \
--jq '.jobs[]' | \
jq -s --arg run_id "$run_id" '[.[] | . + {workflow_run_id: $run_id, source_repo: "confidential-containers/cloud-api-adaptor"}]' > run-jobs.json
echo " Found $(jq 'length' run-jobs.json) jobs"
jq -s 'add' coco-caa-jobs.json run-jobs.json > temp-jobs.json
mv temp-jobs.json coco-caa-jobs.json
done
echo "Fetched $(jq 'length' coco-caa-jobs.json) CAA jobs total"
# Fetch logs for failed CAA jobs
echo ""
echo "Fetching logs for failed CAA jobs..."
mkdir -p coco-caa-logs
for job_id in $(jq -r '.[] | select(.conclusion == "failure") | .id' coco-caa-jobs.json); do
echo "Fetching logs for CAA job $job_id..."
curl -sL \
-H "Authorization: token $GH_TOKEN" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/confidential-containers/cloud-api-adaptor/actions/jobs/$job_id/logs" \
-o "coco-caa-logs/$job_id.log" 2>&1
if [ -f "coco-caa-logs/$job_id.log" ]; then
size=$(wc -c < "coco-caa-logs/$job_id.log")
echo " Log file size: $size bytes"
# Check for Go test FAIL patterns
fail_count=$(grep -c "FAIL:" "coco-caa-logs/$job_id.log" 2>/dev/null || echo "0")
echo " Found $fail_count 'FAIL:' lines (Go test failures)"
fi
done
echo "CAA log files fetched: $(ls coco-caa-logs/ 2>/dev/null | wc -l)"
- name: Process data
id: process
run: |
# Process raw data into dashboard format using config
# Also outputs new failures for notifications
node scripts/process-data.js
# Check if there are new failures to notify about
if [ -f notifications.json ]; then
echo "new_failures=true" >> $GITHUB_OUTPUT
echo "notifications=$(cat notifications.json | jq -c)" >> $GITHUB_OUTPUT
else
echo "new_failures=false" >> $GITHUB_OUTPUT
fi
- name: Commit updated data
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add data.json
if git diff --staged --quiet; then
echo "No changes to commit"
else
git commit -m "Update dashboard data [$(date -u +%Y-%m-%dT%H:%M:%SZ)]"
# Stash any unstaged changes (temp files from processing)
git stash --include-untracked || true
# Rebase to avoid conflicts with other concurrent jobs
git pull --rebase origin main
git push
# Drop stash (we don't need the temp files)
git stash drop || true
fi
notify-slack:
needs: update-data
if: needs.update-data.outputs.new_failures == 'true'
runs-on: ubuntu-latest
steps:
- name: Checkout dashboard repo
uses: actions/checkout@v4
- name: Load config
run: |
# Using local config.yaml
echo "Using local config.yaml"
- name: Send DM to maintainers for failures
env:
# All workspace tokens
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
SLACK_BOT_TOKEN_NVIDIA: ${{ secrets.SLACK_BOT_TOKEN_NVIDIA }}
SLACK_BOT_TOKEN_CNCF: ${{ secrets.SLACK_BOT_TOKEN_CNCF }}
SLACK_BOT_TOKEN_INTEL: ${{ secrets.SLACK_BOT_TOKEN_INTEL }}
NOTIFICATIONS: ${{ needs.update-data.outputs.notifications }}
run: |
# Function to get token for a workspace
get_token() {
local workspace=$1
case "$workspace" in
"nvidia") echo "$SLACK_BOT_TOKEN_NVIDIA" ;;
"cncf") echo "$SLACK_BOT_TOKEN_CNCF" ;;
"intel") echo "$SLACK_BOT_TOKEN_INTEL" ;;
*) echo "$SLACK_BOT_TOKEN" ;; # default
esac
}
# Send direct messages to maintainers for new failures
echo "$NOTIFICATIONS" | jq -c '.[] | select(.type == "new_failure")' | while read -r notification; do
section=$(echo "$notification" | jq -r '.section')
test_name=$(echo "$notification" | jq -r '.test_name')
error=$(echo "$notification" | jq -r '.error')
run_url=$(echo "$notification" | jq -r '.run_url')
# Process each maintainer with their workspace
echo "$notification" | jq -c '.maintainer_contacts[]' 2>/dev/null | while read -r contact; do
slack_id=$(echo "$contact" | jq -r '.slack_id')
workspace=$(echo "$contact" | jq -r '.workspace // "default"')
if [ -n "$slack_id" ] && [ "$slack_id" != "null" ]; then
token=$(get_token "$workspace")
if [ -n "$token" ]; then
echo "Sending DM to $slack_id in workspace $workspace about $test_name"
curl -s -X POST "https://slack.com/api/chat.postMessage" \
-H "Authorization: Bearer $token" \
-H "Content-Type: application/json" \
-d @- <<EOF
{
"channel": "${slack_id}",
"blocks": [
{
"type": "header",
"text": {
"type": "plain_text",
"text": "🔴 Nightly Test Failure",
"emoji": true
}
},
{
"type": "section",
"fields": [
{
"type": "mrkdwn",
"text": "*Section:*\n${section}"
},
{
"type": "mrkdwn",
"text": "*Test:*\n\`${test_name}\`"
}
]
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "*Failed Step:*\n${error}"
}
},
{
"type": "actions",
"elements": [
{
"type": "button",
"text": {
"type": "plain_text",
"text": "🔗 View Run",
"emoji": true
},
"url": "${run_url}",
"style": "danger"
},
{
"type": "button",
"text": {
"type": "plain_text",
"text": "📊 Dashboard",
"emoji": true
},
"url": "https://kata-containers.github.io/ci-dashboard/"
}
]
}
]
}
EOF
sleep 1 # Rate limiting
else
echo "No token configured for workspace: $workspace"
fi
fi
done
done
- name: Send recovery DMs to maintainers
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
SLACK_BOT_TOKEN_NVIDIA: ${{ secrets.SLACK_BOT_TOKEN_NVIDIA }}
SLACK_BOT_TOKEN_CNCF: ${{ secrets.SLACK_BOT_TOKEN_CNCF }}
SLACK_BOT_TOKEN_INTEL: ${{ secrets.SLACK_BOT_TOKEN_INTEL }}
NOTIFICATIONS: ${{ needs.update-data.outputs.notifications }}
run: |
get_token() {
local workspace=$1
case "$workspace" in
"nvidia") echo "$SLACK_BOT_TOKEN_NVIDIA" ;;
"cncf") echo "$SLACK_BOT_TOKEN_CNCF" ;;
"intel") echo "$SLACK_BOT_TOKEN_INTEL" ;;
*) echo "$SLACK_BOT_TOKEN" ;;
esac
}
# Send DMs for section recovery
echo "$NOTIFICATIONS" | jq -c '.[] | select(.type == "recovery")' | while read -r notification; do
section=$(echo "$notification" | jq -r '.section')
echo "$notification" | jq -c '.maintainer_contacts[]' 2>/dev/null | while read -r contact; do
slack_id=$(echo "$contact" | jq -r '.slack_id')
workspace=$(echo "$contact" | jq -r '.workspace // "default"')
if [ -n "$slack_id" ] && [ "$slack_id" != "null" ]; then
token=$(get_token "$workspace")
if [ -n "$token" ]; then
curl -s -X POST "https://slack.com/api/chat.postMessage" \
-H "Authorization: Bearer $token" \
-H "Content-Type: application/json" \
-d @- <<EOF
{
"channel": "${slack_id}",
"text": "☀️ *${section}* is back to 100% passing!"
}
EOF
fi
fi
done
done