Skip to content

Commit fba518d

Browse files
chrisbbreuerclaude
andcommitted
fix: resolve all logical issues in Discord health monitoring
- discord-notify: wrap entire action in function so jq/curl failures never crash parent workflow (was only protecting curl, not jq) - health-monitor: aws s3 ls --max-items is invalid, use s3api head-bucket - health-monitor: guard against null/empty API responses in stale sync check (was computing age from epoch 0 → false positive every 30min) - health-monitor: extract last 3 chars of curl -w output for clean HTTP code - build-package/release: map cancelled builds to yellow/warning, not red/failure - update-pantry/update-packages: add fallback values for empty step outputs - sync-binaries/build-versions: filter out skipped jobs from Discord fields (mutually exclusive jobs showed "skipped" which looks like an error) - health-monitor: use jq '// empty' to handle null workflow run timestamps Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e501705 commit fba518d

File tree

8 files changed

+114
-78
lines changed

8 files changed

+114
-78
lines changed

.github/actions/discord-notify/action.yml

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -35,35 +35,46 @@ runs:
3535
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
3636
WORKFLOW_NAME: ${{ github.workflow }}
3737
run: |
38-
# Map status to color
39-
case "$STATUS" in
40-
success) COLOR=3066993 ;; # green
41-
warning) COLOR=16776960 ;; # yellow
42-
failure) COLOR=15158332 ;; # red
43-
*) COLOR=10181046 ;; # purple/unknown
44-
esac
38+
# Entire block is wrapped so notification errors never fail the parent workflow
39+
_send_discord() {
40+
# Map status to color
41+
case "$STATUS" in
42+
success) COLOR=3066993 ;; # green
43+
warning) COLOR=16776960 ;; # yellow
44+
failure) COLOR=15158332 ;; # red
45+
*) COLOR=10181046 ;; # purple/unknown
46+
esac
4547
46-
# Build fields JSON — append clickable workflow run link
47-
FIELDS_WITH_LINK=$(echo "$FIELDS" | jq -c \
48-
--arg url "$RUN_URL" \
49-
--arg name "$WORKFLOW_NAME" \
50-
'. + [{"name": "Workflow Run", "value": ("[" + $name + "](" + $url + ")"), "inline": false}]')
48+
# Validate fields JSON, fall back to empty array
49+
if ! echo "$FIELDS" | jq -e '.' >/dev/null 2>&1; then
50+
FIELDS='[]'
51+
fi
5152
52-
# Build payload safely via jq (handles all escaping)
53-
jq -nc \
54-
--arg title "$TITLE" \
55-
--arg description "$DESCRIPTION" \
56-
--argjson color "$COLOR" \
57-
--argjson fields "$FIELDS_WITH_LINK" \
58-
'{
59-
embeds: [{
60-
title: $title,
61-
description: $description,
62-
color: $color,
63-
fields: $fields,
64-
timestamp: (now | todate)
65-
}]
66-
}' | curl -sS -X POST \
67-
-H "Content-Type: application/json" \
68-
--data-binary @- \
69-
"$WEBHOOK_URL" || true
53+
# Append clickable workflow run link
54+
FIELDS_WITH_LINK=$(echo "$FIELDS" | jq -c \
55+
--arg url "$RUN_URL" \
56+
--arg name "$WORKFLOW_NAME" \
57+
'. + [{"name": "Workflow Run", "value": ("[" + $name + "](" + $url + ")"), "inline": false}]')
58+
59+
# Build payload safely via jq (handles all escaping)
60+
PAYLOAD=$(jq -nc \
61+
--arg title "$TITLE" \
62+
--arg description "$DESCRIPTION" \
63+
--argjson color "$COLOR" \
64+
--argjson fields "$FIELDS_WITH_LINK" \
65+
'{
66+
embeds: [{
67+
title: $title,
68+
description: $description,
69+
color: $color,
70+
fields: $fields,
71+
timestamp: (now | todate)
72+
}]
73+
}')
74+
75+
curl -sS -X POST \
76+
-H "Content-Type: application/json" \
77+
-d "$PAYLOAD" \
78+
"$WEBHOOK_URL"
79+
}
80+
_send_discord || echo "::warning::Discord notification failed (non-fatal)"

.github/workflows/build-package.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,13 @@ jobs:
234234
- uses: actions/checkout@v6
235235
- uses: ./.github/actions/discord-notify
236236
with:
237-
title: "Build Package — ${{ needs.build.result == 'success' && 'Success' || 'Failed' }}"
237+
title: "Build Package — ${{ needs.build.result == 'success' && 'Success' || needs.build.result == 'cancelled' && 'Cancelled' || 'Failed' }}"
238238
description: "Packages: ${{ github.event.inputs.packages }}"
239-
status: ${{ needs.build.result == 'success' && 'success' || 'failure' }}
239+
status: ${{ needs.build.result == 'success' && 'success' || needs.build.result == 'cancelled' && 'warning' || 'failure' }}
240240
fields: |
241241
[
242-
{"name": "Platform", "value": "${{ github.event.inputs.platform }}", "inline": true},
243-
{"name": "Force", "value": "${{ github.event.inputs.force }}", "inline": true},
242+
{"name": "Platform", "value": "${{ github.event.inputs.platform || 'both' }}", "inline": true},
243+
{"name": "Force", "value": "${{ github.event.inputs.force || 'true' }}", "inline": true},
244244
{"name": "Actor", "value": "${{ github.actor }}", "inline": true}
245245
]
246246
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

.github/workflows/build-versions.yml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -437,15 +437,24 @@ jobs:
437437
runs-on: ubuntu-latest
438438
steps:
439439
- uses: actions/checkout@v6
440+
- name: Build fields
441+
id: fields
442+
run: |
443+
FIELDS="["
444+
FIRST=true
445+
for pair in "Build Targeted:${{ needs.build-targeted.result }}" "Build Batches:${{ needs.build.result }}"; do
446+
NAME="${pair%%:*}"
447+
VAL="${pair##*:}"
448+
[ "$VAL" = "skipped" ] && continue
449+
[ "$FIRST" = "true" ] && FIRST=false || FIELDS="$FIELDS,"
450+
FIELDS="$FIELDS{\"name\":\"$NAME\",\"value\":\"$VAL\",\"inline\":true}"
451+
done
452+
FIELDS="$FIELDS,{\"name\":\"Max Versions\",\"value\":\"${{ github.event.inputs.max-versions || '5' }}\",\"inline\":true}]"
453+
echo "json=$FIELDS" >> "$GITHUB_OUTPUT"
440454
- uses: ./.github/actions/discord-notify
441455
with:
442456
title: Build Versions — Failed
443457
description: Multi-version build failed
444458
status: failure
445-
fields: |
446-
[
447-
{"name": "Build Targeted", "value": "${{ needs.build-targeted.result }}", "inline": true},
448-
{"name": "Build Batches", "value": "${{ needs.build.result }}", "inline": true},
449-
{"name": "Max Versions", "value": "${{ github.event.inputs.max-versions || '5' }}", "inline": true}
450-
]
459+
fields: ${{ steps.fields.outputs.json }}
451460
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

.github/workflows/health-monitor.yml

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,17 @@ jobs:
1919
- name: Check registry health
2020
id: registry
2121
run: |
22+
# Use a temp file to isolate the HTTP code from error output
2223
HTTP_CODE=$(curl -sS -o /tmp/health-response -w '%{http_code}' \
23-
--max-time 15 --retry 2 --retry-delay 5 \
24-
'https://registry.stacksjs.com/health' 2>/dev/null) || HTTP_CODE="000"
24+
--max-time 15 \
25+
'https://registry.stacksjs.com/health' 2>/dev/null) || true
26+
# Extract only the last 3 chars (the HTTP code) in case of any prefix noise
27+
HTTP_CODE="${HTTP_CODE: -3}"
2528
echo "http_code=$HTTP_CODE" >> "$GITHUB_OUTPUT"
2629
if [ "$HTTP_CODE" = "200" ]; then
2730
echo "status=healthy" >> "$GITHUB_OUTPUT"
2831
else
29-
BODY=$(cat /tmp/health-response 2>/dev/null | head -c 200 || echo "no response")
3032
echo "status=unhealthy" >> "$GITHUB_OUTPUT"
31-
echo "body=$BODY" >> "$GITHUB_OUTPUT"
3233
fi
3334
3435
- name: Check S3 bucket accessibility
@@ -38,7 +39,7 @@ jobs:
3839
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
3940
AWS_REGION: us-east-1
4041
run: |
41-
if aws s3 ls s3://pantry-registry --max-items 1 >/dev/null 2>&1; then
42+
if aws s3api head-bucket --bucket pantry-registry 2>/dev/null; then
4243
echo "status=accessible" >> "$GITHUB_OUTPUT"
4344
else
4445
echo "status=inaccessible" >> "$GITHUB_OUTPUT"
@@ -73,27 +74,31 @@ jobs:
7374
# Check update-pantry — should run every 20min, alert if >2h stale
7475
LAST_PANTRY=$(gh api \
7576
"repos/${{ github.repository }}/actions/workflows/update-pantry.yml/runs?status=success&per_page=1" \
76-
--jq '.workflow_runs[0].updated_at' 2>/dev/null || echo "")
77-
if [ -n "$LAST_PANTRY" ]; then
78-
PANTRY_TS=$(date -u -d "$LAST_PANTRY" +%s 2>/dev/null || echo "0")
79-
PANTRY_AGE=$(( (NOW - PANTRY_TS) / 3600 ))
80-
if [ "$PANTRY_AGE" -gt 2 ]; then
81-
STALE_ITEMS="update-pantry (${PANTRY_AGE}h ago)"
77+
--jq '.workflow_runs[0].updated_at // empty' 2>/dev/null || echo "")
78+
if [ -n "$LAST_PANTRY" ] && [ "$LAST_PANTRY" != "null" ]; then
79+
PANTRY_TS=$(date -u -d "$LAST_PANTRY" +%s 2>/dev/null || echo "")
80+
if [ -n "$PANTRY_TS" ] && [ "$PANTRY_TS" -gt 0 ] 2>/dev/null; then
81+
PANTRY_AGE=$(( (NOW - PANTRY_TS) / 3600 ))
82+
if [ "$PANTRY_AGE" -gt 2 ]; then
83+
STALE_ITEMS="update-pantry (${PANTRY_AGE}h ago)"
84+
fi
8285
fi
8386
fi
8487
8588
# Check sync-binaries — runs every 6h, alert if >12h stale
8689
LAST_SYNC=$(gh api \
8790
"repos/${{ github.repository }}/actions/workflows/sync-binaries.yml/runs?status=success&per_page=1" \
88-
--jq '.workflow_runs[0].updated_at' 2>/dev/null || echo "")
89-
if [ -n "$LAST_SYNC" ]; then
90-
SYNC_TS=$(date -u -d "$LAST_SYNC" +%s 2>/dev/null || echo "0")
91-
SYNC_AGE=$(( (NOW - SYNC_TS) / 3600 ))
92-
if [ "$SYNC_AGE" -gt 12 ]; then
93-
if [ -n "$STALE_ITEMS" ]; then
94-
STALE_ITEMS="$STALE_ITEMS, sync-binaries (${SYNC_AGE}h ago)"
95-
else
96-
STALE_ITEMS="sync-binaries (${SYNC_AGE}h ago)"
91+
--jq '.workflow_runs[0].updated_at // empty' 2>/dev/null || echo "")
92+
if [ -n "$LAST_SYNC" ] && [ "$LAST_SYNC" != "null" ]; then
93+
SYNC_TS=$(date -u -d "$LAST_SYNC" +%s 2>/dev/null || echo "")
94+
if [ -n "$SYNC_TS" ] && [ "$SYNC_TS" -gt 0 ] 2>/dev/null; then
95+
SYNC_AGE=$(( (NOW - SYNC_TS) / 3600 ))
96+
if [ "$SYNC_AGE" -gt 12 ]; then
97+
if [ -n "$STALE_ITEMS" ]; then
98+
STALE_ITEMS="$STALE_ITEMS, sync-binaries (${SYNC_AGE}h ago)"
99+
else
100+
STALE_ITEMS="sync-binaries (${SYNC_AGE}h ago)"
101+
fi
97102
fi
98103
fi
99104
fi
@@ -141,9 +146,9 @@ jobs:
141146
status: failure
142147
fields: |
143148
[
144-
{"name": "Registry", "value": "${{ steps.registry.outputs.status }} (HTTP ${{ steps.registry.outputs.http_code }})", "inline": true},
145-
{"name": "S3 Bucket", "value": "${{ steps.s3.outputs.status }}", "inline": true},
146-
{"name": "Failures (6h)", "value": "${{ steps.failures.outputs.count }}", "inline": true},
149+
{"name": "Registry", "value": "${{ steps.registry.outputs.status || 'unknown' }} (HTTP ${{ steps.registry.outputs.http_code || '?' }})", "inline": true},
150+
{"name": "S3 Bucket", "value": "${{ steps.s3.outputs.status || 'unknown' }}", "inline": true},
151+
{"name": "Failures (6h)", "value": "${{ steps.failures.outputs.count || '?' }}", "inline": true},
147152
{"name": "Stale Syncs", "value": "${{ steps.stale.outputs.items || 'none' }}", "inline": false}
148153
]
149154
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
@@ -171,8 +176,8 @@ jobs:
171176
status: success
172177
fields: |
173178
[
174-
{"name": "Registry", "value": "${{ steps.registry.outputs.status }}", "inline": true},
175-
{"name": "S3 Bucket", "value": "${{ steps.s3.outputs.status }}", "inline": true},
176-
{"name": "Failures (6h)", "value": "${{ steps.failures.outputs.count }}", "inline": true}
179+
{"name": "Registry", "value": "${{ steps.registry.outputs.status || 'unknown' }}", "inline": true},
180+
{"name": "S3 Bucket", "value": "${{ steps.s3.outputs.status || 'unknown' }}", "inline": true},
181+
{"name": "Failures (6h)", "value": "${{ steps.failures.outputs.count || '0' }}", "inline": true}
177182
]
178183
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

.github/workflows/release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,9 @@ jobs:
8686
- uses: actions/checkout@v6
8787
- uses: ./.github/actions/discord-notify
8888
with:
89-
title: "Release — ${{ needs.pantry.result == 'success' && 'Published' || 'Failed' }}"
89+
title: "Release — ${{ needs.pantry.result == 'success' && 'Published' || needs.pantry.result == 'cancelled' && 'Cancelled' || 'Failed' }}"
9090
description: "Tag: ${{ github.ref_name }}"
91-
status: ${{ needs.pantry.result == 'success' && 'success' || 'failure' }}
91+
status: ${{ needs.pantry.result == 'success' && 'success' || needs.pantry.result == 'cancelled' && 'warning' || 'failure' }}
9292
fields: |
9393
[
9494
{"name": "Tag", "value": "${{ github.ref_name }}", "inline": true},

.github/workflows/sync-binaries.yml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -538,15 +538,25 @@ jobs:
538538
runs-on: ubuntu-latest
539539
steps:
540540
- uses: actions/checkout@v6
541+
- name: Build fields
542+
id: fields
543+
run: |
544+
# Only include jobs that actually ran (not skipped)
545+
FIELDS="["
546+
FIRST=true
547+
for pair in "Sync Pre-built:${{ needs.sync-prebuilt.result }}" "Build Targeted:${{ needs.build-targeted.result }}" "Build Batches:${{ needs.build.result }}"; do
548+
NAME="${pair%%:*}"
549+
VAL="${pair##*:}"
550+
[ "$VAL" = "skipped" ] && continue
551+
[ "$FIRST" = "true" ] && FIRST=false || FIELDS="$FIELDS,"
552+
FIELDS="$FIELDS{\"name\":\"$NAME\",\"value\":\"$VAL\",\"inline\":true}"
553+
done
554+
FIELDS="$FIELDS]"
555+
echo "json=$FIELDS" >> "$GITHUB_OUTPUT"
541556
- uses: ./.github/actions/discord-notify
542557
with:
543558
title: Sync Binaries — Failed
544559
description: One or more sync/build jobs failed
545560
status: failure
546-
fields: |
547-
[
548-
{"name": "Sync Pre-built", "value": "${{ needs.sync-prebuilt.result }}", "inline": true},
549-
{"name": "Build Targeted", "value": "${{ needs.build-targeted.result }}", "inline": true},
550-
{"name": "Build Batches", "value": "${{ needs.build.result }}", "inline": true}
551-
]
561+
fields: ${{ steps.fields.outputs.json }}
552562
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

.github/workflows/update-packages.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,8 @@ jobs:
393393
status: failure
394394
fields: |
395395
[
396-
{"name": "Fetch Result", "value": "${{ steps.fetch-packages.outputs.fetch_result && 'received' || 'missing' }}", "inline": true},
397-
{"name": "Safe to Commit", "value": "${{ steps.deletion-check.outputs.safe_to_commit || 'n/a' }}", "inline": true}
396+
{"name": "Fetch Success", "value": "${{ steps.commit-message.outputs.should_commit || 'unknown' }}", "inline": true},
397+
{"name": "Safe to Commit", "value": "${{ steps.deletion-check.outputs.safe_to_commit || 'not reached' }}", "inline": true},
398+
{"name": "Updated", "value": "${{ steps.commit-message.outputs.total_updated || '?' }}", "inline": true}
398399
]
399400
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

.github/workflows/update-pantry.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ jobs:
206206
status: failure
207207
fields: |
208208
[
209-
{"name": "Update Success", "value": "${{ steps.update-pantry.outputs.success }}", "inline": true},
210-
{"name": "Changes", "value": "${{ steps.update-pantry.outputs.total_changes }}", "inline": true}
209+
{"name": "Update Success", "value": "${{ steps.update-pantry.outputs.success || 'unknown' }}", "inline": true},
210+
{"name": "Changes", "value": "${{ steps.update-pantry.outputs.total_changes || '?' }}", "inline": true}
211211
]
212212
webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

0 commit comments

Comments
 (0)