Skip to content

Commit 892c685

Browse files
committed
Test health check post workflow
1 parent e9109d4 commit 892c685

File tree

2 files changed

+193
-0
lines changed

2 files changed

+193
-0
lines changed

.github/workflows/health_check.yml

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
name: Health Check
2+
3+
on:
4+
# Run the workflow test on push events
5+
push:
6+
# Run the main workflow on workflow_dispatch or schedule
7+
workflow_dispatch:
8+
schedule:
9+
# Every 5 minutes
10+
- cron: '*/5 * * * *'
11+
12+
jobs:
13+
health_check:
14+
runs-on: ubuntu-latest
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
environment: ${{fromJson(github.event_name == 'push' && '["local"]' || '["dev","stage","prod"]')}}
19+
20+
steps:
21+
- uses: actions/checkout@v4
22+
23+
- name: Fake health check
24+
id: health_check
25+
shell: bash
26+
run: |
27+
cat <<EOF > ${{ vars.health_check_file }}
28+
{
29+
"version": {
30+
"url": "http://nginx/__version__",
31+
"data": {
32+
"target": "development",
33+
"version": "",
34+
"source": "https://github.com/mozilla/addons-server",
35+
"commit": "",
36+
"build": "",
37+
"python": "3.12",
38+
"django": "4.2",
39+
"addons-linter": "7.8.0"
40+
}
41+
},
42+
"heartbeat": {
43+
"url": "http://nginx/__heartbeat__",
44+
"data": {
45+
"memcache": {
46+
"state": true,
47+
"status": ""
48+
},
49+
"libraries": {
50+
"state": true,
51+
"status": ""
52+
},
53+
"elastic": {
54+
"state": true,
55+
"status": ""
56+
},
57+
"path": {
58+
"state": false,
59+
"status": "Everyone has their own path to walk. THis path is not your path"
60+
},
61+
"database": {
62+
"state": true,
63+
"status": ""
64+
}
65+
}
66+
},
67+
"monitors": {
68+
"url": "http://nginx/services/__heartbeat__",
69+
"data": {
70+
"rabbitmq": {
71+
"state": true,
72+
"status": ""
73+
},
74+
"signer": {
75+
"state": true,
76+
"status": ""
77+
},
78+
"remotesettings": {
79+
"state": false,
80+
"status": "The remote is out of batteries. Please try turning on the television using the button on the console."
81+
},
82+
"cinder": {
83+
"state": false,
84+
"status": "Failed to connect to cinder server: Http is conflarbulated due to overriding falpark"
85+
}
86+
}
87+
}
88+
}
89+
EOF
90+
91+
- name: Upload metadata artifact
92+
uses: actions/upload-artifact@v4
93+
with:
94+
name: ${{ vars.health_check_artifact }}
95+
path: ${{ vars.health_check_file }}
96+
retention-days: 90
97+
98+
- name: Exit
99+
shell: bash
100+
run: |
101+
if [[ "${{ steps.health_check.outcome }}" == "failure" ]]; then
102+
exit 1
103+
fi
104+
105+
106+
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Health Check Completed
2+
3+
on:
4+
workflow_run:
5+
workflows: Health Check
6+
types: [completed]
7+
8+
jobs:
9+
health_check_notification:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Download metadata artifact
14+
uses: actions/download-artifact@v4
15+
with:
16+
name: ${{ vars.health_check_artifact }}
17+
path: ${{ vars.health_check_file }}
18+
run-id: ${{ github.event.workflow_run.id }}
19+
20+
- name: Check if we should send a notification
21+
id: check
22+
shell: bash
23+
run: |
24+
current_time=$(date -d "${{ github.event.workflow_run.created_at }}" +%s)
25+
current_hour=$(date -d "${{ github.event.workflow_run.created_at }}" +%H)
26+
current_status="${{ github.event.workflow_run.conclusion }}"
27+
28+
last_workflow_run=$(gh run list \
29+
--status completed \
30+
--limit 1 \
31+
--event ${{ github.event.workflow_run.event }} \
32+
--workflow ${{ github.event.workflow_run.name }} \
33+
--json conclusion,createdAt \
34+
--jq '.[0]'
35+
)
36+
last_time=$(echo "$last_workflow_run" | jq -r '.createdAt' | date -d -%s)
37+
last_status=$(echo "$last_workflow_run" | jq -r '.conclusion')
38+
39+
result="skip"
40+
# If the current workflow is failing, notify failure
41+
if [[ "$current_status" == "failure" ]]; then
42+
result="failure"
43+
# If the current workflow is success...
44+
elif [[ "$current_status" == "success" ]]; then
45+
# if the last workflow was a failure, notify recovery
46+
if [[ "$last_status" == "failure" ]]; then
47+
result="recovery"
48+
# if the last notification was >24 hours ago and the current hour is 11, notify normal
49+
elif [[ "$(current_time - last_time)" -gt 86400 && "$current_hour" == 11 ]]; then
50+
result="normal"
51+
fi
52+
fi
53+
54+
echo "result=${result}" >> $GITHUB_OUTPUT
55+
cat $GITHUB_OUTPUT
56+
57+
- name: Create message blocks
58+
if: steps.check.outputs.result != 'skip'
59+
id: blocks
60+
shell: bash
61+
run: |
62+
# Create the message blocks file
63+
health_check_blocks_file="health_check_blocks.json"
64+
./scripts/health_check_blocks.py \
65+
--input ${{ vars.health_check_file }} \
66+
--output $health_check_blocks_file
67+
# Multiline output needs to use a delimiter to be passed to
68+
# the GITHUB_OUTPUT file.
69+
blocks=$(cat $health_check_blocks_file)
70+
echo "blocks<<EOF"$'\n'$blocks$'\n'EOF >> $GITHUB_OUTPUT
71+
cat $GITHUB_OUTPUT
72+
73+
- uses: mozilla/addons/.github/actions/slack@main
74+
if: steps.check.outputs.result == 'true'
75+
with:
76+
slack_token: ${{ secrets.SLACK_TOKEN }}
77+
payload: |
78+
{
79+
"channel": "${{ secrets.SLACK_ADDONS_PRODUCTION_CHANNEL }}",
80+
"blocks": ${{ toJson(steps.blocks.outputs.blocks) }},
81+
# Don't unfurl links or media
82+
"unfurl_links": false,
83+
"unfurl_media": false,
84+
}
85+
86+
87+

0 commit comments

Comments
 (0)