44
55VERBOSE=" ${VERBOSE:- 0} "
66if [[ " ${VERBOSE} " -ne " 0" ]]; then
7- set -x
7+ set -x
88fi
99
1010# List of required environment variables
1111required_vars=(
12- " INSTATUS_API_KEY"
13- " INSTATUS_PAGE_ID"
14- " INSTATUS_COMPONENT_ID"
15- " VERCEL_API_KEY"
12+ " INSTATUS_API_KEY"
13+ " INSTATUS_PAGE_ID"
14+ " INSTATUS_COMPONENT_ID"
15+ " VERCEL_API_KEY"
1616)
1717
1818# Check if each required variable is set
1919for var in " ${required_vars[@]} " ; do
20- if [[ -z " ${! var:- } " ]]; then
21- echo " Error: Environment variable '$var ' is not set."
22- exit 1
23- fi
20+ if [[ -z " ${! var:- } " ]]; then
21+ echo " Error: Environment variable '$var ' is not set."
22+ exit 1
23+ fi
2424done
2525
2626REGISTRY_BASE_URL=" ${REGISTRY_BASE_URL:- https:// registry.coder.com} "
@@ -31,38 +31,38 @@ declare -a failures=()
3131
3232# Collect all module directories containing a main.tf file
3333for path in $( find . -maxdepth 2 -not -path ' */.*' -type f -name main.tf | cut -d ' /' -f 2 | sort -u) ; do
34- modules+=(" ${path} " )
34+ modules+=(" ${path} " )
3535done
3636
3737echo " Checking modules: ${modules[*]} "
3838
3939# Function to update the component status on Instatus
4040update_component_status () {
41- local component_status=$1
42- # see https://instatus.com/help/api/components
43- (curl -X PUT " https://api.instatus.com/v1/$INSTATUS_PAGE_ID /components/$INSTATUS_COMPONENT_ID " \
44- -H " Authorization: Bearer $INSTATUS_API_KEY " \
45- -H " Content-Type: application/json" \
46- -d " {\" status\" : \" $component_status \" }" )
41+ local component_status=$1
42+ # see https://instatus.com/help/api/components
43+ (curl -X PUT " https://api.instatus.com/v1/$INSTATUS_PAGE_ID /components/$INSTATUS_COMPONENT_ID " \
44+ -H " Authorization: Bearer $INSTATUS_API_KEY " \
45+ -H " Content-Type: application/json" \
46+ -d " {\" status\" : \" $component_status \" }" )
4747}
4848
4949# Function to create an incident
5050create_incident () {
51- local incident_name=" Degraded Service"
52- local message=" The following modules are experiencing issues:\n"
53- for i in " ${! failures[@]} " ; do
54- message+=" $(( i + 1 )) . ${failures[$i]} \n"
55- done
56-
57- component_status=" PARTIALOUTAGE"
58- if (( ${# failures[@]} == ${# modules[@]} )) ; then
59- component_status=" MAJOROUTAGE"
60- fi
61- # see https://instatus.com/help/api/incidents
62- incident_id=$( curl -s -X POST " https://api.instatus.com/v1/$INSTATUS_PAGE_ID /incidents" \
63- -H " Authorization: Bearer $INSTATUS_API_KEY " \
64- -H " Content-Type: application/json" \
65- -d " {
51+ local incident_name=" Degraded Service"
52+ local message=" The following modules are experiencing issues:\n"
53+ for i in " ${! failures[@]} " ; do
54+ message+=" $(( i + 1 )) . ${failures[$i]} \n"
55+ done
56+
57+ component_status=" PARTIALOUTAGE"
58+ if (( ${# failures[@]} == ${# modules[@]} )) ; then
59+ component_status=" MAJOROUTAGE"
60+ fi
61+ # see https://instatus.com/help/api/incidents
62+ incident_id=$( curl -s -X POST " https://api.instatus.com/v1/$INSTATUS_PAGE_ID /incidents" \
63+ -H " Authorization: Bearer $INSTATUS_API_KEY " \
64+ -H " Content-Type: application/json" \
65+ -d " {
6666 \" name\" : \" $incident_name \" ,
6767 \" message\" : \" $message \" ,
6868 \" components\" : [\" $INSTATUS_COMPONENT_ID \" ],
@@ -76,129 +76,129 @@ create_incident() {
7676 ]
7777 }" | jq -r ' .id' )
7878
79- echo " Created incident with ID: $incident_id "
79+ echo " Created incident with ID: $incident_id "
8080}
8181
8282# Function to check for existing unresolved incidents
8383check_existing_incident () {
84- # Fetch the latest incidents with status not equal to "RESOLVED"
85- local unresolved_incidents=$( curl -s -X GET " https://api.instatus.com/v1/$INSTATUS_PAGE_ID /incidents" \
86- -H " Authorization: Bearer $INSTATUS_API_KEY " \
87- -H " Content-Type: application/json" | jq -r ' .incidents[] | select(.status != "RESOLVED") | .id' )
88-
89- if [[ -n " $unresolved_incidents " ]]; then
90- echo " Unresolved incidents found: $unresolved_incidents "
91- return 0 # Indicate that there are unresolved incidents
92- else
93- echo " No unresolved incidents found."
94- return 1 # Indicate that no unresolved incidents exist
95- fi
84+ # Fetch the latest incidents with status not equal to "RESOLVED"
85+ local unresolved_incidents=$( curl -s -X GET " https://api.instatus.com/v1/$INSTATUS_PAGE_ID /incidents" \
86+ -H " Authorization: Bearer $INSTATUS_API_KEY " \
87+ -H " Content-Type: application/json" | jq -r ' .incidents[] | select(.status != "RESOLVED") | .id' )
88+
89+ if [[ -n " $unresolved_incidents " ]]; then
90+ echo " Unresolved incidents found: $unresolved_incidents "
91+ return 0 # Indicate that there are unresolved incidents
92+ else
93+ echo " No unresolved incidents found."
94+ return 1 # Indicate that no unresolved incidents exist
95+ fi
9696}
9797
9898force_redeploy_registry () {
99- # These are not secret values; safe to just expose directly in script
100- local VERCEL_TEAM_SLUG=" codercom"
101- local VERCEL_TEAM_ID=" team_tGkWfhEGGelkkqUUm9nXq17r"
102- local VERCEL_APP=" registry"
103-
104- local latest_res
105- latest_res=$(
106- curl " https://api.vercel.com/v6/deployments?app=$VERCEL_APP &limit=1&slug=$VERCEL_TEAM_SLUG &teamId=$VERCEL_TEAM_ID &target=production&state=BUILDING,INITIALIZING,QUEUED,READY" \
107- --fail \
108- --silent \
109- --header " Authorization: Bearer $VERCEL_API_KEY " \
110- --header " Content-Type: application/json"
111- )
112-
113- # If we have zero deployments, something is VERY wrong. Make the whole
114- # script exit with a non-zero status code
115- local latest_id
116- latest_id=$( echo " ${latest_res} " | jq -r ' .deployments[0].uid' )
117- if [[ " ${latest_id} " = " null" ]]; then
118- echo " Unable to pull any previous deployments for redeployment"
119- echo " Please redeploy the latest deployment manually in Vercel."
120- echo " https://vercel.com/codercom/registry/deployments"
121- exit 1
122- fi
123-
124- local latest_date_ts_seconds
125- latest_date_ts_seconds=$( echo " ${latest_res} " | jq -r ' .deployments[0].createdAt/1000|floor' )
126- local current_date_ts_seconds
127- current_date_ts_seconds=" $( date +%s) "
128- local max_redeploy_interval_seconds=7200 # 2 hours
129- if (( current_date_ts_seconds - latest_date_ts_seconds < max_redeploy_interval_seconds)) ; then
130- echo " The registry was deployed less than 2 hours ago."
131- echo " Not automatically re-deploying the regitstry."
132- echo " A human reading this message should decide if a redeployment is necessary."
133- echo " Please check the Vercel dashboard for more information."
134- echo " https://vercel.com/codercom/registry/deployments"
135- exit 1
136- fi
137-
138- local latest_deployment_state
139- latest_deployment_state=" $( echo " ${latest_res} " | jq -r ' .deployments[0].state' ) "
140- if [[ " ${latest_deployment_state} " != " READY" ]]; then
141- echo " Last deployment was not in READY state. Skipping redeployment."
142- echo " A human reading this message should decide if a redeployment is necessary."
143- echo " Please check the Vercel dashboard for more information."
144- echo " https://vercel.com/codercom/registry/deployments"
145- exit 1
146- fi
147-
148- echo " ============================================================="
149- echo " !!! Redeploying registry with deployment ID: ${latest_id} !!!"
150- echo " ============================================================="
151-
152- if ! curl -X POST " https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug=$VERCEL_TEAM_SLUG &teamId=$VERCEL_TEAM_ID " \
153- --fail \
154- --header " Authorization: Bearer $VERCEL_API_KEY " \
155- --header " Content-Type: application/json" \
156- --data-raw " { \" deploymentId\" : \" ${latest_id} \" , \" name\" : \" ${VERCEL_APP} \" , \" target\" : \" production\" }" ; then
157- echo " DEPLOYMENT FAILED! Please check the Vercel dashboard for more information."
158- echo " https://vercel.com/codercom/registry/deployments"
159- exit 1
160- fi
99+ # These are not secret values; safe to just expose directly in script
100+ local VERCEL_TEAM_SLUG=" codercom"
101+ local VERCEL_TEAM_ID=" team_tGkWfhEGGelkkqUUm9nXq17r"
102+ local VERCEL_APP=" registry"
103+
104+ local latest_res
105+ latest_res=$(
106+ curl " https://api.vercel.com/v6/deployments?app=$VERCEL_APP &limit=1&slug=$VERCEL_TEAM_SLUG &teamId=$VERCEL_TEAM_ID &target=production&state=BUILDING,INITIALIZING,QUEUED,READY" \
107+ --fail \
108+ --silent \
109+ --header " Authorization: Bearer $VERCEL_API_KEY " \
110+ --header " Content-Type: application/json"
111+ )
112+
113+ # If we have zero deployments, something is VERY wrong. Make the whole
114+ # script exit with a non-zero status code
115+ local latest_id
116+ latest_id=$( echo " ${latest_res} " | jq -r ' .deployments[0].uid' )
117+ if [[ " ${latest_id} " = " null" ]]; then
118+ echo " Unable to pull any previous deployments for redeployment"
119+ echo " Please redeploy the latest deployment manually in Vercel."
120+ echo " https://vercel.com/codercom/registry/deployments"
121+ exit 1
122+ fi
123+
124+ local latest_date_ts_seconds
125+ latest_date_ts_seconds=$( echo " ${latest_res} " | jq -r ' .deployments[0].createdAt/1000|floor' )
126+ local current_date_ts_seconds
127+ current_date_ts_seconds=" $( date +%s) "
128+ local max_redeploy_interval_seconds=7200 # 2 hours
129+ if (( current_date_ts_seconds - latest_date_ts_seconds < max_redeploy_interval_seconds)) ; then
130+ echo " The registry was deployed less than 2 hours ago."
131+ echo " Not automatically re-deploying the regitstry."
132+ echo " A human reading this message should decide if a redeployment is necessary."
133+ echo " Please check the Vercel dashboard for more information."
134+ echo " https://vercel.com/codercom/registry/deployments"
135+ exit 1
136+ fi
137+
138+ local latest_deployment_state
139+ latest_deployment_state=" $( echo " ${latest_res} " | jq -r ' .deployments[0].state' ) "
140+ if [[ " ${latest_deployment_state} " != " READY" ]]; then
141+ echo " Last deployment was not in READY state. Skipping redeployment."
142+ echo " A human reading this message should decide if a redeployment is necessary."
143+ echo " Please check the Vercel dashboard for more information."
144+ echo " https://vercel.com/codercom/registry/deployments"
145+ exit 1
146+ fi
147+
148+ echo " ============================================================="
149+ echo " !!! Redeploying registry with deployment ID: ${latest_id} !!!"
150+ echo " ============================================================="
151+
152+ if ! curl -X POST " https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug=$VERCEL_TEAM_SLUG &teamId=$VERCEL_TEAM_ID " \
153+ --fail \
154+ --header " Authorization: Bearer $VERCEL_API_KEY " \
155+ --header " Content-Type: application/json" \
156+ --data-raw " { \" deploymentId\" : \" ${latest_id} \" , \" name\" : \" ${VERCEL_APP} \" , \" target\" : \" production\" }" ; then
157+ echo " DEPLOYMENT FAILED! Please check the Vercel dashboard for more information."
158+ echo " https://vercel.com/codercom/registry/deployments"
159+ exit 1
160+ fi
161161}
162162
163163# Check each module's accessibility
164164for module in " ${modules[@]} " ; do
165- # Trim leading/trailing whitespace from module name
166- module=$( echo " ${module} " | xargs)
167- url=" ${REGISTRY_BASE_URL} /modules/${module} "
168- printf " === Checking module %s at %s\n" " ${module} " " ${url} "
169- status_code=$( curl --output /dev/null --head --silent --fail --location " ${url} " --retry 3 --write-out " %{http_code}" )
170- if (( status_code != 200 )) ; then
171- printf " ==> FAIL(%s)\n" " ${status_code} "
172- status=1
173- failures+=(" ${module} " )
174- else
175- printf " ==> OK(%s)\n" " ${status_code} "
176- fi
165+ # Trim leading/trailing whitespace from module name
166+ module=$( echo " ${module} " | xargs)
167+ url=" ${REGISTRY_BASE_URL} /modules/${module} "
168+ printf " === Checking module %s at %s\n" " ${module} " " ${url} "
169+ status_code=$( curl --output /dev/null --head --silent --fail --location " ${url} " --retry 3 --write-out " %{http_code}" )
170+ if (( status_code != 200 )) ; then
171+ printf " ==> FAIL(%s)\n" " ${status_code} "
172+ status=1
173+ failures+=(" ${module} " )
174+ else
175+ printf " ==> OK(%s)\n" " ${status_code} "
176+ fi
177177done
178178
179179# Determine overall status and update Instatus component
180180if (( status == 0 )) ; then
181- echo " All modules are operational."
182- # set to
183- update_component_status " OPERATIONAL"
181+ echo " All modules are operational."
182+ # set to
183+ update_component_status " OPERATIONAL"
184184else
185- echo " The following modules have issues: ${failures[*]} "
186- # check if all modules are down
187- if (( ${# failures[@]} == ${# modules[@]} )) ; then
188- update_component_status " MAJOROUTAGE"
189- else
190- update_component_status " PARTIALOUTAGE"
191- fi
192-
193- # Check if there is an existing incident before creating a new one
194- if ! check_existing_incident; then
195- create_incident
196- fi
197-
198- # If a module is down, force a reployment to try getting things back online
199- # ASAP
200- # EDIT: registry.coder.com is no longer hosted on vercel
201- # force_redeploy_registry
185+ echo " The following modules have issues: ${failures[*]} "
186+ # check if all modules are down
187+ if (( ${# failures[@]} == ${# modules[@]} )) ; then
188+ update_component_status " MAJOROUTAGE"
189+ else
190+ update_component_status " PARTIALOUTAGE"
191+ fi
192+
193+ # Check if there is an existing incident before creating a new one
194+ if ! check_existing_incident; then
195+ create_incident
196+ fi
197+
198+ # If a module is down, force a reployment to try getting things back online
199+ # ASAP
200+ # EDIT: registry.coder.com is no longer hosted on vercel
201+ # force_redeploy_registry
202202fi
203203
204204exit " ${status} "
0 commit comments