Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.
Merged
74 changes: 70 additions & 4 deletions .github/scripts/check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
set -o pipefail
set -u

VERBOSE="${VERBOSE:-0}"
if [[ "${VERBOSE}" -ne "0" ]]; then
set -x
fi

# List of required environment variables
required_vars=(
"INSTATUS_API_KEY"
"INSTATUS_PAGE_ID"
"INSTATUS_COMPONENT_ID"
"VERCEL_API_KEY"
)

# Check if each required variable is set
Expand All @@ -17,14 +23,20 @@ for var in "${required_vars[@]}"; do
fi
done

LATEST_REDEPLOY_FAILED="${LATEST_REDEPLOY_FAILED:-0}"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can remove this

if [[ "${LATEST_REDEPLOY_FAILED}" -ne "0" ]] ;then
echo "Trying to re-run job when previous re-deploy failed"
return 1
fi

REGISTRY_BASE_URL="${REGISTRY_BASE_URL:-https://registry.coder.com}"

status=0
declare -a modules=()
declare -a failures=()

# Collect all module directories containing a main.tf file
for path in $(find . -not -path '*/.*' -type f -name main.tf -maxdepth 2 | cut -d '/' -f 2 | sort -u); do
for path in $(find . -maxdepth 2 -not -path '*/.*' -type f -name main.tf | cut -d '/' -f 2 | sort -u); do
modules+=("${path}")
done

Expand All @@ -45,7 +57,7 @@ create_incident() {
local incident_name="Testing Instatus"
local message="The following modules are experiencing issues:\n"
for i in "${!failures[@]}"; do
message+="$(($i + 1)). ${failures[$i]}\n"
message+="$((i + 1)). ${failures[$i]}\n"
done

component_status="PARTIALOUTAGE"
Expand Down Expand Up @@ -74,6 +86,42 @@ create_incident() {
echo "$incident_id"
}

force_redeploy_registry () {
# These are not secret values; safe to just expose directly in script
local VERCEL_TEAM_SLUG="codercom"
local VERCEL_TEAM_ID="team_tGkWfhEGGelkkqUUm9nXq17r"
local VERCEL_APP="registry"

local latest_res
latest_res=$(curl "https://api.vercel.com/v6/deployments?app=$VERCEL_APP&limit=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID" \
--fail \
--silent \
-H "Authorization: Bearer $VERCEL_API_KEY" \
-H "Content-Type: application/json"
)

# If we have zero deployments, something is VERY wrong. Make the whole
# script exit with a non-zero status code
local latest_id
latest_id=$(echo "${latest_res}" | jq '.deployments[0].uid')
if [[ "${latest_id}" = "null" ]]; then
echo "Unable to pull any previous deployments for redeployment"
return 1
fi

local redeploy_res
redeploy_res=$(curl -X POST "https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID" \
--fail \
--silent \
--output "/dev/null" \
-H "Authorization: Bearer $VERCEL_API_KEY" \
-H "Content-Type: application/json" \
-d "{ \"deploymentId\": \"${latest_id}\" }"
)

echo "${redeploy_res}"
}

# Check each module's accessibility
for module in "${modules[@]}"; do
# Trim leading/trailing whitespace from module name
Expand All @@ -94,11 +142,13 @@ done
# Determine overall status and update Instatus component
if (( status == 0 )); then
echo "All modules are operational."
# set to
# set to
update_component_status "OPERATIONAL"

echo "LATEST_REDEPLOY_FAILED=0" >> "${GITHUB_ENV}"
else
echo "The following modules have issues: ${failures[*]}"
# check if all modules are down
# check if all modules are down
if (( ${#failures[@]} == ${#modules[@]} )); then
update_component_status "MAJOROUTAGE"
else
Expand All @@ -108,6 +158,22 @@ else
# Create a new incident
incident_id=$(create_incident)
echo "Created incident with ID: $incident_id"

# If a module is down, force a reployment to try getting things back online
# ASAP
status_code=$(force_redeploy_registry)
# shellcheck disable=SC2181
if (( status_code == 200 )); then
echo "Reployment successful"
else
echo "Unable to redeploy automatically"
fi

# Update environment variable so that if automatic re-deployment fails, we
# don't keep running the script over and over again. Note that even if a
# re-deployment succeeds, that doesn't necessarily mean that everything is
# fully operational
echo "LATEST_REDEPLOY_FAILED=1" >> "${GITHUB_ENV}"
fi

exit "${status}"
1 change: 1 addition & 0 deletions .github/workflows/check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ jobs:
INSTATUS_API_KEY: ${{ secrets.INSTATUS_API_KEY }}
INSTATUS_PAGE_ID: ${{ secrets.INSTATUS_PAGE_ID }}
INSTATUS_COMPONENT_ID: ${{ secrets.INSTATUS_COMPONENT_ID }}
VERCEL_API_KEY: ${{ secrets.VERCEL_API_KEY }}