-
Notifications
You must be signed in to change notification settings - Fork 110
ci: seed preview auth in PR previews #2775
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
84b3495
69aa373
b9dedd0
83e402b
847df2d
60fb2a2
cca2bba
0b1d393
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| #!/usr/bin/env bash | ||
| set -euo pipefail | ||
|
|
||
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | ||
| # shellcheck source=.github/scripts/preview/common.sh | ||
| source "${SCRIPT_DIR}/common.sh" | ||
|
|
||
| require_env_vars \ | ||
| API_URL \ | ||
| SPICEDB_PRESHARED_KEY \ | ||
| INKEEP_AGENTS_MANAGE_UI_USERNAME \ | ||
| INKEEP_AGENTS_MANAGE_UI_PASSWORD \ | ||
| BETTER_AUTH_SECRET | ||
|
|
||
| mask_env_vars RUN_DB_URL SPICEDB_ENDPOINT SPICEDB_PRESHARED_KEY INKEEP_AGENTS_MANAGE_UI_PASSWORD BETTER_AUTH_SECRET | ||
|
|
||
| if [ -z "${RUN_DB_URL:-}" ] || [ -z "${SPICEDB_ENDPOINT:-}" ]; then | ||
| require_env_vars \ | ||
| RAILWAY_API_TOKEN \ | ||
| RAILWAY_PROJECT_ID \ | ||
| RAILWAY_OUTPUT_SERVICE \ | ||
| RAILWAY_RUN_DB_URL_KEY \ | ||
| RAILWAY_SPICEDB_ENDPOINT_KEY \ | ||
| PR_NUMBER | ||
|
|
||
| RAILWAY_ENV_NAME="$(pr_env_name "${PR_NUMBER}")" | ||
|
|
||
| railway_link_service "${RAILWAY_PROJECT_ID}" "${RAILWAY_OUTPUT_SERVICE}" "${RAILWAY_ENV_NAME}" | ||
|
|
||
| if [ -z "${RUN_DB_URL:-}" ]; then | ||
| RUN_DB_URL="$(railway_extract_runtime_var "${RAILWAY_OUTPUT_SERVICE}" "${RAILWAY_ENV_NAME}" "${RAILWAY_RUN_DB_URL_KEY}")" | ||
| fi | ||
|
|
||
| if [ -z "${SPICEDB_ENDPOINT:-}" ]; then | ||
| SPICEDB_ENDPOINT="$(railway_extract_runtime_var "${RAILWAY_OUTPUT_SERVICE}" "${RAILWAY_ENV_NAME}" "${RAILWAY_SPICEDB_ENDPOINT_KEY}")" | ||
| fi | ||
|
|
||
| mask_env_vars RUN_DB_URL SPICEDB_ENDPOINT | ||
| fi | ||
|
|
||
| require_env_vars RUN_DB_URL SPICEDB_ENDPOINT | ||
|
|
||
| export INKEEP_AGENTS_API_URL="${API_URL}" | ||
| export INKEEP_AGENTS_RUN_DATABASE_URL="${RUN_DB_URL}" | ||
| export SPICEDB_ENDPOINT | ||
| export TENANT_ID="${TENANT_ID:-default}" | ||
|
|
||
| echo "::group::Run preview runtime migrations" | ||
| pnpm db:run:migrate | ||
| echo "::endgroup::" | ||
|
|
||
| echo "::group::Initialize preview auth" | ||
| pnpm db:auth:init | ||
| echo "::endgroup::" | ||
|
|
||
| if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then | ||
| { | ||
| echo "## Preview Auth Bootstrap" | ||
| echo "- Tenant: \`${TENANT_ID}\`" | ||
| echo "- Admin email: \`${INKEEP_AGENTS_MANAGE_UI_USERNAME}\`" | ||
| echo "- Runtime migrations: \`pnpm db:run:migrate\`" | ||
| echo "- Auth seed: \`pnpm db:auth:init\`" | ||
| } >> "${GITHUB_STEP_SUMMARY}" | ||
| fi | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,57 @@ railway_env_exists_count() { | |
| "${output_path}" | ||
| } | ||
|
|
||
| railway_link_service() { | ||
| local project_id="$1" | ||
| local service="$2" | ||
| local env_name="$3" | ||
|
|
||
| if ! railway link \ | ||
| --project "${project_id}" \ | ||
| --service "${service}" \ | ||
| --environment "${env_name}" \ | ||
| >/dev/null; then | ||
| echo "Failed to link Railway CLI to project ${project_id} service ${service} env ${env_name}." >&2 | ||
| return 1 | ||
| fi | ||
| } | ||
|
|
||
| railway_extract_runtime_var() { | ||
| local service="$1" | ||
| local env_name="$2" | ||
| local key="$3" | ||
| local max_attempts="${4:-20}" | ||
| local sleep_seconds="${5:-2}" | ||
| local attempt="" | ||
| local value="" | ||
|
|
||
| for attempt in $(seq 1 "${max_attempts}"); do | ||
| value="$( | ||
| railway variable list \ | ||
| --service "${service}" \ | ||
| --environment "${env_name}" \ | ||
| --json | | ||
| jq -r --arg key "${key}" '.[$key] // empty' | ||
| )" | ||
|
|
||
| if [ -n "${value}" ] && ! printf '%s' "${value}" | grep -q '\$[{][{]'; then | ||
| printf '%s' "${value}" | ||
| return 0 | ||
| fi | ||
|
|
||
| if [ "${attempt}" -lt "${max_attempts}" ]; then | ||
| sleep "${sleep_seconds}" | ||
| fi | ||
| done | ||
|
Comment on lines
+100
to
+103
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟠 MAJOR: Retry without jitter causes thundering herd Issue: The retry loops in Why: When multiple concurrent PRs experience Railway variable resolution delays, they'll all retry at synchronized intervals, creating a thundering herd pattern that can overwhelm Railway's API and cause cascading timeouts. Fix: Add randomized jitter to the sleep duration: if [ "${attempt}" -lt "${max_attempts}" ]; then
# Add jitter: sleep_seconds * (0.5 to 1.5)
jittered_sleep=$(awk "BEGIN {srand(); print ${sleep_seconds} * (0.5 + rand())}")
sleep "${jittered_sleep}"
fiRefs: |
||
|
|
||
| if [ -z "${value:-}" ]; then | ||
| echo "Missing runtime variable ${key} in Railway service ${service} for env ${env_name}." >&2 | ||
| else | ||
| echo "Runtime variable ${key} is unresolved (${value}) after waiting for Railway interpolation." >&2 | ||
| fi | ||
| return 1 | ||
| } | ||
|
|
||
| mask_env_vars() { | ||
| local var_name | ||
| for var_name in "$@"; do | ||
|
|
@@ -53,9 +104,161 @@ mask_env_vars() { | |
| done | ||
| } | ||
|
|
||
| railway_graphql() { | ||
| local query="$1" | ||
| local payload="" | ||
|
|
||
| payload="$(jq -nc --arg query "${query}" '{query: $query}')" | ||
|
|
||
| curl --connect-timeout 10 --max-time 30 -fsS \ | ||
| -H "Content-Type: application/json" \ | ||
| -H "Authorization: Bearer ${RAILWAY_API_TOKEN}" \ | ||
| -H "User-Agent: Mozilla/5.0" \ | ||
| -H "Origin: https://railway.com" \ | ||
| -H "Referer: https://railway.com/" \ | ||
| -d "${payload}" \ | ||
| https://backboard.railway.com/graphql/v2 | ||
| } | ||
|
|
||
| railway_environment_id() { | ||
| local project_id="$1" | ||
| local env_name="$2" | ||
| local response="" | ||
|
|
||
| response="$( | ||
| railway_graphql "$(cat <<EOF | ||
| query { | ||
| environments(projectId: "${project_id}") { | ||
| edges { | ||
| node { | ||
| id | ||
| name | ||
| } | ||
| } | ||
| } | ||
| } | ||
| EOF | ||
| )" | ||
| )" | ||
|
|
||
| jq -r --arg env_name "${env_name}" '.data.environments.edges[] | select(.node.name == $env_name) | .node.id' <<< "${response}" | ||
| } | ||
|
|
||
| railway_service_id_for_env() { | ||
| local env_id="$1" | ||
| local service_name="$2" | ||
| local response="" | ||
|
|
||
| response="$( | ||
| railway_graphql "$(cat <<EOF | ||
| query { | ||
| environment(id: "${env_id}") { | ||
| serviceInstances { | ||
| edges { | ||
| node { | ||
| serviceId | ||
| serviceName | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| EOF | ||
| )" | ||
| )" | ||
|
|
||
| jq -r --arg service_name "${service_name}" '.data.environment.serviceInstances.edges[] | select(.node.serviceName == $service_name) | .node.serviceId' <<< "${response}" | ||
| } | ||
|
|
||
| railway_ensure_tcp_proxy() { | ||
| local project_id="$1" | ||
| local env_name="$2" | ||
| local service_name="$3" | ||
| local application_port="$4" | ||
| local max_attempts="${5:-30}" | ||
| local sleep_seconds="${6:-2}" | ||
| local env_id="" | ||
| local service_id="" | ||
| local response="" | ||
| local count="" | ||
| local active="" | ||
| local attempt="" | ||
|
|
||
| env_id="$(railway_environment_id "${project_id}" "${env_name}")" | ||
| if [ -z "${env_id}" ]; then | ||
| echo "Unable to resolve Railway environment ID for ${env_name}." >&2 | ||
| return 1 | ||
| fi | ||
|
|
||
| service_id="$(railway_service_id_for_env "${env_id}" "${service_name}")" | ||
| if [ -z "${service_id}" ]; then | ||
| echo "Unable to resolve Railway service ID for ${service_name} in ${env_name}." >&2 | ||
| return 1 | ||
| fi | ||
|
|
||
| response="$( | ||
| railway_graphql "$(cat <<EOF | ||
| query { | ||
| tcpProxies(environmentId: "${env_id}", serviceId: "${service_id}") { | ||
| id | ||
| domain | ||
| proxyPort | ||
| applicationPort | ||
| syncStatus | ||
| } | ||
| } | ||
| EOF | ||
| )" | ||
| )" | ||
|
|
||
| count="$(jq -r --argjson application_port "${application_port}" '[.data.tcpProxies[] | select(.applicationPort == $application_port)] | length' <<< "${response}")" | ||
| if [ "${count}" = "0" ]; then | ||
| railway_graphql "$(cat <<EOF | ||
| mutation { | ||
| tcpProxyCreate(input: { | ||
| environmentId: "${env_id}" | ||
| serviceId: "${service_id}" | ||
| applicationPort: ${application_port} | ||
| }) { | ||
| id | ||
| } | ||
| } | ||
| EOF | ||
| )" >/dev/null | ||
|
||
| fi | ||
|
|
||
| for attempt in $(seq 1 "${max_attempts}"); do | ||
| response="$( | ||
| railway_graphql "$(cat <<EOF | ||
| query { | ||
| tcpProxies(environmentId: "${env_id}", serviceId: "${service_id}") { | ||
| applicationPort | ||
| syncStatus | ||
| } | ||
| } | ||
| EOF | ||
| )" | ||
| )" | ||
|
|
||
| active="$(jq -r --argjson application_port "${application_port}" '[.data.tcpProxies[] | select(.applicationPort == $application_port and .syncStatus == "ACTIVE")] | length' <<< "${response}")" | ||
| if [ "${active}" != "0" ]; then | ||
| return 0 | ||
| fi | ||
|
|
||
| if [ "${attempt}" -lt "${max_attempts}" ]; then | ||
| sleep "${sleep_seconds}" | ||
| fi | ||
| done | ||
|
|
||
| echo "TCP proxy for ${service_name} in ${env_name} did not become ACTIVE." >&2 | ||
| return 1 | ||
| } | ||
|
|
||
| redact_preview_logs() { | ||
| sed -E \ | ||
| -e 's#(postgres(ql)?://)[^[:space:]]+#\1[REDACTED]#g' \ | ||
| -e 's#([A-Z_]*(SECRET|KEY|TOKEN|PASSWORD)[A-Z_]*[:=])[^\r\n[:space:]]+#\1[REDACTED]#g' \ | ||
| -e 's#((s|S)et-(c|C)ookie:[[:space:]]*better-auth[^=]*=)[^;[:space:]]+#\1[REDACTED]#g' \ | ||
| -e 's#(better-auth\.[^=]+=)[^;[:space:]]+#\1[REDACTED]#g' \ | ||
| -e 's#(Bearer )[A-Za-z0-9._-]+#\1[REDACTED]#g' | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💭 Consider: Add explicit timeouts to migration and auth init commands
Issue: The
pnpm db:run:migrateandpnpm db:auth:initcommands run without explicit timeouts. If a database connection hangs, the job will wait until the 20-minute job timeout.Why: Database migrations during preview bootstrapping may encounter transient connection issues to Railway-hosted Postgres. Explicit timeouts provide faster feedback and clearer failure attribution.
Fix: Consider wrapping with explicit timeouts:
This is optional — the job timeout is a backstop, but explicit command timeouts give faster feedback.