11#! /usr/bin/env bash
22# wait-for-hydra: Wait for a Hydra CI build to reach a terminal state.
33#
4- # Polls the GitHub API with exponential backoff until the specified
5- # check-run or status reaches a terminal state.
4+ # Supports two modes:
5+ # 1. SSE mode (preferred): connects to hydra-github-bridge SSE endpoint
6+ # for real-time status updates. Requires HYDRA_STATUS_URL.
7+ # 2. Poll mode (fallback): polls GitHub API with exponential backoff.
68#
79# Exit codes:
810# 0 - build succeeded
@@ -20,6 +22,7 @@ set -euo pipefail
2022: " ${JITTER:= 30} "
2123: " ${TIMEOUT:= 3600} "
2224: " ${MAX_DELAY:= 300} "
25+ : " ${HYDRA_STATUS_URL:= } "
2326
2427# --- Validation --------------------------------------------------------------
2528
@@ -37,6 +40,17 @@ HYDRA_JOB="${CHECK:-$STATUS}"
3740
3841# --- Helpers -----------------------------------------------------------------
3942
43+ # Determine the conclusion/state from a terminal status string.
44+ # Returns: "success", "failure", or empty (pending/unknown).
45+ classify_status () {
46+ local raw=" $1 "
47+ case " $raw " in
48+ success) echo " success" ;;
49+ failure) echo " failure" ;;
50+ * ) echo " " ;;
51+ esac
52+ }
53+
4054# Check if we've exceeded the timeout.
4155check_timeout () {
4256 if [ " $TIMEOUT " -gt 0 ] && [ " $SECONDS " -ge " $TIMEOUT " ]; then
@@ -45,61 +59,220 @@ check_timeout() {
4559 fi
4660}
4761
62+ # --- SSE Mode ----------------------------------------------------------------
63+
64+ # Try to get the current status from the bridge's one-shot endpoint.
65+ # The endpoint returns a JSON map keyed by check-run name:
66+ # { "required": { "conclusion": "success", ... }, "other-job": { ... } }
67+ # Returns the conclusion string for HYDRA_JOB, or empty on failure.
68+ sse_get_current_status () {
69+ local url=" $1 "
70+ local result
71+ result=$( curl -sf --max-time 10 " $url " 2> /dev/null) || return 1
72+ echo " $result " | jq -r --arg job " $HYDRA_JOB " ' .[$job].conclusion // empty' 2> /dev/null || true
73+ }
74+
75+ # Connect to the SSE stream and wait for a terminal event.
76+ # Exits with 0 on success, 1 on failure, or returns 1 on connection error
77+ # to signal fallback to polling.
78+ sse_wait () {
79+ local owner repo sha
80+ # Extract owner/repo from GITHUB_REPOSITORY (format: owner/repo)
81+ owner=" ${GITHUB_REPOSITORY%%/* } "
82+ repo=" ${GITHUB_REPOSITORY##*/ } "
83+ sha=" $RELEVANT_SHA "
84+
85+ local base_url=" ${HYDRA_STATUS_URL%/ } /status/${owner} /${repo} /${sha} "
86+
87+ echo " SSE: Checking current status at ${base_url} "
88+
89+ # One-shot check: build may already be done.
90+ local current_state
91+ current_state=$( sse_get_current_status " $base_url " ) || true
92+ if [ -n " $current_state " ]; then
93+ local result
94+ result=$( classify_status " $current_state " )
95+ if [ " $result " = " success" ]; then
96+ echo " $HYDRA_JOB succeeded (from cached status)"
97+ exit 0
98+ elif [ " $result " = " failure" ]; then
99+ echo " $HYDRA_JOB failed (from cached status)"
100+ exit 1
101+ fi
102+ echo " SSE: Current status is '$current_state ', connecting to event stream..."
103+ fi
104+
105+ # Stream SSE events. curl -N disables buffering.
106+ # Each event carries a single check-run: {"name":"...","conclusion":"..."}
107+ # We only act on events matching HYDRA_JOB.
108+ #
109+ # We use process substitution (< <(curl ...)) instead of a pipe (curl |
110+ # while) so the while loop runs in the current shell — otherwise `exit`
111+ # inside the loop would only terminate the subshell, not the script.
112+ #
113+ # read -t 60: timeout each read after 60s so check_timeout fires even
114+ # when the SSE stream is idle (Cloudflare buffering, no new events).
115+ # Without this, read blocks indefinitely and the script's TIMEOUT is
116+ # never enforced. In bash, read -t returns >128 on timeout, 1 on EOF.
117+ # Cap the SSE stream time to leave room for one-shot re-check and
118+ # polling fallback if the stream ends without delivering our event.
119+ # Reserve 2 minutes for fallback; minimum SSE time is 60 seconds.
120+ local sse_max_time
121+ if [ " $TIMEOUT " -gt 0 ]; then
122+ sse_max_time=$(( TIMEOUT - SECONDS - 120 ))
123+ [ " $sse_max_time " -lt 60 ] && sse_max_time=60
124+ else
125+ sse_max_time=86400 # no timeout: cap at 24h (matches cache TTL)
126+ fi
127+
128+ # Track when we last did a one-shot re-check so we can poll the
129+ # cached endpoint periodically. CDN proxies (e.g. Cloudflare) may
130+ # buffer SSE events, so we re-check every 120s as a safety net.
131+ local last_recheck=" $SECONDS "
132+
133+ echo " SSE: Connecting to ${base_url} /events (max ${sse_max_time} s, filtering for '$HYDRA_JOB ')"
134+ while true ; do
135+ check_timeout
136+
137+ # Periodic one-shot re-check: catch status changes that the SSE
138+ # stream failed to deliver (CDN buffering, lost events, etc.).
139+ if [ $(( SECONDS - last_recheck)) -ge 120 ]; then
140+ last_recheck=" $SECONDS "
141+ local poll_state
142+ poll_state=$( sse_get_current_status " $base_url " ) || true
143+ if [ -n " $poll_state " ]; then
144+ local poll_result
145+ poll_result=$( classify_status " $poll_state " )
146+ if [ " $poll_result " = " success" ]; then
147+ echo " $HYDRA_JOB succeeded (from periodic re-check at ${SECONDS} s)"
148+ exit 0
149+ elif [ " $poll_result " = " failure" ]; then
150+ echo " $HYDRA_JOB failed (from periodic re-check at ${SECONDS} s)"
151+ exit 1
152+ fi
153+ fi
154+ fi
155+
156+ local read_rc=0
157+ IFS= read -r -t 60 line || read_rc=$?
158+ if [ " $read_rc " -gt 128 ]; then
159+ # read timed out (no data for 60s). Loop to check_timeout.
160+ continue
161+ elif [ " $read_rc " -ne 0 ]; then
162+ # EOF — curl exited (connection drop or max-time reached).
163+ break
164+ fi
165+ case " $line " in
166+ " data: " * )
167+ local data=" ${line# data: } "
168+ local name conclusion
169+ name=$( echo " $data " | jq -r ' .name // empty' 2> /dev/null) || continue
170+ # Skip events for other check-runs.
171+ [ " $name " = " $HYDRA_JOB " ] || continue
172+ conclusion=$( echo " $data " | jq -r ' .conclusion // empty' 2> /dev/null) || continue
173+ echo " SSE event: $name conclusion=$conclusion (${SECONDS} s elapsed)"
174+ local result
175+ result=$( classify_status " $conclusion " )
176+ if [ " $result " = " success" ]; then
177+ echo " $HYDRA_JOB succeeded (via SSE)"
178+ exit 0
179+ elif [ " $result " = " failure" ]; then
180+ echo " $HYDRA_JOB failed (via SSE)"
181+ exit 1
182+ fi
183+ ;;
184+ esac
185+ done < <( curl -Nsf --max-time " $sse_max_time " \
186+ " ${base_url} /events" 2> /dev/null)
187+
188+ # SSE stream ended without a terminal event for HYDRA_JOB. This can
189+ # happen on connection drop, Cloudflare buffering, or curl max-time.
190+ # Before falling back to polling, do one final one-shot check — the
191+ # status may have changed while we were connected but the event was
192+ # lost in transit.
193+ echo " SSE: Stream ended after ${SECONDS} s, re-checking one-shot endpoint..."
194+ local final_state
195+ final_state=$( sse_get_current_status " $base_url " ) || true
196+ if [ -n " $final_state " ]; then
197+ local result
198+ result=$( classify_status " $final_state " )
199+ if [ " $result " = " success" ]; then
200+ echo " $HYDRA_JOB succeeded (from one-shot re-check)"
201+ exit 0
202+ elif [ " $result " = " failure" ]; then
203+ echo " $HYDRA_JOB failed (from one-shot re-check)"
204+ exit 1
205+ fi
206+ fi
207+
208+ echo " SSE: Falling back to polling..."
209+ return 1
210+ }
211+
48212# --- Poll Mode ---------------------------------------------------------------
49213
50214poll_github () {
51215 if [ -n " $CHECK " ]; then
52- # Debug output to stderr so it doesn't pollute the captured result.
53- echo " Querying: gh api repos/$GITHUB_REPOSITORY /commits/$RELEVANT_SHA /check-runs --paginate --jq '...select(.name == \" $CHECK \" )...'" >&2
216+ echo " Querying: gh api repos/$GITHUB_REPOSITORY /commits/$RELEVANT_SHA /check-runs --paginate --jq '.check_runs[] | select(.name == \" $CHECK \" ) | .conclusion'" >&2
54217 # Use tail -1 to handle paginated results that may concatenate
55218 # multiple values; take the last (most recent) non-empty line.
56219 gh api " repos/$GITHUB_REPOSITORY /commits/$RELEVANT_SHA /check-runs" \
57220 --paginate \
58221 --jq " .check_runs[] | select(.name == \" $CHECK \" ) | .conclusion" \
59222 | tail -1
60223 else
61- # Debug output to stderr so it doesn't pollute the captured result.
62- echo " Querying: gh api repos/$GITHUB_REPOSITORY /commits/$RELEVANT_SHA /status --paginate --jq '...select(.context == \" $STATUS \" )...'" >&2
224+ echo " Querying: gh api repos/$GITHUB_REPOSITORY /commits/$RELEVANT_SHA /status --paginate --jq '.statuses[] | select(.context == \" $STATUS \" ) | .state'" >&2
63225 gh api " repos/$GITHUB_REPOSITORY /commits/$RELEVANT_SHA /status" \
64226 --paginate \
65227 --jq " .statuses[] | select(.context == \" $STATUS \" ) | .state" \
66228 | tail -1
67229 fi
68230}
69231
232+ poll_wait () {
233+ local iteration=0
234+ local current_delay=" $DELAY "
235+
236+ while true ; do
237+ check_timeout
238+ iteration=$(( iteration + 1 ))
239+
240+ local conclusion
241+ conclusion=$( poll_github)
242+
243+ case " $conclusion " in
244+ success)
245+ echo " $HYDRA_JOB succeeded (iteration $iteration , ${SECONDS} s elapsed)"
246+ exit 0
247+ ;;
248+ failure)
249+ echo " $HYDRA_JOB failed (iteration $iteration , ${SECONDS} s elapsed)"
250+ exit 1
251+ ;;
252+ * )
253+ local wait_time=$(( current_delay + RANDOM % (JITTER + 1 )) )
254+ echo " $HYDRA_JOB pending (conclusion='$conclusion '). Iteration $iteration , ${SECONDS} s elapsed. Waiting ${wait_time} s..."
255+ sleep " $wait_time "
256+
257+ # Exponential backoff: double the delay, cap at MAX_DELAY.
258+ current_delay=$(( current_delay * 2 ))
259+ if [ " $current_delay " -gt " $MAX_DELAY " ]; then
260+ current_delay=" $MAX_DELAY "
261+ fi
262+ ;;
263+ esac
264+ done
265+ }
266+
70267# --- Main --------------------------------------------------------------------
71268
72269SECONDS=0
73- iteration=0
74- current_delay=" $DELAY "
75270
76271echo " Waiting for $HYDRA_JOB on $RELEVANT_SHA (timeout=${TIMEOUT} s, max-delay=${MAX_DELAY} s)"
77272
78- while true ; do
79- check_timeout
80- iteration=$(( iteration + 1 ))
81-
82- conclusion=$( poll_github)
83-
84- case " $conclusion " in
85- success)
86- echo " $HYDRA_JOB succeeded (iteration $iteration , ${SECONDS} s elapsed)"
87- exit 0
88- ;;
89- failure)
90- echo " $HYDRA_JOB failed (iteration $iteration , ${SECONDS} s elapsed)"
91- exit 1
92- ;;
93- * )
94- wait_time=$(( current_delay + RANDOM % (JITTER + 1 )) )
95- echo " $HYDRA_JOB pending (conclusion='$conclusion '). Iteration $iteration , ${SECONDS} s elapsed. Waiting ${wait_time} s..."
96- sleep " $wait_time "
97-
98- # Exponential backoff: double the delay, cap at MAX_DELAY.
99- current_delay=$(( current_delay * 2 ))
100- if [ " $current_delay " -gt " $MAX_DELAY " ]; then
101- current_delay=" $MAX_DELAY "
102- fi
103- ;;
104- esac
105- done
273+ if [ -n " $HYDRA_STATUS_URL " ]; then
274+ # Try SSE mode first; fall back to polling on connection failure.
275+ sse_wait || poll_wait
276+ else
277+ poll_wait
278+ fi
0 commit comments