Skip to content

Commit 54c5ad6

Browse files
feat(cron): add build status reconciliation cron job (#1641)
* feat(cron): add build status reconciliation cron job Adds a cron job that runs every 15 minutes to reconcile stale build_requests rows by querying the builder API directly. For builds with a builder_job_id that have been stale for >5 minutes: fetches real status from the builder, updates status and last_error, and records build time for completed builds. For builds without a builder_job_id that are older than 1 hour: marks them as failed since they were never submitted to the builder. This ensures the dashboard shows accurate build state even when the CLI disconnects before calling /build/status (e.g., network issues). * fix: set reconcile queue consumer to run every minute * fix: fail fast when BUILDER_URL or BUILDER_API_KEY is missing * fix: skip recordBuildTime for unexpected platform values instead of defaulting to ios * refactor: use Promise.allSettled instead of sequential loop, bump batch to 500 * fix: lint --------- Co-authored-by: Martin Donadieu <martindonadieu@gmail.com>
1 parent 28df4d4 commit 54c5ad6

File tree

3 files changed

+238
-0
lines changed

3 files changed

+238
-0
lines changed
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import type { MiddlewareKeyVariables } from '../utils/hono.ts'
2+
import { Hono } from 'hono/tiny'
3+
import { BRES, middlewareAPISecret } from '../utils/hono.ts'
4+
import { cloudlog, cloudlogErr } from '../utils/logging.ts'
5+
import { recordBuildTime, supabaseAdmin } from '../utils/supabase.ts'
6+
import { getEnv } from '../utils/utils.ts'
7+
8+
interface BuilderStatusResponse {
9+
job: {
10+
status: string
11+
started_at: number | null
12+
completed_at: number | null
13+
error: string | null
14+
}
15+
machine: Record<string, unknown> | null
16+
uploadUrl?: string
17+
}
18+
19+
const TERMINAL_STATUSES = new Set(['succeeded', 'failed', 'expired', 'released', 'cancelled'])
20+
const STALE_THRESHOLD_MINUTES = 5
21+
const ORPHAN_THRESHOLD_HOURS = 1
22+
const BATCH_LIMIT = 500
23+
24+
export const app = new Hono<MiddlewareKeyVariables>()
25+
26+
app.post('/', middlewareAPISecret, async (c) => {
27+
const startTime = Date.now()
28+
let reconciled = 0
29+
let orphaned = 0
30+
let errors = 0
31+
32+
const supabase = supabaseAdmin(c)
33+
const builderUrl = getEnv(c, 'BUILDER_URL')
34+
const builderApiKey = getEnv(c, 'BUILDER_API_KEY')
35+
36+
if (!builderUrl || !builderApiKey) {
37+
cloudlogErr({ requestId: c.get('requestId'), message: 'Missing BUILDER_URL or BUILDER_API_KEY env var, skipping reconciliation' })
38+
return c.json(BRES)
39+
}
40+
41+
const { data: staleBuilds, error: queryError } = await supabase
42+
.from('build_requests')
43+
.select('id, builder_job_id, app_id, owner_org, requested_by, platform, status, created_at')
44+
.not('status', 'in', `(${[...TERMINAL_STATUSES].join(',')})`)
45+
.lt('updated_at', new Date(Date.now() - STALE_THRESHOLD_MINUTES * 60 * 1000).toISOString())
46+
.order('updated_at', { ascending: true })
47+
.limit(BATCH_LIMIT)
48+
49+
if (queryError) {
50+
cloudlogErr({ requestId: c.get('requestId'), message: 'Failed to query stale build_requests', error: queryError.message })
51+
return c.json(BRES)
52+
}
53+
54+
if (!staleBuilds || staleBuilds.length === 0) {
55+
cloudlog({ requestId: c.get('requestId'), message: 'No stale builds to reconcile' })
56+
return c.json(BRES)
57+
}
58+
59+
cloudlog({ requestId: c.get('requestId'), message: `Found ${staleBuilds.length} stale builds to reconcile` })
60+
61+
const orphanCutoff = Date.now() - ORPHAN_THRESHOLD_HOURS * 60 * 60 * 1000
62+
const orphanBuilds = staleBuilds.filter(b => !b.builder_job_id && new Date(b.created_at).getTime() < orphanCutoff)
63+
const builderBuilds = staleBuilds.filter(b => !!b.builder_job_id)
64+
65+
const orphanResults = await Promise.allSettled(
66+
orphanBuilds.map(async (build) => {
67+
const { error: updateError } = await supabase
68+
.from('build_requests')
69+
.update({
70+
status: 'failed',
71+
last_error: 'Build request was never submitted to builder',
72+
updated_at: new Date().toISOString(),
73+
})
74+
.eq('id', build.id)
75+
76+
if (updateError)
77+
throw new Error(updateError.message)
78+
}),
79+
)
80+
81+
for (let i = 0; i < orphanResults.length; i++) {
82+
if (orphanResults[i].status === 'fulfilled') {
83+
orphaned++
84+
}
85+
else {
86+
cloudlogErr({ requestId: c.get('requestId'), message: 'Failed to mark orphan build as failed', buildId: orphanBuilds[i].id, error: (orphanResults[i] as PromiseRejectedResult).reason })
87+
errors++
88+
}
89+
}
90+
91+
const builderResults = await Promise.allSettled(
92+
builderBuilds.map(async (build) => {
93+
const response = await fetch(`${builderUrl}/jobs/${build.builder_job_id}`, {
94+
method: 'GET',
95+
headers: { 'x-api-key': builderApiKey },
96+
})
97+
98+
if (!response.ok)
99+
throw new Error(`Builder status fetch failed: ${response.status}`)
100+
101+
const builderJob = await response.json() as BuilderStatusResponse
102+
const jobStatus = builderJob.job.status
103+
104+
const { error: updateError } = await supabase
105+
.from('build_requests')
106+
.update({
107+
status: jobStatus,
108+
last_error: builderJob.job.error || null,
109+
updated_at: new Date().toISOString(),
110+
})
111+
.eq('id', build.id)
112+
113+
if (updateError)
114+
throw new Error(updateError.message)
115+
116+
if (
117+
TERMINAL_STATUSES.has(jobStatus)
118+
&& builderJob.job.started_at
119+
&& builderJob.job.completed_at
120+
) {
121+
const buildTimeSeconds = Math.floor((builderJob.job.completed_at - builderJob.job.started_at) / 1000)
122+
123+
if (build.platform !== 'ios' && build.platform !== 'android') {
124+
cloudlogErr({ requestId: c.get('requestId'), message: 'Unexpected platform, skipping recordBuildTime', buildId: build.id, platform: build.platform })
125+
}
126+
else {
127+
await recordBuildTime(
128+
c,
129+
build.owner_org,
130+
build.requested_by,
131+
build.builder_job_id!,
132+
build.platform,
133+
buildTimeSeconds,
134+
)
135+
}
136+
}
137+
}),
138+
)
139+
140+
for (let i = 0; i < builderResults.length; i++) {
141+
if (builderResults[i].status === 'fulfilled') {
142+
reconciled++
143+
}
144+
else {
145+
cloudlogErr({ requestId: c.get('requestId'), message: 'Error reconciling build', buildId: builderBuilds[i].id, jobId: builderBuilds[i].builder_job_id, error: String((builderResults[i] as PromiseRejectedResult).reason) })
146+
errors++
147+
}
148+
}
149+
150+
cloudlog({
151+
requestId: c.get('requestId'),
152+
message: 'Build status reconciliation completed',
153+
duration_ms: Date.now() - startTime,
154+
total: staleBuilds.length,
155+
reconciled,
156+
orphaned,
157+
errors,
158+
})
159+
160+
return c.json(BRES)
161+
})

supabase/functions/triggers/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { app as credit_usage_alerts } from '../_backend/triggers/credit_usage_al
22
import { app as cron_clean_orphan_images } from '../_backend/triggers/cron_clean_orphan_images.ts'
33
import { app as cron_clear_versions } from '../_backend/triggers/cron_clear_versions.ts'
44
import { app as cron_email } from '../_backend/triggers/cron_email.ts'
5+
import { app as cron_reconcile_build_status } from '../_backend/triggers/cron_reconcile_build_status.ts'
56
import { app as cron_stat_app } from '../_backend/triggers/cron_stat_app.ts'
67
import { app as cron_stat_org } from '../_backend/triggers/cron_stat_org.ts'
78
import { app as cron_sync_sub } from '../_backend/triggers/cron_sync_sub.ts'
@@ -48,6 +49,7 @@ appGlobal.route('/cron_stat_org', cron_stat_org)
4849
appGlobal.route('/cron_sync_sub', cron_sync_sub)
4950
appGlobal.route('/cron_clear_versions', cron_clear_versions)
5051
appGlobal.route('/cron_clean_orphan_images', cron_clean_orphan_images)
52+
appGlobal.route('/cron_reconcile_build_status', cron_reconcile_build_status)
5153
appGlobal.route('/credit_usage_alerts', credit_usage_alerts)
5254
appGlobal.route('/on_organization_delete', on_organization_delete)
5355
appGlobal.route('/on_deploy_history_create', on_deploy_history_create)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
SELECT pgmq.create('cron_reconcile_build_status');
2+
3+
INSERT INTO public.cron_tasks (
4+
name,
5+
description,
6+
task_type,
7+
target,
8+
batch_size,
9+
second_interval,
10+
minute_interval,
11+
hour_interval,
12+
run_at_hour,
13+
run_at_minute,
14+
run_at_second,
15+
run_on_dow,
16+
run_on_day
17+
) VALUES (
18+
'reconcile_build_status',
19+
'Send build status reconciliation job to queue every 15 minutes',
20+
'queue',
21+
'cron_reconcile_build_status',
22+
null,
23+
null,
24+
15,
25+
null,
26+
null,
27+
null,
28+
0,
29+
null,
30+
null
31+
)
32+
ON CONFLICT (name) DO UPDATE SET
33+
description = EXCLUDED.description,
34+
task_type = EXCLUDED.task_type,
35+
target = EXCLUDED.target,
36+
minute_interval = EXCLUDED.minute_interval,
37+
run_at_second = EXCLUDED.run_at_second,
38+
updated_at = NOW();
39+
40+
INSERT INTO public.cron_tasks (
41+
name,
42+
description,
43+
task_type,
44+
target,
45+
batch_size,
46+
second_interval,
47+
minute_interval,
48+
hour_interval,
49+
run_at_hour,
50+
run_at_minute,
51+
run_at_second,
52+
run_on_dow,
53+
run_on_day
54+
) VALUES (
55+
'reconcile_build_status_queue',
56+
'Process build status reconciliation queue',
57+
'function_queue',
58+
'["cron_reconcile_build_status"]',
59+
null,
60+
null,
61+
1,
62+
null,
63+
null,
64+
null,
65+
0,
66+
null,
67+
null
68+
)
69+
ON CONFLICT (name) DO UPDATE SET
70+
description = EXCLUDED.description,
71+
task_type = EXCLUDED.task_type,
72+
target = EXCLUDED.target,
73+
minute_interval = EXCLUDED.minute_interval,
74+
run_at_second = EXCLUDED.run_at_second,
75+
updated_at = NOW();

0 commit comments

Comments
 (0)