11-- One-shot data migration to create/populate execution_data & cost, then drop legacy columns
22-- Safe on reruns and across differing prior schemas
3-
3+ -- Note: Depending on runner timeouts, might have to be run manually
44-- 1) Ensure execution_data exists (prefer rename if only metadata exists)
55DO $$
66BEGIN
@@ -24,19 +24,35 @@ DO $$
2424DECLARE
2525 v_batch_size integer := 500 ; -- keep batches small to avoid timeouts/spills
2626 v_rows_updated integer := 0 ;
27+ v_rows_selected integer := 0 ;
28+ v_last_id text := ' ' ;
29+ v_last_created_at timestamp := ' 1970-01-01 00:00:00' ;
2730BEGIN
31+ -- modest per-statement timeout; adjust based on observed per-batch runtime
32+ PERFORM set_config(' statement_timeout' , ' 180s' , true);
2833 LOOP
29- WITH RECURSIVE candidate AS (
30- SELECT id
31- FROM workflow_execution_logs
32- WHERE cost IS NULL
33- ORDER BY id
34- LIMIT v_batch_size
35- ),
34+ CREATE TEMP TABLE IF NOT EXISTS _tmp_candidate_ids(id text , created_at timestamp ) ON COMMIT DROP;
35+ TRUNCATE _tmp_candidate_ids;
36+ INSERT INTO _tmp_candidate_ids(id, created_at)
37+ SELECT id, created_at
38+ FROM workflow_execution_logs
39+ WHERE (created_at, id) > (v_last_created_at, v_last_id) AND cost IS NULL
40+ ORDER BY created_at, id
41+ LIMIT v_batch_size;
42+
43+ SELECT COUNT (* ) INTO v_rows_selected FROM _tmp_candidate_ids;
44+ EXIT WHEN v_rows_selected = 0 ;
45+ SELECT created_at, id
46+ INTO v_last_created_at, v_last_id
47+ FROM _tmp_candidate_ids
48+ ORDER BY created_at DESC , id DESC
49+ LIMIT 1 ;
50+
51+ WITH RECURSIVE
3652 spans AS (
3753 SELECT l .id , s .span
3854 FROM workflow_execution_logs l
39- JOIN candidate c ON c .id = l .id
55+ JOIN _tmp_candidate_ids c ON c .id = l .id
4056 LEFT JOIN LATERAL jsonb_array_elements(
4157 COALESCE(
4258 CASE
@@ -105,7 +121,7 @@ BEGIN
105121 NULLIF((l .execution_data - > ' tokenBreakdown' - >> ' prompt' )::numeric , 0 ) AS prompt,
106122 NULLIF((l .execution_data - > ' tokenBreakdown' - >> ' completion' )::numeric , 0 ) AS completion
107123 FROM workflow_execution_logs l
108- JOIN candidate c ON c .id = l .id
124+ JOIN _tmp_candidate_ids c ON c .id = l .id
109125 )
110126 UPDATE workflow_execution_logs AS l
111127 SET cost = jsonb_strip_nulls(
@@ -133,7 +149,7 @@ BEGIN
133149 WHERE l .id = agg .id ;
134150
135151 GET DIAGNOSTICS v_rows_updated = ROW_COUNT;
136- EXIT WHEN v_rows_updated = 0 ; -- no more rows to backfill
152+ -- continue advancing by id until no more rows are selected
137153 END LOOP;
138154END $$;-- > statement-breakpoint
139155
0 commit comments