Skip to content

Commit 60a9a25

Browse files
Merge pull request #1009 from simstudioai/staging
update migration file for notekeeping purpose
2 parents 5c56cbd + 52fa388 commit 60a9a25

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

apps/sim/db/migrations/0076_damp_vector.sql

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
-- One-shot data migration to create/populate execution_data & cost, then drop legacy columns
22
-- Safe on reruns and across differing prior schemas
3-
3+
-- Note: Depending on runner timeouts, might have to be run manually
44
-- 1) Ensure execution_data exists (prefer rename if only metadata exists)
55
DO $$
66
BEGIN
@@ -24,19 +24,35 @@ DO $$
2424
DECLARE
2525
v_batch_size integer := 500; -- keep batches small to avoid timeouts/spills
2626
v_rows_updated integer := 0;
27+
v_rows_selected integer := 0;
28+
v_last_id text := '';
29+
v_last_created_at timestamp := '1970-01-01 00:00:00';
2730
BEGIN
31+
-- modest per-statement timeout; adjust based on observed per-batch runtime
32+
PERFORM set_config('statement_timeout', '180s', true);
2833
LOOP
29-
WITH RECURSIVE candidate AS (
30-
SELECT id
31-
FROM workflow_execution_logs
32-
WHERE cost IS NULL
33-
ORDER BY id
34-
LIMIT v_batch_size
35-
),
34+
CREATE TEMP TABLE IF NOT EXISTS _tmp_candidate_ids(id text, created_at timestamp) ON COMMIT DROP;
35+
TRUNCATE _tmp_candidate_ids;
36+
INSERT INTO _tmp_candidate_ids(id, created_at)
37+
SELECT id, created_at
38+
FROM workflow_execution_logs
39+
WHERE (created_at, id) > (v_last_created_at, v_last_id) AND cost IS NULL
40+
ORDER BY created_at, id
41+
LIMIT v_batch_size;
42+
43+
SELECT COUNT(*) INTO v_rows_selected FROM _tmp_candidate_ids;
44+
EXIT WHEN v_rows_selected = 0;
45+
SELECT created_at, id
46+
INTO v_last_created_at, v_last_id
47+
FROM _tmp_candidate_ids
48+
ORDER BY created_at DESC, id DESC
49+
LIMIT 1;
50+
51+
WITH RECURSIVE
3652
spans AS (
3753
SELECT l.id, s.span
3854
FROM workflow_execution_logs l
39-
JOIN candidate c ON c.id = l.id
55+
JOIN _tmp_candidate_ids c ON c.id = l.id
4056
LEFT JOIN LATERAL jsonb_array_elements(
4157
COALESCE(
4258
CASE
@@ -105,7 +121,7 @@ BEGIN
105121
NULLIF((l.execution_data->'tokenBreakdown'->>'prompt')::numeric, 0) AS prompt,
106122
NULLIF((l.execution_data->'tokenBreakdown'->>'completion')::numeric, 0) AS completion
107123
FROM workflow_execution_logs l
108-
JOIN candidate c ON c.id = l.id
124+
JOIN _tmp_candidate_ids c ON c.id = l.id
109125
)
110126
UPDATE workflow_execution_logs AS l
111127
SET cost = jsonb_strip_nulls(
@@ -133,7 +149,7 @@ BEGIN
133149
WHERE l.id = agg.id;
134150

135151
GET DIAGNOSTICS v_rows_updated = ROW_COUNT;
136-
EXIT WHEN v_rows_updated = 0; -- no more rows to backfill
152+
-- continue advancing by id until no more rows are selected
137153
END LOOP;
138154
END $$;--> statement-breakpoint
139155

0 commit comments

Comments
 (0)