Skip to content

Commit e2cc7d3

Browse files
committed
Ensure we copy serialized components when importing causals
1 parent 71f18bd commit e2cc7d3

File tree

4 files changed

+241
-40
lines changed

4 files changed

+241
-40
lines changed

sql/2023-10-18-00-00_temp_entities.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ END;
9292
$$ LANGUAGE plpgsql;
9393

9494
-- Copies ALL dependencies of a causal into the codebase.
95+
--
96+
-- Note: This has been replaced in a later migration.
9597
CREATE OR REPLACE FUNCTION copy_causal_into_codebase(causal_id_to_copy INTEGER, from_codebase_user_id UUID, to_codebase_user_id UUID)
9698
RETURNS VOID AS $$
9799
DECLARE copied_hash TEXT;

sql/2024-09-00-00_sync_v2.sql

Lines changed: 0 additions & 39 deletions
This file was deleted.
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
CREATE TABLE serialized_components (
2+
-- The user the term is sandboxed to.
3+
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
4+
component_hash_id INTEGER NOT NULL REFERENCES component_hashes(id) ON DELETE CASCADE,
5+
6+
-- The serialized component
7+
bytes_id INTEGER NOT NULL REFERENCES bytes(id) ON DELETE NO ACTION,
8+
9+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
10+
11+
PRIMARY KEY (user_id, component_hash_id) INCLUDE (bytes_id)
12+
);
13+
14+
CREATE TABLE serialized_namespaces (
15+
namespace_hash_id INTEGER NOT NULL REFERENCES branch_hashes(id) ON DELETE CASCADE,
16+
17+
-- The serialized namespace
18+
bytes_id INTEGER NOT NULL REFERENCES bytes(id) ON DELETE NO ACTION,
19+
20+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
21+
22+
PRIMARY KEY (namespace_hash_id) INCLUDE (bytes_id)
23+
);
24+
25+
CREATE TABLE serialized_patches (
26+
patch_id INTEGER NOT NULL REFERENCES patches(id) ON DELETE CASCADE,
27+
bytes_id INTEGER NOT NULL REFERENCES bytes(id) ON DELETE NO ACTION,
28+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
29+
30+
PRIMARY KEY (patch_id) INCLUDE (bytes_id)
31+
);
32+
33+
CREATE TABLE serialized_causals (
34+
causal_id INTEGER NOT NULL REFERENCES causals(id) ON DELETE CASCADE,
35+
bytes_id INTEGER NOT NULL REFERENCES bytes(id) ON DELETE NO ACTION,
36+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
37+
38+
PRIMARY KEY (causal_id) INCLUDE (bytes_id)
39+
);
40+
41+
CREATE OR REPLACE FUNCTION copy_causal_into_codebase(causal_id_to_copy INTEGER, from_codebase_user_id UUID, to_codebase_user_id UUID)
42+
RETURNS VOID AS $$
43+
DECLARE copied_hash TEXT;
44+
BEGIN
45+
-- We use a recursive CTE to recursively collect all the dependencies of the causal.
46+
-- This probably uses a bit more memory than calling functions recursively, but is much more in line
47+
-- with how Postgres is designed to work. The recursive function approach hit the stack depth limit :|
48+
-- This will also save us time by not trying to import the same hashes multiple times since we can dedupe them
49+
-- up-front.
50+
FOR copied_hash IN
51+
WITH RECURSIVE transitive_causals(causal_id, causal_namespace_hash_id) AS (
52+
SELECT causal.id, causal.namespace_hash_id
53+
FROM causals causal
54+
WHERE causal.id = causal_id_to_copy
55+
AND NOT EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = to_codebase_user_id AND co.causal_id = causal.id)
56+
AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = from_codebase_user_id AND co.causal_id = causal.id)
57+
UNION
58+
-- This nested CTE is required because RECURSIVE CTEs can't refer
59+
-- to the recursive table more than once.
60+
( WITH rec AS (
61+
SELECT causal_id, causal_namespace_hash_id
62+
FROM transitive_causals tc
63+
)
64+
SELECT ancestor_causal.id, ancestor_causal.namespace_hash_id
65+
FROM causal_ancestors ca
66+
JOIN rec tc ON ca.causal_id = tc.causal_id
67+
JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id
68+
WHERE NOT EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = to_codebase_user_id AND co.causal_id = ancestor_causal.id)
69+
UNION
70+
SELECT child_causal.id, child_causal.namespace_hash_id
71+
FROM rec tc
72+
JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id
73+
JOIN causals child_causal ON nc.child_causal_id = child_causal.id
74+
WHERE NOT EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = to_codebase_user_id AND co.causal_id = child_causal.id)
75+
)
76+
), all_namespaces(namespace_hash_id) AS (
77+
SELECT DISTINCT causal_namespace_hash_id AS namespace_hash_id
78+
FROM transitive_causals
79+
WHERE NOT EXISTS (SELECT FROM namespace_ownership no WHERE no.user_id = to_codebase_user_id AND no.namespace_hash_id = causal_namespace_hash_id)
80+
), all_patches(patch_id) AS (
81+
SELECT DISTINCT patch.id
82+
FROM all_namespaces an
83+
JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id
84+
JOIN patches patch ON np.patch_id = patch.id
85+
WHERE NOT EXISTS (SELECT FROM patch_ownership po WHERE po.user_id = to_codebase_user_id AND po.patch_id = patch.id)
86+
),
87+
-- term components to start transitively joining dependencies to
88+
base_term_components(component_hash_id) AS (
89+
SELECT DISTINCT term.component_hash_id
90+
FROM all_namespaces an
91+
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
92+
JOIN terms term ON nt.term_id = term.id
93+
WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id)
94+
UNION
95+
SELECT DISTINCT term.component_hash_id
96+
FROM all_patches ap
97+
JOIN patch_term_mappings ptm ON ap.patch_id = ptm.patch_id
98+
JOIN terms term ON ptm.to_term_id = term.id
99+
WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id)
100+
UNION
101+
-- term metadata
102+
SELECT DISTINCT term.component_hash_id
103+
FROM all_namespaces an
104+
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
105+
JOIN namespace_term_metadata meta ON nt.id = meta.named_term
106+
JOIN terms term ON meta.metadata_term_id = term.id
107+
WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id)
108+
UNION
109+
-- type metadata
110+
SELECT DISTINCT term.component_hash_id
111+
FROM all_namespaces an
112+
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
113+
JOIN namespace_type_metadata meta ON nt.id = meta.named_type
114+
JOIN terms term ON meta.metadata_term_id = term.id
115+
WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id)
116+
),
117+
-- type components to start transitively joining dependencies to
118+
base_type_components(component_hash_id) AS (
119+
SELECT DISTINCT typ.component_hash_id
120+
FROM all_namespaces an
121+
JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id
122+
JOIN types typ ON nt.type_id = typ.id
123+
WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id)
124+
UNION
125+
SELECT DISTINCT typ.component_hash_id
126+
FROM all_namespaces an
127+
JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id
128+
JOIN constructors con ON nt.constructor_id = con.id
129+
JOIN types typ ON con.type_id = typ.id
130+
WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id)
131+
UNION
132+
SELECT DISTINCT typ.component_hash_id
133+
FROM all_patches ap
134+
JOIN patch_type_mappings ptm ON ap.patch_id = ptm.patch_id
135+
JOIN types typ ON ptm.to_type_id = typ.id
136+
WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id)
137+
UNION
138+
SELECT DISTINCT typ.component_hash_id
139+
FROM all_patches ap
140+
JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id
141+
JOIN constructors con ON pcm.to_constructor_id = con.id
142+
JOIN types typ ON con.type_id = typ.id
143+
WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id)
144+
),
145+
-- All the dependencies we join in transitively from the known term & type components we depend on.
146+
-- Unfortunately it's not possible to know which hashes are terms vs types :'(
147+
transitive_components(component_hash_id) AS (
148+
SELECT DISTINCT btc.component_hash_id
149+
FROM base_term_components btc
150+
UNION
151+
SELECT DISTINCT btc.component_hash_id
152+
FROM base_type_components btc
153+
UNION
154+
( WITH rec AS (
155+
SELECT component_hash_id
156+
FROM transitive_components tc
157+
)
158+
-- recursively union in term dependencies
159+
SELECT DISTINCT ref.component_hash_id
160+
FROM rec atc
161+
-- This joins in ALL the terms from the component, not just the one that caused the dependency on the
162+
-- component
163+
JOIN terms term ON atc.component_hash_id = term.component_hash_id
164+
JOIN term_local_component_references ref ON term.id = ref.term_id
165+
UNION
166+
-- recursively union in type dependencies
167+
SELECT DISTINCT ref.component_hash_id
168+
FROM rec atc
169+
-- This joins in ALL the types from the component, not just the one that caused the dependency on the
170+
-- component
171+
JOIN types typ ON atc.component_hash_id = typ.component_hash_id
172+
JOIN type_local_component_references ref ON typ.id = ref.type_id
173+
)
174+
), copied_causals(causal_id) AS (
175+
INSERT INTO causal_ownership (user_id, causal_id)
176+
SELECT DISTINCT to_codebase_user_id, tc.causal_id
177+
FROM transitive_causals tc
178+
ON CONFLICT DO NOTHING
179+
RETURNING causal_id
180+
), copied_namespaces(namespace_hash_id) AS (
181+
INSERT INTO namespace_ownership (user_id, namespace_hash_id)
182+
SELECT DISTINCT to_codebase_user_id, an.namespace_hash_id
183+
FROM all_namespaces an
184+
ON CONFLICT DO NOTHING
185+
RETURNING namespace_hash_id
186+
), copied_patches(patch_id) AS (
187+
INSERT INTO patch_ownership (user_id, patch_id)
188+
SELECT DISTINCT to_codebase_user_id, ap.patch_id
189+
FROM all_patches ap
190+
ON CONFLICT DO NOTHING
191+
RETURNING patch_id
192+
), copied_term_components AS (
193+
INSERT INTO sandboxed_terms (user_id, term_id, bytes_id)
194+
SELECT DISTINCT to_codebase_user_id, term.id, copy.bytes_id
195+
FROM transitive_components tc
196+
JOIN terms term ON tc.component_hash_id = term.component_hash_id
197+
JOIN sandboxed_terms copy ON term.id = copy.term_id
198+
WHERE copy.user_id = from_codebase_user_id
199+
ON CONFLICT DO NOTHING
200+
), copied_type_components AS (
201+
INSERT INTO sandboxed_types (user_id, type_id, bytes_id)
202+
SELECT DISTINCT to_codebase_user_id, typ.id, copy.bytes_id
203+
FROM transitive_components tc
204+
JOIN types typ ON tc.component_hash_id = typ.component_hash_id
205+
JOIN sandboxed_types copy ON typ.id = copy.type_id
206+
WHERE copy.user_id = from_codebase_user_id
207+
ON CONFLICT DO NOTHING
208+
), copied_serialized_components AS (
209+
INSERT INTO serialized_components(user_id, component_hash_id, bytes_id)
210+
SELECT DISTINCT to_codebase_user_id, sc.component_hash_id, sc.bytes_id
211+
FROM transitive_components tc
212+
JOIN serialized_components sc
213+
ON (tc.component_hash_id = sc.component_hash_id
214+
AND sc.user_id = from_codebase_user_id
215+
)
216+
ON CONFLICT DO NOTHING
217+
) SELECT causal.hash
218+
FROM copied_causals cc
219+
JOIN causals causal ON cc.causal_id = causal.id
220+
UNION ALL
221+
SELECT branch_hashes.base32
222+
FROM copied_namespaces cn
223+
JOIN branch_hashes ON cn.namespace_hash_id = branch_hashes.id
224+
UNION ALL
225+
SELECT patch.hash
226+
FROM copied_patches cp
227+
JOIN patches patch ON cp.patch_id = patch.id
228+
UNION ALL
229+
SELECT component_hashes.base32
230+
FROM transitive_components tc
231+
JOIN component_hashes ON tc.component_hash_id = component_hashes.id
232+
WHERE EXISTS (SELECT FROM temp_entity te WHERE te.user_id = to_codebase_user_id AND te.hash = component_hashes.base32)
233+
OR EXISTS (SELECT FROM temp_entity_missing_dependency dep WHERE (dep.user_id = to_codebase_user_id) AND (dep.dependent = component_hashes.base32 OR dep.dependency = component_hashes.base32))
234+
LOOP
235+
PERFORM remove_entity_from_temp(to_codebase_user_id, copied_hash);
236+
END LOOP;
237+
END;
238+
$$ LANGUAGE plpgsql;

sql/2025-01-24_migrate-serialized-entities.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ CREATE TABLE migrate_serialized_queue(
66
PRIMARY KEY (user_id, hash)
77
);
88

9-
-- Run this manually to populate the queue
9+
-- Run this manually to populate the queue m
1010

1111
-- INSERT INTO migrate_serialized_queue (hash, user_id)
1212
-- SELECT c.hash, co.user_id

0 commit comments

Comments
 (0)