Skip to content

Commit 0f7e91d

Browse files
authored
Merge pull request #41 from unisoncomputing/syncv2/depth-sort
SyncV2 Depth tracking
2 parents 925749a + d0bdfbd commit 0f7e91d

File tree

9 files changed

+328
-11
lines changed

9 files changed

+328
-11
lines changed
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
-- We can track the maximum dependency depth of any sub-dag rooted at each entity.
2+
-- The depth of any entity is simply the maximum depth of any of its children plus one.
3+
-- This allows us to trivially sort entities into a valid dependency order without needing a complex topological
4+
-- sort at query time.
5+
6+
-- Unfortunately we can't use triggers for most of these since for some entities their depth is dependent on
7+
-- references which, due to foreign keys, must be inserted AFTER the entity itself, it must be run after all
8+
-- the entity's local references are inserted, but there's no way for us to trigger
9+
-- only when the LAST one of those is done, so we'd need to run this on every
10+
-- local reference insert, and remove the optimistic exit in the case where the row
11+
-- already exists, which is a big waste.
12+
--
13+
-- Instead we just run these functions manually after an entity's references are all inserted.
14+
15+
CREATE TABLE causal_depth (
16+
causal_id INTEGER PRIMARY KEY REFERENCES causals (id) ON DELETE CASCADE,
17+
depth INTEGER NOT NULL
18+
);
19+
20+
CREATE TABLE component_depth (
21+
component_hash_id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE,
22+
depth INTEGER NOT NULL
23+
);
24+
25+
CREATE TABLE namespace_depth (
26+
namespace_hash_id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE,
27+
depth INTEGER NOT NULL
28+
);
29+
30+
CREATE TABLE patch_depth (
31+
patch_id INTEGER PRIMARY KEY REFERENCES patches (id) ON DELETE CASCADE,
32+
depth INTEGER NOT NULL
33+
);
34+
35+
36+
-- Triggers
37+
38+
CREATE OR REPLACE FUNCTION update_causal_depth(the_causal_id integer) RETURNS VOID AS $$
39+
DECLARE
40+
max_namespace_depth INTEGER;
41+
max_child_causal_depth INTEGER;
42+
the_namespace_hash_id INTEGER;
43+
BEGIN
44+
-- If there's already a depth entry for this causal, we're done.
45+
IF EXISTS (SELECT FROM causal_depth cd WHERE cd.causal_id = the_causal_id) THEN
46+
RETURN;
47+
END IF;
48+
49+
SELECT c.namespace_hash_id INTO the_namespace_hash_id
50+
FROM causals c
51+
WHERE c.id = the_causal_id;
52+
-- Find the max depth of the associated namespace
53+
-- Find the max depth of any child causal
54+
-- Set the depth of this causal to the max of those two plus one
55+
SELECT COALESCE(MAX(nd.depth), -1) INTO max_namespace_depth
56+
FROM namespace_depth nd
57+
WHERE nd.namespace_hash_id = the_namespace_hash_id;
58+
SELECT COALESCE(MAX(cd.depth), -1) INTO max_child_causal_depth
59+
FROM causal_depth cd
60+
JOIN causal_ancestors ca ON cd.causal_id = ca.ancestor_id
61+
WHERE ca.causal_id = the_causal_id;
62+
INSERT INTO causal_depth (causal_id, depth)
63+
VALUES (the_causal_id, GREATEST(max_namespace_depth, max_child_causal_depth) + 1);
64+
65+
RETURN;
66+
END;
67+
$$ LANGUAGE plpgsql;
68+
69+
CREATE OR REPLACE FUNCTION update_component_depth(the_component_hash_id integer) RETURNS VOID AS $$
70+
DECLARE
71+
max_referenced_component_depth INTEGER;
72+
BEGIN
73+
-- If there's already a depth entry for this component, we're done.
74+
IF EXISTS (SELECT FROM component_depth cd WHERE cd.component_hash_id = the_component_hash_id) THEN
75+
RETURN;
76+
END IF;
77+
-- Find the max depth of any component referenced by this component
78+
-- Set the depth of this component to that plus one
79+
SELECT COALESCE(MAX(refs.depth), -1) INTO max_referenced_component_depth
80+
FROM (
81+
( SELECT cd.depth AS depth
82+
FROM terms t
83+
JOIN term_local_component_references cr
84+
ON cr.term_id = t.id
85+
JOIN component_depth cd
86+
ON cd.component_hash_id = cr.component_hash_id
87+
WHERE t.component_hash_id = the_component_hash_id
88+
) UNION
89+
( SELECT cd.depth AS depth
90+
FROM types t
91+
JOIN type_local_component_references cr
92+
ON cr.type_id = t.id
93+
JOIN component_depth cd
94+
ON cd.component_hash_id = cr.component_hash_id
95+
WHERE t.component_hash_id = the_component_hash_id
96+
)
97+
) AS refs;
98+
INSERT INTO component_depth (component_hash_id, depth)
99+
VALUES (the_component_hash_id, max_referenced_component_depth + 1);
100+
RETURN;
101+
END;
102+
$$ LANGUAGE plpgsql;
103+
104+
CREATE OR REPLACE FUNCTION update_namespace_depth(the_namespace_hash_id integer) RETURNS VOID AS $$
105+
DECLARE
106+
max_child_causal_depth INTEGER;
107+
max_patch_depth INTEGER;
108+
max_referenced_component_depth INTEGER;
109+
BEGIN
110+
-- If there's already a depth entry for this namespace, we're done.
111+
IF EXISTS (SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = the_namespace_hash_id) THEN
112+
RETURN;
113+
END IF;
114+
-- Find the max depth of any child causal
115+
-- Find the max depth of any patch
116+
-- Find the max depth of any component referenced by a term, type, or term metadata in this namespace
117+
-- Set the depth of this namespace to the max of those plus one
118+
SELECT COALESCE(MAX(cd.depth), -1) INTO max_child_causal_depth
119+
FROM causal_depth cd
120+
JOIN namespace_children nc ON cd.causal_id = nc.child_causal_id
121+
WHERE nc.parent_namespace_hash_id = the_namespace_hash_id;
122+
SELECT COALESCE(MAX(pd.depth), -1) INTO max_patch_depth
123+
FROM patch_depth pd
124+
JOIN namespace_patches np ON pd.patch_id = np.patch_id
125+
WHERE np.namespace_hash_id = the_namespace_hash_id;
126+
SELECT COALESCE(MAX(depth), -1) INTO max_referenced_component_depth
127+
FROM (
128+
-- direct term references
129+
( SELECT cd.depth AS depth
130+
FROM namespace_terms nt
131+
JOIN terms t
132+
ON nt.term_id = t.id
133+
JOIN component_depth cd
134+
ON t.component_hash_id = cd.component_hash_id
135+
WHERE nt.namespace_hash_id = the_namespace_hash_id
136+
) UNION
137+
-- term metadata references
138+
( SELECT cd.depth AS depth
139+
FROM namespace_terms nt
140+
JOIN namespace_term_metadata ntm
141+
ON ntm.named_term = nt.id
142+
JOIN terms t
143+
ON ntm.metadata_term_id = t.id
144+
JOIN component_depth cd
145+
ON t.component_hash_id = cd.component_hash_id
146+
WHERE nt.namespace_hash_id = the_namespace_hash_id
147+
) UNION
148+
-- direct constructor references
149+
( SELECT cd.depth AS depth
150+
FROM namespace_terms nt
151+
JOIN constructors c
152+
ON c.id = nt.constructor_id
153+
JOIN types t
154+
ON c.type_id = t.id
155+
JOIN component_depth cd
156+
ON t.component_hash_id = cd.component_hash_id
157+
WHERE nt.namespace_hash_id = the_namespace_hash_id
158+
) UNION
159+
-- direct type references
160+
( SELECT cd.depth AS depth
161+
FROM namespace_types nt
162+
JOIN types t
163+
ON nt.type_id = t.id
164+
JOIN component_depth cd
165+
ON t.component_hash_id = cd.component_hash_id
166+
WHERE nt.namespace_hash_id = the_namespace_hash_id
167+
) UNION
168+
-- type metadata references
169+
( SELECT cd.depth AS depth
170+
FROM namespace_types nt
171+
JOIN namespace_type_metadata ntm
172+
ON ntm.named_type = nt.id
173+
JOIN terms t
174+
ON ntm.metadata_term_id = t.id
175+
JOIN component_depth cd
176+
ON t.component_hash_id = cd.component_hash_id
177+
WHERE nt.namespace_hash_id = the_namespace_hash_id
178+
)
179+
) AS refs;
180+
INSERT INTO namespace_depth (namespace_hash_id, depth)
181+
VALUES (the_namespace_hash_id, GREATEST(max_child_causal_depth, max_patch_depth, max_referenced_component_depth) + 1);
182+
183+
RETURN;
184+
END;
185+
$$ LANGUAGE plpgsql;
186+
187+
CREATE OR REPLACE FUNCTION update_patch_depth(the_patch_id integer) RETURNS VOID AS $$
188+
DECLARE
189+
max_referenced_component_depth INTEGER;
190+
BEGIN
191+
-- If there's already a depth entry for this patch, we're done.
192+
IF EXISTS (SELECT FROM patch_depth pd WHERE pd.patch_id = the_patch_id) THEN
193+
RETURN;
194+
END IF;
195+
-- Find the max depth of any term component referenced by a patch
196+
-- Find the max depth of any type component referenced by a patch
197+
-- Set the depth of this patch to that plus one
198+
199+
SELECT COALESCE(MAX(cd.depth), -1) INTO max_referenced_component_depth
200+
FROM (
201+
-- term references
202+
( SELECT t.component_hash_id AS component_hash_id
203+
FROM patch_term_mappings ptm
204+
JOIN terms t
205+
ON ptm.to_term_id = t.id
206+
WHERE ptm.patch_id = the_patch_id
207+
) UNION
208+
-- constructor mappings
209+
( SELECT t.component_hash_id AS component_hash_id
210+
FROM patch_constructor_mappings pcm
211+
JOIN constructors c
212+
ON pcm.to_constructor_id = c.id
213+
JOIN types t
214+
ON c.type_id = t.id
215+
WHERE pcm.patch_id = the_patch_id
216+
) UNION
217+
-- type references
218+
( SELECT t.component_hash_id AS component_hash_id
219+
FROM patch_type_mappings ptm
220+
JOIN types t
221+
ON ptm.to_type_id = t.id
222+
WHERE ptm.patch_id = the_patch_id
223+
)
224+
) AS refs JOIN component_depth cd
225+
ON cd.component_hash_id = refs.component_hash_id;
226+
INSERT INTO patch_depth (patch_id, depth)
227+
VALUES (the_patch_id, max_referenced_component_depth + 1);
228+
229+
RETURN;
230+
END;
231+
$$ LANGUAGE plpgsql;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
2+
CREATE TABLE unfinished_causal_depths (
3+
id INTEGER PRIMARY KEY REFERENCES causals (id) ON DELETE CASCADE
4+
);
5+
6+
CREATE TABLE unfinished_namespace_depths (
7+
id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE
8+
);
9+
10+
CREATE TABLE unfinished_patch_depths (
11+
id INTEGER PRIMARY KEY REFERENCES patches (id) ON DELETE CASCADE
12+
);
13+
14+
CREATE TABLE unfinished_component_depths (
15+
id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE
16+
);
17+
18+
CREATE TABLE unfinished_namespaces_working_set (
19+
id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE
20+
);
21+
22+
INSERT INTO unfinished_causal_depths (id)
23+
SELECT c.id
24+
FROM causals c
25+
WHERE NOT EXISTS (
26+
SELECT FROM causal_depth cd WHERE cd.causal_id = c.id
27+
) ON CONFLICT DO NOTHING;
28+
29+
INSERT INTO unfinished_namespace_depths (id)
30+
SELECT n.namespace_hash_id
31+
FROM namespaces n
32+
WHERE NOT EXISTS (
33+
SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = n.namespace_hash_id
34+
) ON CONFLICT DO NOTHING;
35+
36+
INSERT INTO unfinished_patch_depths (id)
37+
SELECT p.id
38+
FROM patches p
39+
WHERE NOT EXISTS (
40+
SELECT FROM patch_depth pd WHERE pd.patch_id = p.id
41+
) ON CONFLICT DO NOTHING;
42+
43+
INSERT INTO unfinished_component_depths (id)
44+
SELECT ch.id
45+
FROM component_hashes ch
46+
WHERE NOT EXISTS (
47+
SELECT FROM component_depth cd WHERE cd.component_hash_id = ch.id
48+
) ON CONFLICT DO NOTHING;
49+
50+
-- Afterwards
51+
DROP TABLE unfinished_causal_depths;
52+
DROP TABLE unfinished_namespace_depths;
53+
DROP TABLE unfinished_patch_depths;
54+
DROP TABLE unfinished_component_depths;
55+
DROP TABLE unfinished_namespaces_working_set;

src/Share/BackgroundJobs.hs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ module Share.BackgroundJobs (startWorkers) where
33
import Ki.Unlifted qualified as Ki
44
import Share.BackgroundJobs.Monad (Background)
55
import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch
6-
import Share.BackgroundJobs.SerializedEntitiesMigration.Worker qualified as SerializedEntitiesMigration
76

87
-- | Kicks off all background workers.
98
startWorkers :: Ki.Scope -> Background ()
109
startWorkers scope = do
1110
DefnSearch.worker scope
12-
-- Temporary disable background diff jobs until the new diffing logic is done.
13-
-- ContributionDiffs.worker scope
14-
SerializedEntitiesMigration.worker scope
11+
12+
-- Temporary disable background diff jobs until the new diffing logic is done.
13+
-- ContributionDiffs.worker scope
14+
-- SerializedEntitiesMigration.worker scope

src/Share/Postgres/Causal/Queries.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ savePgNamespace maySerialized mayBh b@(BranchFull.Branch {terms, types, patches,
642642
|]
643643
-- Note: this must be run AFTER inserting the namespace and all its children.
644644
execute_ [sql| SELECT save_namespace(#{bhId}) |]
645+
execute_ [sql| SELECT update_namespace_depth(#{bhId}) |]
645646

646647
saveSerializedNamespace :: (QueryM m) => BranchHashId -> CBORBytes TempEntity -> m ()
647648
saveSerializedNamespace bhId (CBORBytes bytes) = do
@@ -785,6 +786,7 @@ saveCausal maySerializedCausal mayCh bhId ancestorIds = do
785786
SELECT #{cId}, a.ancestor_id
786787
FROM ancestors a
787788
|]
789+
execute_ [sql| SELECT update_causal_depth(#{cId}) |]
788790
pure cId
789791

790792
saveSerializedCausal :: (QueryM m) => CausalId -> CBORBytes TempEntity -> m ()

src/Share/Postgres/Definitions/Queries.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,10 @@ saveEncodedTermComponent componentHash maySerialized elements = do
863863
SELECT defn_mappings.term_id, defn_mappings.local_index, defn_mappings.component_hash_id
864864
FROM defn_mappings
865865
|]
866+
execute_
867+
[sql|
868+
SELECT update_component_depth(#{componentHashId})
869+
|]
866870
pure termIds
867871

868872
saveTypeComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e ()
@@ -1013,6 +1017,10 @@ saveTypeComponent componentHash maySerialized elements = do
10131017
FROM defn_mappings
10141018
|]
10151019
saveConstructors (zip (toList typeIds) elements)
1020+
execute_
1021+
[sql|
1022+
SELECT update_component_depth(#{componentHashId})
1023+
|]
10161024
pure typeIds
10171025

10181026
-- | Efficiently resolve all pg Ids across selected Local Ids.

src/Share/Postgres/Patches/Queries.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ savePatch maySerialized patchHash PatchFull.Patch {termEdits, typeEdits} = do
233233
LEFT JOIN types to_type
234234
ON to_type.component_hash_id = to_type_component_hash_id AND to_type.component_index = to_type_component_index
235235
|]
236+
execute_ [sql| SELECT update_patch_depth(#{patchId}) |]
236237
pure patchId
237238
termsTable :: [(Maybe ComponentHashId, Maybe Int64 {- from comp index -}, Maybe TextId, Maybe ComponentHashId, Maybe Int64 {- to comp index -}, Maybe TextId, Maybe PatchFullTermEdit.Typing, Bool)]
238239
constructorsTable :: [(ComponentHashId, Int64 {- from comp index -}, Int64 {- from constr index -}, Maybe ComponentHashId, Maybe Int64 {- to comp index-}, Maybe Int64 {- to constr index -}, Maybe PatchFullTermEdit.Typing, Bool)]

src/Share/Web/UCM/SyncV2/Impl.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ batchSize :: Int32
4848
batchSize = 1000
4949

5050
streamSettings :: Hash32 -> Maybe SyncV2.BranchRef -> StreamInitInfo
51-
streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.Unsorted, numEntities = Nothing, rootCausalHash, rootBranchRef}
51+
streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.DependenciesFirst, numEntities = Nothing, rootCausalHash, rootBranchRef}
5252

5353
server :: Maybe UserId -> SyncV2.Routes WebAppServer
5454
server mayUserId =

0 commit comments

Comments
 (0)