Skip to content
231 changes: 231 additions & 0 deletions sql/2024-12-16-00-00_entity_depths.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
-- We can track the maximum dependency depth of any sub-dag rooted at each entity.
-- The depth of any entity is simply the maximum depth of any of its children plus one.
-- This allows us to trivially sort entities into a valid dependency order without needing a complex topological
-- sort at query time.

-- Unfortunately we can't use triggers for most of these since for some entities their depth is dependent on
-- references which, due to foreign keys, must be inserted AFTER the entity itself, it must be run after all
-- the entity's local references are inserted, but there's no way for us to trigger
-- only when the LAST one of those is done, so we'd need to run this on every
-- local reference insert, and remove the optimistic exit in the case where the row
-- already exists, which is a big waste.
--
-- Instead we just run these functions manually after an entity's references are all inserted.

CREATE TABLE causal_depth (
causal_id INTEGER PRIMARY KEY REFERENCES causals (id) ON DELETE CASCADE,
depth INTEGER NOT NULL
);

CREATE TABLE component_depth (
component_hash_id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE,
depth INTEGER NOT NULL
);

CREATE TABLE namespace_depth (
namespace_hash_id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE,
depth INTEGER NOT NULL
);

CREATE TABLE patch_depth (
patch_id INTEGER PRIMARY KEY REFERENCES patches (id) ON DELETE CASCADE,
depth INTEGER NOT NULL
);


-- Triggers

CREATE OR REPLACE FUNCTION update_causal_depth(the_causal_id integer) RETURNS VOID AS $$
DECLARE
max_namespace_depth INTEGER;
max_child_causal_depth INTEGER;
the_namespace_hash_id INTEGER;
BEGIN
-- If there's already a depth entry for this causal, we're done.
IF EXISTS (SELECT FROM causal_depth cd WHERE cd.causal_id = the_causal_id) THEN
RETURN;
END IF;

SELECT c.namespace_hash_id INTO the_namespace_hash_id
FROM causals c
WHERE c.id = the_causal_id;
-- Find the max depth of the associated namespace
-- Find the max depth of any child causal
-- Set the depth of this causal to the max of those two plus one
SELECT COALESCE(MAX(nd.depth), -1) INTO max_namespace_depth
FROM namespace_depth nd
WHERE nd.namespace_hash_id = the_namespace_hash_id;
SELECT COALESCE(MAX(cd.depth), -1) INTO max_child_causal_depth
FROM causal_depth cd
JOIN causal_ancestors ca ON cd.causal_id = ca.ancestor_id
WHERE ca.causal_id = the_causal_id;
INSERT INTO causal_depth (causal_id, depth)
VALUES (the_causal_id, GREATEST(max_namespace_depth, max_child_causal_depth) + 1);

RETURN;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION update_component_depth(the_component_hash_id integer) RETURNS VOID AS $$
DECLARE
max_referenced_component_depth INTEGER;
BEGIN
-- If there's already a depth entry for this component, we're done.
IF EXISTS (SELECT FROM component_depth cd WHERE cd.component_hash_id = the_component_hash_id) THEN
RETURN;
END IF;
-- Find the max depth of any component referenced by this component
-- Set the depth of this component to that plus one
SELECT COALESCE(MAX(refs.depth), -1) INTO max_referenced_component_depth
FROM (
( SELECT cd.depth AS depth
FROM terms t
JOIN term_local_component_references cr
ON cr.term_id = t.id
JOIN component_depth cd
ON cd.component_hash_id = cr.component_hash_id
WHERE t.component_hash_id = the_component_hash_id
) UNION
( SELECT cd.depth AS depth
FROM types t
JOIN type_local_component_references cr
ON cr.type_id = t.id
JOIN component_depth cd
ON cd.component_hash_id = cr.component_hash_id
WHERE t.component_hash_id = the_component_hash_id
)
) AS refs;
INSERT INTO component_depth (component_hash_id, depth)
VALUES (the_component_hash_id, max_referenced_component_depth + 1);
RETURN;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION update_namespace_depth(the_namespace_hash_id integer) RETURNS VOID AS $$
DECLARE
max_child_causal_depth INTEGER;
max_patch_depth INTEGER;
max_referenced_component_depth INTEGER;
BEGIN
-- If there's already a depth entry for this namespace, we're done.
IF EXISTS (SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = the_namespace_hash_id) THEN
RETURN;
END IF;
-- Find the max depth of any child causal
-- Find the max depth of any patch
-- Find the max depth of any component referenced by a term, type, or term metadata in this namespace
-- Set the depth of this namespace to the max of those plus one
SELECT COALESCE(MAX(cd.depth), -1) INTO max_child_causal_depth
FROM causal_depth cd
JOIN namespace_children nc ON cd.causal_id = nc.child_causal_id
WHERE nc.parent_namespace_hash_id = the_namespace_hash_id;
SELECT COALESCE(MAX(pd.depth), -1) INTO max_patch_depth
FROM patch_depth pd
JOIN namespace_patches np ON pd.patch_id = np.patch_id
WHERE np.namespace_hash_id = the_namespace_hash_id;
SELECT COALESCE(MAX(depth), -1) INTO max_referenced_component_depth
FROM (
-- direct term references
( SELECT cd.depth AS depth
FROM namespace_terms nt
JOIN terms t
ON nt.term_id = t.id
JOIN component_depth cd
ON t.component_hash_id = cd.component_hash_id
WHERE nt.namespace_hash_id = the_namespace_hash_id
) UNION
-- term metadata references
( SELECT cd.depth AS depth
FROM namespace_terms nt
JOIN namespace_term_metadata ntm
ON ntm.named_term = nt.id
JOIN terms t
ON ntm.metadata_term_id = t.id
JOIN component_depth cd
ON t.component_hash_id = cd.component_hash_id
WHERE nt.namespace_hash_id = the_namespace_hash_id
) UNION
-- direct constructor references
( SELECT cd.depth AS depth
FROM namespace_terms nt
JOIN constructors c
ON c.id = nt.constructor_id
JOIN types t
ON c.type_id = t.id
JOIN component_depth cd
ON t.component_hash_id = cd.component_hash_id
WHERE nt.namespace_hash_id = the_namespace_hash_id
) UNION
-- direct type references
( SELECT cd.depth AS depth
FROM namespace_types nt
JOIN types t
ON nt.type_id = t.id
JOIN component_depth cd
ON t.component_hash_id = cd.component_hash_id
WHERE nt.namespace_hash_id = the_namespace_hash_id
) UNION
-- type metadata references
( SELECT cd.depth AS depth
FROM namespace_types nt
JOIN namespace_type_metadata ntm
ON ntm.named_type = nt.id
JOIN terms t
ON ntm.metadata_term_id = t.id
JOIN component_depth cd
ON t.component_hash_id = cd.component_hash_id
WHERE nt.namespace_hash_id = the_namespace_hash_id
)
) AS refs;
INSERT INTO namespace_depth (namespace_hash_id, depth)
VALUES (the_namespace_hash_id, GREATEST(max_child_causal_depth, max_patch_depth, max_referenced_component_depth) + 1);

RETURN;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION update_patch_depth(the_patch_id integer) RETURNS VOID AS $$
DECLARE
max_referenced_component_depth INTEGER;
BEGIN
-- If there's already a depth entry for this patch, we're done.
IF EXISTS (SELECT FROM patch_depth pd WHERE pd.patch_id = the_patch_id) THEN
RETURN;
END IF;
-- Find the max depth of any term component referenced by a patch
-- Find the max depth of any type component referenced by a patch
-- Set the depth of this patch to that plus one

SELECT COALESCE(MAX(cd.depth), -1) INTO max_referenced_component_depth
FROM (
-- term references
( SELECT t.component_hash_id AS component_hash_id
FROM patch_term_mappings ptm
JOIN terms t
ON ptm.to_term_id = t.id
WHERE ptm.patch_id = the_patch_id
) UNION
-- constructor mappings
( SELECT t.component_hash_id AS component_hash_id
FROM patch_constructor_mappings pcm
JOIN constructors c
ON pcm.to_constructor_id = c.id
JOIN types t
ON c.type_id = t.id
WHERE pcm.patch_id = the_patch_id
) UNION
-- type references
( SELECT t.component_hash_id AS component_hash_id
FROM patch_type_mappings ptm
JOIN types t
ON ptm.to_type_id = t.id
WHERE ptm.patch_id = the_patch_id
)
) AS refs JOIN component_depth cd
ON cd.component_hash_id = refs.component_hash_id;
INSERT INTO patch_depth (patch_id, depth)
VALUES (the_patch_id, max_referenced_component_depth + 1);

RETURN;
END;
$$ LANGUAGE plpgsql;
55 changes: 55 additions & 0 deletions sql/migration-helpers/2025-02-25_causal_depth_migration.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

CREATE TABLE unfinished_causal_depths (
id INTEGER PRIMARY KEY REFERENCES causals (id) ON DELETE CASCADE
);

CREATE TABLE unfinished_namespace_depths (
id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE
);

CREATE TABLE unfinished_patch_depths (
id INTEGER PRIMARY KEY REFERENCES patches (id) ON DELETE CASCADE
);

CREATE TABLE unfinished_component_depths (
id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE
);

CREATE TABLE unfinished_namespaces_working_set (
id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE
);

INSERT INTO unfinished_causal_depths (id)
SELECT c.id
FROM causals c
WHERE NOT EXISTS (
SELECT FROM causal_depth cd WHERE cd.causal_id = c.id
) ON CONFLICT DO NOTHING;

INSERT INTO unfinished_namespace_depths (id)
SELECT n.namespace_hash_id
FROM namespaces n
WHERE NOT EXISTS (
SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = n.namespace_hash_id
) ON CONFLICT DO NOTHING;

INSERT INTO unfinished_patch_depths (id)
SELECT p.id
FROM patches p
WHERE NOT EXISTS (
SELECT FROM patch_depth pd WHERE pd.patch_id = p.id
) ON CONFLICT DO NOTHING;

INSERT INTO unfinished_component_depths (id)
SELECT ch.id
FROM component_hashes ch
WHERE NOT EXISTS (
SELECT FROM component_depth cd WHERE cd.component_hash_id = ch.id
) ON CONFLICT DO NOTHING;

-- Afterwards
DROP TABLE unfinished_causal_depths;
DROP TABLE unfinished_namespace_depths;
DROP TABLE unfinished_patch_depths;
DROP TABLE unfinished_component_depths;
DROP TABLE unfinished_namespaces_working_set;
8 changes: 4 additions & 4 deletions src/Share/BackgroundJobs.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ module Share.BackgroundJobs (startWorkers) where
import Ki.Unlifted qualified as Ki
import Share.BackgroundJobs.Monad (Background)
import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch
import Share.BackgroundJobs.SerializedEntitiesMigration.Worker qualified as SerializedEntitiesMigration

-- | Kicks off all background workers.
startWorkers :: Ki.Scope -> Background ()
startWorkers scope = do
DefnSearch.worker scope
-- Temporary disable background diff jobs until the new diffing logic is done.
-- ContributionDiffs.worker scope
SerializedEntitiesMigration.worker scope

-- Temporary disable background diff jobs until the new diffing logic is done.
-- ContributionDiffs.worker scope
-- SerializedEntitiesMigration.worker scope
2 changes: 2 additions & 0 deletions src/Share/Postgres/Causal/Queries.hs
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ savePgNamespace maySerialized mayBh b@(BranchFull.Branch {terms, types, patches,
|]
-- Note: this must be run AFTER inserting the namespace and all its children.
execute_ [sql| SELECT save_namespace(#{bhId}) |]
execute_ [sql| SELECT update_namespace_depth(#{bhId}) |]

saveSerializedNamespace :: (QueryM m) => BranchHashId -> CBORBytes TempEntity -> m ()
saveSerializedNamespace bhId (CBORBytes bytes) = do
Expand Down Expand Up @@ -785,6 +786,7 @@ saveCausal maySerializedCausal mayCh bhId ancestorIds = do
SELECT #{cId}, a.ancestor_id
FROM ancestors a
|]
execute_ [sql| SELECT update_causal_depth(#{cId}) |]
pure cId

saveSerializedCausal :: (QueryM m) => CausalId -> CBORBytes TempEntity -> m ()
Expand Down
8 changes: 8 additions & 0 deletions src/Share/Postgres/Definitions/Queries.hs
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,10 @@ saveEncodedTermComponent componentHash maySerialized elements = do
SELECT defn_mappings.term_id, defn_mappings.local_index, defn_mappings.component_hash_id
FROM defn_mappings
|]
execute_
[sql|
SELECT update_component_depth(#{componentHashId})
|]
pure termIds

saveTypeComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e ()
Expand Down Expand Up @@ -1013,6 +1017,10 @@ saveTypeComponent componentHash maySerialized elements = do
FROM defn_mappings
|]
saveConstructors (zip (toList typeIds) elements)
execute_
[sql|
SELECT update_component_depth(#{componentHashId})
|]
pure typeIds

-- | Efficiently resolve all pg Ids across selected Local Ids.
Expand Down
1 change: 1 addition & 0 deletions src/Share/Postgres/Patches/Queries.hs
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ savePatch maySerialized patchHash PatchFull.Patch {termEdits, typeEdits} = do
LEFT JOIN types to_type
ON to_type.component_hash_id = to_type_component_hash_id AND to_type.component_index = to_type_component_index
|]
execute_ [sql| SELECT update_patch_depth(#{patchId}) |]
pure patchId
termsTable :: [(Maybe ComponentHashId, Maybe Int64 {- from comp index -}, Maybe TextId, Maybe ComponentHashId, Maybe Int64 {- to comp index -}, Maybe TextId, Maybe PatchFullTermEdit.Typing, Bool)]
constructorsTable :: [(ComponentHashId, Int64 {- from comp index -}, Int64 {- from constr index -}, Maybe ComponentHashId, Maybe Int64 {- to comp index-}, Maybe Int64 {- to constr index -}, Maybe PatchFullTermEdit.Typing, Bool)]
Expand Down
2 changes: 1 addition & 1 deletion src/Share/Web/UCM/SyncV2/Impl.hs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ batchSize :: Int32
batchSize = 1000

streamSettings :: Hash32 -> Maybe SyncV2.BranchRef -> StreamInitInfo
streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.Unsorted, numEntities = Nothing, rootCausalHash, rootBranchRef}
streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.DependenciesFirst, numEntities = Nothing, rootCausalHash, rootBranchRef}

server :: Maybe UserId -> SyncV2.Routes WebAppServer
server mayUserId =
Expand Down
Loading