From d7d617f273654e30d08a84fb0b5f2fe2d6b0446a Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 5 Feb 2025 16:08:36 -0800 Subject: [PATCH 01/11] Revive entity depth sort --- sql/2024-12-16-00-00_entity_depths.sql | 226 ++++++++++++++++++++++ src/Share/Postgres/Causal/Queries.hs | 2 + src/Share/Postgres/Definitions/Queries.hs | 8 + src/Share/Postgres/Patches/Queries.hs | 1 + transcripts/sql/inserts.sql | 7 + 5 files changed, 244 insertions(+) create mode 100644 sql/2024-12-16-00-00_entity_depths.sql diff --git a/sql/2024-12-16-00-00_entity_depths.sql b/sql/2024-12-16-00-00_entity_depths.sql new file mode 100644 index 00000000..f3326ed1 --- /dev/null +++ b/sql/2024-12-16-00-00_entity_depths.sql @@ -0,0 +1,226 @@ +-- We can track the maximum dependency depth of any sub-dag rooted at each entity. +-- The depth of any entity is simply the maximum depth of any of its children plus one. +-- This allows us to trivially sort entities into a valid dependency order without needing a complex topological +-- sort at query time. + +-- Unfortunately we can't use triggers for most of these since for some entities their depth is dependent on +-- references which, due to foreign keys, must be inserted AFTER the entity itself, it must be run after all +-- the entity's local references are inserted, but there's no way for us to trigger +-- only when the LAST one of those is done, so we'd need to run this on every +-- local reference insert, and remove the optimistic exit in the case where the row +-- already exists, which is a big waste. +-- +-- Instead we just run these functions manually after an entity's references are all inserted. + +CREATE TABLE causal_depth ( + causal_id INTEGER PRIMARY KEY REFERENCES causals (id) ON DELETE CASCADE, + depth INTEGER NOT NULL +); + +CREATE TABLE component_depth ( + component_hash_id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE, + depth INTEGER NOT NULL +); + +CREATE TABLE namespace_depth ( + namespace_hash_id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE, + depth INTEGER NOT NULL +); + +CREATE TABLE patch_depth ( + patch_id INTEGER PRIMARY KEY REFERENCES patches (id) ON DELETE CASCADE, + depth INTEGER NOT NULL +); + + +-- Triggers + +CREATE OR REPLACE FUNCTION update_causal_depth(the_causal_id integer) RETURNS VOID AS $$ +DECLARE + max_namespace_depth INTEGER; + max_child_causal_depth INTEGER; + the_namespace_hash_id INTEGER; +BEGIN + -- If there's already a depth entry for this causal, we're done. + IF EXISTS (SELECT FROM causal_depth cd WHERE cd.causal_id = the_causal_id) THEN + RETURN; + END IF; + + SELECT c.namespace_hash_id INTO the_namespace_hash_id + FROM causals c + WHERE c.id = the_causal_id; + -- Find the max depth of the associated namespace + -- Find the max depth of any child causal + -- Set the depth of this causal to the max of those two plus one + SELECT COALESCE(MAX(nd.depth), -1) INTO max_namespace_depth + FROM namespace_depth nd + WHERE nd.namespace_hash_id = the_namespace_hash_id; + SELECT COALESCE(MAX(cd.depth), -1) INTO max_child_causal_depth + FROM causal_depth cd + JOIN causal_ancestors ca ON cd.causal_id = ca.ancestor_id + WHERE ca.causal_id = the_causal_id; + INSERT INTO causal_depth (causal_id, depth) + VALUES (the_causal_id, GREATEST(max_namespace_depth, max_child_causal_depth) + 1); + + RETURN; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION update_component_depth(the_component_hash_id integer) RETURNS VOID AS $$ +DECLARE + max_referenced_component_depth INTEGER; +BEGIN + RAISE NOTICE 'Updating component depth for %', the_component_hash_id; + -- If there's already a depth entry for this component, we're done. + IF EXISTS (SELECT FROM component_depth cd WHERE cd.component_hash_id = the_component_hash_id) THEN + RETURN; + END IF; + -- Find the max depth of any component referenced by this component + -- Set the depth of this component to that plus one + SELECT COALESCE(MAX(refs.depth), -1) INTO max_referenced_component_depth + FROM ( + ( SELECT cd.depth AS depth + FROM terms t + JOIN term_local_component_references cr + ON cr.term_id = t.id + JOIN component_depth cd + ON cd.component_hash_id = cr.component_hash_id + WHERE t.component_hash_id = the_component_hash_id + ) UNION + ( SELECT cd.depth AS depth + FROM types t + JOIN type_local_component_references cr + ON cr.type_id = t.id + JOIN component_depth cd + ON cd.component_hash_id = cr.component_hash_id + WHERE t.component_hash_id = the_component_hash_id + ) + ) AS refs; + INSERT INTO component_depth (component_hash_id, depth) + VALUES (the_component_hash_id, max_referenced_component_depth + 1); + RETURN; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION update_namespace_depth(the_namespace_hash_id integer) RETURNS VOID AS $$ +DECLARE + max_child_causal_depth INTEGER; + max_patch_depth INTEGER; + max_referenced_component_depth INTEGER; +BEGIN + -- If there's already a depth entry for this namespace, we're done. + IF EXISTS (SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = the_namespace_hash_id) THEN + RETURN; + END IF; + -- Find the max depth of any child causal + -- Find the max depth of any patch + -- Find the max depth of any component referenced by a term, type, or term metadata in this namespace + -- Set the depth of this namespace to the max of those plus one + SELECT COALESCE(MAX(cd.depth), -1) INTO max_child_causal_depth + FROM causal_depth cd + JOIN namespace_children nc ON cd.causal_id = nc.child_causal_id + WHERE nc.parent_namespace_hash_id = the_namespace_hash_id; + SELECT COALESCE(MAX(pd.depth), -1) INTO max_patch_depth + FROM patch_depth pd + JOIN namespace_patches np ON pd.patch_id = np.patch_id + WHERE np.namespace_hash_id = the_namespace_hash_id; + SELECT COALESCE(MAX(depth), -1) INTO max_referenced_component_depth + FROM ( + -- direct term references + ( SELECT cd.depth AS depth + FROM component_depth cd + JOIN term_local_component_references cr + ON cd.component_hash_id = cr.component_hash_id + JOIN terms t + ON cr.term_id = t.id + JOIN namespace_terms nt + ON t.id = nt.term_id + WHERE nt.namespace_hash_id = the_namespace_hash_id + ) UNION + -- term metadata references + ( SELECT cd.depth AS depth + FROM component_depth cd + JOIN terms t + ON cd.component_hash_id = t.component_hash_id + JOIN namespace_term_metadata ntm + ON ntm.metadata_term_id = t.id + JOIN namespace_terms nt + ON ntm.named_term = nt.id + WHERE nt.namespace_hash_id = the_namespace_hash_id + ) UNION + -- direct constructor references + ( SELECT cd.depth AS depth + FROM component_depth cd + JOIN constructors c + ON cd.component_hash_id = c.constructor_type_component_hash_id + JOIN namespace_terms nt + ON c.id = nt.constructor_id + WHERE nt.namespace_hash_id = the_namespace_hash_id + ) UNION + -- direct type references + ( SELECT cd.depth AS depth + FROM component_depth cd + JOIN type_local_component_references cr + ON cd.component_hash_id = cr.component_hash_id + JOIN types t + ON cr.type_id = t.id + JOIN namespace_types nt + ON t.id = nt.type_id + WHERE nt.namespace_hash_id = the_namespace_hash_id + ) UNION + -- type metadata references + ( SELECT cd.depth AS depth + FROM component_depth cd + JOIN terms t + ON cd.component_hash_id = t.component_hash_id + JOIN namespace_type_metadata ntm + ON ntm.metadata_term_id = t.id + JOIN namespace_types nt + ON ntm.named_type = nt.id + WHERE nt.namespace_hash_id = the_namespace_hash_id + ) + ) AS refs; + INSERT INTO namespace_depth (namespace_hash_id, depth) + VALUES (the_namespace_hash_id, GREATEST(max_child_causal_depth, max_patch_depth, max_referenced_component_depth) + 1); + + RETURN; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION update_patch_depth(the_patch_id integer) RETURNS VOID AS $$ +DECLARE + max_referenced_component_depth INTEGER; +BEGIN + -- If there's already a depth entry for this patch, we're done. + IF EXISTS (SELECT FROM patch_depth pd WHERE pd.patch_id = the_patch_id) THEN + RETURN; + END IF; + -- Find the max depth of any term component referenced by a patch + -- Find the max depth of any type component referenced by a patch + -- Set the depth of this patch to that plus one + + SELECT COALESCE(MAX(cd.depth), -1) INTO max_referenced_component_depth + FROM ( + -- term references + ( SELECT from_term_component_hash_id AS component_hash_id + FROM patch_term_mappings + WHERE patch_id = the_patch_id + ) UNION + -- constructor mappings + ( SELECT from_constructor_component_hash_id AS component_hash_id + FROM patch_constructor_mappings + WHERE patch_id = the_patch_id + ) UNION + -- type references + ( SELECT from_type_component_hash_id AS component_hash_id + FROM patch_type_mappings + WHERE patch_id = the_patch_id + ) + ) AS refs JOIN component_depth cd + ON cd.component_hash_id = refs.component_hash_id; + INSERT INTO patch_depth (patch_id, depth) + VALUES (the_patch_id, max_referenced_component_depth + 1); + + RETURN; +END; +$$ LANGUAGE plpgsql; diff --git a/src/Share/Postgres/Causal/Queries.hs b/src/Share/Postgres/Causal/Queries.hs index 98d9cc1a..6157ebbd 100644 --- a/src/Share/Postgres/Causal/Queries.hs +++ b/src/Share/Postgres/Causal/Queries.hs @@ -642,6 +642,7 @@ savePgNamespace maySerialized mayBh b@(BranchFull.Branch {terms, types, patches, |] -- Note: this must be run AFTER inserting the namespace and all its children. execute_ [sql| SELECT save_namespace(#{bhId}) |] + execute_ [sql| SELECT update_namespace_depth(#{bhId}) |] saveSerializedNamespace :: (QueryM m) => BranchHashId -> CBORBytes TempEntity -> m () saveSerializedNamespace bhId (CBORBytes bytes) = do @@ -785,6 +786,7 @@ saveCausal maySerializedCausal mayCh bhId ancestorIds = do SELECT #{cId}, a.ancestor_id FROM ancestors a |] + execute_ [sql| SELECT update_causal_depth(#{cId}) |] pure cId saveSerializedCausal :: (QueryM m) => CausalId -> CBORBytes TempEntity -> m () diff --git a/src/Share/Postgres/Definitions/Queries.hs b/src/Share/Postgres/Definitions/Queries.hs index cb3a9be9..ef3666c6 100644 --- a/src/Share/Postgres/Definitions/Queries.hs +++ b/src/Share/Postgres/Definitions/Queries.hs @@ -863,6 +863,10 @@ saveEncodedTermComponent componentHash maySerialized elements = do SELECT defn_mappings.term_id, defn_mappings.local_index, defn_mappings.component_hash_id FROM defn_mappings |] + execute_ + [sql| + SELECT update_component_depth(#{componentHashId}) + |] pure termIds saveTypeComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e () @@ -1013,6 +1017,10 @@ saveTypeComponent componentHash maySerialized elements = do FROM defn_mappings |] saveConstructors (zip (toList typeIds) elements) + execute_ + [sql| + SELECT update_component_depth(#{componentHashId}) + |] pure typeIds -- | Efficiently resolve all pg Ids across selected Local Ids. diff --git a/src/Share/Postgres/Patches/Queries.hs b/src/Share/Postgres/Patches/Queries.hs index 4a1f1f14..8c454eae 100644 --- a/src/Share/Postgres/Patches/Queries.hs +++ b/src/Share/Postgres/Patches/Queries.hs @@ -233,6 +233,7 @@ savePatch maySerialized patchHash PatchFull.Patch {termEdits, typeEdits} = do LEFT JOIN types to_type ON to_type.component_hash_id = to_type_component_hash_id AND to_type.component_index = to_type_component_index |] + execute_ [sql| SELECT update_patch_depth(#{patchId}) |] pure patchId termsTable :: [(Maybe ComponentHashId, Maybe Int64 {- from comp index -}, Maybe TextId, Maybe ComponentHashId, Maybe Int64 {- to comp index -}, Maybe TextId, Maybe PatchFullTermEdit.Typing, Bool)] constructorsTable :: [(ComponentHashId, Int64 {- from comp index -}, Int64 {- from constr index -}, Maybe ComponentHashId, Maybe Int64 {- to comp index-}, Maybe Int64 {- to constr index -}, Maybe PatchFullTermEdit.Typing, Bool)] diff --git a/transcripts/sql/inserts.sql b/transcripts/sql/inserts.sql index c7591a3f..510a8722 100644 --- a/transcripts/sql/inserts.sql +++ b/transcripts/sql/inserts.sql @@ -107,12 +107,19 @@ INSERT INTO namespaces(namespace_hash_id, contained_terms, deep_contained_terms, VALUES (0, 0, 0, 0, 0, 0, 0) ON CONFLICT DO NOTHING; +INSERT INTO namespace_depth(namespace_hash_id, depth) + VALUES (0, 0) + ON CONFLICT DO NOTHING; -- Initialize the empty causal INSERT INTO causals(id, hash, namespace_hash_id) VALUES (0, 'sg60bvjo91fsoo7pkh9gejbn0qgc95vra87ap6l5d35ri0lkaudl7bs12d71sf3fh6p23teemuor7mk1i9n567m50ibakcghjec5ajg', 0) ON CONFLICT DO NOTHING; +INSERT INTO causal_depth(causal_id, depth) + VALUES (0, 0) + ON CONFLICT DO NOTHING; + -- Projects INSERT INTO projects ( id, From 882e10dbc52612f6f72bd26c38c4d8aee39f0989 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 6 Feb 2025 10:26:16 -0800 Subject: [PATCH 02/11] Fix bugs in depth generation --- sql/2024-12-16-00-00_entity_depths.sql | 74 ++++++++++++++------------ src/Share/Web/UCM/SyncV2/Impl.hs | 2 +- src/Share/Web/UCM/SyncV2/Queries.hs | 13 +++-- 3 files changed, 50 insertions(+), 39 deletions(-) diff --git a/sql/2024-12-16-00-00_entity_depths.sql b/sql/2024-12-16-00-00_entity_depths.sql index f3326ed1..a7a539cb 100644 --- a/sql/2024-12-16-00-00_entity_depths.sql +++ b/sql/2024-12-16-00-00_entity_depths.sql @@ -128,55 +128,53 @@ BEGIN FROM ( -- direct term references ( SELECT cd.depth AS depth - FROM component_depth cd - JOIN term_local_component_references cr - ON cd.component_hash_id = cr.component_hash_id + FROM namespace_terms nt JOIN terms t - ON cr.term_id = t.id - JOIN namespace_terms nt - ON t.id = nt.term_id + ON nt.term_id = t.id + JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id WHERE nt.namespace_hash_id = the_namespace_hash_id ) UNION -- term metadata references ( SELECT cd.depth AS depth - FROM component_depth cd - JOIN terms t - ON cd.component_hash_id = t.component_hash_id + FROM namespace_terms nt JOIN namespace_term_metadata ntm - ON ntm.metadata_term_id = t.id - JOIN namespace_terms nt ON ntm.named_term = nt.id + JOIN terms t + ON ntm.metadata_term_id = t.id + JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id WHERE nt.namespace_hash_id = the_namespace_hash_id ) UNION -- direct constructor references ( SELECT cd.depth AS depth - FROM component_depth cd + FROM namespace_terms nt JOIN constructors c - ON cd.component_hash_id = c.constructor_type_component_hash_id - JOIN namespace_terms nt ON c.id = nt.constructor_id + JOIN types t + ON c.type_id = t.id + JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id WHERE nt.namespace_hash_id = the_namespace_hash_id ) UNION -- direct type references ( SELECT cd.depth AS depth - FROM component_depth cd - JOIN type_local_component_references cr - ON cd.component_hash_id = cr.component_hash_id + FROM namespace_types nt JOIN types t - ON cr.type_id = t.id - JOIN namespace_types nt - ON t.id = nt.type_id + ON nt.type_id = t.id + JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id WHERE nt.namespace_hash_id = the_namespace_hash_id ) UNION -- type metadata references ( SELECT cd.depth AS depth - FROM component_depth cd - JOIN terms t - ON cd.component_hash_id = t.component_hash_id + FROM namespace_types nt JOIN namespace_type_metadata ntm - ON ntm.metadata_term_id = t.id - JOIN namespace_types nt ON ntm.named_type = nt.id + JOIN terms t + ON ntm.metadata_term_id = t.id + JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id WHERE nt.namespace_hash_id = the_namespace_hash_id ) ) AS refs; @@ -202,19 +200,27 @@ BEGIN SELECT COALESCE(MAX(cd.depth), -1) INTO max_referenced_component_depth FROM ( -- term references - ( SELECT from_term_component_hash_id AS component_hash_id - FROM patch_term_mappings - WHERE patch_id = the_patch_id + ( SELECT t.component_hash_id AS component_hash_id + FROM patch_term_mappings ptm + JOIN terms t + ON ptm.to_term_id = t.id + WHERE ptm.patch_id = the_patch_id ) UNION -- constructor mappings - ( SELECT from_constructor_component_hash_id AS component_hash_id - FROM patch_constructor_mappings - WHERE patch_id = the_patch_id + ( SELECT t.component_hash_id AS component_hash_id + FROM patch_constructor_mappings pcm + JOIN constructors c + ON pcm.to_constructor_id = c.id + JOIN types t + ON c.type_id = t.id + WHERE pcm.patch_id = the_patch_id ) UNION -- type references - ( SELECT from_type_component_hash_id AS component_hash_id - FROM patch_type_mappings - WHERE patch_id = the_patch_id + ( SELECT t.component_hash_id AS component_hash_id + FROM patch_type_mappings ptm + JOIN types t + ON ptm.to_type_id = t.id + WHERE ptm.patch_id = the_patch_id ) ) AS refs JOIN component_depth cd ON cd.component_hash_id = refs.component_hash_id; diff --git a/src/Share/Web/UCM/SyncV2/Impl.hs b/src/Share/Web/UCM/SyncV2/Impl.hs index 71ddaccf..f1a025e7 100644 --- a/src/Share/Web/UCM/SyncV2/Impl.hs +++ b/src/Share/Web/UCM/SyncV2/Impl.hs @@ -48,7 +48,7 @@ batchSize :: Int32 batchSize = 1000 streamSettings :: Hash32 -> Maybe SyncV2.BranchRef -> StreamInitInfo -streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.Unsorted, numEntities = Nothing, rootCausalHash, rootBranchRef} +streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.DependenciesFirst, numEntities = Nothing, rootCausalHash, rootBranchRef} server :: Maybe UserId -> SyncV2.Routes WebAppServer server mayUserId = diff --git a/src/Share/Web/UCM/SyncV2/Queries.hs b/src/Share/Web/UCM/SyncV2/Queries.hs index 02264982..aa92a777 100644 --- a/src/Share/Web/UCM/SyncV2/Queries.hs +++ b/src/Share/Web/UCM/SyncV2/Queries.hs @@ -173,30 +173,35 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = ref.component_hash_id) ) ) - (SELECT bytes.bytes, ch.base32 + (SELECT bytes.bytes, ch.base32, cd.depth FROM transitive_components tc JOIN serialized_components sc ON sc.user_id = #{ownerUserId} AND tc.component_hash_id = sc.component_hash_id JOIN bytes ON sc.bytes_id = bytes.id JOIN component_hashes ch ON tc.component_hash_id = ch.id + JOIN component_depth cd ON ch.id = cd.component_hash_id ) UNION ALL - (SELECT bytes.bytes, ap.patch_hash + (SELECT bytes.bytes, ap.patch_hash, pd.depth FROM all_patches ap JOIN serialized_patches sp ON ap.patch_id = sp.patch_id JOIN bytes ON sp.bytes_id = bytes.id + JOIN patch_depth pd ON ap.patch_id = pd.patch_id ) UNION ALL - (SELECT bytes.bytes, an.namespace_hash + (SELECT bytes.bytes, an.namespace_hash, nd.depth FROM all_namespaces an JOIN serialized_namespaces sn ON an.namespace_hash_id = sn.namespace_hash_id JOIN bytes ON sn.bytes_id = bytes.id + JOIN namespace_depth nd ON an.namespace_hash_id = nd.namespace_hash_id ) UNION ALL - (SELECT bytes.bytes, tc.causal_hash + (SELECT bytes.bytes, tc.causal_hash, cd.depth FROM transitive_causals tc JOIN serialized_causals sc ON tc.causal_id = sc.causal_id JOIN bytes ON sc.bytes_id = bytes.id + JOIN causal_depth cd ON tc.causal_id = cd.causal_id ) + ORDER BY depth ASC |] pure cursor From 81b9d4224ca12986a1cf5e8f0d1bb4f205b7e156 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 6 Feb 2025 13:23:35 -0800 Subject: [PATCH 03/11] Add entity depth migration queries --- share-api.cabal | 2 + src/Share/BackgroundJobs.hs | 4 +- .../EntityDepthMigration/Queries.hs | 233 ++++++++++++++++++ .../EntityDepthMigration/Worker.hs | 100 ++++++++ 4 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs create mode 100644 src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs diff --git a/share-api.cabal b/share-api.cabal index 6356c4f4..fce475a4 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -30,6 +30,8 @@ library Share.BackgroundJobs Share.BackgroundJobs.Diffs.ContributionDiffs Share.BackgroundJobs.Diffs.Queries + Share.BackgroundJobs.EntityDepthMigration.Queries + Share.BackgroundJobs.EntityDepthMigration.Worker Share.BackgroundJobs.Errors Share.BackgroundJobs.Monad Share.BackgroundJobs.Search.DefinitionSync diff --git a/src/Share/BackgroundJobs.hs b/src/Share/BackgroundJobs.hs index f5a533d3..6f050693 100644 --- a/src/Share/BackgroundJobs.hs +++ b/src/Share/BackgroundJobs.hs @@ -3,7 +3,6 @@ module Share.BackgroundJobs (startWorkers) where import Ki.Unlifted qualified as Ki import Share.BackgroundJobs.Monad (Background) import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch -import Share.BackgroundJobs.SerializedEntitiesMigration.Worker qualified as SerializedEntitiesMigration -- | Kicks off all background workers. startWorkers :: Ki.Scope -> Background () @@ -11,4 +10,5 @@ startWorkers scope = do DefnSearch.worker scope -- Temporary disable background diff jobs until the new diffing logic is done. -- ContributionDiffs.worker scope - SerializedEntitiesMigration.worker scope + -- SerializedEntitiesMigration.worker scope + EntityDepthMigration.worker scope diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs new file mode 100644 index 00000000..3142054e --- /dev/null +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs @@ -0,0 +1,233 @@ +module Share.BackgroundJobs.EntityDepthMigration.Queries + ( claimEntity, + claimComponent, + ) +where + +import Share.IDs +import Share.Postgres +import Share.Postgres (queryExpect1Col, queryVectorRows) +import Share.Postgres.IDs +import Unison.Hash32 + +updateComponentDepths :: Transaction e (Int64) +updateComponentDepths = do + queryExpect1Col + [sql| + WITH updatable_type_components(hash, user_id) AS ( + -- Find all component hashes which aren't missing depth info for any of their + -- dependencies. + SELECT ch.id + FROM component_hashes ch + LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id + -- Only recalculate ones which haven't been calculated yet. + WHERE cd.depth IS NULL + -- Check that the component has a term or type + AND EXISTS ( + SELECT 1 + FROM terms t + WHERE t.component_hash_id = ch.id + UNION + SELECT 1 + FROM types t + WHERE t.component_hash_id = ch.id + ) -- Find only the ones that have all their dependency depths already calculated + AND NOT EXISTS ( + SELECT 1 + FROM terms t + JOIN term_local_component_references cr_sub ON cr_sub.term_id = t.id + LEFT JOIN component_depth cd ON cd.component_hash_id = cr_sub.component_hash_id + WHERE + t.component_hash_id = ch.id + AND cr_sub.term_id = t.id AND cd.depth IS NULL + UNION + SELECT 1 + FROM types t + JOIN type_local_component_references cr_sub ON cr_sub.type_id = t.id + LEFT JOIN component_depth cd ON cd.component_hash_id = cr_sub.component_hash_id + WHERE + t.component_hash_id = ch.id + AND cr_sub.type_id = t.id AND cd.depth IS NULL + ) + ), updated AS ( + SELECT update_component_depth(ch.id) + FROM updatable_type_components ch + ) SELECT COUNT(*) FROM updated + |] + +updatePatchDepths :: Transaction e Int64 +updatePatchDepths = do + queryExpect1Col + [sql| + WITH updatable_patches AS ( + -- Find all patches which aren't missing depth info for any of their + -- dependencies. + SELECT p.id + FROM patches p + LEFT JOIN patch_depth pd ON pd.patch_id = p.id + -- Only recalculate ones which haven't been calculated yet. + WHERE pd.patch_id IS NULL + AND NOT EXISTS ( + SELECT 1 + FROM patch_term_mappings ptm + JOIN terms t + ON ptm.to_term_id = t.id + LEFT JOIN component_depth cd + ON cd.component_hash_id = t.component_hash_id + WHERE ptm.patch_id = p.id + AND cd.depth IS NULL + UNION + SELECT 1 + FROM patch_constructor_mappings pcm + JOIN constructors c + ON pcm.to_constructor_id = c.id + JOIN types t + ON c.type_id = t.id + LEFT JOIN component_depth cd + ON cd.component_hash_id = t.component_hash_id + WHERE pcm.patch_id = p.id + AND cd.depth IS NULL + UNION + SELECT 1 + FROM patch_type_mappings ptm + JOIN types t + ON ptm.to_type_id = t.id + LEFT JOIN component_depth cd + ON cd.component_hash_id = t.component_hash_id + WHERE ptm.patch_id = p.id + AND cd.depth IS NULL + ) + ), updated AS ( + SELECT update_patch_depth(p.id) + FROM updatable_patches p + ) SELECT COUNT(*) FROM updated + |] + +updateNamespaceDepths :: Transaction e Int64 +updateNamespaceDepths = do + queryExpect1Col + [sql| + WITH updatable_namespaces AS ( + -- Find all namespaces which aren't missing depth info for any of their + -- dependencies. + SELECT n.id + FROM namespaces n + LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.id + -- Only recalculate ones which haven't been calculated yet. + WHERE nd.depth IS NULL + AND NOT EXISTS ( + SELECT 1 + FROM namespace_children nc + LEFT JOIN namespace_depth nd ON nc.child_causal_id = nd.causal_id + WHERE nc.parent_namespace_hash_id = n.id + AND nd.depth IS NULL + UNION + SELECT 1 + FROM namespace_patches np + LEFT JOIN patch_depth pd ON np.patch_id = pd.patch_id + WHERE np.namespace_hash_id = n.id + AND pd.depth IS NULL + UNION + SELECT 1 + FROM namespace_terms nt + JOIN terms t + ON nt.term_id = t.id + LEFT JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id + WHERE nt.namespace_hash_id = n.id + AND cd.depth IS NULL + UNION + SELECT 1 + FROM namespace_terms nt + JOIN namespace_term_metadata ntm + ON ntm.named_term = nt.id + JOIN terms t + ON ntm.metadata_term_id = t.id + LEFT JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id + WHERE nt.namespace_hash_id = n.id + AND cd.depth IS NULL + UNION + SELECT 1 + FROM namespace_terms nt + JOIN constructors c + ON c.id = nt.constructor_id + JOIN types t + ON c.type_id = t.id + LEFT JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id + WHERE nt.namespace_hash_id = n.id + AND cd.depth IS NULL + UNION + SELECT 1 + FROM namespace_types nt + JOIN types t + ON nt.type_id = t.id + LEFT JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id + WHERE nt.namespace_hash_id = n.id + AND cd.depth IS NULL + UNION + SELECT 1 + FROM namespace_types nt + JOIN namespace_type_metadata ntm + ON ntm.named_type = nt.id + JOIN terms t + ON ntm.metadata_term_id = t.id + LEFT JOIN component_depth cd + ON t.component_hash_id = cd.component_hash_id + WHERE nt.namespace_hash_id = n.id + AND cd.depth IS NULL + ) + ), updated AS ( + SELECT update_namespace_depth(n.id) + FROM updatable_namespaces n + ) SELECT COUNT(*) FROM updated + |] + +updateCausalDepths :: Transaction e Int64 +updateCausalDepths = do + queryExpect1Col + [sql| + WITH updatable_causals AS ( + -- Find all causals which aren't missing depth info for any of their + -- dependencies. + SELECT c.id + FROM causals c + LEFT JOIN causal_depth cd ON cd.causal_id = c.id + -- Only recalculate ones which haven't been calculated yet. + WHERE cd.depth IS NULL + AND EXISTS ( + SELECT 1 + FROM namespace_depth nd + WHERE nd.namespace_hash_id = c.namespace_hash_id + AND nd.depth IS NULL + ) AND NOT EXISTS ( + SELECT 1 + FROM causal_ancestors ca + LEFT JOIN causal_depth cd + ON ca.ancestor_id = cd.causal_id + WHERE ca.causal_id = c.id + AND cd.depth IS NULL + ) + ), updated AS ( + SELECT update_causal_depth(c.id) + FROM updatable_causals c + ) SELECT COUNT(*) FROM updated + |] + +-- Sanity checks +-- +-- Should return no rows: +-- +-- SELECT ch.id +-- FROM component_hashes ch +-- LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id +-- WHERE cd.depth IS NULL +-- +-- Should match the number of components: +-- +-- SELECT COUNT(*) +-- FROM component_hashes ch +-- LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id +-- WHERE cd.depth IS NULL diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs new file mode 100644 index 00000000..6a810494 --- /dev/null +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs @@ -0,0 +1,100 @@ +module Share.BackgroundJobs.EntityDepthMigration.Worker (worker) where + +import Data.ByteString.Lazy qualified as BL +import Ki.Unlifted qualified as Ki +import Share.BackgroundJobs.Monad (Background) +import Share.BackgroundJobs.SerializedEntitiesMigration.Queries qualified as Q +import Share.BackgroundJobs.Workers (newWorker) +import Share.Codebase qualified as Codebase +import Share.Codebase.Types (CodebaseEnv (..)) +import Share.Postgres +import Share.Postgres qualified as PG +import Share.Postgres.Causal.Queries qualified as CQ +import Share.Postgres.Definitions.Queries qualified as DefnQ +import Share.Postgres.Hashes.Queries qualified as HQ +import Share.Postgres.IDs +import Share.Postgres.Patches.Queries qualified as PQ +import Share.Postgres.Sync.Queries qualified as SQ +import Share.Prelude +import Share.Web.Authorization qualified as AuthZ +import U.Codebase.Sqlite.Entity qualified as Entity +import U.Codebase.Sqlite.TempEntity (TempEntity) +import Unison.Hash32 (Hash32) +import Unison.Hash32 qualified as Hash32 +import Unison.Sync.Common qualified as SyncCommon +import Unison.SyncV2.Types qualified as SyncV2 +import UnliftIO.Concurrent qualified as UnliftIO + +pollingIntervalSeconds :: Int +pollingIntervalSeconds = 10 + +worker :: Ki.Scope -> Background () +worker scope = do + authZReceipt <- AuthZ.backgroundJobAuthZ + newWorker scope "migration:serialised_components" $ forever do + gotResult <- processComponents authZReceipt + if gotResult + then pure () + else liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 + newWorker scope "migration:serialised_entities" $ forever do + gotResult <- processEntities authZReceipt + if gotResult + then pure () + else liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 + +processEntities :: AuthZ.AuthZReceipt -> Background Bool +processEntities !_authZReceipt = do + mayHash <- PG.runTransaction do + Q.claimEntity >>= \case + Nothing -> pure Nothing + Just (hash32, codebaseUserId) -> do + let codebaseEnv = CodebaseEnv codebaseUserId + Codebase.codebaseMToTransaction codebaseEnv $ do + entity <- SQ.expectEntity hash32 + let tempEntity = SyncCommon.entityToTempEntity id entity + saveUnsandboxedSerializedEntities hash32 tempEntity + pure (Just hash32) + case mayHash of + Just _hash -> do + pure True + Nothing -> pure False + +-- | Components must be handled separately since they're sandboxed to specific users. +-- NOTE: this process doesn't insert the row into serialized_components, you'll need to do that manually after the automated migration is finished. +processComponents :: AuthZ.AuthZReceipt -> Background Bool +processComponents !_authZReceipt = do + PG.runTransaction do + Q.claimComponent >>= \case + Nothing -> pure False + Just (componentHashId, userId) -> do + let codebaseEnv = CodebaseEnv userId + Codebase.codebaseMToTransaction codebaseEnv $ do + hash32 <- (Hash32.fromHash . unComponentHash) <$> HQ.expectComponentHashesOf id componentHashId + entity <- SQ.expectEntity hash32 + componentSummaryDigest <- HQ.expectComponentSummaryDigest componentHashId + let tempEntity = SyncCommon.entityToTempEntity id entity + let (SyncV2.CBORBytes bytes) = SyncV2.serialiseCBORBytes tempEntity + bytesId <- DefnQ.ensureBytesIdsOf id (BL.toStrict bytes) + execute_ + [sql| + INSERT INTO component_summary_digests_to_serialized_component_bytes_hash (component_hash_id, component_summary_digest, serialized_component_bytes_id) + VALUES (#{componentHashId}, #{componentSummaryDigest}, #{bytesId}) + ON CONFLICT DO NOTHING + |] + pure True + +saveUnsandboxedSerializedEntities :: Hash32 -> TempEntity -> Codebase.CodebaseM e () +saveUnsandboxedSerializedEntities hash entity = do + let serialised = SyncV2.serialiseCBORBytes entity + case entity of + Entity.TC {} -> error "Unexpected term component" + Entity.DC {} -> error "Unexpected decl component" + Entity.P {} -> do + patchId <- HQ.expectPatchIdsOf id (fromHash32 @PatchHash hash) + PQ.saveSerializedPatch patchId serialised + Entity.C {} -> do + cId <- CQ.expectCausalIdByHash (fromHash32 @CausalHash hash) + CQ.saveSerializedCausal cId serialised + Entity.N {} -> do + bhId <- HQ.expectBranchHashId (fromHash32 @BranchHash hash) + CQ.saveSerializedNamespace bhId serialised From bcd8d6f9ed197f72644fc224d15eb1f767906ee4 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 10 Feb 2025 09:44:01 -0800 Subject: [PATCH 04/11] Start on migration worker code for depth migration --- src/Share/BackgroundJobs.hs | 1 + .../EntityDepthMigration/Queries.hs | 11 +- .../EntityDepthMigration/Worker.hs | 124 +++++++----------- 3 files changed, 50 insertions(+), 86 deletions(-) diff --git a/src/Share/BackgroundJobs.hs b/src/Share/BackgroundJobs.hs index 6f050693..1bfd03bb 100644 --- a/src/Share/BackgroundJobs.hs +++ b/src/Share/BackgroundJobs.hs @@ -1,6 +1,7 @@ module Share.BackgroundJobs (startWorkers) where import Ki.Unlifted qualified as Ki +import Share.BackgroundJobs.EntityDepthMigration.Worker qualified as EntityDepthMigration import Share.BackgroundJobs.Monad (Background) import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs index 3142054e..0ead9936 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs @@ -1,14 +1,13 @@ module Share.BackgroundJobs.EntityDepthMigration.Queries - ( claimEntity, - claimComponent, + ( updateComponentDepths, + updatePatchDepths, + updateNamespaceDepths, + updateCausalDepths, ) where -import Share.IDs +import Data.Int (Int64) import Share.Postgres -import Share.Postgres (queryExpect1Col, queryVectorRows) -import Share.Postgres.IDs -import Unison.Hash32 updateComponentDepths :: Transaction e (Int64) updateComponentDepths = do diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs index 6a810494..68e29151 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs @@ -1,28 +1,13 @@ module Share.BackgroundJobs.EntityDepthMigration.Worker (worker) where -import Data.ByteString.Lazy qualified as BL import Ki.Unlifted qualified as Ki +import Share.BackgroundJobs.EntityDepthMigration.Queries qualified as Q import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.SerializedEntitiesMigration.Queries qualified as Q import Share.BackgroundJobs.Workers (newWorker) -import Share.Codebase qualified as Codebase -import Share.Codebase.Types (CodebaseEnv (..)) -import Share.Postgres import Share.Postgres qualified as PG -import Share.Postgres.Causal.Queries qualified as CQ -import Share.Postgres.Definitions.Queries qualified as DefnQ -import Share.Postgres.Hashes.Queries qualified as HQ -import Share.Postgres.IDs -import Share.Postgres.Patches.Queries qualified as PQ -import Share.Postgres.Sync.Queries qualified as SQ import Share.Prelude +import Share.Utils.Logging qualified as Logging import Share.Web.Authorization qualified as AuthZ -import U.Codebase.Sqlite.Entity qualified as Entity -import U.Codebase.Sqlite.TempEntity (TempEntity) -import Unison.Hash32 (Hash32) -import Unison.Hash32 qualified as Hash32 -import Unison.Sync.Common qualified as SyncCommon -import Unison.SyncV2.Types qualified as SyncV2 import UnliftIO.Concurrent qualified as UnliftIO pollingIntervalSeconds :: Int @@ -31,70 +16,49 @@ pollingIntervalSeconds = 10 worker :: Ki.Scope -> Background () worker scope = do authZReceipt <- AuthZ.backgroundJobAuthZ - newWorker scope "migration:serialised_components" $ forever do - gotResult <- processComponents authZReceipt - if gotResult - then pure () - else liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 - newWorker scope "migration:serialised_entities" $ forever do - gotResult <- processEntities authZReceipt - if gotResult - then pure () - else liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 - -processEntities :: AuthZ.AuthZReceipt -> Background Bool -processEntities !_authZReceipt = do - mayHash <- PG.runTransaction do - Q.claimEntity >>= \case - Nothing -> pure Nothing - Just (hash32, codebaseUserId) -> do - let codebaseEnv = CodebaseEnv codebaseUserId - Codebase.codebaseMToTransaction codebaseEnv $ do - entity <- SQ.expectEntity hash32 - let tempEntity = SyncCommon.entityToTempEntity id entity - saveUnsandboxedSerializedEntities hash32 tempEntity - pure (Just hash32) - case mayHash of - Just _hash -> do - pure True - Nothing -> pure False + newWorker scope "migration:entity_depth" $ forever do + -- Do the components first, they're the bottom of the dependency tree. + computeComponentDepths authZReceipt + -- Then do the patches, they depend on components. + computePatchDepths authZReceipt + -- Then do the namespaces and causals together + computeNamespaceAndCausalDepths authZReceipt + liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 -- | Components must be handled separately since they're sandboxed to specific users. -- NOTE: this process doesn't insert the row into serialized_components, you'll need to do that manually after the automated migration is finished. -processComponents :: AuthZ.AuthZReceipt -> Background Bool -processComponents !_authZReceipt = do - PG.runTransaction do - Q.claimComponent >>= \case - Nothing -> pure False - Just (componentHashId, userId) -> do - let codebaseEnv = CodebaseEnv userId - Codebase.codebaseMToTransaction codebaseEnv $ do - hash32 <- (Hash32.fromHash . unComponentHash) <$> HQ.expectComponentHashesOf id componentHashId - entity <- SQ.expectEntity hash32 - componentSummaryDigest <- HQ.expectComponentSummaryDigest componentHashId - let tempEntity = SyncCommon.entityToTempEntity id entity - let (SyncV2.CBORBytes bytes) = SyncV2.serialiseCBORBytes tempEntity - bytesId <- DefnQ.ensureBytesIdsOf id (BL.toStrict bytes) - execute_ - [sql| - INSERT INTO component_summary_digests_to_serialized_component_bytes_hash (component_hash_id, component_summary_digest, serialized_component_bytes_id) - VALUES (#{componentHashId}, #{componentSummaryDigest}, #{bytesId}) - ON CONFLICT DO NOTHING - |] - pure True +computeComponentDepths :: AuthZ.AuthZReceipt -> Background () +computeComponentDepths !_authZReceipt = do + PG.runTransaction Q.updateComponentDepths >>= \case + 0 -> do + Logging.logInfoText $ "Done processing component depth" + pure () + -- Recurse until there's nothing left to do. + n -> do + Logging.logInfoText $ "Computed Depth for " <> tShow n <> " components" + computeComponentDepths _authZReceipt + +computePatchDepths :: AuthZ.AuthZReceipt -> Background () +computePatchDepths !_authZReceipt = do + PG.runTransaction Q.updatePatchDepths >>= \case + 0 -> do + Logging.logInfoText $ "Done processing patch depth" + pure () + -- Recurse until there's nothing left to do. + n -> do + Logging.logInfoText $ "Computed Depth for " <> tShow n <> " patches" + computePatchDepths _authZReceipt -saveUnsandboxedSerializedEntities :: Hash32 -> TempEntity -> Codebase.CodebaseM e () -saveUnsandboxedSerializedEntities hash entity = do - let serialised = SyncV2.serialiseCBORBytes entity - case entity of - Entity.TC {} -> error "Unexpected term component" - Entity.DC {} -> error "Unexpected decl component" - Entity.P {} -> do - patchId <- HQ.expectPatchIdsOf id (fromHash32 @PatchHash hash) - PQ.saveSerializedPatch patchId serialised - Entity.C {} -> do - cId <- CQ.expectCausalIdByHash (fromHash32 @CausalHash hash) - CQ.saveSerializedCausal cId serialised - Entity.N {} -> do - bhId <- HQ.expectBranchHashId (fromHash32 @BranchHash hash) - CQ.saveSerializedNamespace bhId serialised +computeNamespaceAndCausalDepths :: AuthZ.AuthZReceipt -> Background () +computeNamespaceAndCausalDepths !authZReceipt = do + PG.runTransaction Q.updateNamespaceDepths >>= \case + namespaceN -> do + PG.runTransaction Q.updateCausalDepths >>= \case + causalN -> do + case (namespaceN, causalN) of + (0, 0) -> do + Logging.logInfoText $ "Done processing namespace and causal depth" + pure () + (namespaceN, causalN) -> do + Logging.logInfoText $ "Computed Depth for " <> tShow namespaceN <> " namespaces and " <> tShow causalN <> " causals" + computeNamespaceAndCausalDepths authZReceipt From efc142a1900680b2f6905bbf71eef70ad87dbd4d Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 25 Feb 2025 10:48:43 -0800 Subject: [PATCH 05/11] Fix up migration queries --- .../EntityDepthMigration/Queries.hs | 72 ++++++++++--------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs index 0ead9936..f1d34750 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs @@ -13,7 +13,7 @@ updateComponentDepths :: Transaction e (Int64) updateComponentDepths = do queryExpect1Col [sql| - WITH updatable_type_components(hash, user_id) AS ( + WITH updatable_components(component_hash_id) AS ( -- Find all component hashes which aren't missing depth info for any of their -- dependencies. SELECT ch.id @@ -23,16 +23,16 @@ updateComponentDepths = do WHERE cd.depth IS NULL -- Check that the component has a term or type AND EXISTS ( - SELECT 1 + SELECT FROM terms t WHERE t.component_hash_id = ch.id UNION - SELECT 1 + SELECT FROM types t WHERE t.component_hash_id = ch.id ) -- Find only the ones that have all their dependency depths already calculated AND NOT EXISTS ( - SELECT 1 + SELECT FROM terms t JOIN term_local_component_references cr_sub ON cr_sub.term_id = t.id LEFT JOIN component_depth cd ON cd.component_hash_id = cr_sub.component_hash_id @@ -40,7 +40,7 @@ updateComponentDepths = do t.component_hash_id = ch.id AND cr_sub.term_id = t.id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM types t JOIN type_local_component_references cr_sub ON cr_sub.type_id = t.id LEFT JOIN component_depth cd ON cd.component_hash_id = cr_sub.component_hash_id @@ -49,8 +49,8 @@ updateComponentDepths = do AND cr_sub.type_id = t.id AND cd.depth IS NULL ) ), updated AS ( - SELECT update_component_depth(ch.id) - FROM updatable_type_components ch + SELECT update_component_depth(ch.component_hash_id) + FROM updatable_components ch ) SELECT COUNT(*) FROM updated |] @@ -58,7 +58,7 @@ updatePatchDepths :: Transaction e Int64 updatePatchDepths = do queryExpect1Col [sql| - WITH updatable_patches AS ( + WITH updatable_patches(patch_id) AS ( -- Find all patches which aren't missing depth info for any of their -- dependencies. SELECT p.id @@ -67,7 +67,7 @@ updatePatchDepths = do -- Only recalculate ones which haven't been calculated yet. WHERE pd.patch_id IS NULL AND NOT EXISTS ( - SELECT 1 + SELECT FROM patch_term_mappings ptm JOIN terms t ON ptm.to_term_id = t.id @@ -76,7 +76,7 @@ updatePatchDepths = do WHERE ptm.patch_id = p.id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM patch_constructor_mappings pcm JOIN constructors c ON pcm.to_constructor_id = c.id @@ -87,7 +87,7 @@ updatePatchDepths = do WHERE pcm.patch_id = p.id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM patch_type_mappings ptm JOIN types t ON ptm.to_type_id = t.id @@ -97,8 +97,8 @@ updatePatchDepths = do AND cd.depth IS NULL ) ), updated AS ( - SELECT update_patch_depth(p.id) - FROM updatable_patches p + SELECT update_patch_depth(up.patch_id) + FROM updatable_patches up ) SELECT COUNT(*) FROM updated |] @@ -106,37 +106,39 @@ updateNamespaceDepths :: Transaction e Int64 updateNamespaceDepths = do queryExpect1Col [sql| - WITH updatable_namespaces AS ( + WITH updatable_namespaces(namespace_hash_id) AS ( -- Find all namespaces which aren't missing depth info for any of their -- dependencies. - SELECT n.id + SELECT n.namespace_hash_id FROM namespaces n - LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.id + LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.namespace_hash_id -- Only recalculate ones which haven't been calculated yet. WHERE nd.depth IS NULL AND NOT EXISTS ( - SELECT 1 + SELECT FROM namespace_children nc - LEFT JOIN namespace_depth nd ON nc.child_causal_id = nd.causal_id - WHERE nc.parent_namespace_hash_id = n.id + JOIN causals c + ON nc.child_causal_id = c.id + LEFT JOIN namespace_depth nd ON c.namespace_hash_id = nd.namespace_hash_id + WHERE nc.parent_namespace_hash_id = n.namespace_hash_id AND nd.depth IS NULL UNION - SELECT 1 + SELECT FROM namespace_patches np LEFT JOIN patch_depth pd ON np.patch_id = pd.patch_id - WHERE np.namespace_hash_id = n.id + WHERE np.namespace_hash_id = n.namespace_hash_id AND pd.depth IS NULL UNION - SELECT 1 + SELECT FROM namespace_terms nt JOIN terms t ON nt.term_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id + WHERE nt.namespace_hash_id = n.namespace_hash_id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM namespace_terms nt JOIN namespace_term_metadata ntm ON ntm.named_term = nt.id @@ -144,10 +146,10 @@ updateNamespaceDepths = do ON ntm.metadata_term_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id + WHERE nt.namespace_hash_id = n.namespace_hash_id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM namespace_terms nt JOIN constructors c ON c.id = nt.constructor_id @@ -155,19 +157,19 @@ updateNamespaceDepths = do ON c.type_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id + WHERE nt.namespace_hash_id = n.namespace_hash_id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM namespace_types nt JOIN types t ON nt.type_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id + WHERE nt.namespace_hash_id = n.namespace_hash_id AND cd.depth IS NULL UNION - SELECT 1 + SELECT FROM namespace_types nt JOIN namespace_type_metadata ntm ON ntm.named_type = nt.id @@ -175,12 +177,12 @@ updateNamespaceDepths = do ON ntm.metadata_term_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id + WHERE nt.namespace_hash_id = n.namespace_hash_id AND cd.depth IS NULL ) ), updated AS ( - SELECT update_namespace_depth(n.id) - FROM updatable_namespaces n + SELECT update_namespace_depth(un.namespace_hash_id) + FROM updatable_namespaces un ) SELECT COUNT(*) FROM updated |] @@ -197,12 +199,12 @@ updateCausalDepths = do -- Only recalculate ones which haven't been calculated yet. WHERE cd.depth IS NULL AND EXISTS ( - SELECT 1 + SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = c.namespace_hash_id AND nd.depth IS NULL ) AND NOT EXISTS ( - SELECT 1 + SELECT FROM causal_ancestors ca LEFT JOIN causal_depth cd ON ca.ancestor_id = cd.causal_id From 932b2d14bfd257b626aa19fc405c210501f3416b Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 25 Feb 2025 14:55:24 -0800 Subject: [PATCH 06/11] Fixups for entity depth migration --- sql/2025-02-25_causal_depth_migration.sql | 45 +++++++++ .../EntityDepthMigration/Queries.hs | 99 ++++++++++--------- .../EntityDepthMigration/Worker.hs | 5 +- 3 files changed, 104 insertions(+), 45 deletions(-) create mode 100644 sql/2025-02-25_causal_depth_migration.sql diff --git a/sql/2025-02-25_causal_depth_migration.sql b/sql/2025-02-25_causal_depth_migration.sql new file mode 100644 index 00000000..0d9bae52 --- /dev/null +++ b/sql/2025-02-25_causal_depth_migration.sql @@ -0,0 +1,45 @@ + +CREATE TABLE unfinished_causal_depths ( + id INTEGER PRIMARY KEY REFERENCES causals (id) ON DELETE CASCADE +); + +CREATE TABLE unfinished_namespace_depths ( + id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE +); + +CREATE TABLE unfinished_patch_depths ( + id INTEGER PRIMARY KEY REFERENCES patches (id) ON DELETE CASCADE +); + +CREATE TABLE unfinished_component_depths ( + id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE +); + +INSERT INTO unfinished_causal_depths (id) + SELECT c.id + FROM causals c + WHERE NOT EXISTS ( + SELECT FROM causal_depth cd WHERE cd.causal_id = c.id + ); + +INSERT INTO unfinished_namespace_depths (id) + SELECT n.namespace_hash_id + FROM namespaces n + WHERE NOT EXISTS ( + SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = n.namespace_hash_id + ); + +INSERT INTO unfinished_patch_depths (id) + SELECT p.id + FROM patches p + WHERE NOT EXISTS ( + SELECT FROM patch_depth pd WHERE pd.patch_id = p.id + ); + +INSERT INTO unfinished_component_depths (id) + SELECT ch.id + FROM component_hashes ch + WHERE NOT EXISTS ( + SELECT FROM component_depth cd WHERE cd.component_hash_id = ch.id + ); + diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs index f1d34750..f0c7d1b6 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs @@ -17,20 +17,10 @@ updateComponentDepths = do -- Find all component hashes which aren't missing depth info for any of their -- dependencies. SELECT ch.id - FROM component_hashes ch + FROM unfinished_component_depths ch LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id -- Only recalculate ones which haven't been calculated yet. WHERE cd.depth IS NULL - -- Check that the component has a term or type - AND EXISTS ( - SELECT - FROM terms t - WHERE t.component_hash_id = ch.id - UNION - SELECT - FROM types t - WHERE t.component_hash_id = ch.id - ) -- Find only the ones that have all their dependency depths already calculated AND NOT EXISTS ( SELECT FROM terms t @@ -48,9 +38,12 @@ updateComponentDepths = do t.component_hash_id = ch.id AND cr_sub.type_id = t.id AND cd.depth IS NULL ) - ), updated AS ( - SELECT update_component_depth(ch.component_hash_id) + ), updated(component_hash_id, x) AS ( + SELECT ch.component_hash_id, update_component_depth(ch.component_hash_id) FROM updatable_components ch + ), mark_finished AS ( + DELETE FROM unfinished_component_depths ufd + WHERE ufd.id IN (SELECT u.component_hash_id FROM updated u) ) SELECT COUNT(*) FROM updated |] @@ -62,7 +55,7 @@ updatePatchDepths = do -- Find all patches which aren't missing depth info for any of their -- dependencies. SELECT p.id - FROM patches p + FROM unfinished_patch_depths p LEFT JOIN patch_depth pd ON pd.patch_id = p.id -- Only recalculate ones which haven't been calculated yet. WHERE pd.patch_id IS NULL @@ -96,9 +89,12 @@ updatePatchDepths = do WHERE ptm.patch_id = p.id AND cd.depth IS NULL ) - ), updated AS ( - SELECT update_patch_depth(up.patch_id) + ), updated(patch_id, x) AS ( + SELECT up.patch_id, update_patch_depth(up.patch_id) FROM updatable_patches up + ), mark_finished AS ( + DELETE FROM unfinished_patch_depths ufd + WHERE ufd.id IN (SELECT u.patch_id FROM updated u) ) SELECT COUNT(*) FROM updated |] @@ -109,9 +105,9 @@ updateNamespaceDepths = do WITH updatable_namespaces(namespace_hash_id) AS ( -- Find all namespaces which aren't missing depth info for any of their -- dependencies. - SELECT n.namespace_hash_id - FROM namespaces n - LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.namespace_hash_id + SELECT n.id + FROM unfinished_namespace_depths n + LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.id -- Only recalculate ones which haven't been calculated yet. WHERE nd.depth IS NULL AND NOT EXISTS ( @@ -119,14 +115,14 @@ updateNamespaceDepths = do FROM namespace_children nc JOIN causals c ON nc.child_causal_id = c.id - LEFT JOIN namespace_depth nd ON c.namespace_hash_id = nd.namespace_hash_id - WHERE nc.parent_namespace_hash_id = n.namespace_hash_id - AND nd.depth IS NULL + LEFT JOIN causal_depth cd ON nc.child_causal_id = cd.causal_id + WHERE nc.parent_namespace_hash_id = n.id + AND cd.depth IS NULL UNION SELECT FROM namespace_patches np LEFT JOIN patch_depth pd ON np.patch_id = pd.patch_id - WHERE np.namespace_hash_id = n.namespace_hash_id + WHERE np.namespace_hash_id = n.id AND pd.depth IS NULL UNION SELECT @@ -135,7 +131,7 @@ updateNamespaceDepths = do ON nt.term_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.namespace_hash_id + WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL UNION SELECT @@ -146,7 +142,7 @@ updateNamespaceDepths = do ON ntm.metadata_term_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.namespace_hash_id + WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL UNION SELECT @@ -157,7 +153,7 @@ updateNamespaceDepths = do ON c.type_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.namespace_hash_id + WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL UNION SELECT @@ -166,7 +162,7 @@ updateNamespaceDepths = do ON nt.type_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.namespace_hash_id + WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL UNION SELECT @@ -177,12 +173,15 @@ updateNamespaceDepths = do ON ntm.metadata_term_id = t.id LEFT JOIN component_depth cd ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.namespace_hash_id + WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL ) - ), updated AS ( - SELECT update_namespace_depth(un.namespace_hash_id) + ), updated(namespace_hash_id, x) AS ( + SELECT un.namespace_hash_id, update_namespace_depth(un.namespace_hash_id) FROM updatable_namespaces un + ), mark_finished AS ( + DELETE FROM unfinished_namespace_depths ufd + WHERE ufd.id IN (SELECT u.namespace_hash_id FROM updated u) ) SELECT COUNT(*) FROM updated |] @@ -194,7 +193,8 @@ updateCausalDepths = do -- Find all causals which aren't missing depth info for any of their -- dependencies. SELECT c.id - FROM causals c + FROM unfinished_causal_depths ucd + JOIN causals c ON ucd.id = c.id LEFT JOIN causal_depth cd ON cd.causal_id = c.id -- Only recalculate ones which haven't been calculated yet. WHERE cd.depth IS NULL @@ -202,7 +202,6 @@ updateCausalDepths = do SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = c.namespace_hash_id - AND nd.depth IS NULL ) AND NOT EXISTS ( SELECT FROM causal_ancestors ca @@ -211,24 +210,36 @@ updateCausalDepths = do WHERE ca.causal_id = c.id AND cd.depth IS NULL ) - ), updated AS ( - SELECT update_causal_depth(c.id) + ), updated(causal_id) AS ( + SELECT c.id, update_causal_depth(c.id) FROM updatable_causals c + ), mark_finished AS ( + DELETE FROM unfinished_causal_depths ucd + WHERE ucd.id IN (SELECT u.causal_id FROM updated u) ) SELECT COUNT(*) FROM updated |] -- Sanity checks -- --- Should return no rows: --- --- SELECT ch.id +-- Should be 0 + +-- SELECT count(*) from causals +-- WHERE NOT EXISTS ( +-- SELECT FROM causal_depth cd +-- WHERE cd.causal_id = causals.id +-- ); + +-- SELECT count(ch.id) -- FROM component_hashes ch -- LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id --- WHERE cd.depth IS NULL --- --- Should match the number of components: --- +-- WHERE cd.depth IS NULL; + -- SELECT COUNT(*) --- FROM component_hashes ch --- LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id --- WHERE cd.depth IS NULL +-- FROM branch_hashes bh +-- LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = bh.id +-- WHERE nd.depth IS NULL; + +-- SELECT COUNT(*) +-- FROM patches p +-- LEFT JOIN patch_depth pd ON pd.patch_id = p.id +-- WHERE pd.depth IS NULL; diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs index 68e29151..8825ff55 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs @@ -23,7 +23,10 @@ worker scope = do computePatchDepths authZReceipt -- Then do the namespaces and causals together computeNamespaceAndCausalDepths authZReceipt - liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 + -- Once we know we're done, just wait until a human comes and + -- deploys a new version without the migration. + forever do + liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 -- | Components must be handled separately since they're sandboxed to specific users. -- NOTE: this process doesn't insert the row into serialized_components, you'll need to do that manually after the automated migration is finished. From 3ff65468c5b7b425df52781f511029f1f431df8d Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 26 Feb 2025 12:42:40 -0800 Subject: [PATCH 07/11] Disable saving of depth on new entities until migration is done --- src/Share/Postgres/Causal/Queries.hs | 9 +++++++-- src/Share/Postgres/Definitions/Queries.hs | 20 ++++++++++++-------- src/Share/Postgres/Patches/Queries.hs | 4 +++- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/Share/Postgres/Causal/Queries.hs b/src/Share/Postgres/Causal/Queries.hs index 6157ebbd..385165a2 100644 --- a/src/Share/Postgres/Causal/Queries.hs +++ b/src/Share/Postgres/Causal/Queries.hs @@ -642,7 +642,10 @@ savePgNamespace maySerialized mayBh b@(BranchFull.Branch {terms, types, patches, |] -- Note: this must be run AFTER inserting the namespace and all its children. execute_ [sql| SELECT save_namespace(#{bhId}) |] - execute_ [sql| SELECT update_namespace_depth(#{bhId}) |] + +-- Disabled while migration is running so we don't accidentally get incorrect depths +-- from missing depth dependencies +-- execute_ [sql| SELECT update_namespace_depth(#{bhId}) |] saveSerializedNamespace :: (QueryM m) => BranchHashId -> CBORBytes TempEntity -> m () saveSerializedNamespace bhId (CBORBytes bytes) = do @@ -786,7 +789,9 @@ saveCausal maySerializedCausal mayCh bhId ancestorIds = do SELECT #{cId}, a.ancestor_id FROM ancestors a |] - execute_ [sql| SELECT update_causal_depth(#{cId}) |] + -- Disabled while migration is running so we don't accidentally get incorrect depths + -- from missing depth dependencies + -- execute_ [sql| SELECT update_causal_depth(#{cId}) |] pure cId saveSerializedCausal :: (QueryM m) => CausalId -> CBORBytes TempEntity -> m () diff --git a/src/Share/Postgres/Definitions/Queries.hs b/src/Share/Postgres/Definitions/Queries.hs index ef3666c6..6309469f 100644 --- a/src/Share/Postgres/Definitions/Queries.hs +++ b/src/Share/Postgres/Definitions/Queries.hs @@ -863,10 +863,12 @@ saveEncodedTermComponent componentHash maySerialized elements = do SELECT defn_mappings.term_id, defn_mappings.local_index, defn_mappings.component_hash_id FROM defn_mappings |] - execute_ - [sql| - SELECT update_component_depth(#{componentHashId}) - |] + -- Disabled while migration is running so we don't accidentally get incorrect depths + -- from missing depth dependencies + -- execute_ + -- [sql| + -- SELECT update_component_depth(#{componentHashId}) + -- |] pure termIds saveTypeComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e () @@ -1017,10 +1019,12 @@ saveTypeComponent componentHash maySerialized elements = do FROM defn_mappings |] saveConstructors (zip (toList typeIds) elements) - execute_ - [sql| - SELECT update_component_depth(#{componentHashId}) - |] + -- Disabled while migration is running so we don't accidentally get incorrect depths + -- from missing depth dependencies + -- execute_ + -- [sql| + -- SELECT update_component_depth(#{componentHashId}) + -- |] pure typeIds -- | Efficiently resolve all pg Ids across selected Local Ids. diff --git a/src/Share/Postgres/Patches/Queries.hs b/src/Share/Postgres/Patches/Queries.hs index 8c454eae..55a989e4 100644 --- a/src/Share/Postgres/Patches/Queries.hs +++ b/src/Share/Postgres/Patches/Queries.hs @@ -233,7 +233,9 @@ savePatch maySerialized patchHash PatchFull.Patch {termEdits, typeEdits} = do LEFT JOIN types to_type ON to_type.component_hash_id = to_type_component_hash_id AND to_type.component_index = to_type_component_index |] - execute_ [sql| SELECT update_patch_depth(#{patchId}) |] + -- Disabled while migration is running so we don't accidentally get incorrect depths + -- from missing depth dependencies + -- execute_ [sql| SELECT update_patch_depth(#{patchId}) |] pure patchId termsTable :: [(Maybe ComponentHashId, Maybe Int64 {- from comp index -}, Maybe TextId, Maybe ComponentHashId, Maybe Int64 {- to comp index -}, Maybe TextId, Maybe PatchFullTermEdit.Typing, Bool)] constructorsTable :: [(ComponentHashId, Int64 {- from comp index -}, Int64 {- from constr index -}, Maybe ComponentHashId, Maybe Int64 {- to comp index-}, Maybe Int64 {- to constr index -}, Maybe PatchFullTermEdit.Typing, Bool)] From c76044a0dacf56ca129f5fe213a2ff8cbc2d7a13 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 26 Feb 2025 11:08:38 -0800 Subject: [PATCH 08/11] Prepare for pre-migration --- sql/2024-12-16-00-00_entity_depths.sql | 1 - .../2025-02-25_causal_depth_migration.sql | 5 ++++ .../EntityDepthMigration/Queries.hs | 28 ++++++++----------- .../EntityDepthMigration/Worker.hs | 3 ++ src/Share/Web/UCM/SyncV2/Impl.hs | 2 +- src/Share/Web/UCM/SyncV2/Queries.hs | 26 ++++++++++++----- 6 files changed, 40 insertions(+), 25 deletions(-) rename sql/{ => migration-helpers}/2025-02-25_causal_depth_migration.sql (87%) diff --git a/sql/2024-12-16-00-00_entity_depths.sql b/sql/2024-12-16-00-00_entity_depths.sql index a7a539cb..fb56de97 100644 --- a/sql/2024-12-16-00-00_entity_depths.sql +++ b/sql/2024-12-16-00-00_entity_depths.sql @@ -70,7 +70,6 @@ CREATE OR REPLACE FUNCTION update_component_depth(the_component_hash_id integer) DECLARE max_referenced_component_depth INTEGER; BEGIN - RAISE NOTICE 'Updating component depth for %', the_component_hash_id; -- If there's already a depth entry for this component, we're done. IF EXISTS (SELECT FROM component_depth cd WHERE cd.component_hash_id = the_component_hash_id) THEN RETURN; diff --git a/sql/2025-02-25_causal_depth_migration.sql b/sql/migration-helpers/2025-02-25_causal_depth_migration.sql similarity index 87% rename from sql/2025-02-25_causal_depth_migration.sql rename to sql/migration-helpers/2025-02-25_causal_depth_migration.sql index 0d9bae52..639c98fa 100644 --- a/sql/2025-02-25_causal_depth_migration.sql +++ b/sql/migration-helpers/2025-02-25_causal_depth_migration.sql @@ -43,3 +43,8 @@ INSERT INTO unfinished_component_depths (id) SELECT FROM component_depth cd WHERE cd.component_hash_id = ch.id ); +-- Afterwards +DROP TABLE unfinished_causal_depths; +DROP TABLE unfinished_namespace_depths; +DROP TABLE unfinished_patch_depths; +DROP TABLE unfinished_component_depths; diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs index f0c7d1b6..3aaa9e52 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs @@ -18,10 +18,7 @@ updateComponentDepths = do -- dependencies. SELECT ch.id FROM unfinished_component_depths ch - LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id - -- Only recalculate ones which haven't been calculated yet. - WHERE cd.depth IS NULL - AND NOT EXISTS ( + WHERE NOT EXISTS ( SELECT FROM terms t JOIN term_local_component_references cr_sub ON cr_sub.term_id = t.id @@ -38,6 +35,8 @@ updateComponentDepths = do t.component_hash_id = ch.id AND cr_sub.type_id = t.id AND cd.depth IS NULL ) + LIMIT 1 + FOR UPDATE SKIP LOCKED ), updated(component_hash_id, x) AS ( SELECT ch.component_hash_id, update_component_depth(ch.component_hash_id) FROM updatable_components ch @@ -56,10 +55,7 @@ updatePatchDepths = do -- dependencies. SELECT p.id FROM unfinished_patch_depths p - LEFT JOIN patch_depth pd ON pd.patch_id = p.id - -- Only recalculate ones which haven't been calculated yet. - WHERE pd.patch_id IS NULL - AND NOT EXISTS ( + WHERE NOT EXISTS ( SELECT FROM patch_term_mappings ptm JOIN terms t @@ -89,6 +85,8 @@ updatePatchDepths = do WHERE ptm.patch_id = p.id AND cd.depth IS NULL ) + LIMIT 1 + FOR UPDATE SKIP LOCKED ), updated(patch_id, x) AS ( SELECT up.patch_id, update_patch_depth(up.patch_id) FROM updatable_patches up @@ -107,10 +105,7 @@ updateNamespaceDepths = do -- dependencies. SELECT n.id FROM unfinished_namespace_depths n - LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.id - -- Only recalculate ones which haven't been calculated yet. - WHERE nd.depth IS NULL - AND NOT EXISTS ( + WHERE NOT EXISTS ( SELECT FROM namespace_children nc JOIN causals c @@ -176,6 +171,8 @@ updateNamespaceDepths = do WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL ) + LIMIT 1 + FOR UPDATE SKIP LOCKED ), updated(namespace_hash_id, x) AS ( SELECT un.namespace_hash_id, update_namespace_depth(un.namespace_hash_id) FROM updatable_namespaces un @@ -195,10 +192,7 @@ updateCausalDepths = do SELECT c.id FROM unfinished_causal_depths ucd JOIN causals c ON ucd.id = c.id - LEFT JOIN causal_depth cd ON cd.causal_id = c.id - -- Only recalculate ones which haven't been calculated yet. - WHERE cd.depth IS NULL - AND EXISTS ( + WHERE EXISTS ( SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = c.namespace_hash_id @@ -210,6 +204,8 @@ updateCausalDepths = do WHERE ca.causal_id = c.id AND cd.depth IS NULL ) + LIMIT 1 + FOR UPDATE SKIP LOCKED ), updated(causal_id) AS ( SELECT c.id, update_causal_depth(c.id) FROM updatable_causals c diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs index 8825ff55..0a13770c 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs @@ -32,6 +32,7 @@ worker scope = do -- NOTE: this process doesn't insert the row into serialized_components, you'll need to do that manually after the automated migration is finished. computeComponentDepths :: AuthZ.AuthZReceipt -> Background () computeComponentDepths !_authZReceipt = do + Logging.logInfoText $ "Computing component depth for batch" PG.runTransaction Q.updateComponentDepths >>= \case 0 -> do Logging.logInfoText $ "Done processing component depth" @@ -43,6 +44,7 @@ computeComponentDepths !_authZReceipt = do computePatchDepths :: AuthZ.AuthZReceipt -> Background () computePatchDepths !_authZReceipt = do + Logging.logInfoText $ "Computing patch depth for batch" PG.runTransaction Q.updatePatchDepths >>= \case 0 -> do Logging.logInfoText $ "Done processing patch depth" @@ -54,6 +56,7 @@ computePatchDepths !_authZReceipt = do computeNamespaceAndCausalDepths :: AuthZ.AuthZReceipt -> Background () computeNamespaceAndCausalDepths !authZReceipt = do + Logging.logInfoText $ "Computing namespace and causal depth for batch" PG.runTransaction Q.updateNamespaceDepths >>= \case namespaceN -> do PG.runTransaction Q.updateCausalDepths >>= \case diff --git a/src/Share/Web/UCM/SyncV2/Impl.hs b/src/Share/Web/UCM/SyncV2/Impl.hs index f1a025e7..71ddaccf 100644 --- a/src/Share/Web/UCM/SyncV2/Impl.hs +++ b/src/Share/Web/UCM/SyncV2/Impl.hs @@ -48,7 +48,7 @@ batchSize :: Int32 batchSize = 1000 streamSettings :: Hash32 -> Maybe SyncV2.BranchRef -> StreamInitInfo -streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.DependenciesFirst, numEntities = Nothing, rootCausalHash, rootBranchRef} +streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.Unsorted, numEntities = Nothing, rootCausalHash, rootBranchRef} server :: Maybe UserId -> SyncV2.Routes WebAppServer server mayUserId = diff --git a/src/Share/Web/UCM/SyncV2/Queries.hs b/src/Share/Web/UCM/SyncV2/Queries.hs index aa92a777..a9b0271f 100644 --- a/src/Share/Web/UCM/SyncV2/Queries.hs +++ b/src/Share/Web/UCM/SyncV2/Queries.hs @@ -50,7 +50,7 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do JOIN component_hashes ch ON dh.hash = ch.base32 |] cursor <- - PGCursor.newRowCursor + PGCursor.newRowCursor @(CBORBytes TempEntity, Hash32, Maybe Int32) "serialized_entities" [sql| WITH RECURSIVE transitive_causals(causal_id, causal_hash, causal_namespace_hash_id) AS ( @@ -178,32 +178,44 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do JOIN serialized_components sc ON sc.user_id = #{ownerUserId} AND tc.component_hash_id = sc.component_hash_id JOIN bytes ON sc.bytes_id = bytes.id JOIN component_hashes ch ON tc.component_hash_id = ch.id - JOIN component_depth cd ON ch.id = cd.component_hash_id + LEFT JOIN component_depth cd ON ch.id = cd.component_hash_id ) UNION ALL (SELECT bytes.bytes, ap.patch_hash, pd.depth FROM all_patches ap JOIN serialized_patches sp ON ap.patch_id = sp.patch_id JOIN bytes ON sp.bytes_id = bytes.id - JOIN patch_depth pd ON ap.patch_id = pd.patch_id + LEFT JOIN patch_depth pd ON ap.patch_id = pd.patch_id ) UNION ALL (SELECT bytes.bytes, an.namespace_hash, nd.depth FROM all_namespaces an JOIN serialized_namespaces sn ON an.namespace_hash_id = sn.namespace_hash_id JOIN bytes ON sn.bytes_id = bytes.id - JOIN namespace_depth nd ON an.namespace_hash_id = nd.namespace_hash_id + LEFT JOIN namespace_depth nd ON an.namespace_hash_id = nd.namespace_hash_id ) UNION ALL (SELECT bytes.bytes, tc.causal_hash, cd.depth FROM transitive_causals tc JOIN serialized_causals sc ON tc.causal_id = sc.causal_id JOIN bytes ON sc.bytes_id = bytes.id - JOIN causal_depth cd ON tc.causal_id = cd.causal_id + LEFT JOIN causal_depth cd ON tc.causal_id = cd.causal_id ) - ORDER BY depth ASC + -- Re-add this once the migration is done. + -- Put them in dependency order, nulls come first because we want to bail and + -- report an error + -- if we somehow are missing a depth. + -- ORDER BY depth ASC NULLS FIRST |] - pure cursor + -- pure + -- ( cursor <&> \(bytes, hash, depth) -> case depth of + -- -- This should never happen, but is a sanity check in case we're missing a depth. + -- -- Better than silently omitting a required result. + -- Nothing -> error $ "allSerializedDependenciesOfCausalCursor: Missing depth for entity: " <> show hash + -- Just _ -> (bytes, hash) + -- ) + pure + (cursor <&> \(bytes, hash, _depth) -> (bytes, hash)) spineAndLibDependenciesOfCausalCursor :: CausalId -> CodebaseM e (PGCursor (Hash32, IsCausalSpine, IsLibRoot)) spineAndLibDependenciesOfCausalCursor cid = do From 01b117ec035534714cf1d3c4ca959feea3e9a15d Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 6 Mar 2025 12:12:56 -0800 Subject: [PATCH 09/11] MIGRATE HERE Use a working-set for namespaces to speed it up. --- .../2025-02-25_causal_depth_migration.sql | 5 ++ .../EntityDepthMigration/Queries.hs | 43 +++++++++++------ .../EntityDepthMigration/Worker.hs | 47 ++++++++++++++----- 3 files changed, 68 insertions(+), 27 deletions(-) diff --git a/sql/migration-helpers/2025-02-25_causal_depth_migration.sql b/sql/migration-helpers/2025-02-25_causal_depth_migration.sql index 639c98fa..847a6b6c 100644 --- a/sql/migration-helpers/2025-02-25_causal_depth_migration.sql +++ b/sql/migration-helpers/2025-02-25_causal_depth_migration.sql @@ -15,6 +15,10 @@ CREATE TABLE unfinished_component_depths ( id INTEGER PRIMARY KEY REFERENCES component_hashes (id) ON DELETE CASCADE ); +CREATE TABLE unfinished_namespaces_working_set ( + id INTEGER PRIMARY KEY REFERENCES branch_hashes (id) ON DELETE CASCADE +); + INSERT INTO unfinished_causal_depths (id) SELECT c.id FROM causals c @@ -48,3 +52,4 @@ DROP TABLE unfinished_causal_depths; DROP TABLE unfinished_namespace_depths; DROP TABLE unfinished_patch_depths; DROP TABLE unfinished_component_depths; +DROP TABLE unfinished_namespaces_working_set; diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs index 3aaa9e52..c8f6f3fb 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs @@ -2,6 +2,7 @@ module Share.BackgroundJobs.EntityDepthMigration.Queries ( updateComponentDepths, updatePatchDepths, updateNamespaceDepths, + updateNamespaceWorkingSet, updateCausalDepths, ) where @@ -35,7 +36,7 @@ updateComponentDepths = do t.component_hash_id = ch.id AND cr_sub.type_id = t.id AND cd.depth IS NULL ) - LIMIT 1 + LIMIT 1000 FOR UPDATE SKIP LOCKED ), updated(component_hash_id, x) AS ( SELECT ch.component_hash_id, update_component_depth(ch.component_hash_id) @@ -85,7 +86,7 @@ updatePatchDepths = do WHERE ptm.patch_id = p.id AND cd.depth IS NULL ) - LIMIT 1 + LIMIT 1000 FOR UPDATE SKIP LOCKED ), updated(patch_id, x) AS ( SELECT up.patch_id, update_patch_depth(up.patch_id) @@ -103,6 +104,27 @@ updateNamespaceDepths = do WITH updatable_namespaces(namespace_hash_id) AS ( -- Find all namespaces which aren't missing depth info for any of their -- dependencies. + SELECT n.id + FROM unfinished_namespaces_working_set n + LIMIT 1000 + FOR UPDATE SKIP LOCKED + ), updated(namespace_hash_id, x) AS ( + SELECT un.namespace_hash_id, update_namespace_depth(un.namespace_hash_id) + FROM updatable_namespaces un + ), mark_finished_1 AS ( + DELETE FROM unfinished_namespace_depths ufd + WHERE ufd.id IN (SELECT u.namespace_hash_id FROM updated u) + ), mark_finished_2 AS ( + DELETE FROM unfinished_namespaces_working_set ws + WHERE ws.id IN (SELECT u.namespace_hash_id FROM updated u) + ) SELECT COUNT(*) FROM updated + |] + +updateNamespaceWorkingSet :: Transaction e Int64 +updateNamespaceWorkingSet = do + execute_ + [sql| + INSERT INTO unfinished_namespaces_working_set (id) SELECT n.id FROM unfinished_namespace_depths n WHERE NOT EXISTS ( @@ -171,16 +193,9 @@ updateNamespaceDepths = do WHERE nt.namespace_hash_id = n.id AND cd.depth IS NULL ) - LIMIT 1 - FOR UPDATE SKIP LOCKED - ), updated(namespace_hash_id, x) AS ( - SELECT un.namespace_hash_id, update_namespace_depth(un.namespace_hash_id) - FROM updatable_namespaces un - ), mark_finished AS ( - DELETE FROM unfinished_namespace_depths ufd - WHERE ufd.id IN (SELECT u.namespace_hash_id FROM updated u) - ) SELECT COUNT(*) FROM updated + ON CONFLICT DO NOTHING |] + queryExpect1Col [sql| SELECT COUNT(*) FROM unfinished_namespaces_working_set |] updateCausalDepths :: Transaction e Int64 updateCausalDepths = do @@ -204,7 +219,7 @@ updateCausalDepths = do WHERE ca.causal_id = c.id AND cd.depth IS NULL ) - LIMIT 1 + LIMIT 1000 FOR UPDATE SKIP LOCKED ), updated(causal_id) AS ( SELECT c.id, update_causal_depth(c.id) @@ -231,8 +246,8 @@ updateCausalDepths = do -- WHERE cd.depth IS NULL; -- SELECT COUNT(*) --- FROM branch_hashes bh --- LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = bh.id +-- FROM namespaces n +-- LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.namespace_hash_id -- WHERE nd.depth IS NULL; -- SELECT COUNT(*) diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs index 0a13770c..20515ef6 100644 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs +++ b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs @@ -55,16 +55,37 @@ computePatchDepths !_authZReceipt = do computePatchDepths _authZReceipt computeNamespaceAndCausalDepths :: AuthZ.AuthZReceipt -> Background () -computeNamespaceAndCausalDepths !authZReceipt = do - Logging.logInfoText $ "Computing namespace and causal depth for batch" - PG.runTransaction Q.updateNamespaceDepths >>= \case - namespaceN -> do - PG.runTransaction Q.updateCausalDepths >>= \case - causalN -> do - case (namespaceN, causalN) of - (0, 0) -> do - Logging.logInfoText $ "Done processing namespace and causal depth" - pure () - (namespaceN, causalN) -> do - Logging.logInfoText $ "Computed Depth for " <> tShow namespaceN <> " namespaces and " <> tShow causalN <> " causals" - computeNamespaceAndCausalDepths authZReceipt +computeNamespaceAndCausalDepths !_authZReceipt = do + doNamespaces + where + doNamespaces :: Background () + doNamespaces = do + Logging.logInfoText $ "Computing namespace depth for batch" + PG.runTransaction Q.updateNamespaceDepths >>= \case + 0 -> do + Logging.logInfoText $ "Recomputing namespace working set." + PG.runTransaction Q.updateNamespaceWorkingSet >>= \case + 0 -> do + -- No more namespaces to compute, we may be done if there are no more causals + doCausals True + n -> do + Logging.logInfoText $ "Added " <> tShow n <> " namespaces to the working set" + -- Keep doing namespaces as long as we can. + doNamespaces + n -> do + Logging.logInfoText $ "Computed Depth for " <> tShow n <> " namespaces" + doNamespaces + doCausals :: Bool -> Background () + doCausals finish = do + causalN <- PG.runTransaction Q.updateCausalDepths + case (causalN, finish) of + (0, True) -> do + Logging.logInfoText $ "Done processing namespace and causal depth" + pure () + (0, False) -> do + -- We're done causals for now, might be finished, but there may be more namespaces. + doNamespaces + (n, _) -> do + Logging.logInfoText $ "Computed Depth for " <> tShow n <> " causals" + -- Keep doing causals. + doCausals False From a3bb67f4109b15002c31d5ded79a4e4132af05fc Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 26 Feb 2025 12:43:39 -0800 Subject: [PATCH 10/11] Back out "Disable saving of depth on new entities until migration is done" This backs out commit 3ff65468c5b7b425df52781f511029f1f431df8d. --- src/Share/Postgres/Causal/Queries.hs | 9 ++------- src/Share/Postgres/Definitions/Queries.hs | 20 ++++++++------------ src/Share/Postgres/Patches/Queries.hs | 4 +--- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/Share/Postgres/Causal/Queries.hs b/src/Share/Postgres/Causal/Queries.hs index 385165a2..6157ebbd 100644 --- a/src/Share/Postgres/Causal/Queries.hs +++ b/src/Share/Postgres/Causal/Queries.hs @@ -642,10 +642,7 @@ savePgNamespace maySerialized mayBh b@(BranchFull.Branch {terms, types, patches, |] -- Note: this must be run AFTER inserting the namespace and all its children. execute_ [sql| SELECT save_namespace(#{bhId}) |] - --- Disabled while migration is running so we don't accidentally get incorrect depths --- from missing depth dependencies --- execute_ [sql| SELECT update_namespace_depth(#{bhId}) |] + execute_ [sql| SELECT update_namespace_depth(#{bhId}) |] saveSerializedNamespace :: (QueryM m) => BranchHashId -> CBORBytes TempEntity -> m () saveSerializedNamespace bhId (CBORBytes bytes) = do @@ -789,9 +786,7 @@ saveCausal maySerializedCausal mayCh bhId ancestorIds = do SELECT #{cId}, a.ancestor_id FROM ancestors a |] - -- Disabled while migration is running so we don't accidentally get incorrect depths - -- from missing depth dependencies - -- execute_ [sql| SELECT update_causal_depth(#{cId}) |] + execute_ [sql| SELECT update_causal_depth(#{cId}) |] pure cId saveSerializedCausal :: (QueryM m) => CausalId -> CBORBytes TempEntity -> m () diff --git a/src/Share/Postgres/Definitions/Queries.hs b/src/Share/Postgres/Definitions/Queries.hs index 6309469f..ef3666c6 100644 --- a/src/Share/Postgres/Definitions/Queries.hs +++ b/src/Share/Postgres/Definitions/Queries.hs @@ -863,12 +863,10 @@ saveEncodedTermComponent componentHash maySerialized elements = do SELECT defn_mappings.term_id, defn_mappings.local_index, defn_mappings.component_hash_id FROM defn_mappings |] - -- Disabled while migration is running so we don't accidentally get incorrect depths - -- from missing depth dependencies - -- execute_ - -- [sql| - -- SELECT update_component_depth(#{componentHashId}) - -- |] + execute_ + [sql| + SELECT update_component_depth(#{componentHashId}) + |] pure termIds saveTypeComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e () @@ -1019,12 +1017,10 @@ saveTypeComponent componentHash maySerialized elements = do FROM defn_mappings |] saveConstructors (zip (toList typeIds) elements) - -- Disabled while migration is running so we don't accidentally get incorrect depths - -- from missing depth dependencies - -- execute_ - -- [sql| - -- SELECT update_component_depth(#{componentHashId}) - -- |] + execute_ + [sql| + SELECT update_component_depth(#{componentHashId}) + |] pure typeIds -- | Efficiently resolve all pg Ids across selected Local Ids. diff --git a/src/Share/Postgres/Patches/Queries.hs b/src/Share/Postgres/Patches/Queries.hs index 55a989e4..8c454eae 100644 --- a/src/Share/Postgres/Patches/Queries.hs +++ b/src/Share/Postgres/Patches/Queries.hs @@ -233,9 +233,7 @@ savePatch maySerialized patchHash PatchFull.Patch {termEdits, typeEdits} = do LEFT JOIN types to_type ON to_type.component_hash_id = to_type_component_hash_id AND to_type.component_index = to_type_component_index |] - -- Disabled while migration is running so we don't accidentally get incorrect depths - -- from missing depth dependencies - -- execute_ [sql| SELECT update_patch_depth(#{patchId}) |] + execute_ [sql| SELECT update_patch_depth(#{patchId}) |] pure patchId termsTable :: [(Maybe ComponentHashId, Maybe Int64 {- from comp index -}, Maybe TextId, Maybe ComponentHashId, Maybe Int64 {- to comp index -}, Maybe TextId, Maybe PatchFullTermEdit.Typing, Bool)] constructorsTable :: [(ComponentHashId, Int64 {- from comp index -}, Int64 {- from constr index -}, Maybe ComponentHashId, Maybe Int64 {- to comp index-}, Maybe Int64 {- to constr index -}, Maybe PatchFullTermEdit.Typing, Bool)] From d0bdfbdca4b7fb2411f1b9ff9dba6c99971a3947 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 26 Feb 2025 11:11:49 -0800 Subject: [PATCH 11/11] Post-migration deployment --- share-api.cabal | 2 - .../2025-02-25_causal_depth_migration.sql | 8 +- src/Share/BackgroundJobs.hs | 9 +- .../EntityDepthMigration/Queries.hs | 256 ------------------ .../EntityDepthMigration/Worker.hs | 91 ------- src/Share/Web/UCM/SyncV2/Impl.hs | 2 +- src/Share/Web/UCM/SyncV2/Queries.hs | 20 +- 7 files changed, 17 insertions(+), 371 deletions(-) delete mode 100644 src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs delete mode 100644 src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs diff --git a/share-api.cabal b/share-api.cabal index fce475a4..6356c4f4 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -30,8 +30,6 @@ library Share.BackgroundJobs Share.BackgroundJobs.Diffs.ContributionDiffs Share.BackgroundJobs.Diffs.Queries - Share.BackgroundJobs.EntityDepthMigration.Queries - Share.BackgroundJobs.EntityDepthMigration.Worker Share.BackgroundJobs.Errors Share.BackgroundJobs.Monad Share.BackgroundJobs.Search.DefinitionSync diff --git a/sql/migration-helpers/2025-02-25_causal_depth_migration.sql b/sql/migration-helpers/2025-02-25_causal_depth_migration.sql index 847a6b6c..810e858e 100644 --- a/sql/migration-helpers/2025-02-25_causal_depth_migration.sql +++ b/sql/migration-helpers/2025-02-25_causal_depth_migration.sql @@ -24,28 +24,28 @@ INSERT INTO unfinished_causal_depths (id) FROM causals c WHERE NOT EXISTS ( SELECT FROM causal_depth cd WHERE cd.causal_id = c.id - ); + ) ON CONFLICT DO NOTHING; INSERT INTO unfinished_namespace_depths (id) SELECT n.namespace_hash_id FROM namespaces n WHERE NOT EXISTS ( SELECT FROM namespace_depth nd WHERE nd.namespace_hash_id = n.namespace_hash_id - ); + ) ON CONFLICT DO NOTHING; INSERT INTO unfinished_patch_depths (id) SELECT p.id FROM patches p WHERE NOT EXISTS ( SELECT FROM patch_depth pd WHERE pd.patch_id = p.id - ); + ) ON CONFLICT DO NOTHING; INSERT INTO unfinished_component_depths (id) SELECT ch.id FROM component_hashes ch WHERE NOT EXISTS ( SELECT FROM component_depth cd WHERE cd.component_hash_id = ch.id - ); + ) ON CONFLICT DO NOTHING; -- Afterwards DROP TABLE unfinished_causal_depths; diff --git a/src/Share/BackgroundJobs.hs b/src/Share/BackgroundJobs.hs index 1bfd03bb..dec54bb0 100644 --- a/src/Share/BackgroundJobs.hs +++ b/src/Share/BackgroundJobs.hs @@ -1,7 +1,6 @@ module Share.BackgroundJobs (startWorkers) where import Ki.Unlifted qualified as Ki -import Share.BackgroundJobs.EntityDepthMigration.Worker qualified as EntityDepthMigration import Share.BackgroundJobs.Monad (Background) import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch @@ -9,7 +8,7 @@ import Share.BackgroundJobs.Search.DefinitionSync qualified as DefnSearch startWorkers :: Ki.Scope -> Background () startWorkers scope = do DefnSearch.worker scope - -- Temporary disable background diff jobs until the new diffing logic is done. - -- ContributionDiffs.worker scope - -- SerializedEntitiesMigration.worker scope - EntityDepthMigration.worker scope + +-- Temporary disable background diff jobs until the new diffing logic is done. +-- ContributionDiffs.worker scope +-- SerializedEntitiesMigration.worker scope diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs deleted file mode 100644 index c8f6f3fb..00000000 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Queries.hs +++ /dev/null @@ -1,256 +0,0 @@ -module Share.BackgroundJobs.EntityDepthMigration.Queries - ( updateComponentDepths, - updatePatchDepths, - updateNamespaceDepths, - updateNamespaceWorkingSet, - updateCausalDepths, - ) -where - -import Data.Int (Int64) -import Share.Postgres - -updateComponentDepths :: Transaction e (Int64) -updateComponentDepths = do - queryExpect1Col - [sql| - WITH updatable_components(component_hash_id) AS ( - -- Find all component hashes which aren't missing depth info for any of their - -- dependencies. - SELECT ch.id - FROM unfinished_component_depths ch - WHERE NOT EXISTS ( - SELECT - FROM terms t - JOIN term_local_component_references cr_sub ON cr_sub.term_id = t.id - LEFT JOIN component_depth cd ON cd.component_hash_id = cr_sub.component_hash_id - WHERE - t.component_hash_id = ch.id - AND cr_sub.term_id = t.id AND cd.depth IS NULL - UNION - SELECT - FROM types t - JOIN type_local_component_references cr_sub ON cr_sub.type_id = t.id - LEFT JOIN component_depth cd ON cd.component_hash_id = cr_sub.component_hash_id - WHERE - t.component_hash_id = ch.id - AND cr_sub.type_id = t.id AND cd.depth IS NULL - ) - LIMIT 1000 - FOR UPDATE SKIP LOCKED - ), updated(component_hash_id, x) AS ( - SELECT ch.component_hash_id, update_component_depth(ch.component_hash_id) - FROM updatable_components ch - ), mark_finished AS ( - DELETE FROM unfinished_component_depths ufd - WHERE ufd.id IN (SELECT u.component_hash_id FROM updated u) - ) SELECT COUNT(*) FROM updated - |] - -updatePatchDepths :: Transaction e Int64 -updatePatchDepths = do - queryExpect1Col - [sql| - WITH updatable_patches(patch_id) AS ( - -- Find all patches which aren't missing depth info for any of their - -- dependencies. - SELECT p.id - FROM unfinished_patch_depths p - WHERE NOT EXISTS ( - SELECT - FROM patch_term_mappings ptm - JOIN terms t - ON ptm.to_term_id = t.id - LEFT JOIN component_depth cd - ON cd.component_hash_id = t.component_hash_id - WHERE ptm.patch_id = p.id - AND cd.depth IS NULL - UNION - SELECT - FROM patch_constructor_mappings pcm - JOIN constructors c - ON pcm.to_constructor_id = c.id - JOIN types t - ON c.type_id = t.id - LEFT JOIN component_depth cd - ON cd.component_hash_id = t.component_hash_id - WHERE pcm.patch_id = p.id - AND cd.depth IS NULL - UNION - SELECT - FROM patch_type_mappings ptm - JOIN types t - ON ptm.to_type_id = t.id - LEFT JOIN component_depth cd - ON cd.component_hash_id = t.component_hash_id - WHERE ptm.patch_id = p.id - AND cd.depth IS NULL - ) - LIMIT 1000 - FOR UPDATE SKIP LOCKED - ), updated(patch_id, x) AS ( - SELECT up.patch_id, update_patch_depth(up.patch_id) - FROM updatable_patches up - ), mark_finished AS ( - DELETE FROM unfinished_patch_depths ufd - WHERE ufd.id IN (SELECT u.patch_id FROM updated u) - ) SELECT COUNT(*) FROM updated - |] - -updateNamespaceDepths :: Transaction e Int64 -updateNamespaceDepths = do - queryExpect1Col - [sql| - WITH updatable_namespaces(namespace_hash_id) AS ( - -- Find all namespaces which aren't missing depth info for any of their - -- dependencies. - SELECT n.id - FROM unfinished_namespaces_working_set n - LIMIT 1000 - FOR UPDATE SKIP LOCKED - ), updated(namespace_hash_id, x) AS ( - SELECT un.namespace_hash_id, update_namespace_depth(un.namespace_hash_id) - FROM updatable_namespaces un - ), mark_finished_1 AS ( - DELETE FROM unfinished_namespace_depths ufd - WHERE ufd.id IN (SELECT u.namespace_hash_id FROM updated u) - ), mark_finished_2 AS ( - DELETE FROM unfinished_namespaces_working_set ws - WHERE ws.id IN (SELECT u.namespace_hash_id FROM updated u) - ) SELECT COUNT(*) FROM updated - |] - -updateNamespaceWorkingSet :: Transaction e Int64 -updateNamespaceWorkingSet = do - execute_ - [sql| - INSERT INTO unfinished_namespaces_working_set (id) - SELECT n.id - FROM unfinished_namespace_depths n - WHERE NOT EXISTS ( - SELECT - FROM namespace_children nc - JOIN causals c - ON nc.child_causal_id = c.id - LEFT JOIN causal_depth cd ON nc.child_causal_id = cd.causal_id - WHERE nc.parent_namespace_hash_id = n.id - AND cd.depth IS NULL - UNION - SELECT - FROM namespace_patches np - LEFT JOIN patch_depth pd ON np.patch_id = pd.patch_id - WHERE np.namespace_hash_id = n.id - AND pd.depth IS NULL - UNION - SELECT - FROM namespace_terms nt - JOIN terms t - ON nt.term_id = t.id - LEFT JOIN component_depth cd - ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id - AND cd.depth IS NULL - UNION - SELECT - FROM namespace_terms nt - JOIN namespace_term_metadata ntm - ON ntm.named_term = nt.id - JOIN terms t - ON ntm.metadata_term_id = t.id - LEFT JOIN component_depth cd - ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id - AND cd.depth IS NULL - UNION - SELECT - FROM namespace_terms nt - JOIN constructors c - ON c.id = nt.constructor_id - JOIN types t - ON c.type_id = t.id - LEFT JOIN component_depth cd - ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id - AND cd.depth IS NULL - UNION - SELECT - FROM namespace_types nt - JOIN types t - ON nt.type_id = t.id - LEFT JOIN component_depth cd - ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id - AND cd.depth IS NULL - UNION - SELECT - FROM namespace_types nt - JOIN namespace_type_metadata ntm - ON ntm.named_type = nt.id - JOIN terms t - ON ntm.metadata_term_id = t.id - LEFT JOIN component_depth cd - ON t.component_hash_id = cd.component_hash_id - WHERE nt.namespace_hash_id = n.id - AND cd.depth IS NULL - ) - ON CONFLICT DO NOTHING - |] - queryExpect1Col [sql| SELECT COUNT(*) FROM unfinished_namespaces_working_set |] - -updateCausalDepths :: Transaction e Int64 -updateCausalDepths = do - queryExpect1Col - [sql| - WITH updatable_causals AS ( - -- Find all causals which aren't missing depth info for any of their - -- dependencies. - SELECT c.id - FROM unfinished_causal_depths ucd - JOIN causals c ON ucd.id = c.id - WHERE EXISTS ( - SELECT - FROM namespace_depth nd - WHERE nd.namespace_hash_id = c.namespace_hash_id - ) AND NOT EXISTS ( - SELECT - FROM causal_ancestors ca - LEFT JOIN causal_depth cd - ON ca.ancestor_id = cd.causal_id - WHERE ca.causal_id = c.id - AND cd.depth IS NULL - ) - LIMIT 1000 - FOR UPDATE SKIP LOCKED - ), updated(causal_id) AS ( - SELECT c.id, update_causal_depth(c.id) - FROM updatable_causals c - ), mark_finished AS ( - DELETE FROM unfinished_causal_depths ucd - WHERE ucd.id IN (SELECT u.causal_id FROM updated u) - ) SELECT COUNT(*) FROM updated - |] - --- Sanity checks --- --- Should be 0 - --- SELECT count(*) from causals --- WHERE NOT EXISTS ( --- SELECT FROM causal_depth cd --- WHERE cd.causal_id = causals.id --- ); - --- SELECT count(ch.id) --- FROM component_hashes ch --- LEFT JOIN component_depth cd ON cd.component_hash_id = ch.id --- WHERE cd.depth IS NULL; - --- SELECT COUNT(*) --- FROM namespaces n --- LEFT JOIN namespace_depth nd ON nd.namespace_hash_id = n.namespace_hash_id --- WHERE nd.depth IS NULL; - --- SELECT COUNT(*) --- FROM patches p --- LEFT JOIN patch_depth pd ON pd.patch_id = p.id --- WHERE pd.depth IS NULL; diff --git a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs b/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs deleted file mode 100644 index 20515ef6..00000000 --- a/src/Share/BackgroundJobs/EntityDepthMigration/Worker.hs +++ /dev/null @@ -1,91 +0,0 @@ -module Share.BackgroundJobs.EntityDepthMigration.Worker (worker) where - -import Ki.Unlifted qualified as Ki -import Share.BackgroundJobs.EntityDepthMigration.Queries qualified as Q -import Share.BackgroundJobs.Monad (Background) -import Share.BackgroundJobs.Workers (newWorker) -import Share.Postgres qualified as PG -import Share.Prelude -import Share.Utils.Logging qualified as Logging -import Share.Web.Authorization qualified as AuthZ -import UnliftIO.Concurrent qualified as UnliftIO - -pollingIntervalSeconds :: Int -pollingIntervalSeconds = 10 - -worker :: Ki.Scope -> Background () -worker scope = do - authZReceipt <- AuthZ.backgroundJobAuthZ - newWorker scope "migration:entity_depth" $ forever do - -- Do the components first, they're the bottom of the dependency tree. - computeComponentDepths authZReceipt - -- Then do the patches, they depend on components. - computePatchDepths authZReceipt - -- Then do the namespaces and causals together - computeNamespaceAndCausalDepths authZReceipt - -- Once we know we're done, just wait until a human comes and - -- deploys a new version without the migration. - forever do - liftIO $ UnliftIO.threadDelay $ pollingIntervalSeconds * 1000000 - --- | Components must be handled separately since they're sandboxed to specific users. --- NOTE: this process doesn't insert the row into serialized_components, you'll need to do that manually after the automated migration is finished. -computeComponentDepths :: AuthZ.AuthZReceipt -> Background () -computeComponentDepths !_authZReceipt = do - Logging.logInfoText $ "Computing component depth for batch" - PG.runTransaction Q.updateComponentDepths >>= \case - 0 -> do - Logging.logInfoText $ "Done processing component depth" - pure () - -- Recurse until there's nothing left to do. - n -> do - Logging.logInfoText $ "Computed Depth for " <> tShow n <> " components" - computeComponentDepths _authZReceipt - -computePatchDepths :: AuthZ.AuthZReceipt -> Background () -computePatchDepths !_authZReceipt = do - Logging.logInfoText $ "Computing patch depth for batch" - PG.runTransaction Q.updatePatchDepths >>= \case - 0 -> do - Logging.logInfoText $ "Done processing patch depth" - pure () - -- Recurse until there's nothing left to do. - n -> do - Logging.logInfoText $ "Computed Depth for " <> tShow n <> " patches" - computePatchDepths _authZReceipt - -computeNamespaceAndCausalDepths :: AuthZ.AuthZReceipt -> Background () -computeNamespaceAndCausalDepths !_authZReceipt = do - doNamespaces - where - doNamespaces :: Background () - doNamespaces = do - Logging.logInfoText $ "Computing namespace depth for batch" - PG.runTransaction Q.updateNamespaceDepths >>= \case - 0 -> do - Logging.logInfoText $ "Recomputing namespace working set." - PG.runTransaction Q.updateNamespaceWorkingSet >>= \case - 0 -> do - -- No more namespaces to compute, we may be done if there are no more causals - doCausals True - n -> do - Logging.logInfoText $ "Added " <> tShow n <> " namespaces to the working set" - -- Keep doing namespaces as long as we can. - doNamespaces - n -> do - Logging.logInfoText $ "Computed Depth for " <> tShow n <> " namespaces" - doNamespaces - doCausals :: Bool -> Background () - doCausals finish = do - causalN <- PG.runTransaction Q.updateCausalDepths - case (causalN, finish) of - (0, True) -> do - Logging.logInfoText $ "Done processing namespace and causal depth" - pure () - (0, False) -> do - -- We're done causals for now, might be finished, but there may be more namespaces. - doNamespaces - (n, _) -> do - Logging.logInfoText $ "Computed Depth for " <> tShow n <> " causals" - -- Keep doing causals. - doCausals False diff --git a/src/Share/Web/UCM/SyncV2/Impl.hs b/src/Share/Web/UCM/SyncV2/Impl.hs index 71ddaccf..f1a025e7 100644 --- a/src/Share/Web/UCM/SyncV2/Impl.hs +++ b/src/Share/Web/UCM/SyncV2/Impl.hs @@ -48,7 +48,7 @@ batchSize :: Int32 batchSize = 1000 streamSettings :: Hash32 -> Maybe SyncV2.BranchRef -> StreamInitInfo -streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.Unsorted, numEntities = Nothing, rootCausalHash, rootBranchRef} +streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.Version 1, entitySorting = SyncV2.DependenciesFirst, numEntities = Nothing, rootCausalHash, rootBranchRef} server :: Maybe UserId -> SyncV2.Routes WebAppServer server mayUserId = diff --git a/src/Share/Web/UCM/SyncV2/Queries.hs b/src/Share/Web/UCM/SyncV2/Queries.hs index a9b0271f..61d5d034 100644 --- a/src/Share/Web/UCM/SyncV2/Queries.hs +++ b/src/Share/Web/UCM/SyncV2/Queries.hs @@ -201,21 +201,17 @@ allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do JOIN bytes ON sc.bytes_id = bytes.id LEFT JOIN causal_depth cd ON tc.causal_id = cd.causal_id ) - -- Re-add this once the migration is done. -- Put them in dependency order, nulls come first because we want to bail and - -- report an error - -- if we somehow are missing a depth. - -- ORDER BY depth ASC NULLS FIRST + -- report an error if we are somehow missing a depth. + ORDER BY depth ASC NULLS FIRST |] - -- pure - -- ( cursor <&> \(bytes, hash, depth) -> case depth of - -- -- This should never happen, but is a sanity check in case we're missing a depth. - -- -- Better than silently omitting a required result. - -- Nothing -> error $ "allSerializedDependenciesOfCausalCursor: Missing depth for entity: " <> show hash - -- Just _ -> (bytes, hash) - -- ) pure - (cursor <&> \(bytes, hash, _depth) -> (bytes, hash)) + ( cursor <&> \(bytes, hash, depth) -> case depth of + -- This should never happen, but is a sanity check in case we're missing a depth. + -- Better than silently omitting a required result. + Nothing -> error $ "allSerializedDependenciesOfCausalCursor: Missing depth for entity: " <> show hash + Just _ -> (bytes, hash) + ) spineAndLibDependenciesOfCausalCursor :: CausalId -> CodebaseM e (PGCursor (Hash32, IsCausalSpine, IsLibRoot)) spineAndLibDependenciesOfCausalCursor cid = do