diff --git a/share-api.cabal b/share-api.cabal index 53e2be52..778d1a37 100644 --- a/share-api.cabal +++ b/share-api.cabal @@ -158,6 +158,7 @@ library Share.Web.UCM.SyncV2.API Share.Web.UCM.SyncV2.Impl Share.Web.UCM.SyncV2.Queries + Share.Web.UCM.SyncV2.Types Unison.PrettyPrintEnvDecl.Postgres Unison.Server.NameSearch.Postgres Unison.Server.Share.Definitions diff --git a/sql/2025-01-31_dependencies-of-causal.sql b/sql/2025-01-31_dependencies-of-causal.sql new file mode 100644 index 00000000..a362627b --- /dev/null +++ b/sql/2025-01-31_dependencies-of-causal.sql @@ -0,0 +1,131 @@ +-- Takes a causal_id and returns a table of ALL hashes which are dependencies of that causal. +CREATE OR REPLACE FUNCTION dependencies_of_causals(the_causal_ids INTEGER[]) RETURNS TABLE (hash TEXT) AS $$ + WITH RECURSIVE all_causals(causal_id, causal_hash, causal_namespace_hash_id) AS ( + -- Base causal + SELECT DISTINCT causal.id, causal.hash, causal.namespace_hash_id + FROM UNNEST(the_causal_ids) AS causal_id + JOIN causals causal ON causal.id = causal_id + UNION + -- This nested CTE is required because RECURSIVE CTEs can't refer + -- to the recursive table more than once. + -- I don't fully understand why or how this works, but it does + ( WITH rec AS ( + SELECT tc.causal_id, tc.causal_namespace_hash_id + FROM all_causals tc + ) + SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id + FROM causal_ancestors ca + JOIN rec tc ON ca.causal_id = tc.causal_id + JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id + UNION + SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id + FROM rec tc + JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id + JOIN causals child_causal ON nc.child_causal_id = child_causal.id + ) + ), all_namespaces(namespace_hash_id, namespace_hash) AS ( + SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash + FROM all_causals tc + JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id + ), all_patches(patch_id, patch_hash) AS ( + SELECT DISTINCT patch.id, patch.hash + FROM all_namespaces an + JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id + JOIN patches patch ON np.patch_id = patch.id + ), + -- term components to start transitively joining dependencies to + base_term_components(component_hash_id) AS ( + SELECT DISTINCT term.component_hash_id + FROM all_namespaces an + JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN terms term ON nt.term_id = term.id + UNION + SELECT DISTINCT term.component_hash_id + FROM all_patches ap + JOIN patch_term_mappings ptm ON ap.patch_id = ptm.patch_id + JOIN terms term ON ptm.to_term_id = term.id + UNION + -- term metadata + SELECT DISTINCT term.component_hash_id + FROM all_namespaces an + JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN namespace_term_metadata meta ON nt.id = meta.named_term + JOIN terms term ON meta.metadata_term_id = term.id + UNION + -- type metadata + SELECT DISTINCT term.component_hash_id + FROM all_namespaces an + JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN namespace_type_metadata meta ON nt.id = meta.named_type + JOIN terms term ON meta.metadata_term_id = term.id + ), + -- type components to start transitively joining dependencies to + base_type_components(component_hash_id) AS ( + SELECT DISTINCT typ.component_hash_id + FROM all_namespaces an + JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN types typ ON nt.type_id = typ.id + UNION + SELECT DISTINCT typ.component_hash_id + FROM all_namespaces an + JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id + JOIN constructors con ON nt.constructor_id = con.id + JOIN types typ ON con.type_id = typ.id + UNION + SELECT DISTINCT typ.component_hash_id + FROM all_patches ap + JOIN patch_type_mappings ptm ON ap.patch_id = ptm.patch_id + JOIN types typ ON ptm.to_type_id = typ.id + UNION + SELECT DISTINCT typ.component_hash_id + FROM all_patches ap + JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id + JOIN constructors con ON pcm.to_constructor_id = con.id + JOIN types typ ON con.type_id = typ.id + ), + -- All the dependencies we join in transitively from the known term & type components we depend on. + all_components(component_hash_id) AS ( + SELECT DISTINCT btc.component_hash_id + FROM base_term_components btc + UNION + SELECT DISTINCT btc.component_hash_id + FROM base_type_components btc + UNION + ( WITH rec AS ( + SELECT DISTINCT ac.component_hash_id + FROM all_components ac + ) + -- recursively union in term dependencies + SELECT DISTINCT ref.component_hash_id + FROM rec atc + -- This joins in ALL the terms from the component, not just the one that caused the dependency on the + -- component + JOIN terms term ON atc.component_hash_id = term.component_hash_id + JOIN term_local_component_references ref ON term.id = ref.term_id + UNION + -- recursively union in type dependencies + SELECT DISTINCT ref.component_hash_id + FROM rec atc + -- This joins in ALL the types from the component, not just the one that caused the dependency on the + -- component + JOIN types typ ON atc.component_hash_id = typ.component_hash_id + JOIN type_local_component_references ref ON typ.id = ref.type_id + ) + ) + (SELECT ch.base32 AS hash + FROM all_components ac + JOIN component_hashes ch ON ac.component_hash_id = ch.id + ) + UNION ALL + (SELECT ap.patch_hash AS hash + FROM all_patches ap + ) + UNION ALL + (SELECT an.namespace_hash AS hash + FROM all_namespaces an + ) + UNION ALL + (SELECT ac.causal_hash AS hash + FROM all_causals ac + ) +$$ LANGUAGE SQL; diff --git a/src/Share/Web/UCM/SyncV2/Impl.hs b/src/Share/Web/UCM/SyncV2/Impl.hs index 106eee08..87d7e2c8 100644 --- a/src/Share/Web/UCM/SyncV2/Impl.hs +++ b/src/Share/Web/UCM/SyncV2/Impl.hs @@ -7,11 +7,13 @@ import Codec.Serialise qualified as CBOR import Conduit qualified as C import Control.Concurrent.STM qualified as STM import Control.Concurrent.STM.TBMQueue qualified as STM -import Control.Monad.Except (ExceptT (ExceptT)) +import Control.Monad.Except (ExceptT (ExceptT), withExceptT) import Control.Monad.Trans.Except (runExceptT) import Data.Binary.Builder qualified as Builder -import Data.Vector (Vector) +import Data.Set qualified as Set +import Data.Text.Encoding qualified as Text import Data.Vector qualified as Vector +import Ki.Unlifted qualified as Ki import Servant import Servant.Conduit (ConduitToSourceIO (..)) import Servant.Types.SourceT (SourceT (..)) @@ -33,14 +35,15 @@ import Share.Web.Authorization qualified as AuthZ import Share.Web.Errors import Share.Web.UCM.Sync.HashJWT qualified as HashJWT import Share.Web.UCM.SyncV2.Queries qualified as SSQ +import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..)) import U.Codebase.Sqlite.Orphans () +import Unison.Debug qualified as Debug import Unison.Hash32 (Hash32) import Unison.Share.API.Hash (HashJWTClaims (..)) import Unison.SyncV2.API qualified as SyncV2 -import Unison.SyncV2.Types (DownloadEntitiesChunk (..), EntityChunk (..), ErrorChunk (..), StreamInitInfo (..)) +import Unison.SyncV2.Types (CausalDependenciesChunk (..), DependencyType (..), DownloadEntitiesChunk (..), EntityChunk (..), ErrorChunk (..), StreamInitInfo (..)) import Unison.SyncV2.Types qualified as SyncV2 import UnliftIO qualified -import UnliftIO.Async qualified as Async batchSize :: Int32 batchSize = 1000 @@ -51,7 +54,8 @@ streamSettings rootCausalHash rootBranchRef = StreamInitInfo {version = SyncV2.V server :: Maybe UserId -> SyncV2.Routes WebAppServer server mayUserId = SyncV2.Routes - { downloadEntitiesStream = downloadEntitiesStreamImpl mayUserId + { downloadEntitiesStream = downloadEntitiesStreamImpl mayUserId, + causalDependenciesStream = causalDependenciesStreamImpl mayUserId } parseBranchRef :: SyncV2.BranchRef -> Either Text (Either ProjectReleaseShortHand ProjectBranchShortHand) @@ -66,30 +70,16 @@ parseBranchRef (SyncV2.BranchRef branchRef) = parseRelease = fmap Left . eitherToMaybe $ IDs.fromText @ProjectReleaseShortHand branchRef downloadEntitiesStreamImpl :: Maybe UserId -> SyncV2.DownloadEntitiesRequest -> WebApp (SourceIO (SyncV2.CBORStream SyncV2.DownloadEntitiesChunk)) -downloadEntitiesStreamImpl mayCallerUserId (SyncV2.DownloadEntitiesRequest {causalHash = causalHashJWT, branchRef, knownHashes = _todo}) = do +downloadEntitiesStreamImpl mayCallerUserId (SyncV2.DownloadEntitiesRequest {causalHash = causalHashJWT, branchRef, knownHashes}) = do either emitErr id <$> runExceptT do addRequestTag "branch-ref" (SyncV2.unBranchRef branchRef) HashJWTClaims {hash = causalHash} <- lift (HashJWT.verifyHashJWT mayCallerUserId causalHashJWT >>= either respondError pure) codebase <- - case parseBranchRef branchRef of - Left err -> throwError (SyncV2.DownloadEntitiesInvalidBranchRef err branchRef) - Right (Left (ProjectReleaseShortHand {userHandle, projectSlug})) -> do - let projectShortHand = ProjectShortHand {userHandle, projectSlug} - (Project {ownerUserId = projectOwnerUserId}, contributorId) <- ExceptT . PG.tryRunTransaction $ do - project <- PGQ.projectByShortHand projectShortHand `whenNothingM` throwError (SyncV2.DownloadEntitiesProjectNotFound $ IDs.toText @ProjectShortHand projectShortHand) - pure (project, Nothing) - authZToken <- lift AuthZ.checkDownloadFromProjectBranchCodebase `whenLeftM` \_err -> throwError (SyncV2.DownloadEntitiesNoReadPermission branchRef) - let codebaseLoc = Codebase.codebaseLocationForProjectBranchCodebase projectOwnerUserId contributorId - pure $ Codebase.codebaseEnv authZToken codebaseLoc - Right (Right (ProjectBranchShortHand {userHandle, projectSlug, contributorHandle})) -> do - let projectShortHand = ProjectShortHand {userHandle, projectSlug} - (Project {ownerUserId = projectOwnerUserId}, contributorId) <- ExceptT . PG.tryRunTransaction $ do - project <- (PGQ.projectByShortHand projectShortHand) `whenNothingM` throwError (SyncV2.DownloadEntitiesProjectNotFound $ IDs.toText @ProjectShortHand projectShortHand) - mayContributorUserId <- for contributorHandle \ch -> fmap user_id $ (PGQ.userByHandle ch) `whenNothingM` throwError (SyncV2.DownloadEntitiesUserNotFound $ IDs.toText @UserHandle ch) - pure (project, mayContributorUserId) - authZToken <- lift AuthZ.checkDownloadFromProjectBranchCodebase `whenLeftM` \_err -> throwError (SyncV2.DownloadEntitiesNoReadPermission branchRef) - let codebaseLoc = Codebase.codebaseLocationForProjectBranchCodebase projectOwnerUserId contributorId - pure $ Codebase.codebaseEnv authZToken codebaseLoc + flip withExceptT (codebaseForBranchRef branchRef) \case + CodebaseLoadingErrorProjectNotFound projectShortHand -> SyncV2.DownloadEntitiesProjectNotFound (IDs.toText projectShortHand) + CodebaseLoadingErrorUserNotFound userHandle -> SyncV2.DownloadEntitiesUserNotFound (IDs.toText userHandle) + CodebaseLoadingErrorNoReadPermission branchRef -> SyncV2.DownloadEntitiesNoReadPermission branchRef + CodebaseLoadingErrorInvalidBranchRef err branchRef -> SyncV2.DownloadEntitiesInvalidBranchRef err branchRef q <- UnliftIO.atomically $ do q <- STM.newTBMQueue 10 STM.writeTBMQueue q (Vector.singleton $ InitialC $ streamSettings causalHash (Just branchRef)) @@ -98,34 +88,100 @@ downloadEntitiesStreamImpl mayCallerUserId (SyncV2.DownloadEntitiesRequest {caus Logging.logInfoText "Starting download entities stream" Codebase.runCodebaseTransaction codebase $ do (_bhId, causalId) <- CausalQ.expectCausalIdsOf id (hash32ToCausalHash causalHash) - cursor <- SSQ.allSerializedDependenciesOfCausalCursor causalId + let knownCausalHashes = Set.map hash32ToCausalHash knownHashes + cursor <- SSQ.allSerializedDependenciesOfCausalCursor causalId knownCausalHashes Cursor.foldBatched cursor batchSize \batch -> do let entityChunkBatch = batch <&> \(entityCBOR, hash) -> EntityC (EntityChunk {hash, entityCBOR}) PG.transactionUnsafeIO $ STM.atomically $ STM.writeTBMQueue q entityChunkBatch PG.transactionUnsafeIO $ STM.atomically $ STM.closeTBMQueue q pure $ sourceIOWithAsync streamResults $ conduitToSourceIO do - stream q + queueToStream q where - stream :: STM.TBMQueue (Vector DownloadEntitiesChunk) -> C.ConduitT () (SyncV2.CBORStream DownloadEntitiesChunk) IO () - stream q = do - let loop :: C.ConduitT () (SyncV2.CBORStream DownloadEntitiesChunk) IO () - loop = do - liftIO (STM.atomically (STM.readTBMQueue q)) >>= \case - -- The queue is closed. - Nothing -> do - pure () - Just batches -> do - batches - & foldMap (CBOR.serialiseIncremental) - & (SyncV2.CBORStream . Builder.toLazyByteString) - & C.yield - loop - - loop - emitErr :: SyncV2.DownloadEntitiesError -> SourceIO (SyncV2.CBORStream SyncV2.DownloadEntitiesChunk) emitErr err = SourceT.source [SyncV2.CBORStream . CBOR.serialise $ ErrorC (ErrorChunk err)] +causalDependenciesStreamImpl :: Maybe UserId -> SyncV2.CausalDependenciesRequest -> WebApp (SourceIO (SyncV2.CBORStream SyncV2.CausalDependenciesChunk)) +causalDependenciesStreamImpl mayCallerUserId (SyncV2.CausalDependenciesRequest {rootCausal = causalHashJWT, branchRef}) = do + respondExceptT do + addRequestTag "branch-ref" (SyncV2.unBranchRef branchRef) + HashJWTClaims {hash = causalHash} <- lift (HashJWT.verifyHashJWT mayCallerUserId causalHashJWT >>= either respondError pure) + addRequestTag "root-causal" (tShow causalHash) + codebase <- codebaseForBranchRef branchRef + q <- UnliftIO.atomically $ STM.newTBMQueue 10 + streamResults <- lift $ UnliftIO.toIO do + Logging.logInfoText "Starting causal dependencies stream" + Codebase.runCodebaseTransaction codebase $ do + (_bhId, causalId) <- CausalQ.expectCausalIdsOf id (hash32ToCausalHash causalHash) + Debug.debugLogM Debug.Temp "Getting cursor" + cursor <- SSQ.spineAndLibDependenciesOfCausalCursor causalId + Debug.debugLogM Debug.Temp "Folding cursor" + Cursor.foldBatched cursor batchSize \batch -> do + Debug.debugLogM Debug.Temp "Got batch" + let depBatch = + batch <&> \(causalHash, isCausalSpine, isLibRoot) -> + let dependencyType = case (isCausalSpine, isLibRoot) of + (IsCausalSpine, _) -> CausalSpineDependency + (_, IsLibRoot) -> LibDependency + _ -> error $ "Causal dependency which is neither spine nor lib root: " <> show causalHash + in CausalHashDepC {causalHash, dependencyType} + PG.transactionUnsafeIO $ STM.atomically $ STM.writeTBMQueue q depBatch + PG.transactionUnsafeIO $ STM.atomically $ STM.closeTBMQueue q + pure $ sourceIOWithAsync streamResults $ conduitToSourceIO do + queueToStream q + +queueToStream :: forall a f. (CBOR.Serialise a, Foldable f) => STM.TBMQueue (f a) -> C.ConduitT () (SyncV2.CBORStream a) IO () +queueToStream q = do + let loop :: C.ConduitT () (SyncV2.CBORStream a) IO () + loop = do + liftIO (STM.atomically (STM.readTBMQueue q)) >>= \case + -- The queue is closed. + Nothing -> do + pure () + Just batches -> do + batches + & foldMap (CBOR.serialiseIncremental) + & (SyncV2.CBORStream . Builder.toLazyByteString) + & C.yield + loop + loop + +data CodebaseLoadingError + = CodebaseLoadingErrorProjectNotFound ProjectShortHand + | CodebaseLoadingErrorUserNotFound UserHandle + | CodebaseLoadingErrorNoReadPermission SyncV2.BranchRef + | CodebaseLoadingErrorInvalidBranchRef Text SyncV2.BranchRef + deriving stock (Show) + deriving (Logging.Loggable) via Logging.ShowLoggable Logging.UserFault CodebaseLoadingError + +instance ToServerError CodebaseLoadingError where + toServerError = \case + CodebaseLoadingErrorProjectNotFound projectShortHand -> (ErrorID "codebase-loading:project-not-found", Servant.err404 {errBody = from . Text.encodeUtf8 $ "Project not found: " <> (IDs.toText projectShortHand)}) + CodebaseLoadingErrorUserNotFound userHandle -> (ErrorID "codebase-loading:user-not-found", Servant.err404 {errBody = from . Text.encodeUtf8 $ "User not found: " <> (IDs.toText userHandle)}) + CodebaseLoadingErrorNoReadPermission branchRef -> (ErrorID "codebase-loading:no-read-permission", Servant.err403 {errBody = from . Text.encodeUtf8 $ "No read permission for branch ref: " <> (SyncV2.unBranchRef branchRef)}) + CodebaseLoadingErrorInvalidBranchRef err branchRef -> (ErrorID "codebase-loading:invalid-branch-ref", Servant.err400 {errBody = from . Text.encodeUtf8 $ "Invalid branch ref: " <> err <> " " <> (SyncV2.unBranchRef branchRef)}) + +codebaseForBranchRef :: SyncV2.BranchRef -> (ExceptT CodebaseLoadingError WebApp Codebase.CodebaseEnv) +codebaseForBranchRef branchRef = do + case parseBranchRef branchRef of + Left err -> throwError (CodebaseLoadingErrorInvalidBranchRef err branchRef) + Right (Left (ProjectReleaseShortHand {userHandle, projectSlug})) -> do + let projectShortHand = ProjectShortHand {userHandle, projectSlug} + (Project {ownerUserId = projectOwnerUserId}, contributorId) <- ExceptT . PG.tryRunTransaction $ do + project <- PGQ.projectByShortHand projectShortHand `whenNothingM` throwError (CodebaseLoadingErrorProjectNotFound $ projectShortHand) + pure (project, Nothing) + authZToken <- lift AuthZ.checkDownloadFromProjectBranchCodebase `whenLeftM` \_err -> throwError (CodebaseLoadingErrorNoReadPermission branchRef) + let codebaseLoc = Codebase.codebaseLocationForProjectBranchCodebase projectOwnerUserId contributorId + pure $ Codebase.codebaseEnv authZToken codebaseLoc + Right (Right (ProjectBranchShortHand {userHandle, projectSlug, contributorHandle})) -> do + let projectShortHand = ProjectShortHand {userHandle, projectSlug} + (Project {ownerUserId = projectOwnerUserId}, contributorId) <- ExceptT . PG.tryRunTransaction $ do + project <- (PGQ.projectByShortHand projectShortHand) `whenNothingM` throwError (CodebaseLoadingErrorProjectNotFound projectShortHand) + mayContributorUserId <- for contributorHandle \ch -> fmap user_id $ (PGQ.userByHandle ch) `whenNothingM` throwError (CodebaseLoadingErrorUserNotFound ch) + pure (project, mayContributorUserId) + authZToken <- lift AuthZ.checkDownloadFromProjectBranchCodebase `whenLeftM` \_err -> throwError (CodebaseLoadingErrorNoReadPermission branchRef) + let codebaseLoc = Codebase.codebaseLocationForProjectBranchCodebase projectOwnerUserId contributorId + pure $ Codebase.codebaseEnv authZToken codebaseLoc + -- | Run an IO action in the background while streaming the results. -- -- Servant doesn't provide any easier way to do bracketing like this, all the IO must be @@ -133,4 +189,6 @@ downloadEntitiesStreamImpl mayCallerUserId (SyncV2.DownloadEntitiesRequest {caus sourceIOWithAsync :: IO a -> SourceIO r -> SourceIO r sourceIOWithAsync action (SourceT k) = SourceT \k' -> - Async.withAsync action \_ -> k k' + Ki.scoped \scope -> do + _ <- Ki.fork scope action + k k' diff --git a/src/Share/Web/UCM/SyncV2/Queries.hs b/src/Share/Web/UCM/SyncV2/Queries.hs index 261e02a2..02264982 100644 --- a/src/Share/Web/UCM/SyncV2/Queries.hs +++ b/src/Share/Web/UCM/SyncV2/Queries.hs @@ -1,5 +1,6 @@ module Share.Web.UCM.SyncV2.Queries ( allSerializedDependenciesOfCausalCursor, + spineAndLibDependenciesOfCausalCursor, ) where @@ -9,209 +10,79 @@ import Share.Postgres import Share.Postgres.Cursors (PGCursor) import Share.Postgres.Cursors qualified as PGCursor import Share.Postgres.IDs +import Share.Prelude +import Share.Web.UCM.SyncV2.Types (IsCausalSpine (..), IsLibRoot (..)) import U.Codebase.Sqlite.TempEntity (TempEntity) import Unison.Hash32 (Hash32) import Unison.SyncV2.Types (CBORBytes) --- Useful, but needs to be double-checked before use. --- allHashDependenciesOfCausalCursor :: CausalId -> CodebaseM e (PGCursor Text) --- allHashDependenciesOfCausalCursor cid = do --- ownerUserId <- asks codebaseOwner --- PGCursor.newColCursor --- "causal_dependencies" --- [sql| --- WITH RECURSIVE transitive_causals(causal_id, causal_namespace_hash_id) AS ( --- SELECT causal.id, causal.namespace_hash_id --- FROM causals causal --- WHERE causal.id = #{cid} --- AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id) --- UNION --- -- This nested CTE is required because RECURSIVE CTEs can't refer --- -- to the recursive table more than once. --- ( WITH rec AS ( --- SELECT causal_id, causal_namespace_hash_id --- FROM transitive_causals tc --- ) --- SELECT ancestor_causal.id, ancestor_causal.namespace_hash_id --- FROM causal_ancestors ca --- JOIN rec tc ON ca.causal_id = tc.causal_id --- JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id --- -- WHERE NOT EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = to_codebase_user_id AND co.causal_id = ancestor_causal.id) --- UNION --- SELECT child_causal.id, child_causal.namespace_hash_id --- FROM rec tc --- JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id --- JOIN causals child_causal ON nc.child_causal_id = child_causal.id --- -- WHERE NOT EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = to_codebase_user_id AND co.causal_id = child_causal.id) --- ) --- ), all_namespaces(namespace_hash_id) AS ( --- SELECT DISTINCT causal_namespace_hash_id AS namespace_hash_id --- FROM transitive_causals --- -- WHERE NOT EXISTS (SELECT FROM namespace_ownership no WHERE no.user_id = to_codebase_user_id AND no.namespace_hash_id = causal_namespace_hash_id) --- ), all_patches(patch_id) AS ( --- SELECT DISTINCT patch.id --- FROM all_namespaces an --- JOIN namespace_patches np ON an.namespace_hash_id = np.namespace_hash_id --- JOIN patches patch ON np.patch_id = patch.id --- -- WHERE NOT EXISTS (SELECT FROM patch_ownership po WHERE po.user_id = to_codebase_user_id AND po.patch_id = patch.id) --- ), --- -- term components to start transitively joining dependencies to --- base_term_components(component_hash_id) AS ( --- SELECT DISTINCT term.component_hash_id --- FROM all_namespaces an --- JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id --- JOIN terms term ON nt.term_id = term.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id) --- UNION --- SELECT DISTINCT term.component_hash_id --- FROM all_patches ap --- JOIN patch_term_mappings ptm ON ap.patch_id = ptm.patch_id --- JOIN terms term ON ptm.to_term_id = term.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id) --- UNION --- -- term metadata --- SELECT DISTINCT term.component_hash_id --- FROM all_namespaces an --- JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id --- JOIN namespace_term_metadata meta ON nt.id = meta.named_term --- JOIN terms term ON meta.metadata_term_id = term.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id) --- UNION --- -- type metadata --- SELECT DISTINCT term.component_hash_id --- FROM all_namespaces an --- JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id --- JOIN namespace_type_metadata meta ON nt.id = meta.named_type --- JOIN terms term ON meta.metadata_term_id = term.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_terms st WHERE st.user_id = to_codebase_user_id AND st.term_id = term.id) --- ), --- -- type components to start transitively joining dependencies to --- base_type_components(component_hash_id) AS ( --- SELECT DISTINCT typ.component_hash_id --- FROM all_namespaces an --- JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id --- JOIN types typ ON nt.type_id = typ.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id) --- UNION --- SELECT DISTINCT typ.component_hash_id --- FROM all_namespaces an --- JOIN namespace_terms nt ON an.namespace_hash_id = nt.namespace_hash_id --- JOIN constructors con ON nt.constructor_id = con.id --- JOIN types typ ON con.type_id = typ.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id) --- UNION --- SELECT DISTINCT typ.component_hash_id --- FROM all_patches ap --- JOIN patch_type_mappings ptm ON ap.patch_id = ptm.patch_id --- JOIN types typ ON ptm.to_type_id = typ.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id) --- UNION --- SELECT DISTINCT typ.component_hash_id --- FROM all_patches ap --- JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id --- JOIN constructors con ON pcm.to_constructor_id = con.id --- JOIN types typ ON con.type_id = typ.id --- -- WHERE NOT EXISTS (SELECT FROM sandboxed_types st WHERE st.user_id = to_codebase_user_id AND st.type_id = typ.id) --- ), --- -- All the dependencies we join in transitively from the known term & type components we depend on. --- -- Unfortunately it's not possible to know which hashes are terms vs types :'( --- transitive_components(component_hash_id) AS ( --- SELECT DISTINCT btc.component_hash_id --- FROM base_term_components btc --- UNION --- SELECT DISTINCT btc.component_hash_id --- FROM base_type_components btc --- UNION --- ( WITH rec AS ( --- SELECT component_hash_id --- FROM transitive_components tc --- ) --- -- recursively union in term dependencies --- SELECT DISTINCT ref.component_hash_id --- FROM rec atc --- -- This joins in ALL the terms from the component, not just the one that caused the dependency on the --- -- component --- JOIN terms term ON atc.component_hash_id = term.component_hash_id --- JOIN term_local_component_references ref ON term.id = ref.term_id --- UNION --- -- recursively union in type dependencies --- SELECT DISTINCT ref.component_hash_id --- FROM rec atc --- -- This joins in ALL the types from the component, not just the one that caused the dependency on the --- -- component --- JOIN types typ ON atc.component_hash_id = typ.component_hash_id --- JOIN type_local_component_references ref ON typ.id = ref.type_id --- ) --- ), copied_causals(causal_id) AS ( --- SELECT DISTINCT tc.causal_id --- FROM transitive_causals tc --- ), copied_namespaces(namespace_hash_id) AS ( --- SELECT DISTINCT an.namespace_hash_id --- FROM all_namespaces an --- ), copied_patches(patch_id) AS ( --- SELECT DISTINCT ap.patch_id --- FROM all_patches ap --- ), copied_term_components AS ( --- SELECT DISTINCT term.id, copy.bytes_id --- FROM transitive_components tc --- JOIN terms term ON tc.component_hash_id = term.component_hash_id --- JOIN sandboxed_terms copy ON term.id = copy.term_id --- WHERE copy.user_id = #{ownerUserId} --- ), copied_type_components AS ( --- SELECT DISTINCT typ.id, copy.bytes_id --- FROM transitive_components tc --- JOIN types typ ON tc.component_hash_id = typ.component_hash_id --- JOIN sandboxed_types copy ON typ.id = copy.type_id --- WHERE copy.user_id = #{ownerUserId} --- ) SELECT causal.hash --- FROM copied_causals cc --- JOIN causals causal ON cc.causal_id = causal.id --- UNION ALL --- SELECT branch_hashes.base32 --- FROM copied_namespaces cn --- JOIN branch_hashes ON cn.namespace_hash_id = branch_hashes.id --- UNION ALL --- SELECT patch.hash --- FROM copied_patches cp --- JOIN patches patch ON cp.patch_id = patch.id --- UNION ALL --- SELECT component_hashes.base32 --- FROM transitive_components tc --- JOIN component_hashes ON tc.component_hash_id = component_hashes.id --- |] - -allSerializedDependenciesOfCausalCursor :: CausalId -> CodebaseM e (PGCursor (CBORBytes TempEntity, Hash32)) -allSerializedDependenciesOfCausalCursor cid = do +allSerializedDependenciesOfCausalCursor :: CausalId -> Set CausalHash -> CodebaseM e (PGCursor (CBORBytes TempEntity, Hash32)) +allSerializedDependenciesOfCausalCursor cid exceptCausalHashes = do ownerUserId <- asks codebaseOwner - PGCursor.newRowCursor - "causal_dependencies" + -- Create a temp table for storing the dependencies we know the calling client already has. + execute_ [sql| CREATE TEMP TABLE except_causals (causal_id INTEGER NULL ) ON COMMIT DROP |] + execute_ [sql| CREATE TEMP TABLE except_components ( component_hash_id INTEGER NULL ) ON COMMIT DROP |] + execute_ [sql| CREATE TEMP TABLE except_namespaces ( branch_hash_ids INTEGER NULL ) ON COMMIT DROP |] + execute_ [sql| + WITH the_causal_hashes(hash) AS ( + SELECT * FROM ^{singleColumnTable (toList exceptCausalHashes)} + ), known_causal_ids(causal_id) AS ( + SELECT c.id + FROM the_causal_hashes tch + JOIN causals c ON tch.hash = c.hash + ), dependency_hashes(hash) AS ( + SELECT DISTINCT deps.hash + FROM dependencies_of_causals((SELECT ARRAY_AGG(kci.causal_id) FROM known_causal_ids kci)) AS deps + ), do_causals AS ( + INSERT INTO except_causals(causal_id) + SELECT causal.id + FROM the_causal_hashes tch + JOIN causals causal ON tch.hash = causal.hash + ), do_namespaces AS ( + INSERT INTO except_namespaces(branch_hash_ids) + SELECT bh.id + FROM dependency_hashes dh + JOIN branch_hashes bh ON dh.hash = bh.base32 + ) INSERT INTO except_components(component_hash_id) + SELECT ch.id + FROM dependency_hashes dh + JOIN component_hashes ch ON dh.hash = ch.base32 + |] + cursor <- + PGCursor.newRowCursor + "serialized_entities" + [sql| WITH RECURSIVE transitive_causals(causal_id, causal_hash, causal_namespace_hash_id) AS ( SELECT causal.id, causal.hash, causal.namespace_hash_id FROM causals causal WHERE causal.id = #{cid} AND EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = causal.id) + AND NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = causal.id) UNION -- This nested CTE is required because RECURSIVE CTEs can't refer -- to the recursive table more than once. ( WITH rec AS ( - SELECT causal_id, causal_namespace_hash_id + SELECT tc.causal_id, tc.causal_namespace_hash_id FROM transitive_causals tc ) SELECT ancestor_causal.id, ancestor_causal.hash, ancestor_causal.namespace_hash_id FROM causal_ancestors ca JOIN rec tc ON ca.causal_id = tc.causal_id JOIN causals ancestor_causal ON ca.ancestor_id = ancestor_causal.id + WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = ancestor_causal.id) UNION SELECT child_causal.id, child_causal.hash, child_causal.namespace_hash_id FROM rec tc JOIN namespace_children nc ON tc.causal_namespace_hash_id = nc.parent_namespace_hash_id JOIN causals child_causal ON nc.child_causal_id = child_causal.id + WHERE NOT EXISTS (SELECT FROM except_causals ec WHERE ec.causal_id = child_causal.id) ) ), all_namespaces(namespace_hash_id, namespace_hash) AS ( SELECT DISTINCT tc.causal_namespace_hash_id AS namespace_hash_id, bh.base32 as namespace_hash FROM transitive_causals tc JOIN branch_hashes bh ON tc.causal_namespace_hash_id = bh.id + WHERE NOT EXISTS (SELECT FROM except_namespaces en WHERE en.branch_hash_ids = tc.causal_namespace_hash_id) ), all_patches(patch_id, patch_hash) AS ( SELECT DISTINCT patch.id, patch.hash FROM all_namespaces an @@ -243,6 +114,7 @@ allSerializedDependenciesOfCausalCursor cid = do JOIN namespace_types nt ON an.namespace_hash_id = nt.namespace_hash_id JOIN namespace_type_metadata meta ON nt.id = meta.named_type JOIN terms term ON meta.metadata_term_id = term.id + WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = term.component_hash_id) ), -- type components to start transitively joining dependencies to base_type_components(component_hash_id) AS ( @@ -267,6 +139,7 @@ allSerializedDependenciesOfCausalCursor cid = do JOIN patch_constructor_mappings pcm ON ap.patch_id = pcm.patch_id JOIN constructors con ON pcm.to_constructor_id = con.id JOIN types typ ON con.type_id = typ.id + WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = typ.component_hash_id) ), -- All the dependencies we join in transitively from the known term & type components we depend on. -- Unfortunately it's not possible to know which hashes are terms vs types :'( @@ -288,6 +161,7 @@ allSerializedDependenciesOfCausalCursor cid = do -- component JOIN terms term ON atc.component_hash_id = term.component_hash_id JOIN term_local_component_references ref ON term.id = ref.term_id + WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = ref.component_hash_id) UNION -- recursively union in type dependencies SELECT DISTINCT ref.component_hash_id @@ -296,6 +170,7 @@ allSerializedDependenciesOfCausalCursor cid = do -- component JOIN types typ ON atc.component_hash_id = typ.component_hash_id JOIN type_local_component_references ref ON typ.id = ref.type_id + WHERE NOT EXISTS (SELECT FROM except_components ec WHERE ec.component_hash_id = ref.component_hash_id) ) ) (SELECT bytes.bytes, ch.base32 @@ -323,3 +198,40 @@ allSerializedDependenciesOfCausalCursor cid = do JOIN bytes ON sc.bytes_id = bytes.id ) |] + pure cursor + +spineAndLibDependenciesOfCausalCursor :: CausalId -> CodebaseM e (PGCursor (Hash32, IsCausalSpine, IsLibRoot)) +spineAndLibDependenciesOfCausalCursor cid = do + ownerUserId <- asks codebaseOwner + libSegmentTextId <- queryExpect1Col @Int64 [sql| SELECT text.id FROM text WHERE content_hash = text_hash('lib') |] + PGCursor.newRowCursor + "causal_dependencies" + [sql| + WITH causal_spine(causal_id, ord) AS ( + -- Empty OVER clause is valid and just numbers the rows in the order they come back, + -- which is what we want in this case. + -- Perhaps we can use a proper order-by on causal depth once that's available. + SELECT ch.causal_id, ROW_NUMBER() OVER () FROM causal_history(#{cid}) AS ch + WHERE EXISTS (SELECT FROM causal_ownership co WHERE co.user_id = #{ownerUserId} AND co.causal_id = #{cid}) + ), lib_deps(causal_id, ord) AS ( + SELECT DISTINCT ON (lib_dep.child_causal_id) lib_dep.child_causal_id, cs.ord + FROM causal_spine cs + -- Spinal causal + -- Root where all library roots are attached + JOIN causals spine_causal ON spine_causal.id = cs.causal_id + -- The actual library dependency children + JOIN namespace_children lib_root_ns ON spine_causal.namespace_hash_id = lib_root_ns.parent_namespace_hash_id + JOIN causals lib_root_causal ON lib_root_ns.child_causal_id = lib_root_causal.id + JOIN namespace_children lib_dep ON lib_root_causal.namespace_hash_id = lib_dep.parent_namespace_hash_id + WHERE lib_root_ns.name_segment_id = #{libSegmentTextId} + ORDER BY lib_dep.child_causal_id, cs.ord ASC + ) SELECT c.hash AS hash, true AS is_spine, false AS is_lib, cs.ord AS ord + FROM causal_spine cs + JOIN causals c ON cs.causal_id = c.id + UNION + SELECT c.hash AS hash, false AS is_spine, true AS is_lib, ld.ord AS ord + FROM lib_deps ld + JOIN causals c ON ld.causal_id = c.id + ORDER BY ord ASC, is_lib ASC, is_spine ASC + |] + <&> fmap (\(hash, isSpine, isLibRoot) -> (hash, if isSpine then IsCausalSpine else NotCausalSpine, if isLibRoot then IsLibRoot else NotLibRoot)) diff --git a/src/Share/Web/UCM/SyncV2/Types.hs b/src/Share/Web/UCM/SyncV2/Types.hs new file mode 100644 index 00000000..3526ae7f --- /dev/null +++ b/src/Share/Web/UCM/SyncV2/Types.hs @@ -0,0 +1,13 @@ +module Share.Web.UCM.SyncV2.Types + ( IsLibRoot (..), + IsCausalSpine (..), + ) +where + +data IsLibRoot + = IsLibRoot + | NotLibRoot + +data IsCausalSpine + = IsCausalSpine + | NotCausalSpine diff --git a/unison b/unison index 855bb255..95450654 160000 --- a/unison +++ b/unison @@ -1 +1 @@ -Subproject commit 855bb2557d09c71d737f0b9331db3c21712060a9 +Subproject commit 9545065466f2c8117d3c005940c2f6796ed787c7