Skip to content

Commit 9399c85

Browse files
authored
Merge pull request #33 from unisoncomputing/syncv2/ensure-serialized
Ensure Serialized Entities
2 parents fd3160d + eddd5a6 commit 9399c85

File tree

8 files changed

+279
-174
lines changed

8 files changed

+279
-174
lines changed

src/Share/BackgroundJobs/SerializedEntitiesMigration/Worker.hs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ import Share.Codebase qualified as Codebase
99
import Share.Codebase.Types (CodebaseEnv (..))
1010
import Share.Postgres
1111
import Share.Postgres qualified as PG
12+
import Share.Postgres.Causal.Queries qualified as CQ
1213
import Share.Postgres.Definitions.Queries qualified as DefnQ
1314
import Share.Postgres.Hashes.Queries qualified as HQ
1415
import Share.Postgres.IDs
16+
import Share.Postgres.Patches.Queries qualified as PQ
1517
import Share.Postgres.Sync.Queries qualified as SQ
1618
import Share.Prelude
1719
import Share.Web.Authorization qualified as AuthZ
@@ -81,12 +83,18 @@ processComponents !_authZReceipt = do
8183
|]
8284
pure True
8385

84-
saveUnsandboxedSerializedEntities :: (QueryM m) => Hash32 -> TempEntity -> m ()
86+
saveUnsandboxedSerializedEntities :: Hash32 -> TempEntity -> Codebase.CodebaseM e ()
8587
saveUnsandboxedSerializedEntities hash entity = do
8688
let serialised = SyncV2.serialiseCBORBytes entity
8789
case entity of
8890
Entity.TC {} -> error "Unexpected term component"
8991
Entity.DC {} -> error "Unexpected decl component"
90-
Entity.P {} -> SQ.saveSerializedPatch hash serialised
91-
Entity.C {} -> SQ.saveSerializedCausal hash serialised
92-
Entity.N {} -> SQ.saveSerializedNamespace hash serialised
92+
Entity.P {} -> do
93+
patchId <- HQ.expectPatchIdsOf id (fromHash32 @PatchHash hash)
94+
PQ.saveSerializedPatch patchId serialised
95+
Entity.C {} -> do
96+
cId <- CQ.expectCausalIdByHash (fromHash32 @CausalHash hash)
97+
CQ.saveSerializedCausal cId serialised
98+
Entity.N {} -> do
99+
bhId <- HQ.expectBranchHashId (fromHash32 @BranchHash hash)
100+
CQ.saveSerializedNamespace bhId serialised

src/Share/Codebase.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ squashCausal Causal.Causal {valueHash = unsquashedBranchHash, value} = do
468468
let squashedBranchHead = branch {V2.children = snd <$> squashedChildren}
469469
(squashedBranchHashId, squashedBranchHash) <- CausalQ.saveV2BranchShallow squashedBranchHead
470470
let ancestors = mempty
471-
(squashedCausalId, squashedCausalHash) <- CausalQ.saveCausal Nothing squashedBranchHashId ancestors
471+
(squashedCausalId, squashedCausalHash) <- CausalQ.saveCausal Nothing Nothing squashedBranchHashId ancestors
472472
let squashedCausalBranch =
473473
Causal.Causal
474474
{ causalHash = squashedCausalHash,

src/Share/Postgres/Causal/Queries.hs

Lines changed: 106 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,38 +25,61 @@ module Share.Postgres.Causal.Queries
2525
hashCausal,
2626
bestCommonAncestor,
2727
isFastForward,
28+
29+
-- * Sync
30+
expectCausalEntity,
31+
expectNamespaceEntity,
32+
33+
-- * For migrations, can probably remove this export later.
34+
saveSerializedCausal,
35+
saveSerializedNamespace,
2836
)
2937
where
3038

3139
import Control.Lens
40+
import Data.ByteString.Lazy.Char8 qualified as BL
3241
import Data.Map qualified as Map
3342
import Data.Set qualified as Set
43+
import Data.Vector qualified as Vector
3444
import Share.Codebase.Types (CodebaseM)
3545
import Share.Codebase.Types qualified as Codebase
3646
import Share.IDs (UserId)
3747
import Share.Postgres
3848
import Share.Postgres.Causal.Types
3949
import Share.Postgres.Definitions.Queries qualified as Defn
50+
import Share.Postgres.Definitions.Queries qualified as DefnQ
4051
import Share.Postgres.Definitions.Types
4152
import Share.Postgres.Hashes.Queries qualified as HashQ
4253
import Share.Postgres.IDs
4354
import Share.Postgres.Patches.Queries qualified as PatchQ
55+
import Share.Postgres.Serialization qualified as S
56+
import Share.Postgres.Sync.Conversions qualified as Cv
4457
import Share.Prelude
4558
import Share.Utils.Postgres (OrdBy, ordered)
4659
import Share.Web.Errors (MissingExpectedEntity (MissingExpectedEntity))
4760
import U.Codebase.Branch hiding (NamespaceStats, nonEmptyChildren)
4861
import U.Codebase.Branch qualified as V2 hiding (NamespaceStats)
4962
import U.Codebase.Causal qualified as Causal
63+
import U.Codebase.Causal qualified as U
5064
import U.Codebase.Reference
5165
import U.Codebase.Referent
5266
import U.Codebase.Referent qualified as Referent
67+
import U.Codebase.Sqlite.Branch.Format (LocalBranchBytes (..))
68+
import U.Codebase.Sqlite.Branch.Format qualified as BranchFormat
5369
import U.Codebase.Sqlite.Branch.Full qualified as BranchFull
70+
import U.Codebase.Sqlite.LocalizeObject qualified as Localize
71+
import U.Codebase.Sqlite.TempEntity (TempEntity)
5472
import Unison.Codebase.Path qualified as Path
5573
import Unison.Hash (Hash)
5674
import Unison.Hash32 (Hash32)
75+
import Unison.Hash32 qualified as Hash32
5776
import Unison.Hashing.V2 qualified as H
5877
import Unison.NameSegment.Internal as NameSegment
5978
import Unison.Reference qualified as Reference
79+
import Unison.Sync.Common qualified as SyncCommon
80+
import Unison.Sync.Types qualified as Sync
81+
import Unison.SyncV2.Types (CBORBytes (..))
82+
import Unison.SyncV2.Types qualified as SyncV2
6083
import Unison.Util.Map qualified as Map
6184

6285
expectCausalNamespace :: (HasCallStack, QueryM m) => CausalId -> m (CausalNamespace m)
@@ -407,18 +430,22 @@ loadCausalNamespaceAtPath causalId path = do
407430
-- | Given a namespace whose dependencies have all been pre-saved, save it to the database under the given hash.
408431
savePgNamespace ::
409432
(HasCallStack) =>
433+
-- | The pre-serialized namespace, if available. If Nothing it will be re-generated, which is slower.
434+
Maybe TempEntity ->
410435
-- Normally we'd prefer to always hash it ourselves, but there are some bad hashes in the wild
411436
-- that we need to support saving, if we're passed a hash to save a branch at we will save
412437
-- it at that hash regardless of what the _actual_ hash is.
413438
Maybe BranchHash ->
414439
PgNamespace ->
415440
CodebaseM e (BranchHashId, BranchHash)
416-
savePgNamespace mayBh b@(BranchFull.Branch {terms, types, patches, children}) = do
441+
savePgNamespace maySerialized mayBh b@(BranchFull.Branch {terms, types, patches, children}) = do
417442
codebaseOwnerUserId <- asks Codebase.codebaseOwner
418443
bh <- whenNothing mayBh $ hashPgNamespace b
419444
bhId <- HashQ.ensureBranchHashId bh
420445
queryExpect1Col [sql| SELECT EXISTS (SELECT FROM namespaces WHERE namespace_hash_id = #{bhId}) |] >>= \case
421-
False -> doSave bhId
446+
False -> do
447+
doSave bhId
448+
doSaveSerialized bhId
422449
True -> pure ()
423450
execute_
424451
[sql| INSERT INTO namespace_ownership (namespace_hash_id, user_id)
@@ -427,6 +454,14 @@ savePgNamespace mayBh b@(BranchFull.Branch {terms, types, patches, children}) =
427454
|]
428455
pure (bhId, bh)
429456
where
457+
doSaveSerialized :: BranchHashId -> CodebaseM e ()
458+
doSaveSerialized bhId = do
459+
nsEntity <- case maySerialized of
460+
Just serialized -> pure serialized
461+
Nothing -> SyncCommon.entityToTempEntity id . Sync.N <$> expectNamespaceEntity bhId
462+
let serializedNamespace = SyncV2.serialiseCBORBytes nsEntity
463+
saveSerializedNamespace bhId serializedNamespace
464+
430465
doSave :: BranchHashId -> CodebaseM e ()
431466
doSave bhId = do
432467
-- Expand all term mappings into a list
@@ -608,6 +643,34 @@ savePgNamespace mayBh b@(BranchFull.Branch {terms, types, patches, children}) =
608643
-- Note: this must be run AFTER inserting the namespace and all its children.
609644
execute_ [sql| SELECT save_namespace(#{bhId}) |]
610645

646+
saveSerializedNamespace :: (QueryM m) => BranchHashId -> CBORBytes TempEntity -> m ()
647+
saveSerializedNamespace bhId (CBORBytes bytes) = do
648+
bytesId <- DefnQ.ensureBytesIdsOf id (BL.toStrict bytes)
649+
execute_
650+
[sql|
651+
INSERT INTO serialized_namespaces (namespace_hash_id, bytes_id)
652+
VALUES (#{bhId}, #{bytesId})
653+
ON CONFLICT DO NOTHING
654+
|]
655+
656+
expectNamespaceEntity :: BranchHashId -> CodebaseM e (Sync.Namespace Text Hash32)
657+
expectNamespaceEntity bhId = do
658+
v2Branch <- expectNamespace bhId
659+
second Hash32.fromHash <$> branchToEntity v2Branch
660+
where
661+
branchToEntity branch = do
662+
branchFull <- Cv.branchV2ToBF branch
663+
let (BranchFormat.LocalIds {branchTextLookup, branchDefnLookup, branchPatchLookup, branchChildLookup}, localBranch) = Localize.localizeBranchG branchFull
664+
let bytes = LocalBranchBytes $ S.encodeNamespace localBranch
665+
pure $
666+
Sync.Namespace
667+
{ textLookup = Vector.toList branchTextLookup,
668+
defnLookup = Vector.toList branchDefnLookup,
669+
patchLookup = Vector.toList branchPatchLookup,
670+
childLookup = Vector.toList branchChildLookup,
671+
bytes = bytes
672+
}
673+
611674
-- | Hash a namespace into a BranchHash
612675
hashPgNamespace :: forall m. (QueryM m) => PgNamespace -> m BranchHash
613676
hashPgNamespace b = do
@@ -671,14 +734,23 @@ hashCausal branchHashId ancestorIds = do
671734
let hCausal = H.Causal {branchHash = unBranchHash branchHash, parents = ancestors}
672735
pure . CausalHash . H.contentHash $ hCausal
673736

674-
saveCausal :: Maybe CausalHash -> BranchHashId -> Set CausalId -> CodebaseM e (CausalId, CausalHash)
675-
saveCausal mayCh bhId ancestorIds = do
737+
saveCausal ::
738+
-- | The pre-serialized causal, if available. If Nothing it will be re-generated, which is slower.
739+
Maybe TempEntity ->
740+
Maybe CausalHash ->
741+
BranchHashId ->
742+
Set CausalId ->
743+
CodebaseM e (CausalId, CausalHash)
744+
saveCausal maySerializedCausal mayCh bhId ancestorIds = do
676745
ch <- maybe (hashCausal bhId ancestorIds) pure mayCh
677746
codebaseOwnerUserId <- asks Codebase.codebaseOwner
678747
cId <-
679748
query1Col [sql| SELECT id FROM causals WHERE hash = #{ch} |] >>= \case
680749
Just cId -> pure cId
681-
Nothing -> doSave ch
750+
Nothing -> do
751+
cId <- doSave ch
752+
doSaveSerialized cId
753+
pure cId
682754
execute_
683755
[sql|
684756
INSERT INTO causal_ownership (user_id, causal_id)
@@ -687,6 +759,14 @@ saveCausal mayCh bhId ancestorIds = do
687759
|]
688760
pure (cId, ch)
689761
where
762+
doSaveSerialized cId = do
763+
causalEntity <- case maySerializedCausal of
764+
Just serializedCausal -> pure serializedCausal
765+
Nothing -> do
766+
SyncCommon.entityToTempEntity id . Sync.C <$> expectCausalEntity cId
767+
let serializedCausal = SyncV2.serialiseCBORBytes causalEntity
768+
saveSerializedCausal cId serializedCausal
769+
690770
doSave ch = do
691771
cId <-
692772
queryExpect1Col
@@ -707,6 +787,26 @@ saveCausal mayCh bhId ancestorIds = do
707787
|]
708788
pure cId
709789

790+
saveSerializedCausal :: (QueryM m) => CausalId -> CBORBytes TempEntity -> m ()
791+
saveSerializedCausal causalId (CBORBytes bytes) = do
792+
bytesId <- DefnQ.ensureBytesIdsOf id (BL.toStrict bytes)
793+
execute_
794+
[sql|
795+
INSERT INTO serialized_causals (causal_id, bytes_id)
796+
VALUES (#{causalId}, #{bytesId})
797+
ON CONFLICT DO NOTHING
798+
|]
799+
800+
expectCausalEntity :: (HasCallStack) => CausalId -> CodebaseM e (Sync.Causal Hash32)
801+
expectCausalEntity causalId = do
802+
U.Causal {valueHash, parents} <- expectCausalNamespace causalId
803+
pure $
804+
( Sync.Causal
805+
{ namespaceHash = Hash32.fromHash $ unBranchHash valueHash,
806+
parents = Set.map (Hash32.fromHash . unCausalHash) . Map.keysSet $ parents
807+
}
808+
)
809+
710810
-- | Get the ref to the result of squashing if we've squashed that ref in the past.
711811
-- Also adds the squash result to current codebase if we find it.
712812
tryGetCachedSquashResult :: BranchHash -> CodebaseM e (Maybe CausalId)
@@ -744,7 +844,7 @@ saveSquashResult unsquashedBranchHash squashedCausalHashId = do
744844
saveV2BranchShallow :: V2.Branch (CodebaseM e) -> CodebaseM e (BranchHashId, BranchHash)
745845
saveV2BranchShallow v2Branch = do
746846
pgNamespace <- expectV2BranchDependencies v2Branch
747-
savePgNamespace Nothing pgNamespace
847+
savePgNamespace Nothing Nothing pgNamespace
748848
where
749849
expectV2BranchDependencies :: V2.Branch (CodebaseM e) -> CodebaseM e PgNamespace
750850
expectV2BranchDependencies V2.Branch {terms, types, patches, children} = do

src/Share/Postgres/Definitions/Queries.hs

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,15 @@ module Share.Postgres.Definitions.Queries
2828
ensureBytesIdsOf,
2929
expectTextsOf,
3030
saveTypeComponent,
31+
32+
-- * For Migrations
33+
saveSerializedComponent,
3134
)
3235
where
3336

3437
import Control.Lens
3538
import Data.ByteString qualified as BS
39+
import Data.ByteString.Lazy.Char8 qualified as BL
3640
import Data.List.NonEmpty qualified as NE
3741
import Data.List.NonEmpty qualified as NonEmpty
3842
import Data.Set qualified as Set
@@ -41,6 +45,7 @@ import Data.Vector (Vector)
4145
import Data.Vector qualified as Vector
4246
import Servant (err500)
4347
import Share.Codebase.Types (CodebaseEnv (..), CodebaseM)
48+
import Share.Codebase.Types qualified as Codebase
4449
import Share.IDs
4550
import Share.Postgres
4651
import Share.Postgres qualified as PG
@@ -64,6 +69,7 @@ import U.Codebase.Sqlite.HashHandle (HashHandle (..))
6469
import U.Codebase.Sqlite.LocalIds qualified as LocalIds
6570
import U.Codebase.Sqlite.Queries qualified as Util
6671
import U.Codebase.Sqlite.Symbol (Symbol)
72+
import U.Codebase.Sqlite.TempEntity (TempEntity)
6773
import U.Codebase.Sqlite.Term.Format qualified as TermFormat
6874
import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle)
6975
import U.Codebase.Term qualified as V2
@@ -81,7 +87,10 @@ import Unison.Reference qualified as V1Reference
8187
import Unison.Referent qualified as V1Referent
8288
import Unison.Runtime.IOSource qualified as Decls
8389
import Unison.Server.Types qualified as Tags
90+
import Unison.Sync.Common qualified as SyncCommon
8491
import Unison.Sync.Types qualified as Share
92+
import Unison.SyncV2.Types (CBORBytes (..))
93+
import Unison.SyncV2.Types qualified as SyncV2
8594

8695
type ResolvedLocalIds = LocalIds.LocalIds' Text ComponentHash
8796

@@ -731,12 +740,12 @@ saveTermComponent componentHash elements = do
731740
let encodedElements =
732741
elements <&> \(localIds, trm, typ) ->
733742
(localIds, TermComponentElementBytes $ termComponentElementToByteString (TermComponentElement trm typ), typ)
734-
saveEncodedTermComponent componentHash encodedElements
743+
saveEncodedTermComponent componentHash Nothing encodedElements
735744

736745
-- | Save an already-encoded term component to the database. This is more efficient than
737746
-- 'saveTermComponent' in cases where you've already got a serialized term (like during sync).
738-
saveEncodedTermComponent :: ComponentHash -> [(PgLocalIds, TermComponentElementBytes, TermFormat.Type)] -> CodebaseM e ()
739-
saveEncodedTermComponent componentHash elements = do
747+
saveEncodedTermComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, TermComponentElementBytes, TermFormat.Type)] -> CodebaseM e ()
748+
saveEncodedTermComponent componentHash maySerialized elements = do
740749
codebaseOwnerUserId <- asks codebaseOwner
741750
componentHashId <- HashQ.ensureComponentHashId componentHash
742751
let elementsTable = elements & imap \i _ -> pgComponentIndex $ fromIntegral @Int i
@@ -775,7 +784,16 @@ saveEncodedTermComponent componentHash elements = do
775784
SELECT #{codebaseOwnerUserId}, element.term_id, element.bytes_id
776785
FROM elements element
777786
|]
787+
doSaveSerialized componentHashId
778788
where
789+
doSaveSerialized chId = do
790+
componentEntity <- case maySerialized of
791+
Just serialized -> pure serialized
792+
Nothing -> do
793+
SyncCommon.entityToTempEntity id . Share.TC <$> expectShareTermComponent chId
794+
let serializedEntity = SyncV2.serialiseCBORBytes componentEntity
795+
saveSerializedComponent chId serializedEntity
796+
779797
saveSharedTermAndLocalMappings :: ComponentHashId -> CodebaseM e (NE.NonEmpty TermId)
780798
saveSharedTermAndLocalMappings componentHashId = do
781799
let HashHandle {toReference = hashType} = v2HashHandle
@@ -847,8 +865,8 @@ saveEncodedTermComponent componentHash elements = do
847865
|]
848866
pure termIds
849867

850-
saveTypeComponent :: ComponentHash -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e ()
851-
saveTypeComponent componentHash elements = do
868+
saveTypeComponent :: ComponentHash -> Maybe TempEntity -> [(PgLocalIds, DeclFormat.Decl Symbol)] -> CodebaseM e ()
869+
saveTypeComponent componentHash maySerialized elements = do
852870
codebaseOwnerUserId <- asks codebaseOwner
853871
componentHashId <- HashQ.ensureComponentHashId componentHash
854872
let elementsTable = elements & imap \i _ -> fromIntegral @Int @Int32 i
@@ -886,7 +904,16 @@ saveTypeComponent componentHash elements = do
886904
SELECT #{codebaseOwnerUserId}, element.type_id, element.bytes_id
887905
FROM elements element
888906
|]
907+
doSaveSerialized componentHashId
889908
where
909+
doSaveSerialized chId = do
910+
componentEntity <- case maySerialized of
911+
Just serialized -> pure serialized
912+
Nothing -> do
913+
SyncCommon.entityToTempEntity id . Share.DC <$> expectShareTypeComponent chId
914+
let serializedEntity = SyncV2.serialiseCBORBytes componentEntity
915+
saveSerializedComponent chId serializedEntity
916+
890917
saveConstructors :: [(TypeId, (PgLocalIds, DeclFormat.Decl Symbol))] -> CodebaseM e ()
891918
saveConstructors typeElements = do
892919
typeElementsWithResolvedLocals <- lift $ resolveLocalIdsOf (traversed . _2 . _1) typeElements
@@ -1121,3 +1148,14 @@ typeTagsByReferencesOf trav s = do
11211148
tagFromDeclKind = \case
11221149
DefnTypes.Ability -> Tags.Ability
11231150
DefnTypes.Data -> Tags.Data
1151+
1152+
saveSerializedComponent :: ComponentHashId -> CBORBytes TempEntity -> CodebaseM e ()
1153+
saveSerializedComponent chId (CBORBytes bytes) = do
1154+
codebaseOwnerUserId <- asks Codebase.codebaseOwner
1155+
bytesId <- ensureBytesIdsOf id (BL.toStrict bytes)
1156+
execute_
1157+
[sql|
1158+
INSERT INTO serialized_components (user_id, component_hash_id, bytes_id)
1159+
VALUES (#{codebaseOwnerUserId}, #{chId}, #{bytesId})
1160+
ON CONFLICT DO NOTHING
1161+
|]

0 commit comments

Comments
 (0)