55-- | An incremental merge of multiple runs.
66module Database.LSMTree.Internal.MergingRun (
77 MergingRun (.. )
8- , unsafeNew
8+ , new
9+ , newCompleted
910 , duplicateRuns
1011 , supplyCredits
1112 , expectCompleted
@@ -32,14 +33,21 @@ import Control.Monad.Class.MonadThrow (MonadCatch (bracketOnError),
3233import Control.Monad.Primitive
3334import Control.RefCount
3435import Control.TempRegistry
36+ import Data.Maybe (fromMaybe )
3537import Data.Primitive.MutVar
3638import Data.Primitive.PrimVar
3739import qualified Data.Vector as V
3840import Database.LSMTree.Internal.Assertions (assert )
3941import Database.LSMTree.Internal.Entry (NumEntries (.. ), unNumEntries )
42+ import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue )
4043import Database.LSMTree.Internal.Merge (Merge , StepResult (.. ))
4144import qualified Database.LSMTree.Internal.Merge as Merge
45+ import Database.LSMTree.Internal.Paths (RunFsPaths (.. ))
4246import Database.LSMTree.Internal.Run (Run )
47+ import qualified Database.LSMTree.Internal.Run as Run
48+ import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc )
49+ import System.FS.API (HasFS )
50+ import System.FS.BlockIO.API (HasBlockIO )
4351
4452data MergingRun m h = MergingRun {
4553 mergePolicy :: ! MergePolicyForLevel
@@ -103,16 +111,65 @@ instance NFData MergeKnownCompleted where
103111 rnf MergeKnownCompleted = ()
104112 rnf MergeMaybeCompleted = ()
105113
106- {-# SPECIALISE unsafeNew ::
114+ {-# SPECIALISE new ::
115+ HasFS IO h
116+ -> HasBlockIO IO h
117+ -> ResolveSerialisedValue
118+ -> Run.RunDataCaching
119+ -> RunBloomFilterAlloc
120+ -> Merge.Level
121+ -> MergePolicyForLevel
122+ -> RunFsPaths
123+ -> V.Vector (Ref (Run IO h))
124+ -> IO (Ref (MergingRun IO h)) #-}
125+ -- | Create a new merging run, returning a reference to it that must ultimately
126+ -- be released via 'releaseRef'.
127+ --
128+ -- Takes over ownership of the references to the runs passed.
129+ --
130+ -- This function should be run with asynchronous exceptions masked to prevent
131+ -- failing after internal resources have already been created.
132+ new ::
133+ (MonadMVar m , MonadMask m , MonadSTM m , MonadST m )
134+ => HasFS m h
135+ -> HasBlockIO m h
136+ -> ResolveSerialisedValue
137+ -> Run. RunDataCaching
138+ -> RunBloomFilterAlloc
139+ -> Merge. Level
140+ -> MergePolicyForLevel
141+ -> RunFsPaths
142+ -> V. Vector (Ref (Run m h ))
143+ -> m (Ref (MergingRun m h ))
144+ new hfs hbio resolve caching alloc mergeLevel mergePolicy runPaths runs = do
145+ merge <- fromMaybe (error " newMerge: merges can not be empty" )
146+ <$> Merge. new hfs hbio caching alloc mergeLevel resolve runPaths runs
147+ let numInputRuns = NumRuns $ V. length runs
148+ let numInputEntries = V. foldMap' Run. size runs
149+ spentCreditsVar <- SpentCreditsVar <$> newPrimVar 0
150+ unsafeNew mergePolicy numInputRuns numInputEntries MergeMaybeCompleted $
151+ OngoingMerge runs spentCreditsVar merge
152+
153+ {-# SPECIALISE newCompleted ::
107154 MergePolicyForLevel
108155 -> NumRuns
109156 -> NumEntries
110- -> MergeKnownCompleted
111- -> MergingRunState IO h
157+ -> Ref (Run IO h)
112158 -> IO (Ref (MergingRun IO h)) #-}
113- -- | This allows constructing ill-formed MergingRuns, but the flexibility is
114- -- needed for creating a merging run that is already Completed, as well as
115- -- opening a merging run from a snapshot.
159+ -- | Create a merging run that is already in the completed state, returning a
160+ -- reference that must ultimately be released via 'releaseRef'.
161+ newCompleted ::
162+ (MonadMVar m , MonadMask m , MonadSTM m , MonadST m )
163+ => MergePolicyForLevel
164+ -> NumRuns
165+ -> NumEntries
166+ -> Ref (Run m h )
167+ -> m (Ref (MergingRun m h ))
168+ newCompleted mergePolicy numInputRuns numInputEntries run = do
169+ unsafeNew mergePolicy numInputRuns numInputEntries MergeKnownCompleted $
170+ CompletedMerge run
171+
172+ {-# INLINE unsafeNew #-}
116173unsafeNew ::
117174 (MonadMVar m , MonadMask m , MonadSTM m , MonadST m )
118175 => MergePolicyForLevel
@@ -150,20 +207,18 @@ unsafeNew mergePolicy mergeNumRuns mergeNumEntries knownCompleted state = do
150207
151208-- | Create references to the runs that should be queried for lookups.
152209-- In particular, if the merge is not complete, these are the input runs.
153- {-# SPECIALISE duplicateRuns :: TempRegistry IO -> Ref (MergingRun IO h) -> IO (V.Vector (Ref (Run IO h))) #-}
210+ {-# SPECIALISE duplicateRuns :: Ref (MergingRun IO h) -> IO (V.Vector (Ref (Run IO h))) #-}
154211duplicateRuns ::
155212 (PrimMonad m , MonadMVar m , MonadMask m )
156- => TempRegistry m
157- -> Ref (MergingRun m h )
213+ => Ref (MergingRun m h )
158214 -> m (V. Vector (Ref (Run m h )))
159- duplicateRuns reg (DeRef mr) =
215+ duplicateRuns (DeRef mr) =
160216 -- We take the references while holding the MVar to make sure the MergingRun
161217 -- does not get completed concurrently before we are done.
162218 withMVar (mergeState mr) $ \ case
163- CompletedMerge r -> V. singleton <$> dupRun r
164- OngoingMerge rs _ _ -> V. mapM dupRun rs
165- where
166- dupRun r = allocateTemp reg (dupRef r) releaseRef
219+ CompletedMerge r -> V. singleton <$> dupRef r
220+ OngoingMerge rs _ _ -> withTempRegistry $ \ reg ->
221+ V. mapM (\ r -> allocateTemp reg (dupRef r) releaseRef) rs
167222
168223{- ------------------------------------------------------------------------------
169224 Credits
@@ -331,19 +386,19 @@ tryTakeUnspentCredits ::
331386tryTakeUnspentCredits
332387 unspentCreditsVar@ (UnspentCreditsVar ! var)
333388 thresh@ (CreditThreshold ! creditsThresh)
334- (Credits ! prev )
335- | prev < creditsThresh = pure Nothing
389+ (Credits ! before )
390+ | before < creditsThresh = pure Nothing
336391 | otherwise = do
337392 -- numThresholds is guaranteed to be >= 1
338- let ! numThresholds = prev `div` creditsThresh
393+ let ! numThresholds = before `div` creditsThresh
339394 ! creditsToTake = numThresholds * creditsThresh
340- ! new = prev - creditsToTake
341- assert (new < creditsThresh) $ pure ()
342- prev ' <- casInt var prev new
343- if prev ' == prev then
395+ ! after = before - creditsToTake
396+ assert (after < creditsThresh) $ pure ()
397+ before ' <- casInt var before after
398+ if before ' == before then
344399 pure (Just (Credits creditsToTake))
345400 else
346- tryTakeUnspentCredits unspentCreditsVar thresh (Credits prev ')
401+ tryTakeUnspentCredits unspentCreditsVar thresh (Credits before ')
347402
348403{-# SPECIALISE putBackUnspentCredits ::
349404 UnspentCreditsVar RealWorld
@@ -446,13 +501,14 @@ completeMerge mergeVar mergeKnownCompletedVar = do
446501 pure $! CompletedMerge r
447502
448503{-# SPECIALISE expectCompleted ::
449- TempRegistry IO
450- -> Ref (MergingRun IO h)
504+ Ref (MergingRun IO h)
451505 -> IO (Ref (Run IO h)) #-}
506+ -- | This does /not/ release the reference, but allocates a new reference for
507+ -- the returned run, which must be released at some point.
452508expectCompleted ::
453509 (MonadMVar m , MonadSTM m , MonadST m , MonadMask m )
454- => TempRegistry m -> Ref (MergingRun m h ) -> m (Ref (Run m h ))
455- expectCompleted reg mr @ (DeRef MergingRun {.. }) = do
510+ => Ref (MergingRun m h ) -> m (Ref (Run m h ))
511+ expectCompleted (DeRef MergingRun {.. }) = do
456512 knownCompleted <- readMutVar mergeKnownCompleted
457513 -- The merge is not guaranteed to be complete, so we do the remaining steps
458514 when (knownCompleted == MergeMaybeCompleted ) $ do
@@ -462,13 +518,9 @@ expectCompleted reg mr@(DeRef MergingRun {..}) = do
462518 when isMergeDone $ completeMerge mergeState mergeKnownCompleted
463519 -- TODO: can we think of a check to see if we did not do too much work
464520 -- here?
465- r <- withMVar mergeState $ \ case
466- CompletedMerge r -> pure r
521+ withMVar mergeState $ \ case
522+ CompletedMerge r -> dupRef r -- return a fresh reference to the run
467523 OngoingMerge {} -> do
468524 -- If the algorithm finds an ongoing merge here, then it is a bug in
469525 -- our merge sceduling algorithm. As such, we throw a pure error.
470526 error " expectCompleted: expected a completed merge, but found an ongoing merge"
471- -- return a fresh reference to the run
472- r' <- allocateTemp reg (dupRef r) releaseRef
473- freeTemp reg (releaseRef mr)
474- pure r'
0 commit comments