Skip to content

Commit ba1f16f

Browse files
committed
Move MergePolicyForLevel from MergingRun to IncomingRun
As part of the changes to the scheduled merges prototype to introduce table unions, this refactoring was done. This is because the merging run gets reused in tree merges, where the merging policy makes no sense. So we move it up one level into something that's specific to the levels (the incomming run). So this is the corresonding refactoring as in the prototype, and is preparation for introducing table unions.
1 parent 42711c5 commit ba1f16f

File tree

3 files changed

+47
-55
lines changed

3 files changed

+47
-55
lines changed

src/Database/LSMTree/Internal/MergeSchedule.hs

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,8 @@ mkLevelsCache reg lvls = do
202202
foldRunAndMergeM k1 k2 ls =
203203
fmap fold $ V.forM ls $ \(Level ir rs) -> do
204204
incoming <- case ir of
205-
Single r -> k1 r
206-
Merging mr -> k2 mr
205+
Single r -> k1 r
206+
Merging _ mr -> k2 mr
207207
(incoming <>) . fold <$> V.forM rs k1
208208

209209
{-# SPECIALISE rebuildCache ::
@@ -283,7 +283,7 @@ data Level m h = Level {
283283
-- | An incoming run is either a single run, or a merge.
284284
data IncomingRun m h =
285285
Single !(Ref (Run m h))
286-
| Merging !(Ref (MergingRun m h))
286+
| Merging !MergePolicyForLevel !(Ref (MergingRun m h))
287287

288288
mergePolicyForLevel :: MergePolicy -> LevelNo -> Levels m h -> MergePolicyForLevel
289289
mergePolicyForLevel MergePolicyLazyLevelling (LevelNo n) nextLevels
@@ -329,16 +329,16 @@ duplicateIncomingRun ::
329329
duplicateIncomingRun reg (Single r) =
330330
Single <$> allocateTemp reg (dupRef r) releaseRef
331331

332-
duplicateIncomingRun reg (Merging mr) =
333-
Merging <$> allocateTemp reg (dupRef mr) releaseRef
332+
duplicateIncomingRun reg (Merging mp mr) =
333+
Merging mp <$> allocateTemp reg (dupRef mr) releaseRef
334334

335335
{-# SPECIALISE releaseIncomingRun :: TempRegistry IO -> IncomingRun IO h -> IO () #-}
336336
releaseIncomingRun ::
337337
(PrimMonad m, MonadMask m, MonadMVar m)
338338
=> TempRegistry m
339339
-> IncomingRun m h -> m ()
340-
releaseIncomingRun reg (Single r) = freeTemp reg (releaseRef r)
341-
releaseIncomingRun reg (Merging mr) = freeTemp reg (releaseRef mr)
340+
releaseIncomingRun reg (Single r) = freeTemp reg (releaseRef r)
341+
releaseIncomingRun reg (Merging _ mr) = freeTemp reg (releaseRef mr)
342342

343343
{-# SPECIALISE iforLevelM_ :: Levels IO h -> (LevelNo -> Level IO h -> IO ()) -> IO () #-}
344344
iforLevelM_ :: Monad m => Levels m h -> (LevelNo -> Level m h -> m ()) -> m ()
@@ -618,8 +618,8 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels =
618618
expectCompletedMerge :: LevelNo -> IncomingRun m h -> m (Ref (Run m h))
619619
expectCompletedMerge ln ir = do
620620
r <- case ir of
621-
Single r -> pure r
622-
Merging mr -> do
621+
Single r -> pure r
622+
Merging _ mr -> do
623623
r <- allocateTemp reg (MR.expectCompleted mr) releaseRef
624624
freeTemp reg (releaseRef mr)
625625
pure r
@@ -656,7 +656,7 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels =
656656
-- The runs will end up inside the merging run, with fresh references.
657657
-- The original references can be released (but only on the happy path).
658658
mr <- allocateTemp reg
659-
(MR.new hfs hbio resolve caching alloc mergeLevel mergePolicy runPaths rs)
659+
(MR.new hfs hbio resolve caching alloc mergeLevel runPaths rs)
660660
releaseRef
661661
V.forM_ rs $ \r -> freeTemp reg (releaseRef r)
662662
case confMergeSchedule of
@@ -671,7 +671,7 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels =
671671
traceWith tr $ AtLevel ln $
672672
TraceCompletedMerge (Run.size r) (Run.runFsPathsNumber r)
673673

674-
return (Merging mr)
674+
return (Merging mergePolicy mr)
675675

676676
-- $setup
677677
-- >>> import Database.LSMTree.Internal.Entry
@@ -790,8 +790,8 @@ supplyCredits conf c levels =
790790
iforLevelM_ levels $ \ln (Level ir _rs) ->
791791
case ir of
792792
Single{} -> pure ()
793-
Merging mr -> do
794-
let !c' = scaleCreditsForMerge mr c
793+
Merging mp mr -> do
794+
let !c' = scaleCreditsForMerge mp mr c
795795
let !thresh = creditThresholdForLevel conf ln
796796
MR.supplyCredits c' thresh mr
797797

@@ -801,28 +801,27 @@ supplyCredits conf c levels =
801801
-- Initially, 1 update supplies 1 credit. However, since merging runs have
802802
-- different numbers of input runs/entries, we may have to a more or less
803803
-- merging work than 1 merge step for each credit.
804-
scaleCreditsForMerge :: Ref (MergingRun m h) -> Credits -> MR.Credits
804+
scaleCreditsForMerge :: MergePolicyForLevel -> Ref (MergingRun m h) -> Credits -> MR.Credits
805805
-- A single run is a trivially completed merge, so it requires no credits.
806-
scaleCreditsForMerge (DeRef mr) (Credits c) =
807-
case MR.mergePolicy mr of
808-
LevelTiering ->
809-
-- A tiering merge has 5 runs at most (one could be held back to merged
810-
-- again) and must be completed before the level is full (once 4 more
811-
-- runs come in).
812-
MR.Credits (c * (1 + 4))
813-
LevelLevelling ->
814-
-- A levelling merge has 1 input run and one resident run, which is (up
815-
-- to) 4x bigger than the others. It needs to be completed before
816-
-- another run comes in.
817-
-- TODO: this is currently assuming a naive worst case, where the
818-
-- resident run is as large as it can be for the current level. We
819-
-- probably have enough information available here to lower the
820-
-- worst-case upper bound by looking at the sizes of the input runs.
821-
-- As as result, merge work would/could be more evenly distributed over
822-
-- time when the resident run is smaller than the worst case.
823-
let NumRuns n = MR.mergeNumRuns mr
824-
-- same as division rounding up: ceiling (c * n / 4)
825-
in MR.Credits ((c * n + 3) `div` 4)
806+
scaleCreditsForMerge LevelTiering _ (Credits c) =
807+
-- A tiering merge has 5 runs at most (one could be held back to merged
808+
-- again) and must be completed before the level is full (once 4 more
809+
-- runs come in).
810+
MR.Credits (c * (1 + 4))
811+
812+
scaleCreditsForMerge LevelLevelling (DeRef mr) (Credits c) =
813+
-- A levelling merge has 1 input run and one resident run, which is (up
814+
-- to) 4x bigger than the others. It needs to be completed before
815+
-- another run comes in.
816+
-- TODO: this is currently assuming a naive worst case, where the
817+
-- resident run is as large as it can be for the current level. We
818+
-- probably have enough information available here to lower the
819+
-- worst-case upper bound by looking at the sizes of the input runs.
820+
-- As as result, merge work would/could be more evenly distributed over
821+
-- time when the resident run is smaller than the worst case.
822+
let NumRuns n = MR.mergeNumRuns mr
823+
-- same as division rounding up: ceiling (c * n / 4)
824+
in MR.Credits ((c * n + 3) `div` 4)
826825

827826
-- TODO: the thresholds for doing merge work should be different for each level,
828827
-- maybe co-prime?

src/Database/LSMTree/Internal/MergingRun.hs

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ import System.FS.API (HasFS)
5050
import System.FS.BlockIO.API (HasBlockIO)
5151

5252
data MergingRun m h = MergingRun {
53-
mergePolicy :: !MergePolicyForLevel
54-
, mergeNumRuns :: !NumRuns
53+
mergeNumRuns :: !NumRuns
5554
-- | Sum of number of entries in the input runs
5655
, mergeNumEntries :: !NumEntries
5756
-- | The number of currently /unspent/ credits
@@ -118,7 +117,6 @@ instance NFData MergeKnownCompleted where
118117
-> Run.RunDataCaching
119118
-> RunBloomFilterAlloc
120119
-> Merge.Level
121-
-> MergePolicyForLevel
122120
-> RunFsPaths
123121
-> V.Vector (Ref (Run IO h))
124122
-> IO (Ref (MergingRun IO h)) #-}
@@ -137,11 +135,10 @@ new ::
137135
-> Run.RunDataCaching
138136
-> RunBloomFilterAlloc
139137
-> Merge.Level
140-
-> MergePolicyForLevel
141138
-> RunFsPaths
142139
-> V.Vector (Ref (Run m h))
143140
-> m (Ref (MergingRun m h))
144-
new hfs hbio resolve caching alloc mergeLevel mergePolicy runPaths inputRuns =
141+
new hfs hbio resolve caching alloc mergeLevel runPaths inputRuns =
145142
-- If creating the Merge fails, we must release the references again.
146143
withTempRegistry $ \reg -> do
147144
runs <- V.mapM (\r -> allocateTemp reg (dupRef r) releaseRef) inputRuns
@@ -150,12 +147,11 @@ new hfs hbio resolve caching alloc mergeLevel mergePolicy runPaths inputRuns =
150147
let numInputRuns = NumRuns $ V.length runs
151148
let numInputEntries = V.foldMap' Run.size runs
152149
spentCreditsVar <- SpentCreditsVar <$> newPrimVar 0
153-
unsafeNew mergePolicy numInputRuns numInputEntries MergeMaybeCompleted $
150+
unsafeNew numInputRuns numInputEntries MergeMaybeCompleted $
154151
OngoingMerge runs spentCreditsVar merge
155152

156153
{-# SPECIALISE newCompleted ::
157-
MergePolicyForLevel
158-
-> NumRuns
154+
NumRuns
159155
-> NumEntries
160156
-> Ref (Run IO h)
161157
-> IO (Ref (MergingRun IO h)) #-}
@@ -168,26 +164,24 @@ new hfs hbio resolve caching alloc mergeLevel mergePolicy runPaths inputRuns =
168164
-- failing after internal resources have already been created.
169165
newCompleted ::
170166
(MonadMVar m, MonadMask m, MonadSTM m, MonadST m)
171-
=> MergePolicyForLevel
172-
-> NumRuns
167+
=> NumRuns
173168
-> NumEntries
174169
-> Ref (Run m h)
175170
-> m (Ref (MergingRun m h))
176-
newCompleted mergePolicy numInputRuns numInputEntries inputRun = do
171+
newCompleted numInputRuns numInputEntries inputRun = do
177172
bracketOnError (dupRef inputRun) releaseRef $ \run ->
178-
unsafeNew mergePolicy numInputRuns numInputEntries MergeKnownCompleted $
173+
unsafeNew numInputRuns numInputEntries MergeKnownCompleted $
179174
CompletedMerge run
180175

181176
{-# INLINE unsafeNew #-}
182177
unsafeNew ::
183178
(MonadMVar m, MonadMask m, MonadSTM m, MonadST m)
184-
=> MergePolicyForLevel
185-
-> NumRuns
179+
=> NumRuns
186180
-> NumEntries
187181
-> MergeKnownCompleted
188182
-> MergingRunState m h
189183
-> m (Ref (MergingRun m h))
190-
unsafeNew mergePolicy mergeNumRuns mergeNumEntries knownCompleted state = do
184+
unsafeNew mergeNumRuns mergeNumEntries knownCompleted state = do
191185
mergeUnspentCredits <- UnspentCreditsVar <$> newPrimVar 0
192186
mergeStepsPerformed <- TotalStepsVar <$> newPrimVar 0
193187
case state of
@@ -197,8 +191,7 @@ unsafeNew mergePolicy mergeNumRuns mergeNumEntries knownCompleted state = do
197191
mergeState <- newMVar $! state
198192
newRef (finalise mergeState) $ \mergeRefCounter ->
199193
MergingRun {
200-
mergePolicy
201-
, mergeNumRuns
194+
mergeNumRuns
202195
, mergeNumEntries
203196
, mergeUnspentCredits
204197
, mergeStepsPerformed

src/Database/LSMTree/Internal/Snapshot.hs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ toSnapIncomingRun (Single r) = pure (SnapSingleRun r)
185185
-- We need to know how many credits were yet unspent so we can restore merge
186186
-- work on snapshot load. No need to snapshot the contents of totalStepsVar
187187
-- here, since we still start counting from 0 again when loading the snapshot.
188-
toSnapIncomingRun (Merging (DeRef MR.MergingRun {..})) = do
188+
toSnapIncomingRun (Merging mergePolicy (DeRef MR.MergingRun {..})) = do
189189
unspentCredits <- readPrimVar (MR.getUnspentCreditsVar mergeUnspentCredits)
190190
smrs <- withMVar mergeState $ \mrs -> toSnapMergingRunState mrs
191191
pure $
@@ -345,14 +345,14 @@ fromSnapLevels reg hfs hbio conf@TableConfig{..} uc resolve dir (SnapLevels leve
345345
fromSnapIncomingRun (SnapSingleRun run) = do
346346
Single <$> dupRun run
347347
fromSnapIncomingRun (SnapMergingRun mpfl nr ne unspentCredits smrs) = do
348-
Merging <$> case smrs of
348+
Merging mpfl <$> case smrs of
349349
SnapCompletedMerge run ->
350-
allocateTemp reg (MR.newCompleted mpfl nr ne run) releaseRef
350+
allocateTemp reg (MR.newCompleted nr ne run) releaseRef
351351

352352
SnapOngoingMerge runs spentCredits lvl -> do
353353
rn <- uniqueToRunNumber <$> incrUniqCounter uc
354354
mr <- allocateTemp reg
355-
(MR.new hfs hbio resolve caching alloc lvl mpfl (mkPath rn) runs)
355+
(MR.new hfs hbio resolve caching alloc lvl (mkPath rn) runs)
356356
releaseRef
357357
-- When a snapshot is created, merge progress is lost, so we
358358
-- have to redo merging work here. UnspentCredits and

0 commit comments

Comments
 (0)