1616-- Finally, it demonstrates a design for table unions, including a
1717-- representation for in-progress merging trees.
1818--
19- -- The merging policy that this prototype uses is power 4 \"lazy levelling\".
20- -- Power 4 means each level is 4 times bigger than the previous level.
19+ -- The merging policy that this prototype uses is \"lazy levelling\".
20+ -- Each level is T times bigger than the previous level.
2121-- Lazy levelling means we use tiering for every level except the last level
2222-- which uses levelling. Though note that the first level always uses tiering,
2323-- even if the first level is also the last level. This is to simplify flushing
@@ -123,6 +123,8 @@ data LSM s = LSMHandle !(STRef s Counter)
123123-- | Configuration options for individual LSM tables.
124124data LSMConfig = LSMConfig {
125125 configMaxWriteBufferSize :: ! Int
126+ -- | Also known as the parameter @T@
127+ , configSizeRatio :: ! Int
126128 }
127129 deriving stock (Show , Eq )
128130
@@ -316,7 +318,7 @@ mergeTypeForLevel _ _ = MergeMidLevel
316318-- the last level.
317319--
318320invariant :: forall s . LSMConfig -> LSMContent s -> ST s ()
319- invariant conf (LSMContent _ levels ul) = do
321+ invariant conf@ LSMConfig { .. } (LSMContent _ levels ul) = do
320322 levelsInvariant 1 levels
321323 case ul of
322324 NoUnion -> return ()
@@ -335,7 +337,7 @@ invariant conf (LSMContent _ levels ul) = do
335337 assertST $ mt == mergeTypeForLevel ls ul
336338 readSTRef ref
337339
338- assertST $ length rs <= 3
340+ assertST $ length rs <= configSizeRatio - 1
339341 expectedRunLengths ln rs ls
340342 expectedMergingRunLengths ln ir mrs ls
341343
@@ -353,12 +355,12 @@ invariant conf (LSMContent _ levels ul) = do
353355 -- 'IncomingRun', using 'Single'. Thus there are no other resident runs.
354356 MergePolicyLevelling -> assertST $ null rs
355357 -- Runs in tiering levels usually fit that size, but they can be one
356- -- larger, if a run has been held back (creating a 5 -way merge).
358+ -- larger, if a run has been held back (creating a (T+1) -way merge).
357359 MergePolicyTiering -> assertST $ all (\ r -> runToLevelNumber MergePolicyTiering conf r `elem` [ln, ln+ 1 ]) rs
358360 -- (This is actually still not really true, but will hold in practice.
359361 -- In the pathological case, all runs passed to the next level can be
360- -- factor (5/4 ) too large, and there the same holding back can lead to
361- -- factor (6/4 ) etc., until at level 12 a run is two levels too large.
362+ -- factor ((T+1)/T ) too large, and there the same holding back can lead to
363+ -- factor ((T+2)/T ) etc., until at level 12 a run is two levels too large.
362364
363365 -- Incoming runs being merged also need to be of the right size, but the
364366 -- conditions are more complicated.
@@ -381,17 +383,17 @@ invariant conf (LSMContent _ levels ul) = do
381383 (_, CompletedMerge r) ->
382384 assertST $ runToLevelNumber MergePolicyLevelling conf r <= ln+ 1
383385
384- -- An ongoing merge for levelling should have 4 incoming runs of
386+ -- An ongoing merge for levelling should have T incoming runs of
385387 -- the right size for the level below (or slightly larger due to
386388 -- holding back underfull runs), and 1 run from this level,
387389 -- but the run from this level can be of almost any size for the
388390 -- same reasons as above. Although if this is the first merge for
389- -- a new level, it'll have only 4 runs.
391+ -- a new level, it'll have only T runs.
390392 (_, OngoingMerge _ rs _) -> do
391- assertST $ length rs `elem` [4 , 5 ]
393+ assertST $ length rs `elem` [configSizeRatio, configSizeRatio + 1 ]
392394 assertST $ all (\ r -> runSize r > 0 ) rs -- don't merge empty runs
393- let incoming = take 4 rs
394- let resident = drop 4 rs
395+ let incoming = take configSizeRatio rs
396+ let resident = drop configSizeRatio rs
395397 assertST $ all (\ r -> runToLevelNumber MergePolicyTiering conf r `elem` [ln- 1 , ln]) incoming
396398 assertST $ all (\ r -> runToLevelNumber MergePolicyLevelling conf r <= ln+ 1 ) resident
397399
@@ -419,12 +421,12 @@ invariant conf (LSMContent _ levels ul) = do
419421 (_, CompletedMerge r, MergeMidLevel ) ->
420422 assertST $ runToLevelNumber MergePolicyTiering conf r `elem` [ln- 1 , ln, ln+ 1 ]
421423
422- -- An ongoing merge for tiering should have 4 incoming runs of
424+ -- An ongoing merge for tiering should have T incoming runs of
423425 -- the right size for the level below, and at most 1 run held back
424426 -- due to being too small (which would thus also be of the size of
425427 -- the level below).
426428 (_, OngoingMerge _ rs _, _) -> do
427- assertST $ length rs == 4 || length rs == 5
429+ assertST $ length rs == configSizeRatio || length rs == configSizeRatio + 1
428430 assertST $ all (\ r -> runToLevelNumber MergePolicyTiering conf r == ln- 1 ) rs
429431
430432-- We don't make many assumptions apart from what the types already enforce.
@@ -533,16 +535,16 @@ assertST p = assert p $ return ()
533535-- The size of a tiering run at each level is allowed to be
534536-- @bufferSize*sizeRatio^(level-1) < size <= bufferSize*sizeRatio^level@.
535537--
536- -- >>> levelNumberToMaxRunSize MergePolicyTiering (LSMConfig 2) <$> [0, 1, 2, 3, 4]
538+ -- >>> levelNumberToMaxRunSize MergePolicyTiering (LSMConfig 2 4 ) <$> [0, 1, 2, 3, 4]
537539-- [0,2,8,32,128]
538540--
539541-- The @size@ of a levelling run at each level is allowed to be
540- -- @bufferSize*sizeRatio^( level-1) < size <= bufferSize*sizeRatio^(level+1)@. A
542+ -- @bufferSize*sizeRatio^level < size <= bufferSize*sizeRatio^(level+1)@. A
541543-- levelling run can take take up a whole level, so the maximum size of a run is
542544-- @sizeRatio@ tmes larger than the maximum size of a tiering run on the same
543545-- level.
544546--
545- -- >>> levelNumberToMaxRunSize MergePolicyLevelling (LSMConfig 2) <$> [0, 1, 2, 3, 4]
547+ -- >>> levelNumberToMaxRunSize MergePolicyLevelling (LSMConfig 2 4 ) <$> [0, 1, 2, 3, 4]
546548-- [0,8,32,128,512]
547549levelNumberToMaxRunSize :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int
548550levelNumberToMaxRunSize = \ case
@@ -551,10 +553,12 @@ levelNumberToMaxRunSize = \case
551553
552554-- | See 'levelNumberToMaxRunSize'
553555levelNumberToMaxRunSizeTiering :: HasCallStack => LSMConfig -> LevelNo -> Int
554- levelNumberToMaxRunSizeTiering LSMConfig {configMaxWriteBufferSize = bufSize} ln
556+ levelNumberToMaxRunSizeTiering
557+ LSMConfig {configMaxWriteBufferSize = bufSize, configSizeRatio = sizeRatio}
558+ ln
555559 | ln < 0 = error " level number must be non-negative"
556560 | ln == 0 = 0
557- | otherwise = fromIntegerChecked (toInteger bufSize * 4 ^ pred (toInteger ln))
561+ | otherwise = fromIntegerChecked (toInteger bufSize * toInteger sizeRatio ^ pred (toInteger ln))
558562 -- Perform the computation with arbitrary precision using 'Integers', but
559563 -- throw an error if the result does not fit into an 'Int'.
560564
@@ -574,10 +578,10 @@ runToLevelNumber mpl conf run = runSizeToLevelNumber mpl conf (runSize run)
574578-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
575579-- sizes at each level.
576580--
577- -- >>> runSizeToLevelNumber MergePolicyTiering (LSMConfig 2) <$> [0,2,8,32,128]
581+ -- >>> runSizeToLevelNumber MergePolicyTiering (LSMConfig 2 4 ) <$> [0,2,8,32,128]
578582-- [0,1,2,3,4]
579583--
580- -- >>> runSizeToLevelNumber MergePolicyLevelling (LSMConfig 2) <$> [0,8,32,128,512]
584+ -- >>> runSizeToLevelNumber MergePolicyLevelling (LSMConfig 2 4 ) <$> [0,8,32,128,512]
581585-- [0,1,2,3,4]
582586runSizeToLevelNumber :: HasCallStack => MergePolicy -> LSMConfig -> Int -> LevelNo
583587runSizeToLevelNumber = \ case
@@ -590,7 +594,7 @@ runSizeToLevelNumberTiering conf n
590594 | n < 0 = error " run size must be positive"
591595 -- TODO: enumerating level numbers is potentially costly, but it does gives a
592596 -- precise answer, where we'd otherwise have to deal with Double rounding
593- -- errors in computing @ln = logBase 4 (n / configMaxWriteBufferSize) + 1@
597+ -- errors in computing @ln = logBase configSizeRatio (n / configMaxWriteBufferSize) + 1@
594598 | otherwise = head $ -- the list is guaranteed to be non-empty
595599 [ ln
596600 | ln <- [0 .. ]
@@ -603,7 +607,7 @@ runSizeToLevelNumberLevelling conf n
603607 | n < 0 = error " run size must be positive"
604608 -- TODO: enumerating level numbers is potentially costly, but it does gives a
605609 -- precise answer, where we'd otherwise have to deal with Double rounding
606- -- errors in computing @ln = logBase 4 (n / configMaxWriteBufferSize)@
610+ -- errors in computing @ln = logBase configSizeRatio (n / configMaxWriteBufferSize)@
607611 | otherwise = head $ -- the list is guaranteed to be non-empty
608612 [ ln
609613 | ln <- [0 .. ]
@@ -635,10 +639,10 @@ _runFitsInLevel mpl conf ln r = runSizeFitsInLevel mpl conf ln (runSize r)
635639-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
636640-- sizes at each level.
637641--
638- -- >>> runSizeFitsInLevel MergePolicyTiering (LSMConfig 2) 3 <$> [8,9,16,32,33]
642+ -- >>> runSizeFitsInLevel MergePolicyTiering (LSMConfig 2 4 ) 3 <$> [8,9,16,32,33]
639643-- [False,True,True,True,False]
640644--
641- -- >>> runSizeFitsInLevel MergePolicyLevelling (LSMConfig 2) 2 <$> [8,9,16,32,33]
645+ -- >>> runSizeFitsInLevel MergePolicyLevelling (LSMConfig 2 4 ) 2 <$> [8,9,16,32,33]
642646-- [False,True,True,True,False]
643647runSizeFitsInLevel :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int -> Bool
644648runSizeFitsInLevel mpl conf ln n
@@ -657,10 +661,10 @@ runTooSmallForLevel mpl conf ln r = runSizeTooSmallForLevel mpl conf ln (runSize
657661-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
658662-- sizes at each level.
659663--
660- -- >>> runSizeTooSmallForLevel MergePolicyTiering (LSMConfig 2) 3 <$> [8,9]
664+ -- >>> runSizeTooSmallForLevel MergePolicyTiering (LSMConfig 2 4 ) 3 <$> [8,9]
661665-- [True,False]
662666--
663- -- >>> runSizeTooSmallForLevel MergePolicyLevelling (LSMConfig 2) 2 <$> [8,9]
667+ -- >>> runSizeTooSmallForLevel MergePolicyLevelling (LSMConfig 2 4 ) 2 <$> [8,9]
664668-- [True,False]
665669runSizeTooSmallForLevel :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int -> Bool
666670runSizeTooSmallForLevel mpl conf ln n
@@ -681,10 +685,10 @@ runTooLargeForLevel mpl conf ln r = runSizeTooLargeForLevel mpl conf ln (runSize
681685-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
682686-- sizes at each level.
683687--
684- -- >>> runSizeTooLargeForLevel MergePolicyTiering (LSMConfig 2) 2 <$> [8,9]
688+ -- >>> runSizeTooLargeForLevel MergePolicyTiering (LSMConfig 2 4 ) 2 <$> [8,9]
685689-- [False,True]
686690--
687- -- >>> runSizeTooLargeForLevel MergePolicyLevelling (LSMConfig 2) 1 <$> [8,9]
691+ -- >>> runSizeTooLargeForLevel MergePolicyLevelling (LSMConfig 2 4 ) 1 <$> [8,9]
688692-- [False,True]
689693runSizeTooLargeForLevel :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int -> Bool
690694runSizeTooLargeForLevel mpl conf ln n
@@ -709,7 +713,8 @@ levelIsFull mpl conf ln incoming resident = case mpl of
709713
710714-- | Only based on run count, not their sizes.
711715levelIsFullTiering :: LSMConfig -> LevelNo -> [Run ] -> [Run ] -> Bool
712- levelIsFullTiering _conf _ln _incoming resident = length resident >= 4
716+ levelIsFullTiering LSMConfig {.. } _ln _incoming resident =
717+ length resident >= configSizeRatio
713718
714719-- | The level is only considered full once the resident run is /too large/
715720-- for the level.
@@ -936,16 +941,19 @@ suppliedCreditMergingRun (MergingRun _ d ref) =
936941new :: ST s (LSM s )
937942new = newWith conf
938943 where
944+ -- 4 was the default for both the max write buffer size and size ratio
945+ -- before they were made configurable
939946 conf = LSMConfig {
940- -- 4 was the default before the write buffer size was the hard-coded,
941- -- default max write buffer size before it was made to be configurable
942947 configMaxWriteBufferSize = 4
948+ , configSizeRatio = 4
943949 }
944950
945951newWith :: LSMConfig -> ST s (LSM s )
946952newWith conf
947953 | configMaxWriteBufferSize conf <= 0 =
948954 error " newWith: configMaxWriteBufferSize should be positive"
955+ | configSizeRatio conf <= 1 =
956+ error " newWith: configSizeRatio should be larger than 1"
949957 | otherwise = do
950958 c <- newSTRef 0
951959 lsm <- newSTRef (LSMContent Map. empty [] NoUnion )
@@ -1450,8 +1458,8 @@ newLevelMerge :: Tracer (ST s) EventDetail
14501458 -> Int -> MergePolicy -> LevelMergeType
14511459 -> [Run ] -> ST s (IncomingRun s )
14521460newLevelMerge _ _ _ _ _ [r] = return (Single r)
1453- newLevelMerge tr conf level mergePolicy mergeType rs = do
1454- assertST (length rs `elem` [4 , 5 ])
1461+ newLevelMerge tr conf@ LSMConfig { .. } level mergePolicy mergeType rs = do
1462+ assertST (length rs `elem` [configSizeRatio, configSizeRatio + 1 ])
14551463 mergingRun@ (MergingRun _ physicalDebt _) <- newMergingRun mergeType rs
14561464 assertST (totalDebt physicalDebt <= maxPhysicalDebt)
14571465 traceWith tr MergeStartedEvent {
@@ -1480,9 +1488,11 @@ newLevelMerge tr conf level mergePolicy mergeType rs = do
14801488 -- includes the single run in the current level.
14811489 maxPhysicalDebt =
14821490 case mergePolicy of
1483- MergePolicyLevelling -> 4 * levelNumberToMaxRunSize MergePolicyTiering conf (level- 1 )
1484- + levelNumberToMaxRunSize MergePolicyLevelling conf level
1485- MergePolicyTiering -> length rs * levelNumberToMaxRunSize MergePolicyTiering conf (level- 1 )
1491+ MergePolicyLevelling ->
1492+ configSizeRatio * levelNumberToMaxRunSize MergePolicyTiering conf (level- 1 )
1493+ + levelNumberToMaxRunSize MergePolicyLevelling conf level
1494+ MergePolicyTiering ->
1495+ length rs * levelNumberToMaxRunSize MergePolicyTiering conf (level- 1 )
14861496
14871497-------------------------------------------------------------------------------
14881498-- MergingTree abstraction
0 commit comments