Skip to content

Commit 1bf5f11

Browse files
authored
Merge pull request #717 from IntersectMBO/jdral/prototypes-configurable-sizeratio
`ScheduledMerges`: make the size ratio configurable
2 parents fc417d9 + 1f5611d commit 1bf5f11

File tree

3 files changed

+71
-47
lines changed

3 files changed

+71
-47
lines changed

src-prototypes/ScheduledMerges.hs

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
-- Finally, it demonstrates a design for table unions, including a
1717
-- representation for in-progress merging trees.
1818
--
19-
-- The merging policy that this prototype uses is power 4 \"lazy levelling\".
20-
-- Power 4 means each level is 4 times bigger than the previous level.
19+
-- The merging policy that this prototype uses is \"lazy levelling\".
20+
-- Each level is T times bigger than the previous level.
2121
-- Lazy levelling means we use tiering for every level except the last level
2222
-- which uses levelling. Though note that the first level always uses tiering,
2323
-- even if the first level is also the last level. This is to simplify flushing
@@ -123,6 +123,8 @@ data LSM s = LSMHandle !(STRef s Counter)
123123
-- | Configuration options for individual LSM tables.
124124
data LSMConfig = LSMConfig {
125125
configMaxWriteBufferSize :: !Int
126+
-- | Also known as the parameter @T@
127+
, configSizeRatio :: !Int
126128
}
127129
deriving stock (Show, Eq)
128130

@@ -316,7 +318,7 @@ mergeTypeForLevel _ _ = MergeMidLevel
316318
-- the last level.
317319
--
318320
invariant :: forall s. LSMConfig -> LSMContent s -> ST s ()
319-
invariant conf (LSMContent _ levels ul) = do
321+
invariant conf@LSMConfig{..} (LSMContent _ levels ul) = do
320322
levelsInvariant 1 levels
321323
case ul of
322324
NoUnion -> return ()
@@ -335,7 +337,7 @@ invariant conf (LSMContent _ levels ul) = do
335337
assertST $ mt == mergeTypeForLevel ls ul
336338
readSTRef ref
337339

338-
assertST $ length rs <= 3
340+
assertST $ length rs <= configSizeRatio - 1
339341
expectedRunLengths ln rs ls
340342
expectedMergingRunLengths ln ir mrs ls
341343

@@ -353,12 +355,12 @@ invariant conf (LSMContent _ levels ul) = do
353355
-- 'IncomingRun', using 'Single'. Thus there are no other resident runs.
354356
MergePolicyLevelling -> assertST $ null rs
355357
-- Runs in tiering levels usually fit that size, but they can be one
356-
-- larger, if a run has been held back (creating a 5-way merge).
358+
-- larger, if a run has been held back (creating a (T+1)-way merge).
357359
MergePolicyTiering -> assertST $ all (\r -> runToLevelNumber MergePolicyTiering conf r `elem` [ln, ln+1]) rs
358360
-- (This is actually still not really true, but will hold in practice.
359361
-- In the pathological case, all runs passed to the next level can be
360-
-- factor (5/4) too large, and there the same holding back can lead to
361-
-- factor (6/4) etc., until at level 12 a run is two levels too large.
362+
-- factor ((T+1)/T) too large, and there the same holding back can lead to
363+
-- factor ((T+2)/T) etc., until at level 12 a run is two levels too large.
362364

363365
-- Incoming runs being merged also need to be of the right size, but the
364366
-- conditions are more complicated.
@@ -381,17 +383,17 @@ invariant conf (LSMContent _ levels ul) = do
381383
(_, CompletedMerge r) ->
382384
assertST $ runToLevelNumber MergePolicyLevelling conf r <= ln+1
383385

384-
-- An ongoing merge for levelling should have 4 incoming runs of
386+
-- An ongoing merge for levelling should have T incoming runs of
385387
-- the right size for the level below (or slightly larger due to
386388
-- holding back underfull runs), and 1 run from this level,
387389
-- but the run from this level can be of almost any size for the
388390
-- same reasons as above. Although if this is the first merge for
389-
-- a new level, it'll have only 4 runs.
391+
-- a new level, it'll have only T runs.
390392
(_, OngoingMerge _ rs _) -> do
391-
assertST $ length rs `elem` [4, 5]
393+
assertST $ length rs `elem` [configSizeRatio, configSizeRatio + 1]
392394
assertST $ all (\r -> runSize r > 0) rs -- don't merge empty runs
393-
let incoming = take 4 rs
394-
let resident = drop 4 rs
395+
let incoming = take configSizeRatio rs
396+
let resident = drop configSizeRatio rs
395397
assertST $ all (\r -> runToLevelNumber MergePolicyTiering conf r `elem` [ln-1, ln]) incoming
396398
assertST $ all (\r -> runToLevelNumber MergePolicyLevelling conf r <= ln+1) resident
397399

@@ -419,12 +421,12 @@ invariant conf (LSMContent _ levels ul) = do
419421
(_, CompletedMerge r, MergeMidLevel) ->
420422
assertST $ runToLevelNumber MergePolicyTiering conf r `elem` [ln-1, ln, ln+1]
421423

422-
-- An ongoing merge for tiering should have 4 incoming runs of
424+
-- An ongoing merge for tiering should have T incoming runs of
423425
-- the right size for the level below, and at most 1 run held back
424426
-- due to being too small (which would thus also be of the size of
425427
-- the level below).
426428
(_, OngoingMerge _ rs _, _) -> do
427-
assertST $ length rs == 4 || length rs == 5
429+
assertST $ length rs == configSizeRatio || length rs == configSizeRatio + 1
428430
assertST $ all (\r -> runToLevelNumber MergePolicyTiering conf r == ln-1) rs
429431

430432
-- We don't make many assumptions apart from what the types already enforce.
@@ -533,16 +535,16 @@ assertST p = assert p $ return ()
533535
-- The size of a tiering run at each level is allowed to be
534536
-- @bufferSize*sizeRatio^(level-1) < size <= bufferSize*sizeRatio^level@.
535537
--
536-
-- >>> levelNumberToMaxRunSize MergePolicyTiering (LSMConfig 2) <$> [0, 1, 2, 3, 4]
538+
-- >>> levelNumberToMaxRunSize MergePolicyTiering (LSMConfig 2 4) <$> [0, 1, 2, 3, 4]
537539
-- [0,2,8,32,128]
538540
--
539541
-- The @size@ of a levelling run at each level is allowed to be
540-
-- @bufferSize*sizeRatio^(level-1) < size <= bufferSize*sizeRatio^(level+1)@. A
542+
-- @bufferSize*sizeRatio^level < size <= bufferSize*sizeRatio^(level+1)@. A
541543
-- levelling run can take take up a whole level, so the maximum size of a run is
542544
-- @sizeRatio@ tmes larger than the maximum size of a tiering run on the same
543545
-- level.
544546
--
545-
-- >>> levelNumberToMaxRunSize MergePolicyLevelling (LSMConfig 2) <$> [0, 1, 2, 3, 4]
547+
-- >>> levelNumberToMaxRunSize MergePolicyLevelling (LSMConfig 2 4) <$> [0, 1, 2, 3, 4]
546548
-- [0,8,32,128,512]
547549
levelNumberToMaxRunSize :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int
548550
levelNumberToMaxRunSize = \case
@@ -551,10 +553,12 @@ levelNumberToMaxRunSize = \case
551553

552554
-- | See 'levelNumberToMaxRunSize'
553555
levelNumberToMaxRunSizeTiering :: HasCallStack => LSMConfig -> LevelNo -> Int
554-
levelNumberToMaxRunSizeTiering LSMConfig {configMaxWriteBufferSize = bufSize} ln
556+
levelNumberToMaxRunSizeTiering
557+
LSMConfig {configMaxWriteBufferSize = bufSize, configSizeRatio = sizeRatio}
558+
ln
555559
| ln < 0 = error "level number must be non-negative"
556560
| ln == 0 = 0
557-
| otherwise = fromIntegerChecked (toInteger bufSize * 4 ^ pred (toInteger ln))
561+
| otherwise = fromIntegerChecked (toInteger bufSize * toInteger sizeRatio ^ pred (toInteger ln))
558562
-- Perform the computation with arbitrary precision using 'Integers', but
559563
-- throw an error if the result does not fit into an 'Int'.
560564

@@ -574,10 +578,10 @@ runToLevelNumber mpl conf run = runSizeToLevelNumber mpl conf (runSize run)
574578
-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
575579
-- sizes at each level.
576580
--
577-
-- >>> runSizeToLevelNumber MergePolicyTiering (LSMConfig 2) <$> [0,2,8,32,128]
581+
-- >>> runSizeToLevelNumber MergePolicyTiering (LSMConfig 2 4) <$> [0,2,8,32,128]
578582
-- [0,1,2,3,4]
579583
--
580-
-- >>> runSizeToLevelNumber MergePolicyLevelling (LSMConfig 2) <$> [0,8,32,128,512]
584+
-- >>> runSizeToLevelNumber MergePolicyLevelling (LSMConfig 2 4) <$> [0,8,32,128,512]
581585
-- [0,1,2,3,4]
582586
runSizeToLevelNumber :: HasCallStack => MergePolicy -> LSMConfig -> Int -> LevelNo
583587
runSizeToLevelNumber = \case
@@ -590,7 +594,7 @@ runSizeToLevelNumberTiering conf n
590594
| n < 0 = error "run size must be positive"
591595
-- TODO: enumerating level numbers is potentially costly, but it does gives a
592596
-- precise answer, where we'd otherwise have to deal with Double rounding
593-
-- errors in computing @ln = logBase 4 (n / configMaxWriteBufferSize) + 1@
597+
-- errors in computing @ln = logBase configSizeRatio (n / configMaxWriteBufferSize) + 1@
594598
| otherwise = head $ -- the list is guaranteed to be non-empty
595599
[ ln
596600
| ln <- [0..]
@@ -603,7 +607,7 @@ runSizeToLevelNumberLevelling conf n
603607
| n < 0 = error "run size must be positive"
604608
-- TODO: enumerating level numbers is potentially costly, but it does gives a
605609
-- precise answer, where we'd otherwise have to deal with Double rounding
606-
-- errors in computing @ln = logBase 4 (n / configMaxWriteBufferSize)@
610+
-- errors in computing @ln = logBase configSizeRatio (n / configMaxWriteBufferSize)@
607611
| otherwise = head $ -- the list is guaranteed to be non-empty
608612
[ ln
609613
| ln <- [0..]
@@ -635,10 +639,10 @@ _runFitsInLevel mpl conf ln r = runSizeFitsInLevel mpl conf ln (runSize r)
635639
-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
636640
-- sizes at each level.
637641
--
638-
-- >>> runSizeFitsInLevel MergePolicyTiering (LSMConfig 2) 3 <$> [8,9,16,32,33]
642+
-- >>> runSizeFitsInLevel MergePolicyTiering (LSMConfig 2 4) 3 <$> [8,9,16,32,33]
639643
-- [False,True,True,True,False]
640644
--
641-
-- >>> runSizeFitsInLevel MergePolicyLevelling (LSMConfig 2) 2 <$> [8,9,16,32,33]
645+
-- >>> runSizeFitsInLevel MergePolicyLevelling (LSMConfig 2 4) 2 <$> [8,9,16,32,33]
642646
-- [False,True,True,True,False]
643647
runSizeFitsInLevel :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int -> Bool
644648
runSizeFitsInLevel mpl conf ln n
@@ -657,10 +661,10 @@ runTooSmallForLevel mpl conf ln r = runSizeTooSmallForLevel mpl conf ln (runSize
657661
-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
658662
-- sizes at each level.
659663
--
660-
-- >>> runSizeTooSmallForLevel MergePolicyTiering (LSMConfig 2) 3 <$> [8,9]
664+
-- >>> runSizeTooSmallForLevel MergePolicyTiering (LSMConfig 2 4) 3 <$> [8,9]
661665
-- [True,False]
662666
--
663-
-- >>> runSizeTooSmallForLevel MergePolicyLevelling (LSMConfig 2) 2 <$> [8,9]
667+
-- >>> runSizeTooSmallForLevel MergePolicyLevelling (LSMConfig 2 4) 2 <$> [8,9]
664668
-- [True,False]
665669
runSizeTooSmallForLevel :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int -> Bool
666670
runSizeTooSmallForLevel mpl conf ln n
@@ -681,10 +685,10 @@ runTooLargeForLevel mpl conf ln r = runSizeTooLargeForLevel mpl conf ln (runSize
681685
-- See 'levelNumberToMaxRunSize' for the bounds on (tiering or levelling) run
682686
-- sizes at each level.
683687
--
684-
-- >>> runSizeTooLargeForLevel MergePolicyTiering (LSMConfig 2) 2 <$> [8,9]
688+
-- >>> runSizeTooLargeForLevel MergePolicyTiering (LSMConfig 2 4) 2 <$> [8,9]
685689
-- [False,True]
686690
--
687-
-- >>> runSizeTooLargeForLevel MergePolicyLevelling (LSMConfig 2) 1 <$> [8,9]
691+
-- >>> runSizeTooLargeForLevel MergePolicyLevelling (LSMConfig 2 4) 1 <$> [8,9]
688692
-- [False,True]
689693
runSizeTooLargeForLevel :: HasCallStack => MergePolicy -> LSMConfig -> LevelNo -> Int -> Bool
690694
runSizeTooLargeForLevel mpl conf ln n
@@ -709,7 +713,8 @@ levelIsFull mpl conf ln incoming resident = case mpl of
709713

710714
-- | Only based on run count, not their sizes.
711715
levelIsFullTiering :: LSMConfig -> LevelNo -> [Run] -> [Run] -> Bool
712-
levelIsFullTiering _conf _ln _incoming resident = length resident >= 4
716+
levelIsFullTiering LSMConfig{..} _ln _incoming resident =
717+
length resident >= configSizeRatio
713718

714719
-- | The level is only considered full once the resident run is /too large/
715720
-- for the level.
@@ -936,16 +941,19 @@ suppliedCreditMergingRun (MergingRun _ d ref) =
936941
new :: ST s (LSM s)
937942
new = newWith conf
938943
where
944+
-- 4 was the default for both the max write buffer size and size ratio
945+
-- before they were made configurable
939946
conf = LSMConfig {
940-
-- 4 was the default before the write buffer size was the hard-coded,
941-
-- default max write buffer size before it was made to be configurable
942947
configMaxWriteBufferSize = 4
948+
, configSizeRatio = 4
943949
}
944950

945951
newWith :: LSMConfig -> ST s (LSM s)
946952
newWith conf
947953
| configMaxWriteBufferSize conf <= 0 =
948954
error "newWith: configMaxWriteBufferSize should be positive"
955+
| configSizeRatio conf <= 1 =
956+
error "newWith: configSizeRatio should be larger than 1"
949957
| otherwise = do
950958
c <- newSTRef 0
951959
lsm <- newSTRef (LSMContent Map.empty [] NoUnion)
@@ -1450,8 +1458,8 @@ newLevelMerge :: Tracer (ST s) EventDetail
14501458
-> Int -> MergePolicy -> LevelMergeType
14511459
-> [Run] -> ST s (IncomingRun s)
14521460
newLevelMerge _ _ _ _ _ [r] = return (Single r)
1453-
newLevelMerge tr conf level mergePolicy mergeType rs = do
1454-
assertST (length rs `elem` [4, 5])
1461+
newLevelMerge tr conf@LSMConfig{..} level mergePolicy mergeType rs = do
1462+
assertST (length rs `elem` [configSizeRatio, configSizeRatio + 1])
14551463
mergingRun@(MergingRun _ physicalDebt _) <- newMergingRun mergeType rs
14561464
assertST (totalDebt physicalDebt <= maxPhysicalDebt)
14571465
traceWith tr MergeStartedEvent {
@@ -1480,9 +1488,11 @@ newLevelMerge tr conf level mergePolicy mergeType rs = do
14801488
-- includes the single run in the current level.
14811489
maxPhysicalDebt =
14821490
case mergePolicy of
1483-
MergePolicyLevelling -> 4 * levelNumberToMaxRunSize MergePolicyTiering conf (level-1)
1484-
+ levelNumberToMaxRunSize MergePolicyLevelling conf level
1485-
MergePolicyTiering -> length rs * levelNumberToMaxRunSize MergePolicyTiering conf (level-1)
1491+
MergePolicyLevelling ->
1492+
configSizeRatio * levelNumberToMaxRunSize MergePolicyTiering conf (level-1)
1493+
+ levelNumberToMaxRunSize MergePolicyLevelling conf level
1494+
MergePolicyTiering ->
1495+
length rs * levelNumberToMaxRunSize MergePolicyTiering conf (level-1)
14861496

14871497
-------------------------------------------------------------------------------
14881498
-- MergingTree abstraction

test-prototypes/Test/ScheduledMerges/RunSizes.hs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,21 @@ newtype Config = Config LSMConfig
6969
instance Arbitrary Config where
7070
arbitrary = Config <$> do
7171
bufSize <- (getSmall <$> arbitrary) `suchThat` (>0)
72+
sizeRatio <- (getSmall <$> arbitrary) `suchThat` (>1)
7273
pure $ LSMConfig {
7374
configMaxWriteBufferSize = bufSize
75+
, configSizeRatio = sizeRatio
7476
}
75-
shrink (Config LSMConfig{..}) =
76-
[ Config LSMConfig{configMaxWriteBufferSize = bufSize'}
77+
shrink (Config conf@LSMConfig{..}) =
78+
[ Config conf{configMaxWriteBufferSize = bufSize'}
7779
| bufSize' <- shrink configMaxWriteBufferSize
7880
, bufSize' > 0
7981
]
82+
++ [ Config conf{configSizeRatio = sizeRatio'}
83+
| sizeRatio' <- shrink configSizeRatio
84+
, sizeRatio' > 1
85+
]
86+
8087

8188
newtype LevelNo = LevelNo Int
8289
deriving stock (Show, Eq, Ord)
@@ -93,16 +100,16 @@ newtype LevelNo = LevelNo Int
93100
levelNumberInvariant :: MergePolicyForLevel -> Config -> LevelNo -> Bool
94101
levelNumberInvariant
95102
(MergePolicyForLevel mpl)
96-
(Config LSMConfig{configMaxWriteBufferSize})
103+
(Config LSMConfig{configMaxWriteBufferSize, configSizeRatio})
97104
(LevelNo ln)
98105
| ln < 0 = False
99106
| ln == 0 = True
100107
| otherwise = case mpl of
101108
MergePolicyTiering ->
102-
toInteger configMaxWriteBufferSize * (4 ^ toInteger (pred ln))
109+
toInteger configMaxWriteBufferSize * (toInteger configSizeRatio ^ toInteger (pred ln))
103110
<= toInteger (maxBound :: Int)
104111
MergePolicyLevelling ->
105-
toInteger configMaxWriteBufferSize * (4 ^ toInteger ln)
112+
toInteger configMaxWriteBufferSize * (toInteger configSizeRatio ^ toInteger ln)
106113
<= toInteger (maxBound :: Int)
107114

108115
newtype RunSize = RunSize Int

test-prototypes/Test/ScheduledMergesQLS.hs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,13 @@ instance InLockstep Model where
213213
arbitraryWithVars ctx model =
214214
case findVars ctx (Proxy :: Proxy (LSM RealWorld)) of
215215
[] ->
216-
-- Generate a write buffer size in the range [3,5] most of the time,
217-
-- sometimes in the range [1,10] to hit edge cases. 4 was the hard-coded
218-
-- default before it was made configurable.
219-
fmap Some $ ANew <$> (LSMConfig <$> frequency [(10, choose (1,10)), (90, choose (3,5))])
216+
-- Generate a write buffer size and size ratio in the range [3,5] most
217+
-- of the time, sometimes in the range [1,10] to hit edge cases. 4 was
218+
-- the hard-coded default for both before it was made configurable.
219+
fmap Some $ ANew <$> (
220+
LSMConfig <$> frequency [(10, choose (1,10)), (90, choose (3,5))]
221+
<*> frequency [(10, choose (2,10)), (90, choose (3,5))]
222+
)
220223
vars ->
221224
let kvars = findVars ctx (Proxy :: Proxy Key)
222225
existingKey = Left <$> elements kvars
@@ -300,8 +303,12 @@ instance InLockstep Model where
300303
| mwbs' <- shrink mwbs
301304
, mwbs' >= 1, mwbs' <= 10
302305
]
306+
++ [ Some $ ANew conf { configSizeRatio = sr' }
307+
| sr' <- shrink sr
308+
, sr' >= 2, sr' <= 10
309+
]
303310
where
304-
LSMConfig mwbs = conf
311+
LSMConfig mwbs sr = conf
305312

306313
shrinkWithVars _ctx _model (AInsert var (Right k) v b) =
307314
[ Some $ AInsert var (Right k') v' b' | (k', v', b') <- shrink (k, v, b) ]

0 commit comments

Comments
 (0)