@@ -26,12 +26,16 @@ module Database.LSMTree.Internal.Config (
2626 , diskCachePolicyForLevel
2727 -- * Merge schedule
2828 , MergeSchedule (.. )
29+ -- * Merge batch size
30+ , MergeBatchSize (.. )
31+ , creditThresholdForLevel
2932 ) where
3033
3134import Control.DeepSeq (NFData (.. ))
3235import Database.LSMTree.Internal.Index (IndexType )
3336import qualified Database.LSMTree.Internal.Index as Index
3437 (IndexType (Compact , Ordinary ))
38+ import qualified Database.LSMTree.Internal.MergingRun as MR
3539import qualified Database.LSMTree.Internal.RawBytes as RB
3640import Database.LSMTree.Internal.Run (RunDataCaching (.. ))
3741import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (.. ))
@@ -99,12 +103,14 @@ data TableConfig = TableConfig {
99103 , confBloomFilterAlloc :: ! BloomFilterAlloc
100104 , confFencePointerIndex :: ! FencePointerIndexType
101105 , confDiskCachePolicy :: ! DiskCachePolicy
106+ , confMergeBatchSize :: ! MergeBatchSize
102107 }
103108 deriving stock (Show , Eq )
104109
105110instance NFData TableConfig where
106- rnf (TableConfig a b c d e f g) =
107- rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` rnf e `seq` rnf f `seq` rnf g
111+ rnf (TableConfig a b c d e f g h) =
112+ rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq`
113+ rnf e `seq` rnf f `seq` rnf g `seq` rnf h
108114
109115-- | The 'defaultTableConfig' defines reasonable defaults for all 'TableConfig' parameters.
110116--
@@ -133,6 +139,7 @@ defaultTableConfig =
133139 , confBloomFilterAlloc = AllocRequestFPR 1.0e-3
134140 , confFencePointerIndex = OrdinaryIndex
135141 , confDiskCachePolicy = DiskCacheAll
142+ , confMergeBatchSize = MergeBatchSize 20_000 -- same as write buffer
136143 }
137144
138145data RunLevelNo = RegularLevel LevelNo | UnionLevel
@@ -238,6 +245,8 @@ data MergeSchedule =
238245 The 'Incremental' merge schedule spreads out the merging work over time.
239246 This is less efficient than the 'OneShot' merge schedule, but has a consistent workload.
240247 Using the 'Incremental' merge schedule, the worst-case disk I\/O complexity of the update operations is /logarithmic/ in the size of the table.
248+ This 'Incremental' merge schedule still uses batching to improve performance.
249+ The batch size can be controlled using the 'MergeBatchSize'.
241250 -}
242251 | Incremental
243252 deriving stock (Eq , Show )
@@ -385,3 +394,41 @@ diskCachePolicyForLevel policy levelNo =
385394 RegularLevel l | l <= LevelNo n -> CacheRunData
386395 | otherwise -> NoCacheRunData
387396 UnionLevel -> NoCacheRunData
397+
398+ {- ------------------------------------------------------------------------------
399+ Merge batch size
400+ -------------------------------------------------------------------------------}
401+
402+ {- |
403+ The /merge batch size/ is a micro-tuning parameter, and in most cases you do
404+ need to think about it and can leave it at its default.
405+
406+ When using the 'Incremental' merge schedule, merging is done in batches. This
407+ is a trade-off: larger batches tends to mean better overall performance but the
408+ downside is that while most updates (inserts, deletes, upserts) are fast, some
409+ are slower (when a batch of merging work has to be done).
410+
411+ If you care most about the maximum latency of updates, then use a small batch
412+ size. If you don't care about latency of individual operations, just the
413+ latency of the overall sequence of operations then use a large batch size. The
414+ default is to use a large batch size, the same size as the write buffer itself.
415+ The minimum batch size is 1.
416+
417+ Note that the actual batch size is the minimum of this configuration
418+ parameter and the size of the batch of operations performed (e.g. 'inserts').
419+ So if you consistently use large batches, you can use a batch size of 1 and
420+ the merge batch size will always be determined by the operation batch size.
421+
422+ A further reason why it may be preferable to use minimal batch sizes is to get
423+ good parallel work balance, when using parallelism.
424+ -}
425+ newtype MergeBatchSize = MergeBatchSize Int
426+ deriving stock (Show , Eq , Ord )
427+ deriving newtype (NFData )
428+
429+ -- TODO: the thresholds for doing merge work should be different for each level,
430+ -- and ideally all-pairs co-prime.
431+ creditThresholdForLevel :: TableConfig -> LevelNo -> MR. CreditThreshold
432+ creditThresholdForLevel TableConfig { confMergeBatchSize = MergeBatchSize n }
433+ (LevelNo _i) =
434+ MR. CreditThreshold (MR. UnspentCredits (MR. MergeCredits (max 1 n)))
0 commit comments