@@ -26,12 +26,16 @@ module Database.LSMTree.Internal.Config (
2626 , diskCachePolicyForLevel
2727 -- * Merge schedule
2828 , MergeSchedule (.. )
29+ -- * Merge batch size
30+ , MergeBatchSize (.. )
31+ , creditThresholdForLevel
2932 ) where
3033
3134import Control.DeepSeq (NFData (.. ))
3235import Database.LSMTree.Internal.Index (IndexType )
3336import qualified Database.LSMTree.Internal.Index as Index
3437 (IndexType (Compact , Ordinary ))
38+ import qualified Database.LSMTree.Internal.MergingRun as MR
3539import qualified Database.LSMTree.Internal.RawBytes as RB
3640import Database.LSMTree.Internal.Run (RunDataCaching (.. ))
3741import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (.. ))
@@ -90,6 +94,12 @@ For a detailed discussion of fine-tuning the table configuration, see [Fine-tuni
9094[@confDiskCachePolicy :: t'DiskCachePolicy'@]
9195 The /disk cache policy/ supports caching lookup operations using the OS page cache.
9296 Caching may improve the performance of lookups and updates if database access follows certain patterns.
97+
98+ [@confMergeBatchSize :: t'MergeBatchSize'@]
99+ The merge batch size balances the maximum latency of individual update
100+ operations, versus the latency of a sequence of update operations. Bigger
101+ batches improves overall performance but some updates will take a lot
102+ longer than others. The default is to use a large batch size.
93103-}
94104data TableConfig = TableConfig {
95105 confMergePolicy :: ! MergePolicy
@@ -99,12 +109,14 @@ data TableConfig = TableConfig {
99109 , confBloomFilterAlloc :: ! BloomFilterAlloc
100110 , confFencePointerIndex :: ! FencePointerIndexType
101111 , confDiskCachePolicy :: ! DiskCachePolicy
112+ , confMergeBatchSize :: ! MergeBatchSize
102113 }
103114 deriving stock (Show , Eq )
104115
105116instance NFData TableConfig where
106- rnf (TableConfig a b c d e f g) =
107- rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` rnf e `seq` rnf f `seq` rnf g
117+ rnf (TableConfig a b c d e f g h) =
118+ rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq`
119+ rnf e `seq` rnf f `seq` rnf g `seq` rnf h
108120
109121-- | The 'defaultTableConfig' defines reasonable defaults for all 'TableConfig' parameters.
110122--
@@ -122,6 +134,8 @@ instance NFData TableConfig where
122134-- OrdinaryIndex
123135-- >>> confDiskCachePolicy defaultTableConfig
124136-- DiskCacheAll
137+ -- >>> confMergeBatchSize defaultTableConfig
138+ -- MergeBatchSize 20000
125139--
126140defaultTableConfig :: TableConfig
127141defaultTableConfig =
@@ -133,6 +147,7 @@ defaultTableConfig =
133147 , confBloomFilterAlloc = AllocRequestFPR 1.0e-3
134148 , confFencePointerIndex = OrdinaryIndex
135149 , confDiskCachePolicy = DiskCacheAll
150+ , confMergeBatchSize = MergeBatchSize 20_000 -- same as write buffer
136151 }
137152
138153data RunLevelNo = RegularLevel LevelNo | UnionLevel
@@ -238,6 +253,8 @@ data MergeSchedule =
238253 The 'Incremental' merge schedule spreads out the merging work over time.
239254 This is less efficient than the 'OneShot' merge schedule, but has a consistent workload.
240255 Using the 'Incremental' merge schedule, the worst-case disk I\/O complexity of the update operations is /logarithmic/ in the size of the table.
256+ This 'Incremental' merge schedule still uses batching to improve performance.
257+ The batch size can be controlled using the 'MergeBatchSize'.
241258 -}
242259 | Incremental
243260 deriving stock (Eq , Show )
@@ -385,3 +402,50 @@ diskCachePolicyForLevel policy levelNo =
385402 RegularLevel l | l <= LevelNo n -> CacheRunData
386403 | otherwise -> NoCacheRunData
387404 UnionLevel -> NoCacheRunData
405+
406+ {- ------------------------------------------------------------------------------
407+ Merge batch size
408+ -------------------------------------------------------------------------------}
409+
410+ {- |
411+ The /merge batch size/ is a micro-tuning parameter, and in most cases you do
412+ need to think about it and can leave it at its default.
413+
414+ When using the 'Incremental' merge schedule, merging is done in batches. This
415+ is a trade-off: larger batches tends to mean better overall performance but the
416+ downside is that while most updates (inserts, deletes, upserts) are fast, some
417+ are slower (when a batch of merging work has to be done).
418+
419+ If you care most about the maximum latency of updates, then use a small batch
420+ size. If you don't care about latency of individual operations, just the
421+ latency of the overall sequence of operations then use a large batch size. The
422+ default is to use a large batch size, the same size as the write buffer itself.
423+ The minimum batch size is 1. The maximum batch size is the size of the write
424+ buffer 'confWriteBufferAlloc'.
425+
426+ Note that the actual batch size is the minimum of this configuration
427+ parameter and the size of the batch of operations performed (e.g. 'inserts').
428+ So if you consistently use large batches, you can use a batch size of 1 and
429+ the merge batch size will always be determined by the operation batch size.
430+
431+ A further reason why it may be preferable to use minimal batch sizes is to get
432+ good parallel work balance, when using parallelism.
433+ -}
434+ newtype MergeBatchSize = MergeBatchSize Int
435+ deriving stock (Show , Eq , Ord )
436+ deriving newtype (NFData )
437+
438+ -- TODO: the thresholds for doing merge work should be different for each level,
439+ -- and ideally all-pairs co-prime.
440+ creditThresholdForLevel :: TableConfig -> LevelNo -> MR. CreditThreshold
441+ creditThresholdForLevel TableConfig {
442+ confMergeBatchSize = MergeBatchSize mergeBatchSz,
443+ confWriteBufferAlloc = AllocNumEntries writeBufferSz
444+ }
445+ (LevelNo _i) =
446+ MR. CreditThreshold
447+ . MR. UnspentCredits
448+ . MR. MergeCredits
449+ . max 1
450+ . min writeBufferSz
451+ $ mergeBatchSz
0 commit comments