Skip to content

Commit e83ef48

Browse files
committed
Make the MergeBatchSize an adjustable parameter in TableConfig
Previously it was hard coded to be the same as the write buffer size. Document what it means as a new tunable parameter. Setting this low (1) is important for getting good parallel work balance on the pipelined WP8 benchmark. It is a crucial change that makes the pipelined version actually improve performance. Previously it would only get about a 5 to 10% improvement.
1 parent 4d1876e commit e83ef48

File tree

11 files changed

+105
-40
lines changed

11 files changed

+105
-40
lines changed

src-extras/Database/LSMTree/Extras/NoThunks.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,9 @@ deriving anyclass instance NoThunks DiskCachePolicy
659659
deriving stock instance Generic MergeSchedule
660660
deriving anyclass instance NoThunks MergeSchedule
661661

662+
deriving stock instance Generic MergeBatchSize
663+
deriving anyclass instance NoThunks MergeBatchSize
664+
662665
{-------------------------------------------------------------------------------
663666
RWVar
664667
-------------------------------------------------------------------------------}

src/Database/LSMTree.hs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ module Database.LSMTree (
119119
BloomFilterAlloc (AllocFixed, AllocRequestFPR),
120120
FencePointerIndexType (OrdinaryIndex, CompactIndex),
121121
DiskCachePolicy (..),
122+
MergeBatchSize (..),
122123

123124
-- ** Table Configuration Overrides #table_configuration_overrides#
124125
OverrideDiskCachePolicy (..),
@@ -214,9 +215,10 @@ import qualified Database.LSMTree.Internal.BlobRef as Internal
214215
import Database.LSMTree.Internal.Config
215216
(BloomFilterAlloc (AllocFixed, AllocRequestFPR),
216217
DiskCachePolicy (..), FencePointerIndexType (..),
217-
LevelNo (..), MergePolicy (..), MergeSchedule (..),
218-
SizeRatio (..), TableConfig (..), WriteBufferAlloc (..),
219-
defaultTableConfig, serialiseKeyMinimalSize)
218+
LevelNo (..), MergeBatchSize (..), MergePolicy (..),
219+
MergeSchedule (..), SizeRatio (..), TableConfig (..),
220+
WriteBufferAlloc (..), defaultTableConfig,
221+
serialiseKeyMinimalSize)
220222
import Database.LSMTree.Internal.Config.Override
221223
(OverrideDiskCachePolicy (..))
222224
import Database.LSMTree.Internal.Entry (NumEntries (..))

src/Database/LSMTree/Internal/Config.hs

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,16 @@ module Database.LSMTree.Internal.Config (
2626
, diskCachePolicyForLevel
2727
-- * Merge schedule
2828
, MergeSchedule (..)
29+
-- * Merge batch size
30+
, MergeBatchSize (..)
31+
, creditThresholdForLevel
2932
) where
3033

3134
import Control.DeepSeq (NFData (..))
3235
import Database.LSMTree.Internal.Index (IndexType)
3336
import qualified Database.LSMTree.Internal.Index as Index
3437
(IndexType (Compact, Ordinary))
38+
import qualified Database.LSMTree.Internal.MergingRun as MR
3539
import qualified Database.LSMTree.Internal.RawBytes as RB
3640
import Database.LSMTree.Internal.Run (RunDataCaching (..))
3741
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
@@ -99,12 +103,14 @@ data TableConfig = TableConfig {
99103
, confBloomFilterAlloc :: !BloomFilterAlloc
100104
, confFencePointerIndex :: !FencePointerIndexType
101105
, confDiskCachePolicy :: !DiskCachePolicy
106+
, confMergeBatchSize :: !MergeBatchSize
102107
}
103108
deriving stock (Show, Eq)
104109

105110
instance NFData TableConfig where
106-
rnf (TableConfig a b c d e f g) =
107-
rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq` rnf e `seq` rnf f `seq` rnf g
111+
rnf (TableConfig a b c d e f g h) =
112+
rnf a `seq` rnf b `seq` rnf c `seq` rnf d `seq`
113+
rnf e `seq` rnf f `seq` rnf g `seq` rnf h
108114

109115
-- | The 'defaultTableConfig' defines reasonable defaults for all 'TableConfig' parameters.
110116
--
@@ -133,6 +139,7 @@ defaultTableConfig =
133139
, confBloomFilterAlloc = AllocRequestFPR 1.0e-3
134140
, confFencePointerIndex = OrdinaryIndex
135141
, confDiskCachePolicy = DiskCacheAll
142+
, confMergeBatchSize = MergeBatchSize 20_000 -- same as write buffer
136143
}
137144

138145
data RunLevelNo = RegularLevel LevelNo | UnionLevel
@@ -238,6 +245,8 @@ data MergeSchedule =
238245
The 'Incremental' merge schedule spreads out the merging work over time.
239246
This is less efficient than the 'OneShot' merge schedule, but has a consistent workload.
240247
Using the 'Incremental' merge schedule, the worst-case disk I\/O complexity of the update operations is /logarithmic/ in the size of the table.
248+
This 'Incremental' merge schedule still uses batching to improve performance.
249+
The batch size can be controlled using the 'MergeBatchSize'.
241250
-}
242251
| Incremental
243252
deriving stock (Eq, Show)
@@ -385,3 +394,41 @@ diskCachePolicyForLevel policy levelNo =
385394
RegularLevel l | l <= LevelNo n -> CacheRunData
386395
| otherwise -> NoCacheRunData
387396
UnionLevel -> NoCacheRunData
397+
398+
{-------------------------------------------------------------------------------
399+
Merge batch size
400+
-------------------------------------------------------------------------------}
401+
402+
{- |
403+
The /merge batch size/ is a micro-tuning parameter, and in most cases you do
404+
need to think about it and can leave it at its default.
405+
406+
When using the 'Incremental' merge schedule, merging is done in batches. This
407+
is a trade-off: larger batches tends to mean better overall performance but the
408+
downside is that while most updates (inserts, deletes, upserts) are fast, some
409+
are slower (when a batch of merging work has to be done).
410+
411+
If you care most about the maximum latency of updates, then use a small batch
412+
size. If you don't care about latency of individual operations, just the
413+
latency of the overall sequence of operations then use a large batch size. The
414+
default is to use a large batch size, the same size as the write buffer itself.
415+
The minimum batch size is 1.
416+
417+
Note that the actual batch size is the minimum of this configuration
418+
parameter and the size of the batch of operations performed (e.g. 'inserts').
419+
So if you consistently use large batches, you can use a batch size of 1 and
420+
the merge batch size will always be determined by the operation batch size.
421+
422+
A further reason why it may be preferable to use minimal batch sizes is to get
423+
good parallel work balance, when using parallelism.
424+
-}
425+
newtype MergeBatchSize = MergeBatchSize Int
426+
deriving stock (Show, Eq, Ord)
427+
deriving newtype (NFData)
428+
429+
-- TODO: the thresholds for doing merge work should be different for each level,
430+
-- and ideally all-pairs co-prime.
431+
creditThresholdForLevel :: TableConfig -> LevelNo -> MR.CreditThreshold
432+
creditThresholdForLevel TableConfig { confMergeBatchSize = MergeBatchSize n }
433+
(LevelNo _i) =
434+
MR.CreditThreshold (MR.UnspentCredits (MR.MergeCredits (max 1 n)))

src/Database/LSMTree/Internal/Config/Override.hs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,8 @@ instance Override DiskCachePolicy SnapshotMetaData where
9191
in fmap (override rdc) smt
9292

9393
instance Override DiskCachePolicy TableConfig where
94-
override confDiskCachePolicy' TableConfig {..}
95-
= TableConfig
96-
{ confMergePolicy,
97-
confMergeSchedule,
98-
confSizeRatio,
99-
confWriteBufferAlloc,
100-
confBloomFilterAlloc,
101-
confFencePointerIndex,
102-
confDiskCachePolicy = confDiskCachePolicy'
103-
}
94+
override confDiskCachePolicy' tc =
95+
tc { confDiskCachePolicy = confDiskCachePolicy' }
10496

10597
instance Override DiskCachePolicy (SnapLevels SnapshotRun) where
10698
override dcp (SnapLevels (vec :: V.Vector (SnapLevel SnapshotRun))) =

src/Database/LSMTree/Internal/IncomingRun.hs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,6 @@ supplyCreditsIncomingRun conf ln (Merging _ nominalDebt nominalCreditsVar mr)
218218
-- use atomic operations for its counters). We could potentially simplify
219219
-- MergingRun by dispensing with batching for the MergeCredits counters.
220220

221-
-- TODO: the thresholds for doing merge work should be different for each level,
222-
-- maybe co-prime?
223-
creditThresholdForLevel :: TableConfig -> LevelNo -> MR.CreditThreshold
224-
creditThresholdForLevel conf (LevelNo _i) =
225-
let AllocNumEntries x = confWriteBufferAlloc conf
226-
in MR.CreditThreshold (MR.UnspentCredits (MergeCredits x))
227-
228221
-- | Deposit nominal credits in the local credits var, ensuring the total
229222
-- credits does not exceed the total debt.
230223
--

src/Database/LSMTree/Internal/Snapshot/Codec.hs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import Codec.CBOR.Decoding
2222
import Codec.CBOR.Encoding
2323
import Codec.CBOR.Read
2424
import Codec.CBOR.Write
25+
import Control.Monad (unless)
2526
import Control.Monad.Class.MonadThrow (Exception (displayException),
2627
MonadThrow (..))
2728
import Data.Bifunctor (Bifunctor (..))
@@ -274,27 +275,34 @@ instance Encode TableConfig where
274275
, confBloomFilterAlloc = bloomFilterAlloc
275276
, confFencePointerIndex = fencePointerIndex
276277
, confDiskCachePolicy = diskCachePolicy
278+
, confMergeBatchSize = mergeBatchSize
277279
}
278280
) =
279-
encodeListLen 7
281+
encodeListLen 8
280282
<> encode mergePolicy
281283
<> encode mergeSchedule
282284
<> encode sizeRatio
283285
<> encode writeBufferAlloc
284286
<> encode bloomFilterAlloc
285287
<> encode fencePointerIndex
286288
<> encode diskCachePolicy
289+
<> encode mergeBatchSize
287290

288291
instance DecodeVersioned TableConfig where
289292
decodeVersioned v@V0 = do
290-
_ <- decodeListLenOf 7
293+
n <- decodeListLen
294+
unless (n >= 7 && n <= 8) $
295+
fail "TableConfig: expected record of length 7 or 8"
291296
confMergePolicy <- decodeVersioned v
292297
confMergeSchedule <- decodeVersioned v
293298
confSizeRatio <- decodeVersioned v
294299
confWriteBufferAlloc <- decodeVersioned v
295300
confBloomFilterAlloc <- decodeVersioned v
296301
confFencePointerIndex <- decodeVersioned v
297302
confDiskCachePolicy <- decodeVersioned v
303+
confMergeBatchSize <- if n == 8
304+
then decodeVersioned v
305+
else pure (confMergeBatchSize defaultTableConfig)
298306
pure TableConfig {..}
299307

300308
-- MergePolicy
@@ -492,6 +500,14 @@ instance DecodeVersioned MergeSchedule where
492500
1 -> pure Incremental
493501
_ -> fail ("[MergeSchedule] Unexpected tag: " <> show tag)
494502

503+
-- MergeBatchSize
504+
505+
instance Encode MergeBatchSize where
506+
encode (MergeBatchSize n) = encodeInt n
507+
508+
instance DecodeVersioned MergeBatchSize where
509+
decodeVersioned V0 = MergeBatchSize <$> decodeInt
510+
495511
{-------------------------------------------------------------------------------
496512
Encoding and decoding: SnapLevels
497513
-------------------------------------------------------------------------------}

src/Database/LSMTree/Simple.hs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ module Database.LSMTree.Simple (
111111
FencePointerIndexType (OrdinaryIndex, CompactIndex),
112112
DiskCachePolicy (..),
113113
MergeSchedule (..),
114+
MergeBatchSize (..),
114115

115116
-- ** Table Configuration Overrides #table_configuration_overrides#
116117
OverrideDiskCachePolicy (..),
@@ -165,9 +166,9 @@ import Data.Vector (Vector)
165166
import Data.Void (Void)
166167
import Database.LSMTree (BloomFilterAlloc, CursorClosedError (..),
167168
DiskCachePolicy, FencePointerIndexType,
168-
InvalidSnapshotNameError (..), MergePolicy, MergeSchedule,
169-
OverrideDiskCachePolicy (..), Range (..), RawBytes,
170-
ResolveAsFirst (..), SerialiseKey (..),
169+
InvalidSnapshotNameError (..), MergeBatchSize, MergePolicy,
170+
MergeSchedule, OverrideDiskCachePolicy (..), Range (..),
171+
RawBytes, ResolveAsFirst (..), SerialiseKey (..),
171172
SerialiseKeyOrderPreserving, SerialiseValue (..),
172173
SessionClosedError (..), SizeRatio,
173174
SnapshotCorruptedError (..),

test/Test/Database/LSMTree/Internal/Snapshot/Codec.hs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,11 +230,11 @@ instance Arbitrary SnapshotRun where
230230

231231
instance Arbitrary TableConfig where
232232
arbitrary =
233-
TableConfig <$> arbitrary <*> arbitrary <*> arbitrary
234-
<*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
235-
shrink (TableConfig a b c d e f g) =
236-
[ TableConfig a' b' c' d' e' f' g'
237-
| (a', b', c', d', e', f', g') <- shrink (a, b, c, d, e, f, g) ]
233+
TableConfig <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
234+
<*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
235+
shrink (TableConfig a b c d e f g h) =
236+
[ TableConfig a' b' c' d' e' f' g' h'
237+
| (a', b', c', d', e', f', g', h') <- shrink (a, b, c, d, e, f, g, h) ]
238238

239239
instance Arbitrary MergePolicy where
240240
arbitrary = pure LazyLevelling
@@ -273,6 +273,10 @@ instance Arbitrary MergeSchedule where
273273
arbitrary = elements [OneShot, Incremental]
274274
shrink _ = []
275275

276+
instance Arbitrary MergeBatchSize where
277+
arbitrary = MergeBatchSize <$> arbitrary
278+
shrink (MergeBatchSize n) = map MergeBatchSize (shrink n)
279+
276280
{-------------------------------------------------------------------------------
277281
Arbitrary: SnapLevels
278282
-------------------------------------------------------------------------------}

test/Test/Database/LSMTree/Internal/Snapshot/Codec/Golden.hs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ import Data.Typeable
1515
import qualified Data.Vector as V
1616
import Database.LSMTree.Internal.Config (BloomFilterAlloc (..),
1717
DiskCachePolicy (..), FencePointerIndexType (..),
18-
MergePolicy (..), MergeSchedule (..), SizeRatio (..),
19-
TableConfig (..), WriteBufferAlloc (..))
18+
MergeBatchSize (..), MergePolicy (..), MergeSchedule (..),
19+
SizeRatio (..), TableConfig (..), WriteBufferAlloc (..))
2020
import Database.LSMTree.Internal.MergeSchedule
2121
(MergePolicyForLevel (..), NominalCredits (..),
2222
NominalDebt (..))
@@ -276,7 +276,8 @@ instance EnumGolden SnapshotLabel where
276276
SnapshotLabel{} -> ()
277277

278278
instance EnumGolden TableConfig where
279-
singGolden = TableConfig singGolden singGolden singGolden singGolden singGolden singGolden singGolden
279+
singGolden = TableConfig singGolden singGolden singGolden singGolden
280+
singGolden singGolden singGolden singGolden
280281
where
281282
_coveredAllCases = \case
282283
TableConfig{} -> ()
@@ -329,6 +330,9 @@ instance EnumGolden MergeSchedule where
329330
OneShot{} -> ()
330331
Incremental{} -> ()
331332

333+
instance EnumGolden MergeBatchSize where
334+
enumGolden = map MergeBatchSize [ 1, 1000 ]
335+
332336
instance EnumGolden (SnapLevels SnapshotRun) where
333337
singGolden = SnapLevels singGolden
334338
where

test/Test/Database/LSMTree/StateMachine.hs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@ import qualified Database.LSMTree.Class as Class
9494
import Database.LSMTree.Extras (showPowersOf)
9595
import Database.LSMTree.Extras.Generators (KeyForIndexCompact)
9696
import Database.LSMTree.Extras.NoThunks (propNoThunks)
97-
import qualified Database.LSMTree.Internal.Config as R
98-
(TableConfig (TableConfig))
97+
import qualified Database.LSMTree.Internal.Config as R (TableConfig (..))
9998
import Database.LSMTree.Internal.Serialise (SerialisedBlob,
10099
SerialisedValue)
101100
import qualified Database.LSMTree.Internal.Types as R.Types
@@ -226,6 +225,8 @@ instance Arbitrary R.TableConfig where
226225
]
227226
confWriteBufferAlloc <- QC.arbitrary
228227
confFencePointerIndex <- QC.arbitrary
228+
confMergeBatchSize <- QC.sized $ \sz ->
229+
R.MergeBatchSize <$> QC.chooseInt (1, sz)
229230
pure $ R.TableConfig {
230231
R.confMergePolicy = R.LazyLevelling
231232
, R.confSizeRatio = R.Four
@@ -234,6 +235,7 @@ instance Arbitrary R.TableConfig where
234235
, confFencePointerIndex
235236
, R.confDiskCachePolicy = R.DiskCacheNone
236237
, confMergeSchedule
238+
, confMergeBatchSize
237239
}
238240

239241
shrink R.TableConfig{..} =

0 commit comments

Comments
 (0)