Skip to content

Commit 87a2aac

Browse files
committed
Introduce RunParams data type to bundle several run building params
These params are (almost) always used together, so this helps to reduce verbosity and the number of separate args that have to be passed around. We also slightly simplify when args are provided. Now all these params are provided to the RunBuilder, and none extra are needed when the RunBuilder is turned into the Run. Previously, two were passed to the RunBuilder and one more passed to the Run (though actually finalising the RunBuidler also needed the third param anyway). However, the real motivation for all this is to improve restoring snapshots of merging runs. At the moment, deserialising a merging run requires us to supply all three of these params from the surrounding context. This is ok in the context of levels, but becomes awkward for merging runs embedded in a merging tree. The obvious solution is to serialise this set of parameters with the merging run, and then they're directly available to use when reconstructing the merge during snapshot restore. Changes to serialisation are not in this patch, but the next.
1 parent 2b12878 commit 87a2aac

File tree

16 files changed

+158
-155
lines changed

16 files changed

+158
-155
lines changed

bench/macro/lsm-tree-bench-lookups.hs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import Database.LSMTree.Internal.Paths (RunFsPaths (RunFsPaths))
3232
import Database.LSMTree.Internal.Run (Run)
3333
import qualified Database.LSMTree.Internal.Run as Run
3434
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
35+
import Database.LSMTree.Internal.RunBuilder (RunParams (..))
3536
import qualified Database.LSMTree.Internal.RunBuilder as RunBuilder
3637
import Database.LSMTree.Internal.RunNumber
3738
import Database.LSMTree.Internal.Serialise (SerialisedKey,
@@ -349,10 +350,13 @@ lookupsEnv runSizes keyRng0 hfs hbio caching = do
349350
-- create the runs
350351
rbs <- sequence
351352
[ RunBuilder.new hfs hbio
353+
RunParams {
354+
runParamCaching = caching,
355+
runParamAlloc = RunAllocFixed benchmarkNumBitsPerEntry,
356+
runParamIndex = Index.Compact
357+
}
352358
(RunFsPaths (FS.mkFsPath []) (RunNumber i))
353359
(NumEntries numEntries)
354-
(RunAllocFixed benchmarkNumBitsPerEntry)
355-
Index.Compact
356360
| ((numEntries, _), i) <- zip runSizes [0..] ]
357361

358362
-- fill the runs
@@ -373,7 +377,7 @@ lookupsEnv runSizes keyRng0 hfs hbio caching = do
373377
putStr "DONE"
374378

375379
-- return runs
376-
runs <- V.fromList <$> mapM (Run.fromMutable caching) rbs
380+
runs <- V.fromList <$> mapM Run.fromMutable rbs
377381
let blooms = V.map (\(DeRef r) -> Run.runFilter r) runs
378382
indexes = V.map (\(DeRef r) -> Run.runIndex r) runs
379383
handles = V.map (\(DeRef r) -> Run.runKOpsFile r) runs

bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,15 @@ import qualified Data.Vector as V
1919
import Database.LSMTree.Extras.Orphans ()
2020
import Database.LSMTree.Extras.Random (frequency, randomByteStringR,
2121
sampleUniformWithReplacement, uniformWithoutReplacement)
22+
import Database.LSMTree.Extras.RunData (defaultRunParams)
2223
import Database.LSMTree.Extras.UTxO
2324
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
24-
import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact))
2525
import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches,
2626
intraPageLookups, lookupsIO, prepLookups)
2727
import Database.LSMTree.Internal.Page (getNumPages)
2828
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
2929
import Database.LSMTree.Internal.Run (Run)
3030
import qualified Database.LSMTree.Internal.Run as Run
31-
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
3231
import Database.LSMTree.Internal.RunNumber
3332
import Database.LSMTree.Internal.Serialise
3433
import qualified Database.LSMTree.Internal.WriteBuffer as WB
@@ -192,7 +191,7 @@ lookupsInBatchesEnv Config {..} = do
192191
wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"])
193192
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys
194193
let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
195-
r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) Index.Compact fsps wb wbblobs
194+
r <- Run.fromWriteBuffer hasFS hasBlockIO defaultRunParams fsps wb wbblobs
196195
let NumEntries nentriesReal = Run.size r
197196
assertEqual nentriesReal nentries $ pure ()
198197
-- 42 to 43 entries per page

bench/micro/Bench/Database/LSMTree/Internal/Merge.hs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import qualified Database.LSMTree.Internal.Merge as Merge
2323
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
2424
import Database.LSMTree.Internal.Run (Run)
2525
import qualified Database.LSMTree.Internal.Run as Run
26-
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
2726
import Database.LSMTree.Internal.RunNumber
2827
import Database.LSMTree.Internal.Serialise
2928
import Database.LSMTree.Internal.UniqCounter
@@ -264,8 +263,7 @@ merge ::
264263
merge fs hbio Config {..} targetPaths runs = do
265264
let f = fromMaybe const mergeMappend
266265
m <- fromMaybe (error "empty inputs, no merge created") <$>
267-
Merge.new fs hbio Run.CacheRunData (RunAllocFixed 10) Index.Compact
268-
mergeType f targetPaths runs
266+
Merge.new fs hbio defaultRunParams mergeType f targetPaths runs
269267
Merge.stepsToCompletion m stepSize
270268

271269
fsPath :: FS.FsPath

src-extras/Database/LSMTree/Extras/MergingRunData.hs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@ import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue)
2828
import Database.LSMTree.Internal.MergingRun (MergingRun)
2929
import qualified Database.LSMTree.Internal.MergingRun as MR
3030
import Database.LSMTree.Internal.Paths
31-
import Database.LSMTree.Internal.Run (RunDataCaching (..))
3231
import qualified Database.LSMTree.Internal.Run as Run
33-
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
3432
import Database.LSMTree.Internal.RunNumber
3533
import Database.LSMTree.Internal.Serialise
3634
import Database.LSMTree.Internal.UniqCounter
@@ -88,8 +86,8 @@ unsafeCreateMergingRun hfs hbio resolve indexType path counter = \case
8886
$ \runs -> do
8987
n <- incrUniqCounter counter
9088
let fsPaths = RunFsPaths path (RunNumber (uniqueToInt n))
91-
MR.new hfs hbio resolve CacheRunData (RunAllocFixed 10) indexType
92-
mergeType fsPaths (V.fromList runs)
89+
MR.new hfs hbio resolve defaultRunParams mergeType
90+
fsPaths (V.fromList runs)
9391

9492
{-------------------------------------------------------------------------------
9593
MergingRunData

src-extras/Database/LSMTree/Extras/NoThunks.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,18 @@ deriving stock instance Generic (Run m h)
214214
deriving anyclass instance (Typeable m, Typeable (PrimState m), Typeable h)
215215
=> NoThunks (Run m h)
216216

217+
deriving stock instance Generic RunParams
218+
deriving anyclass instance NoThunks RunParams
219+
220+
deriving stock instance Generic RunBloomFilterAlloc
221+
deriving anyclass instance NoThunks RunBloomFilterAlloc
222+
217223
deriving stock instance Generic RunDataCaching
218224
deriving anyclass instance NoThunks RunDataCaching
219225

226+
deriving stock instance Generic IndexType
227+
deriving anyclass instance NoThunks IndexType
228+
220229
{-------------------------------------------------------------------------------
221230
Paths
222231
-------------------------------------------------------------------------------}

src-extras/Database/LSMTree/Extras/RunData.hs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
-- from them. Tests and benchmarks should preferably use these utilities instead
33
-- of (re-)defining their own.
44
module Database.LSMTree.Extras.RunData (
5+
-- * RunParams
6+
defaultRunParams
57
-- * Create runs
6-
withRun
8+
, withRun
79
, withRunAt
810
, withRuns
911
, unsafeCreateRun
@@ -48,12 +50,13 @@ import qualified Data.Vector as V
4850
import Database.LSMTree.Extras (showPowersOf10)
4951
import Database.LSMTree.Extras.Generators ()
5052
import Database.LSMTree.Internal.Entry
51-
import Database.LSMTree.Internal.Index (IndexType)
53+
import Database.LSMTree.Internal.Index (IndexType (..))
5254
import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue)
5355
import Database.LSMTree.Internal.MergeSchedule (addWriteBufferEntries)
5456
import Database.LSMTree.Internal.Paths
5557
import qualified Database.LSMTree.Internal.Paths as Paths
56-
import Database.LSMTree.Internal.Run (Run, RunDataCaching (..))
58+
import Database.LSMTree.Internal.Run (Run, RunDataCaching (..),
59+
RunParams (..))
5760
import qualified Database.LSMTree.Internal.Run as Run
5861
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..),
5962
entryWouldFitInPage)
@@ -69,6 +72,15 @@ import qualified System.FS.BlockIO.API as FS
6972
import System.FS.BlockIO.API (HasBlockIO)
7073
import Test.QuickCheck
7174

75+
76+
defaultRunParams :: RunParams
77+
defaultRunParams =
78+
RunParams {
79+
runParamCaching = CacheRunData,
80+
runParamAlloc = RunAllocFixed 10,
81+
runParamIndex = Compact
82+
}
83+
7284
{-------------------------------------------------------------------------------
7385
Create runs
7486
-------------------------------------------------------------------------------}
@@ -153,7 +165,8 @@ unsafeCreateRunAt fs hbio indexType fsPaths (RunData m) = do
153165
let blobpath = FS.addExtension (runBlobPath fsPaths) ".wb"
154166
bracket (WBB.new fs blobpath) releaseRef $ \wbblobs -> do
155167
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob fs wbblobs)) m
156-
Run.fromWriteBuffer fs hbio CacheRunData (RunAllocFixed 10) indexType
168+
Run.fromWriteBuffer fs hbio
169+
defaultRunParams { runParamIndex = indexType }
157170
fsPaths wb wbblobs
158171

159172
-- | Create a 'RunFsPaths' using an empty 'FsPath'. The empty path corresponds

src/Database/LSMTree/Internal.hs

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,28 +1534,19 @@ writeBufferToNewRun SessionEnv {
15341534
sessionHasBlockIO = hbio,
15351535
sessionUniqCounter = uc
15361536
}
1537-
conf@TableConfig {
1538-
confDiskCachePolicy,
1539-
confFencePointerIndex
1540-
}
1537+
conf
15411538
TableContent{
15421539
tableWriteBuffer,
15431540
tableWriteBufferBlobs
15441541
}
15451542
| WB.null tableWriteBuffer = pure Nothing
15461543
| otherwise = Just <$> do
1547-
!n <- incrUniqCounter uc
1548-
let !ln = LevelNo 1
1549-
!cache = diskCachePolicyForLevel confDiskCachePolicy ln
1550-
!alloc = bloomFilterAllocForLevel conf ln
1551-
!indexType = indexTypeForRun confFencePointerIndex
1552-
!path = Paths.runPath (Paths.activeDir root)
1553-
(uniqueToRunNumber n)
1554-
Run.fromWriteBuffer hfs hbio
1555-
cache
1556-
alloc
1557-
indexType
1558-
path
1544+
!uniq <- incrUniqCounter uc
1545+
let (!runParams, !runPaths) = mergingRunParamsForLevel
1546+
(Paths.activeDir root) conf uniq (LevelNo 1)
1547+
Run.fromWriteBuffer
1548+
hfs hbio
1549+
runParams runPaths
15591550
tableWriteBuffer
15601551
tableWriteBufferBlobs
15611552

src/Database/LSMTree/Internal/Index.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ import Database.LSMTree.Internal.Serialise (SerialisedKey)
7070

7171
-- | The type of supported index types.
7272
data IndexType = Compact | Ordinary
73+
deriving stock (Eq, Show)
7374

7475
-- * Indexes
7576

src/Database/LSMTree/Internal/Merge.hs

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module Database.LSMTree.Internal.Merge (
99
, TreeMergeType (..)
1010
, Mappend
1111
, MergeState (..)
12+
, RunParams (..)
1213
, new
1314
, abort
1415
, complete
@@ -31,11 +32,9 @@ import Data.Traversable (for)
3132
import qualified Data.Vector as V
3233
import Database.LSMTree.Internal.BlobRef (RawBlobRef)
3334
import Database.LSMTree.Internal.Entry
34-
import Database.LSMTree.Internal.Index (IndexType)
35-
import Database.LSMTree.Internal.Run (Run, RunDataCaching)
35+
import Database.LSMTree.Internal.Run (Run)
3636
import qualified Database.LSMTree.Internal.Run as Run
37-
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
38-
import Database.LSMTree.Internal.RunBuilder (RunBuilder)
37+
import Database.LSMTree.Internal.RunBuilder (RunBuilder, RunParams)
3938
import qualified Database.LSMTree.Internal.RunBuilder as Builder
4039
import qualified Database.LSMTree.Internal.RunReader as Reader
4140
import Database.LSMTree.Internal.RunReaders (Readers)
@@ -60,8 +59,6 @@ data Merge t m h = Merge {
6059
, mergeMappend :: !Mappend
6160
, mergeReaders :: {-# UNPACK #-} !(Readers m h)
6261
, mergeBuilder :: !(RunBuilder m h)
63-
-- | The caching policy to use for the output Run.
64-
, mergeCaching :: !RunDataCaching
6562
-- | The result of the latest call to 'steps'. This is used to determine
6663
-- whether a merge can be 'complete'd.
6764
, mergeState :: !(MutVar (PrimState m) MergeState)
@@ -152,9 +149,7 @@ type Mappend = SerialisedValue -> SerialisedValue -> SerialisedValue
152149
IsMergeType t
153150
=> HasFS IO h
154151
-> HasBlockIO IO h
155-
-> RunDataCaching
156-
-> RunBloomFilterAlloc
157-
-> IndexType
152+
-> RunParams
158153
-> t
159154
-> Mappend
160155
-> Run.RunFsPaths
@@ -166,21 +161,19 @@ new ::
166161
(IsMergeType t, MonadMask m, MonadSTM m, MonadST m)
167162
=> HasFS m h
168163
-> HasBlockIO m h
169-
-> RunDataCaching
170-
-> RunBloomFilterAlloc
171-
-> IndexType
164+
-> RunParams
172165
-> t
173166
-> Mappend
174167
-> Run.RunFsPaths
175168
-> V.Vector (Ref (Run m h))
176169
-> m (Maybe (Merge t m h))
177-
new hfs hbio mergeCaching alloc indexType mergeType mergeMappend targetPaths runs = do
170+
new hfs hbio runParams mergeType mergeMappend targetPaths runs = do
178171
-- no offset, no write buffer
179172
mreaders <- Readers.new Readers.NoOffsetKey Nothing runs
180173
for mreaders $ \mergeReaders -> do
181174
-- calculate upper bounds based on input runs
182175
let numEntries = V.foldMap' Run.size runs
183-
mergeBuilder <- Builder.new hfs hbio targetPaths numEntries alloc indexType
176+
mergeBuilder <- Builder.new hfs hbio runParams targetPaths numEntries
184177
mergeState <- newMutVar $! Merging
185178
return Merge {
186179
mergeIsLastLevel = isLastLevel mergeType
@@ -239,7 +232,7 @@ complete Merge{..} = do
239232
Merging -> error "complete: Merge is not done"
240233
MergingDone -> do
241234
-- the readers are already drained, therefore closed
242-
r <- Run.fromMutable mergeCaching mergeBuilder
235+
r <- Run.fromMutable mergeBuilder
243236
writeMutVar mergeState $! Completed
244237
pure r
245238
Completed -> error "complete: Merge is already completed"

0 commit comments

Comments
 (0)