Skip to content

Commit 23c41e3

Browse files
committed
Change representation for snapshot of a run
Again, the goal is to make the representation more self-contained so that less information needs to be supplied from the context during snapshot restore. And again the intnded use case is merging trees, and runs within them. In the case of a run it is the disk caching policy, and the index type. So overall for a run we store the run number (to find the disk files), the index type (so we know how to restore the index) and the caching policy. Arguably the index type should not need to be known in advance and the index file format should simply say which kind of index it is.
1 parent c182515 commit 23c41e3

File tree

7 files changed

+140
-59
lines changed

7 files changed

+140
-59
lines changed

src/Database/LSMTree/Internal.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1308,7 +1308,7 @@ openSnapshot sesh label tableType override snap resolve = do
13081308
(tableWriteBuffer, tableWriteBufferBlobs) <- openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths
13091309

13101310
-- Hard link runs into the active directory,
1311-
snapLevels' <- openRuns reg hfs hbio conf (sessionUniqCounter seshEnv) snapDir activeDir snapLevels
1311+
snapLevels' <- openRuns reg hfs hbio (sessionUniqCounter seshEnv) snapDir activeDir snapLevels
13121312

13131313
-- Convert from the snapshot format, restoring merge progress in the process
13141314
tableLevels <- fromSnapLevels reg hfs hbio conf (sessionUniqCounter seshEnv) resolve activeDir snapLevels'

src/Database/LSMTree/Internal/Index.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ module Database.LSMTree.Internal.Index
2525
(
2626
-- * Index types
2727
IndexType (Compact, Ordinary),
28+
indexToIndexType,
2829

2930
-- * Indexes
3031
Index (CompactIndex, OrdinaryIndex),
@@ -88,6 +89,10 @@ instance NFData Index where
8889
rnf (CompactIndex index) = rnf index
8990
rnf (OrdinaryIndex index) = rnf index
9091

92+
indexToIndexType :: Index -> IndexType
93+
indexToIndexType CompactIndex{} = Compact
94+
indexToIndexType OrdinaryIndex{} = Ordinary
95+
9196
{-|
9297
Searches for a page span that contains a key–value pair with the given key.
9398
If there is indeed such a pair, the result is the corresponding page span;

src/Database/LSMTree/Internal/Run.hs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,18 @@ module Database.LSMTree.Internal.Run (
1414
, sizeInPages
1515
, runFsPaths
1616
, runFsPathsNumber
17+
, runDataCaching
18+
, runIndexType
1719
, mkRawBlobRef
1820
, mkWeakBlobRef
1921
-- ** Run creation
2022
, fromMutable
2123
, fromWriteBuffer
2224
, RunParams (..)
23-
, RunDataCaching (..)
2425
-- * Snapshot
2526
, openFromDisk
27+
, RunDataCaching (..)
28+
, IndexType (..)
2629
) where
2730

2831
import Control.DeepSeq (NFData (..), rwhnf)
@@ -41,8 +44,8 @@ import qualified Database.LSMTree.Internal.BlobRef as BlobRef
4144
import Database.LSMTree.Internal.BloomFilter (bloomFilterFromSBS)
4245
import qualified Database.LSMTree.Internal.CRC32C as CRC
4346
import Database.LSMTree.Internal.Entry (NumEntries (..))
44-
import Database.LSMTree.Internal.Index (Index, IndexType)
45-
import qualified Database.LSMTree.Internal.Index as Index (fromSBS, sizeInPages)
47+
import Database.LSMTree.Internal.Index (Index, IndexType (..))
48+
import qualified Database.LSMTree.Internal.Index as Index
4649
import Database.LSMTree.Internal.Page (NumPages)
4750
import Database.LSMTree.Internal.Paths as Paths
4851
import Database.LSMTree.Internal.RunBuilder (RunBuilder,
@@ -113,6 +116,15 @@ runFsPaths (DeRef r) = runRunFsPaths r
113116
runFsPathsNumber :: Ref (Run m h) -> RunNumber
114117
runFsPathsNumber = Paths.runNumber . runFsPaths
115118

119+
-- | See 'openFromDisk'
120+
runIndexType :: Ref (Run m h) -> IndexType
121+
runIndexType (DeRef r) = Index.indexToIndexType (runIndex r)
122+
123+
-- | See 'openFromDisk'
124+
runDataCaching :: Ref (Run m h) -> RunDataCaching
125+
runDataCaching (DeRef r) = runRunDataCaching r
126+
127+
116128
-- | Helper function to make a 'WeakBlobRef' that points into a 'Run'.
117129
mkRawBlobRef :: Run m h -> BlobSpan -> RawBlobRef m h
118130
mkRawBlobRef Run{runBlobFile} blobspan =
@@ -237,6 +249,15 @@ fromWriteBuffer fs hbio params fsPaths buffer blobs = do
237249
--
238250
-- Exceptions will be raised when any of the file's contents don't match their
239251
-- checksum ('ChecksumError') or can't be parsed ('FileFormatError').
252+
--
253+
-- The 'RunDataCaching' and 'IndexType' parameters need to be saved and
254+
-- restored separately because these are not stored in the on-disk
255+
-- representation. Use 'runDataCaching' and 'runIndexType' to obtain these
256+
-- parameters from the open run before persisting to disk.
257+
--
258+
-- TODO: it may make more sense to persist these parameters with the run's
259+
-- on-disk representation.
260+
--
240261
openFromDisk ::
241262
forall m h.
242263
(MonadSTM m, MonadMask m, PrimMonad m)

src/Database/LSMTree/Internal/Snapshot.hs

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ module Database.LSMTree.Internal.Snapshot (
1414
, snapshotWriteBuffer
1515
, openWriteBuffer
1616
-- * Runs
17+
, SnapshotRun (..)
1718
, snapshotRuns
1819
, openRuns
1920
, releaseRuns
@@ -110,7 +111,7 @@ data SnapshotMetaData = SnapshotMetaData {
110111
-- | The write buffer.
111112
, snapWriteBuffer :: !RunNumber
112113
-- | The shape of the levels of the LSM tree.
113-
, snapMetaLevels :: !(SnapLevels RunNumber)
114+
, snapMetaLevels :: !(SnapLevels SnapshotRun)
114115
}
115116
deriving stock Eq
116117

@@ -353,12 +354,28 @@ openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do
353354
Runs
354355
-------------------------------------------------------------------------------}
355356

357+
-- | Information needed to open a 'Run' from disk using 'snapshotRuns' and
358+
-- 'openRuns'.
359+
--
360+
-- TODO: one could imagine needing only the 'RunNumber' to identify the files
361+
-- on disk, and the other parameters being stored with the run itself, rather
362+
-- than needing to be supplied.
363+
data SnapshotRun = SnapshotRun {
364+
snapRunNumber :: !RunNumber,
365+
snapRunCaching :: !Run.RunDataCaching,
366+
snapRunIndex :: !Run.IndexType
367+
}
368+
deriving stock Eq
369+
370+
instance NFData SnapshotRun where
371+
rnf (SnapshotRun a b c) = rnf a `seq` rnf b `seq` rnf c
372+
356373
{-# SPECIALISE snapshotRuns ::
357374
ActionRegistry IO
358375
-> UniqCounter IO
359376
-> NamedSnapshotDir
360377
-> SnapLevels (Ref (Run IO h))
361-
-> IO (SnapLevels RunNumber) #-}
378+
-> IO (SnapLevels SnapshotRun) #-}
362379
-- | @'snapshotRuns' _ _ snapUc targetDir levels@ creates hard links for all run
363380
-- files associated with the runs in @levels@, and puts the new directory
364381
-- entries in the @targetDir@ directory. The entries are renamed using @snapUc@.
@@ -368,7 +385,7 @@ snapshotRuns ::
368385
-> UniqCounter m
369386
-> NamedSnapshotDir
370387
-> SnapLevels (Ref (Run m h))
371-
-> m (SnapLevels RunNumber)
388+
-> m (SnapLevels SnapshotRun)
372389
snapshotRuns reg snapUc (NamedSnapshotDir targetDir) levels = do
373390
for levels $ \run@(DeRef Run.Run {
374391
Run.runHasFS = hfs,
@@ -378,17 +395,20 @@ snapshotRuns reg snapUc (NamedSnapshotDir targetDir) levels = do
378395
let sourcePaths = Run.runFsPaths run
379396
let targetPaths = sourcePaths { runDir = targetDir , runNumber = rn}
380397
hardLinkRunFiles reg hfs hbio sourcePaths targetPaths
381-
pure (runNumber targetPaths)
398+
pure SnapshotRun {
399+
snapRunNumber = runNumber targetPaths,
400+
snapRunCaching = Run.runDataCaching run,
401+
snapRunIndex = Run.runIndexType run
402+
}
382403

383404
{-# SPECIALISE openRuns ::
384405
ActionRegistry IO
385406
-> HasFS IO h
386407
-> HasBlockIO IO h
387-
-> TableConfig
388408
-> UniqCounter IO
389409
-> NamedSnapshotDir
390410
-> ActiveDir
391-
-> SnapLevels RunNumber
411+
-> SnapLevels SnapshotRun
392412
-> IO (SnapLevels (Ref (Run IO h))) #-}
393413
-- | @'openRuns' _ _ _ _ uniqCounter sourceDir targetDir levels@ takes all run
394414
-- files that are referenced by @levels@, and hard links them from @sourceDir@
@@ -402,23 +422,19 @@ openRuns ::
402422
=> ActionRegistry m
403423
-> HasFS m h
404424
-> HasBlockIO m h
405-
-> TableConfig
406425
-> UniqCounter m
407426
-> NamedSnapshotDir
408427
-> ActiveDir
409-
-> SnapLevels RunNumber
428+
-> SnapLevels SnapshotRun
410429
-> m (SnapLevels (Ref (Run m h)))
411-
openRuns
412-
reg hfs hbio TableConfig{..} uc
413-
(NamedSnapshotDir sourceDir) (ActiveDir targetDir) (SnapLevels levels) = do
414-
levels' <-
415-
V.iforM levels $ \i level ->
416-
let ln = LevelNo (i+1) in
417-
let
418-
caching = diskCachePolicyForLevel confDiskCachePolicy ln
419-
indexType = indexTypeForRun confFencePointerIndex
420-
in
421-
for level $ \runNum -> do
430+
openRuns reg hfs hbio uc (NamedSnapshotDir sourceDir) (ActiveDir targetDir)
431+
levels =
432+
for levels $
433+
\SnapshotRun {
434+
snapRunNumber = runNum,
435+
snapRunCaching = caching,
436+
snapRunIndex = indexType
437+
} -> do
422438
let sourcePaths = RunFsPaths sourceDir runNum
423439
runNum' <- uniqueToRunNumber <$> incrUniqCounter uc
424440
let targetPaths = RunFsPaths targetDir runNum'
@@ -427,7 +443,6 @@ openRuns
427443
withRollback reg
428444
(Run.openFromDisk hfs hbio caching indexType targetPaths)
429445
releaseRef
430-
pure (SnapLevels levels')
431446

432447
{-# SPECIALISE releaseRuns ::
433448
ActionRegistry IO -> SnapLevels (Ref (Run IO h)) -> IO ()

src/Database/LSMTree/Internal/Snapshot/Codec.hs

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,25 @@ instance DecodeVersioned SnapshotTableType where
262262
2 -> pure SnapFullTable
263263
_ -> fail ("[SnapshotTableType] Unexpected tag: " <> show tag)
264264

265+
instance Encode SnapshotRun where
266+
encode SnapshotRun { snapRunNumber, snapRunCaching, snapRunIndex } =
267+
encodeListLen 4
268+
<> encodeWord 0
269+
<> encode snapRunNumber
270+
<> encode snapRunCaching
271+
<> encode snapRunIndex
272+
273+
instance DecodeVersioned SnapshotRun where
274+
decodeVersioned v@V0 = do
275+
n <- decodeListLen
276+
tag <- decodeWord
277+
case (n, tag) of
278+
(4, 0) -> do snapRunNumber <- decodeVersioned v
279+
snapRunCaching <- decodeVersioned v
280+
snapRunIndex <- decodeVersioned v
281+
pure SnapshotRun{..}
282+
_ -> fail ("[SnapshotRun] Unexpected combination of list length and tag: " <> show (n, tag))
283+
265284
{-------------------------------------------------------------------------------
266285
Encoding and decoding: TableConfig
267286
-------------------------------------------------------------------------------}
@@ -499,38 +518,38 @@ instance DecodeVersioned MergeSchedule where
499518

500519
-- SnapLevels
501520

502-
instance Encode (SnapLevels RunNumber) where
521+
instance Encode r => Encode (SnapLevels r) where
503522
encode (SnapLevels levels) =
504523
encodeListLen (fromIntegral (V.length levels))
505524
<> V.foldMap encode levels
506525

507-
instance DecodeVersioned (SnapLevels RunNumber) where
526+
instance DecodeVersioned r => DecodeVersioned (SnapLevels r) where
508527
decodeVersioned v@V0 = do
509528
n <- decodeListLen
510529
SnapLevels <$> V.replicateM n (decodeVersioned v)
511530

512531
-- SnapLevel
513532

514-
instance Encode (SnapLevel RunNumber) where
533+
instance Encode r => Encode (SnapLevel r) where
515534
encode (SnapLevel incomingRuns residentRuns) =
516535
encodeListLen 2
517536
<> encode incomingRuns
518537
<> encode residentRuns
519538

520539

521-
instance DecodeVersioned (SnapLevel RunNumber) where
540+
instance DecodeVersioned r => DecodeVersioned (SnapLevel r) where
522541
decodeVersioned v@V0 = do
523542
_ <- decodeListLenOf 2
524543
SnapLevel <$> decodeVersioned v <*> decodeVersioned v
525544

526-
-- Vector RunNumber
545+
-- Vector
527546

528-
instance Encode (V.Vector RunNumber) where
547+
instance Encode r => Encode (V.Vector r) where
529548
encode rns =
530549
encodeListLen (fromIntegral (V.length rns))
531550
<> V.foldMap encode rns
532551

533-
instance DecodeVersioned (V.Vector RunNumber) where
552+
instance DecodeVersioned r => DecodeVersioned (V.Vector r) where
534553
decodeVersioned v@V0 = do
535554
n <- decodeListLen
536555
V.replicateM n (decodeVersioned v)
@@ -545,7 +564,7 @@ instance DecodeVersioned RunNumber where
545564

546565
-- SnapIncomingRun
547566

548-
instance Encode (SnapIncomingRun RunNumber) where
567+
instance Encode r => Encode (SnapIncomingRun r) where
549568
encode (SnapMergingRun mpfl nd nc smrs) =
550569
encodeListLen 5
551570
<> encodeWord 0
@@ -558,7 +577,7 @@ instance Encode (SnapIncomingRun RunNumber) where
558577
<> encodeWord 1
559578
<> encode x
560579

561-
instance DecodeVersioned (SnapIncomingRun RunNumber) where
580+
instance DecodeVersioned r => DecodeVersioned (SnapIncomingRun r) where
562581
decodeVersioned v@V0 = do
563582
n <- decodeListLen
564583
tag <- decodeWord
@@ -592,22 +611,22 @@ instance DecodeVersioned MergePolicyForLevel where
592611

593612
-- SnapMergingRunState
594613

595-
instance Encode t => Encode (SnapMergingRunState t RunNumber) where
614+
instance (Encode t, Encode r) => Encode (SnapMergingRunState t r) where
596615
encode (SnapCompletedMerge nr md r) =
597616
encodeListLen 4
598617
<> encodeWord 0
599618
<> encode nr
600619
<> encode md
601620
<> encode r
602-
encode (SnapOngoingMerge ln mc rs mt) =
621+
encode (SnapOngoingMerge rp mc rs mt) =
603622
encodeListLen 5
604623
<> encodeWord 1
605-
<> encode ln
624+
<> encode rp
606625
<> encode mc
607626
<> encode rs
608627
<> encode mt
609628

610-
instance DecodeVersioned t => DecodeVersioned (SnapMergingRunState t RunNumber) where
629+
instance (DecodeVersioned t, DecodeVersioned r) => DecodeVersioned (SnapMergingRunState t r) where
611630
decodeVersioned v@V0 = do
612631
n <- decodeListLen
613632
tag <- decodeWord

0 commit comments

Comments
 (0)