Skip to content

Commit 2044d00

Browse files
authored
Merge pull request #456 from IntersectMBO/jdral/named-snapshot-directory
Create a proper directory for each snapshot
2 parents 63e5e06 + bbcde44 commit 2044d00

File tree

7 files changed

+147
-78
lines changed

7 files changed

+147
-78
lines changed

.hlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
- ignore: {name: "Use section"}
2626
- ignore: {name: "Redundant $!"}
2727
- ignore: {name: "Use shows"}
28+
- ignore: {name: "Use fmap"}
2829

2930
# Specify additional command line arguments
3031
#

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Note: later rules override earlier rules.
22

33
# Default
4-
* @dcoutts @jorisdral @mheinzel @recursion-ninja
4+
* @dcoutts @jorisdral @mheinzel @recursion-ninja @wenkokke

src/Database/LSMTree/Internal.hs

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ import Control.Concurrent.Class.MonadSTM (MonadSTM (..))
7272
import Control.Concurrent.Class.MonadSTM.RWVar (RWVar)
7373
import qualified Control.Concurrent.Class.MonadSTM.RWVar as RW
7474
import Control.DeepSeq
75-
import Control.Monad (unless, void, when)
75+
import Control.Monad (unless)
7676
import Control.Monad.Class.MonadST (MonadST (..))
7777
import Control.Monad.Class.MonadThrow
7878
import Control.Monad.Fix (MonadFix)
@@ -100,7 +100,8 @@ import Database.LSMTree.Internal.Lookup (ByteCountDiscrepancy,
100100
ResolveSerialisedValue, lookupsIO)
101101
import Database.LSMTree.Internal.MergeSchedule
102102
import Database.LSMTree.Internal.Paths (SessionRoot (..),
103-
SnapshotName)
103+
SnapshotMetaDataChecksumFile (..),
104+
SnapshotMetaDataFile (..), SnapshotName)
104105
import qualified Database.LSMTree.Internal.Paths as Paths
105106
import Database.LSMTree.Internal.Range (Range (..))
106107
import Database.LSMTree.Internal.Run (Run)
@@ -115,7 +116,6 @@ import qualified Database.LSMTree.Internal.WriteBuffer as WB
115116
import qualified Database.LSMTree.Internal.WriteBufferBlobs as WBB
116117
import qualified System.FS.API as FS
117118
import System.FS.API (FsError, FsErrorPath (..), FsPath, HasFS)
118-
import qualified System.FS.API.Lazy as FS
119119
import qualified System.FS.BlockIO.API as FS
120120
import System.FS.BlockIO.API (HasBlockIO)
121121

@@ -1077,9 +1077,17 @@ createSnapshot resolve snap label tableType t = do
10771077
let conf = tableConfig t
10781078
withOpenTable t $ \thEnv -> do
10791079
let hfs = tableHasFS thEnv
1080-
let snapPath = Paths.snapshot (tableSessionRoot thEnv) snap
1081-
FS.doesFileExist (tableHasFS thEnv) snapPath >>= \b ->
1082-
when b $ throwIO (ErrSnapshotExists snap)
1080+
1081+
-- Guard that the snapshot does not exist already
1082+
let snapDir = Paths.namedSnapshotDir (tableSessionRoot thEnv) snap
1083+
doesSnapshotExist <-
1084+
FS.doesDirectoryExist (tableHasFS thEnv) (Paths.getNamedSnapshotDir snapDir)
1085+
if doesSnapshotExist then
1086+
throwIO (ErrSnapshotExists snap)
1087+
else
1088+
-- we assume the snapshots directory already exists, so we just have to
1089+
-- create the directory for this specific snapshot.
1090+
FS.createDirectory hfs (Paths.getNamedSnapshotDir snapDir)
10831091

10841092
-- For the temporary implementation it is okay to just flush the buffer
10851093
-- before taking the snapshot.
@@ -1110,13 +1118,10 @@ createSnapshot resolve snap label tableType t = do
11101118
-- consistent.
11111119

11121120
snappedLevels <- snapLevels (tableLevels content)
1113-
let snapContents = encodeSnapshotMetaData (SnapshotMetaData label tableType (tableConfig t) snappedLevels)
1114-
1115-
FS.withFile
1116-
(tableHasFS thEnv)
1117-
snapPath
1118-
(FS.WriteMode FS.MustBeNew) $ \h ->
1119-
void $ FS.hPutAll (tableHasFS thEnv) h snapContents
1121+
let snapMetaData = SnapshotMetaData label tableType (tableConfig t) snappedLevels
1122+
SnapshotMetaDataFile contentPath = Paths.snapshotMetaDataFile snapDir
1123+
SnapshotMetaDataChecksumFile checksumPath = Paths.snapshotMetaDataChecksumFile snapDir
1124+
writeFileSnapshotMetaData hfs contentPath checksumPath snapMetaData
11201125

11211126
pure $! numSnapRuns snappedLevels
11221127

@@ -1142,20 +1147,20 @@ openSnapshot sesh label tableType override snap resolve = do
11421147
traceWith (sessionTracer sesh) $ TraceOpenSnapshot snap override
11431148
withOpenSession sesh $ \seshEnv -> do
11441149
withTempRegistry $ \reg -> do
1145-
let hfs = sessionHasFS seshEnv
1146-
hbio = sessionHasBlockIO seshEnv
1147-
snapPath = Paths.snapshot (sessionRoot seshEnv) snap
1148-
FS.doesFileExist hfs snapPath >>= \b ->
1150+
let hfs = sessionHasFS seshEnv
1151+
hbio = sessionHasBlockIO seshEnv
1152+
1153+
-- Guard that the snapshot exists
1154+
let snapDir = Paths.namedSnapshotDir (sessionRoot seshEnv) snap
1155+
FS.doesDirectoryExist hfs (Paths.getNamedSnapshotDir snapDir) >>= \b ->
11491156
unless b $ throwIO (ErrSnapshotNotExists snap)
1150-
bs <- FS.withFile
1151-
hfs
1152-
snapPath
1153-
FS.ReadMode $ \h ->
1154-
FS.hGetAll (sessionHasFS seshEnv) h
11551157

1156-
snapMetaData <- case decodeSnapshotMetaData bs of
1158+
let SnapshotMetaDataFile contentPath = Paths.snapshotMetaDataFile snapDir
1159+
SnapshotMetaDataChecksumFile checksumPath = Paths.snapshotMetaDataChecksumFile snapDir
1160+
snapMetaData <- readFileSnapshotMetaData hfs contentPath checksumPath >>= \case
11571161
Left e -> throwIO (ErrSnapshotDeserialiseFailure e snap)
11581162
Right x -> pure x
1163+
11591164
let SnapshotMetaData label' tableType' conf snappedLevels = snapMetaData
11601165

11611166
unless (tableType == tableType') $
@@ -1195,10 +1200,12 @@ deleteSnapshot sesh snap = do
11951200
traceWith (sessionTracer sesh) $ TraceDeleteSnapshot snap
11961201
withOpenSession sesh $ \seshEnv -> do
11971202
let hfs = sessionHasFS seshEnv
1198-
snapPath = Paths.snapshot (sessionRoot seshEnv) snap
1199-
FS.doesFileExist hfs snapPath >>= \b ->
1200-
unless b $ throwIO (ErrSnapshotNotExists snap)
1201-
FS.removeFile hfs snapPath
1203+
1204+
let snapDir = Paths.namedSnapshotDir (sessionRoot seshEnv) snap
1205+
doesSnapshotExist <-
1206+
FS.doesDirectoryExist (sessionHasFS seshEnv) (Paths.getNamedSnapshotDir snapDir)
1207+
unless doesSnapshotExist $ throwIO (ErrSnapshotNotExists snap)
1208+
FS.removeDirectoryRecursive hfs (Paths.getNamedSnapshotDir snapDir)
12021209

12031210
{-# SPECIALISE listSnapshots :: Session IO h -> IO [SnapshotName] #-}
12041211
-- | See 'Database.LSMTree.Common.listSnapshots'.
@@ -1219,8 +1226,9 @@ listSnapshots sesh = do
12191226
case Paths.mkSnapshotName s of
12201227
Nothing -> pure Nothing
12211228
Just snap -> do
1222-
-- check that it is a file
1223-
b <- FS.doesFileExist hfs (Paths.snapshot root snap)
1229+
-- check that it is a directory
1230+
b <- FS.doesDirectoryExist hfs
1231+
(Paths.getNamedSnapshotDir $ Paths.namedSnapshotDir root snap)
12241232
if b then pure $ Just snap
12251233
else pure $ Nothing
12261234

src/Database/LSMTree/Internal/CRC32C.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ module Database.LSMTree.Internal.CRC32C (
4141
readChecksumsFile,
4242
writeChecksumsFile,
4343
writeChecksumsFile',
44+
45+
hexdigitsToInt
4446
) where
4547

4648
import Control.Monad
@@ -212,7 +214,7 @@ hGetAllCRC32C' ::
212214
-> m CRC32C
213215
hGetAllCRC32C' hfs h (ChunkSize !chunkSize) !crc0
214216
| chunkSize <= 0
215-
= error "hGetAllCRC32C_SBS: chunkSize must be >0"
217+
= error "hGetAllCRC32C': chunkSize must be >0"
216218
| otherwise
217219
= do
218220
buf@(MutableByteArray !mba#) <- newPinnedByteArray (fromIntegral chunkSize)

src/Database/LSMTree/Internal/Paths.hs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@ module Database.LSMTree.Internal.Paths (
44
, activeDir
55
, runPath
66
, snapshotsDir
7-
, snapshot
7+
, NamedSnapshotDir (..)
8+
, namedSnapshotDir
9+
, SnapshotMetaDataFile (..)
10+
, snapshotMetaDataFile
11+
, SnapshotMetaDataChecksumFile (..)
12+
, snapshotMetaDataChecksumFile
813
-- * Table paths
914
, tableBlobPath
1015
-- * Snapshot name
@@ -59,8 +64,27 @@ runPath root = RunFsPaths (activeDir root)
5964
snapshotsDir :: SessionRoot -> FsPath
6065
snapshotsDir (SessionRoot dir) = dir </> mkFsPath ["snapshots"]
6166

62-
snapshot :: SessionRoot -> SnapshotName -> FsPath
63-
snapshot root (MkSnapshotName name) = snapshotsDir root </> mkFsPath [name]
67+
-- | The directory for a specific, /named/ snapshot.
68+
--
69+
-- Not to be confused with the snapshot/s/ directory, which holds all named
70+
-- snapshot directories.
71+
newtype NamedSnapshotDir = NamedSnapshotDir { getNamedSnapshotDir :: FsPath }
72+
73+
namedSnapshotDir :: SessionRoot -> SnapshotName -> NamedSnapshotDir
74+
namedSnapshotDir root (MkSnapshotName name) =
75+
NamedSnapshotDir (snapshotsDir root </> mkFsPath [name])
76+
77+
newtype SnapshotMetaDataFile = SnapshotMetaDataFile FsPath
78+
79+
snapshotMetaDataFile :: NamedSnapshotDir -> SnapshotMetaDataFile
80+
snapshotMetaDataFile (NamedSnapshotDir dir) =
81+
SnapshotMetaDataFile (dir </> mkFsPath ["metadata"])
82+
83+
newtype SnapshotMetaDataChecksumFile = SnapshotMetaDataChecksumFile FsPath
84+
85+
snapshotMetaDataChecksumFile :: NamedSnapshotDir -> SnapshotMetaDataChecksumFile
86+
snapshotMetaDataChecksumFile (NamedSnapshotDir dir) =
87+
SnapshotMetaDataChecksumFile (dir </> mkFsPath ["metadata.checksum"])
6488

6589
{-------------------------------------------------------------------------------
6690
Snapshot name

src/Database/LSMTree/Internal/Run.hs

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,38 +5,7 @@
55
{-# LANGUAGE RecordWildCards #-}
66
{-# LANGUAGE RecursiveDo #-}
77

8-
-- | Functionality related to LSM-Tree runs (sequences of LSM-Tree data).
9-
--
10-
-- === TODO
11-
--
12-
-- This is temporary module header documentation. The module will be
13-
-- fleshed out more as we implement bits of it.
14-
--
15-
-- Related work packages: 5, 6
16-
--
17-
-- This module includes in-memory parts and I\/O parts for, amongst others,
18-
--
19-
-- * High-performance batch lookups
20-
--
21-
-- * Range lookups
22-
--
23-
-- * Incremental run construction
24-
--
25-
-- * Lookups in loaded disk pages, value resolution
26-
--
27-
-- * In-memory representation of a run
28-
--
29-
-- * Flushing a write buffer to a run
30-
--
31-
-- * Opening, deserialising, and verifying files for an LSM run.
32-
--
33-
-- * Closing runs (and removing files)
34-
--
35-
-- * high performance, incremental k-way merge
36-
--
37-
-- The above list is a sketch. Functionality may move around, and the list is
38-
-- not exhaustive.
39-
--
8+
-- | Runs of sorted key\/value data.
409
module Database.LSMTree.Internal.Run (
4110
-- * Run
4211
Run (..)
@@ -50,8 +19,10 @@ module Database.LSMTree.Internal.Run (
5019
-- ** Run creation
5120
, fromMutable
5221
, fromWriteBuffer
53-
, openFromDisk
5422
, RunDataCaching (..)
23+
-- * Snapshot
24+
, ChecksumError (..)
25+
, openFromDisk
5526
) where
5627

5728
import Control.DeepSeq (NFData (..), rwhnf)

src/Database/LSMTree/Internal/Snapshot.hs

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ module Database.LSMTree.Internal.Snapshot (
77
, SnapshotMetaData (..)
88
, SnapshotLabel (..)
99
, SnapshotTableType (..)
10+
, writeFileSnapshotMetaData
11+
, readFileSnapshotMetaData
12+
, encodeSnapshotMetaData
13+
, decodeSnapshotMetaData
1014
-- * Snapshot format
1115
, numSnapRuns
1216
, SnapLevels
@@ -23,8 +27,6 @@ module Database.LSMTree.Internal.Snapshot (
2327
, Encode (..)
2428
, Decode (..)
2529
, DecodeVersioned (..)
26-
, encodeSnapshotMetaData
27-
, decodeSnapshotMetaData
2830
, Versioned (..)
2931
) where
3032

@@ -34,30 +36,38 @@ import Codec.CBOR.Read
3436
import Codec.CBOR.Write
3537
import Control.Concurrent.Class.MonadMVar.Strict
3638
import Control.Concurrent.Class.MonadSTM (MonadSTM)
39+
import Control.Monad (void, when)
3740
import Control.Monad.Class.MonadST (MonadST)
38-
import Control.Monad.Class.MonadThrow (MonadMask)
41+
import Control.Monad.Class.MonadThrow (MonadMask, MonadThrow (..))
3942
import Control.Monad.Fix (MonadFix)
4043
import Control.Monad.Primitive (PrimMonad)
4144
import Control.TempRegistry
4245
import Data.Bifunctor
46+
import qualified Data.ByteString.Builder as BSB
47+
import qualified Data.ByteString.Char8 as BSC
4348
import Data.ByteString.Lazy (ByteString)
49+
import qualified Data.ByteString.Lazy as BSL
4450
import Data.Primitive (newMutVar, readMutVar)
4551
import Data.Primitive.PrimVar
4652
import Data.Text (Text)
4753
import qualified Data.Vector as V
4854
import Database.LSMTree.Internal.Config
55+
import Database.LSMTree.Internal.CRC32C
56+
import qualified Database.LSMTree.Internal.CRC32C as FS
4957
import Database.LSMTree.Internal.Entry
5058
import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue)
5159
import qualified Database.LSMTree.Internal.Merge as Merge
5260
import Database.LSMTree.Internal.MergeSchedule
5361
import Database.LSMTree.Internal.Paths (SessionRoot)
5462
import qualified Database.LSMTree.Internal.Paths as Paths
55-
import Database.LSMTree.Internal.Run (Run)
63+
import Database.LSMTree.Internal.Run (ChecksumError (..), Run)
5664
import qualified Database.LSMTree.Internal.Run as Run
5765
import Database.LSMTree.Internal.RunNumber
5866
import Database.LSMTree.Internal.UniqCounter (UniqCounter,
5967
incrUniqCounter, uniqueToRunNumber)
60-
import System.FS.API (HasFS)
68+
import qualified System.FS.API as FS
69+
import System.FS.API (FsPath, HasFS)
70+
import qualified System.FS.API.Lazy as FS
6171
import System.FS.BlockIO.API (HasBlockIO)
6272
import Text.Printf
6373

@@ -133,6 +143,65 @@ data SnapshotMetaData = SnapshotMetaData {
133143
}
134144
deriving stock (Show, Eq)
135145

146+
-- | Encode 'SnapshotMetaData' and write it to 'SnapshotMetaDataFile'.
147+
writeFileSnapshotMetaData ::
148+
MonadThrow m
149+
=> HasFS m h
150+
-> FsPath -- ^ Target file for snapshot metadata
151+
-> FsPath -- ^ Target file for checksum
152+
-> SnapshotMetaData
153+
-> m ()
154+
writeFileSnapshotMetaData hfs contentPath checksumPath snapMetaData = do
155+
(_, checksum) <-
156+
FS.withFile hfs contentPath (FS.WriteMode FS.MustBeNew) $ \h ->
157+
hPutAllChunksCRC32C hfs h (encodeSnapshotMetaData snapMetaData) initialCRC32C
158+
FS.withFile hfs checksumPath (FS.WriteMode FS.MustBeNew) $ \h ->
159+
void $ FS.hPutAll hfs h $ encodeChecksum checksum
160+
161+
-- | Read from 'SnapshotMetaDataFile' and attempt to decode it to
162+
-- 'SnapshotMetaData'.
163+
readFileSnapshotMetaData ::
164+
MonadThrow m
165+
=> HasFS m h
166+
-> FsPath -- ^ Source file for snapshot metadata
167+
-> FsPath -- ^ Source file for checksum
168+
-> m (Either DeserialiseFailure SnapshotMetaData)
169+
readFileSnapshotMetaData hfs contentPath checksumPath = do
170+
!bsc <-
171+
FS.withFile hfs checksumPath FS.ReadMode $ \h ->
172+
BSC.toStrict <$> FS.hGetAll hfs h
173+
174+
case decodeChecksum bsc of
175+
Left failure -> pure (Left failure)
176+
Right expectedChecksum -> do
177+
178+
(lbs, actualChecksum) <- FS.withFile hfs contentPath FS.ReadMode $ \h -> do
179+
n <- FS.hGetSize hfs h
180+
FS.hGetExactlyCRC32C hfs h n initialCRC32C
181+
182+
when (expectedChecksum /= actualChecksum) $
183+
throwIO $ ChecksumError contentPath expectedChecksum actualChecksum
184+
185+
pure $ decodeSnapshotMetaData lbs
186+
187+
encodeChecksum :: CRC32C -> BSL.ByteString
188+
encodeChecksum (CRC32C x) = BSB.toLazyByteString (BSB.word32HexFixed x)
189+
190+
decodeChecksum :: BSC.ByteString -> Either DeserialiseFailure CRC32C
191+
decodeChecksum bsc = do
192+
when (BSC.length bsc /= 8) $ do
193+
let msg = "decodeChecksum: expected 8 bytes, but found "
194+
<> (show (BSC.length bsc))
195+
Left $ DeserialiseFailure 0 msg
196+
let !x = fromIntegral (hexdigitsToInt bsc)
197+
pure $! CRC32C x
198+
199+
encodeSnapshotMetaData :: SnapshotMetaData -> ByteString
200+
encodeSnapshotMetaData = toLazyByteString . encode . Versioned
201+
202+
decodeSnapshotMetaData :: ByteString -> Either DeserialiseFailure SnapshotMetaData
203+
decodeSnapshotMetaData bs = second (getVersioned . snd) (deserialiseFromBytes decode bs)
204+
136205
{-------------------------------------------------------------------------------
137206
Levels snapshot format
138207
-------------------------------------------------------------------------------}
@@ -343,12 +412,6 @@ class Decode a where
343412
class DecodeVersioned a where
344413
decodeVersioned :: SnapshotVersion -> Decoder s a
345414

346-
encodeSnapshotMetaData :: SnapshotMetaData -> ByteString
347-
encodeSnapshotMetaData = toLazyByteString . encode . Versioned
348-
349-
decodeSnapshotMetaData :: ByteString -> Either DeserialiseFailure SnapshotMetaData
350-
decodeSnapshotMetaData bs = second (getVersioned . snd) (deserialiseFromBytes decode bs)
351-
352415
newtype Versioned a = Versioned { getVersioned :: a }
353416
deriving stock (Show, Eq)
354417

0 commit comments

Comments
 (0)