Skip to content

Commit 8d78948

Browse files
committed
feat(lsm-tree): add salt
1 parent 0a91ff8 commit 8d78948

File tree

16 files changed

+161
-73
lines changed

16 files changed

+161
-73
lines changed

lsm-tree.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ library
593593
, lsm-tree:control
594594
, lsm-tree:kmerge
595595
, primitive ^>=0.9
596+
, random ^>=1.0 || ^>=1.1 || ^>=1.2 || ^>=1.3
596597
, text ^>=2.1.1
597598
, utf8-string ^>=1.0
598599
, vector ^>=0.13

src/Database/LSMTree.hs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ module Database.LSMTree (
101101
toSnapshotName,
102102
SnapshotLabel (..),
103103

104+
-- * Session Configuration #session_configuration#
105+
Salt,
106+
104107
-- * Table Configuration #table_configuration#
105108
TableConfig (
106109
confMergePolicy,
@@ -242,7 +245,7 @@ import Database.LSMTree.Internal.Serialise.Class (SerialiseKey (..),
242245
import Database.LSMTree.Internal.Snapshot (SnapshotLabel (..))
243246
import Database.LSMTree.Internal.Types (BlobRef (..), Cursor (..),
244247
ResolveAsFirst (..), ResolveValue (..),
245-
ResolveViaSemigroup (..), Session (..), Table (..),
248+
ResolveViaSemigroup (..), Salt, Session (..), Table (..),
246249
resolveAssociativity, resolveCompatibility,
247250
resolveValidOutput)
248251
import Database.LSMTree.Internal.Unsafe (BlobRefInvalidError (..),
@@ -263,6 +266,7 @@ import System.FS.API (FsPath, HasFS (..), MountPoint (..), mkFsPath)
263266
import System.FS.BlockIO.API (HasBlockIO (..), defaultIOCtxParams)
264267
import System.FS.BlockIO.IO (ioHasBlockIO, withIOHasBlockIO)
265268
import System.FS.IO (HandleIO, ioHasFS)
269+
import System.Random (randomIO)
266270

267271
--------------------------------------------------------------------------------
268272
-- Usage Notes
@@ -426,6 +430,7 @@ Throws the following exceptions:
426430
Tracer IO LSMTreeTrace ->
427431
HasFS IO HandleIO ->
428432
HasBlockIO IO HandleIO ->
433+
Salt ->
429434
FsPath ->
430435
(Session IO -> IO a) ->
431436
IO a
@@ -436,12 +441,14 @@ withSession ::
436441
Tracer m LSMTreeTrace ->
437442
HasFS m h ->
438443
HasBlockIO m h ->
444+
-- | The session salt.
445+
Salt ->
439446
-- | The session directory.
440447
FsPath ->
441448
(Session m -> m a) ->
442449
m a
443-
withSession tracer hasFS hasBlockIO sessionDir action = do
444-
Internal.withSession tracer hasFS hasBlockIO sessionDir (action . Session)
450+
withSession tracer hasFS hasBlockIO sessionSalt sessionDir action = do
451+
Internal.withSession tracer hasFS hasBlockIO sessionSalt sessionDir (action . Session)
445452

446453
-- | Variant of 'withSession' that is specialised to 'IO' using the real filesystem.
447454
withSessionIO ::
@@ -453,8 +460,9 @@ withSessionIO tracer sessionDir action = do
453460
let mountPoint = MountPoint sessionDir
454461
let sessionDirFsPath = mkFsPath []
455462
let hasFS = ioHasFS mountPoint
463+
sessionSalt <- randomIO
456464
withIOHasBlockIO hasFS defaultIOCtxParams $ \hasBlockIO ->
457-
withSession tracer hasFS hasBlockIO sessionDirFsPath action
465+
withSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath action
458466

459467
{- |
460468
Open a session from a session directory.
@@ -480,7 +488,7 @@ Throws the following exceptions:
480488
Tracer IO LSMTreeTrace ->
481489
HasFS IO HandleIO ->
482490
HasBlockIO IO HandleIO ->
483-
-- \| The session directory.
491+
Salt ->
484492
FsPath ->
485493
IO (Session IO)
486494
#-}
@@ -490,26 +498,29 @@ openSession ::
490498
Tracer m LSMTreeTrace ->
491499
HasFS m h ->
492500
HasBlockIO m h ->
501+
-- | The session salt.
502+
Salt ->
493503
-- | The session directory.
494504
FsPath ->
495505
m (Session m)
496-
openSession tracer hasFS hasBlockIO sessionDir =
497-
Session <$> Internal.openSession tracer hasFS hasBlockIO sessionDir
506+
openSession tracer hasFS hasBlockIO sessionSalt sessionDir =
507+
Session <$> Internal.openSession tracer hasFS hasBlockIO sessionSalt sessionDir
498508

499509
-- | Variant of 'openSession' that is specialised to 'IO' using the real filesystem.
500510
openSessionIO ::
501511
Tracer IO LSMTreeTrace ->
502-
-- \| The session directory.
512+
-- | The session directory.
503513
FilePath ->
504514
IO (Session IO)
505515
openSessionIO tracer sessionDir = do
506516
let mountPoint = MountPoint sessionDir
507517
let sessionDirFsPath = mkFsPath []
508518
let hasFS = ioHasFS mountPoint
519+
sessionSalt <- randomIO
509520
let acquireHasBlockIO = ioHasBlockIO hasFS defaultIOCtxParams
510521
let releaseHasBlockIO HasBlockIO{close} = close
511522
bracketOnError acquireHasBlockIO releaseHasBlockIO $ \hasBlockIO ->
512-
openSession tracer hasFS hasBlockIO sessionDirFsPath
523+
openSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath
513524

514525
{- |
515526
Close a session.

src/Database/LSMTree/Internal/BloomFilter.hs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import qualified Database.LSMTree.Internal.Vector as P
4848
import qualified Database.LSMTree.Internal.StrictArray as P
4949
#endif
5050

51+
import Database.LSMTree.Internal.Paths (SessionSalt (..))
5152
import Prelude hiding (filter)
5253

5354
-- Bulk query
@@ -108,17 +109,18 @@ type ResIx = Int -- Result index
108109
-- number of keys but this is grown if needed (using a doubling strategy).
109110
--
110111
bloomQueries ::
111-
V.Vector (Bloom SerialisedKey)
112+
SessionSalt
113+
-> V.Vector (Bloom SerialisedKey)
112114
-> V.Vector SerialisedKey
113115
-> VP.Vector RunIxKeyIx
114-
bloomQueries !filters !keys
116+
bloomQueries _sessionSalt !filters !keys
115117
| V.null filters || V.null keys = VP.empty
116-
bloomQueries !filters !keys =
118+
bloomQueries !sessionSalt !filters !keys =
117119
runST (bloomQueries_loop1 filters' keyhashes)
118120
where
119121
filters' = toFiltersArray filters
120122
keyhashes = P.generatePrimArray (V.length keys) $ \i ->
121-
Bloom.hashes (V.unsafeIndex keys i)
123+
Bloom.hashesWithSalt (getSessionSalt sessionSalt) (V.unsafeIndex keys i)
122124

123125
-- loop over all keys
124126
bloomQueries_loop1 ::
@@ -220,15 +222,16 @@ bloomFilterVersion = 1 + fromIntegral Bloom.formatVersion
220222

221223
bloomFilterToLBS :: Bloom a -> LBS.ByteString
222224
bloomFilterToLBS bf =
223-
let (size, ba, off, len) = Bloom.serialise bf
224-
in header size <> byteArrayToLBS ba off len
225+
let (size, salt, ba, off, len) = Bloom.serialise bf
226+
in header size salt <> byteArrayToLBS ba off len
225227
where
226-
header Bloom.BloomSize { sizeBits, sizeHashes } =
227-
-- creates a single 16 byte chunk
228-
B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $
228+
header Bloom.BloomSize { sizeBits, sizeHashes } salt =
229+
-- creates a single 24 byte chunk
230+
B.toLazyByteStringWith (B.safeStrategy 24 B.smallChunkSize) mempty $
229231
B.word32Host bloomFilterVersion
230232
<> B.word32Host (fromIntegral sizeHashes)
231233
<> B.word64Host (fromIntegral sizeBits)
234+
<> B.word64Host salt
232235

233236
byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString
234237
byteArrayToLBS ba off len =
@@ -250,11 +253,12 @@ bloomFilterFromFile ::
250253
-> m (Bloom a)
251254
bloomFilterFromFile hfs h = do
252255
header <- rethrowEOFError "Doesn't contain a header" $
253-
hGetByteArrayExactly hfs h 16
256+
hGetByteArrayExactly hfs h 24
254257

255258
let !version = P.indexByteArray header 0 :: Word32
256259
!nhashes = P.indexByteArray header 1 :: Word32
257260
!nbits = P.indexByteArray header 1 :: Word64
261+
!salt = P.indexByteArray header 2 :: Word64
258262

259263
when (version /= bloomFilterVersion) $ throwFormatError $
260264
if byteSwap32 version == bloomFilterVersion
@@ -274,6 +278,7 @@ bloomFilterFromFile hfs h = do
274278
Bloom.sizeBits = fromIntegral nbits,
275279
Bloom.sizeHashes = fromIntegral nhashes
276280
}
281+
salt
277282
(\buf off len ->
278283
rethrowEOFError "bloom filter file too short" $
279284
void $ hGetBufExactly hfs

src/Database/LSMTree/Internal/Lookup.hs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import Database.LSMTree.Internal.Index (Index)
4343
import qualified Database.LSMTree.Internal.Index as Index (search)
4444
import Database.LSMTree.Internal.Page (PageSpan (..), getNumPages,
4545
pageSpanSize, unPageNo)
46+
import Database.LSMTree.Internal.Paths (SessionSalt)
4647
import Database.LSMTree.Internal.RawBytes (RawBytes (..))
4748
import qualified Database.LSMTree.Internal.RawBytes as RB
4849
import Database.LSMTree.Internal.RawPage
@@ -61,13 +62,14 @@ import System.FS.BlockIO.API
6162
-- associated with each 'IOOp'.
6263
prepLookups ::
6364
Arena s
65+
-> SessionSalt
6466
-> V.Vector (Bloom SerialisedKey)
6567
-> V.Vector Index
6668
-> V.Vector (Handle h)
6769
-> V.Vector SerialisedKey
6870
-> ST s (VP.Vector RunIxKeyIx, V.Vector (IOOp s h))
69-
prepLookups arena blooms indexes kopsFiles ks = do
70-
let !rkixs = bloomQueries blooms ks
71+
prepLookups arena sessionSalt blooms indexes kopsFiles ks = do
72+
let !rkixs = bloomQueries sessionSalt blooms ks
7173
!ioops <- indexSearches arena indexes kopsFiles ks rkixs
7274
pure (rkixs, ioops)
7375

@@ -110,6 +112,7 @@ type LookupAcc m h = V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))
110112
HasBlockIO IO h
111113
-> ArenaManager RealWorld
112114
-> ResolveSerialisedValue
115+
-> SessionSalt
113116
-> WB.WriteBuffer
114117
-> Ref (WBB.WriteBufferBlobs IO h)
115118
-> V.Vector (Ref (Run IO h))
@@ -125,6 +128,7 @@ lookupsIOWithWriteBuffer ::
125128
=> HasBlockIO m h
126129
-> ArenaManager (PrimState m)
127130
-> ResolveSerialisedValue
131+
-> SessionSalt
128132
-> WB.WriteBuffer
129133
-> Ref (WBB.WriteBufferBlobs m h)
130134
-> V.Vector (Ref (Run m h)) -- ^ Runs @rs@
@@ -133,10 +137,10 @@ lookupsIOWithWriteBuffer ::
133137
-> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@
134138
-> V.Vector SerialisedKey
135139
-> m (LookupAcc m h)
136-
lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks =
140+
lookupsIOWithWriteBuffer !hbio !mgr !resolveV !sessionSalt !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks =
137141
assert precondition $
138142
withArena mgr $ \arena -> do
139-
(rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks
143+
(rkixs, ioops) <- ST.stToIO $ prepLookups arena sessionSalt blooms indexes kopsFiles ks
140144
ioress <- submitIO hbio ioops
141145
intraPageLookupsWithWriteBuffer resolveV wb wbblobs rs ks rkixs ioops ioress
142146
where
@@ -152,6 +156,7 @@ lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes
152156
HasBlockIO IO h
153157
-> ArenaManager RealWorld
154158
-> ResolveSerialisedValue
159+
-> SessionSalt
155160
-> V.Vector (Ref (Run IO h))
156161
-> V.Vector (Bloom SerialisedKey)
157162
-> V.Vector Index
@@ -168,16 +173,17 @@ lookupsIO ::
168173
=> HasBlockIO m h
169174
-> ArenaManager (PrimState m)
170175
-> ResolveSerialisedValue
176+
-> SessionSalt
171177
-> V.Vector (Ref (Run m h)) -- ^ Runs @rs@
172178
-> V.Vector (Bloom SerialisedKey) -- ^ The bloom filters inside @rs@
173179
-> V.Vector Index -- ^ The indexes inside @rs@
174180
-> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@
175181
-> V.Vector SerialisedKey
176182
-> m (LookupAcc m h)
177-
lookupsIO !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles !ks =
183+
lookupsIO !hbio !mgr !resolveV !sessionSalt !rs !blooms !indexes !kopsFiles !ks =
178184
assert precondition $
179185
withArena mgr $ \arena -> do
180-
(rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks
186+
(rkixs, ioops) <- ST.stToIO $ prepLookups arena sessionSalt blooms indexes kopsFiles ks
181187
ioress <- submitIO hbio ioops
182188
intraPageLookupsOn resolveV (V.map (const Nothing) ks) rs ks rkixs ioops ioress
183189
where

src/Database/LSMTree/Internal/Merge.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import Data.Traversable (for)
3535
import qualified Data.Vector as V
3636
import Database.LSMTree.Internal.BlobRef (RawBlobRef)
3737
import Database.LSMTree.Internal.Entry
38+
import Database.LSMTree.Internal.Paths (SessionSalt)
3839
import Database.LSMTree.Internal.Readers (Readers)
3940
import qualified Database.LSMTree.Internal.Readers as Readers
4041
import Database.LSMTree.Internal.Run (Run)
@@ -153,6 +154,7 @@ instance IsMergeType TreeMergeType where
153154
IsMergeType t
154155
=> HasFS IO h
155156
-> HasBlockIO IO h
157+
-> SessionSalt
156158
-> RunParams
157159
-> t
158160
-> ResolveSerialisedValue
@@ -165,13 +167,14 @@ new ::
165167
(IsMergeType t, MonadMask m, MonadSTM m, MonadST m)
166168
=> HasFS m h
167169
-> HasBlockIO m h
170+
-> SessionSalt
168171
-> RunParams
169172
-> t
170173
-> ResolveSerialisedValue
171174
-> Run.RunFsPaths
172175
-> V.Vector (Ref (Run m h))
173176
-> m (Maybe (Merge t m h))
174-
new hfs hbio runParams mergeType mergeResolve targetPaths runs = do
177+
new hfs hbio sessionSalt runParams mergeType mergeResolve targetPaths runs = do
175178
let sources = Readers.FromRun <$> V.toList runs
176179
mreaders <- Readers.new mergeResolve Readers.NoOffsetKey sources
177180
-- TODO: Exception safety! If Readers.new fails after already creating some
@@ -180,7 +183,7 @@ new hfs hbio runParams mergeType mergeResolve targetPaths runs = do
180183
for mreaders $ \mergeReaders -> do
181184
-- calculate upper bounds based on input runs
182185
let numEntries = V.foldMap' Run.size runs
183-
mergeBuilder <- Builder.new hfs hbio runParams targetPaths numEntries
186+
mergeBuilder <- Builder.new hfs hbio sessionSalt runParams targetPaths numEntries
184187
mergeState <- newMutVar $! Merging
185188
pure Merge {
186189
mergeIsLastLevel = isLastLevel mergeType

0 commit comments

Comments
 (0)