Skip to content

Commit 24d5af0

Browse files
jorisdralwenkokke
andcommitted
lsm-tree: make the hash salt for bloom filters configurable
Making the hash salt configurable improves security, because bloom filter hashes are not cryptographic hashes. The hash salt can be configured on a session-wide basis only. That is, all bloom filters for all tables in a single session use the same salt. As a result, batch lookup performance is not impacted by the salt. The performance currently relies on being able to compute a hash only once for multiple bloom filters, which is only possible if the salt for each bloom filter is the same. For now the user has the responsibility of passing in the same salt each time a session is restored. We will change this in the next few commits. Co-authored-by: Wen Kokke <[email protected]>
1 parent 170ae9b commit 24d5af0

File tree

15 files changed

+163
-69
lines changed

15 files changed

+163
-69
lines changed

doc/format-run.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,11 @@ It (and remaining fields) are serialised in native byte order.
140140
The remainder of the header for format 1 consists of:
141141
1. The hash function count (32bit)
142142
2. The bit size of the filter (64bit)
143+
3. The hash salt of the filter (64bit)
143144

144145
The fields of the header are serialized in native byte order.
145146

146-
The maximum filter size is 2^48 bits, corresponding to 32 Terabytes.
147+
The maximum filter size is 2^41 bits, corresponding to 32 Terabytes.
147148
The family of hash functions to use is implied by the format version.
148149

149150
The filter bit vector itself is organised as a whole number of 64 bit words.

lsm-tree.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ library
593593
, lsm-tree:control
594594
, lsm-tree:kmerge
595595
, primitive ^>=0.9
596+
, random ^>=1.0 || ^>=1.1 || ^>=1.2 || ^>=1.3
596597
, text ^>=2.1.1
597598
, utf8-string ^>=1.0
598599
, vector ^>=0.13

src/Database/LSMTree.hs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ module Database.LSMTree (
101101
toSnapshotName,
102102
SnapshotLabel (..),
103103

104+
-- * Session Configuration #session_configuration#
105+
Salt,
106+
104107
-- * Table Configuration #table_configuration#
105108
TableConfig (
106109
confMergePolicy,
@@ -242,7 +245,7 @@ import Database.LSMTree.Internal.Serialise.Class (SerialiseKey (..),
242245
import Database.LSMTree.Internal.Snapshot (SnapshotLabel (..))
243246
import Database.LSMTree.Internal.Types (BlobRef (..), Cursor (..),
244247
ResolveAsFirst (..), ResolveValue (..),
245-
ResolveViaSemigroup (..), Session (..), Table (..),
248+
ResolveViaSemigroup (..), Salt, Session (..), Table (..),
246249
resolveAssociativity, resolveCompatibility,
247250
resolveValidOutput)
248251
import Database.LSMTree.Internal.Unsafe (BlobRefInvalidError (..),
@@ -263,6 +266,7 @@ import System.FS.API (FsPath, HasFS (..), MountPoint (..), mkFsPath)
263266
import System.FS.BlockIO.API (HasBlockIO (..), defaultIOCtxParams)
264267
import System.FS.BlockIO.IO (ioHasBlockIO, withIOHasBlockIO)
265268
import System.FS.IO (HandleIO, ioHasFS)
269+
import System.Random (randomIO)
266270

267271
--------------------------------------------------------------------------------
268272
-- Usage Notes
@@ -426,6 +430,7 @@ Throws the following exceptions:
426430
Tracer IO LSMTreeTrace ->
427431
HasFS IO HandleIO ->
428432
HasBlockIO IO HandleIO ->
433+
Salt ->
429434
FsPath ->
430435
(Session IO -> IO a) ->
431436
IO a
@@ -436,12 +441,14 @@ withSession ::
436441
Tracer m LSMTreeTrace ->
437442
HasFS m h ->
438443
HasBlockIO m h ->
444+
-- | The session salt.
445+
Salt ->
439446
-- | The session directory.
440447
FsPath ->
441448
(Session m -> m a) ->
442449
m a
443-
withSession tracer hasFS hasBlockIO sessionDir action = do
444-
Internal.withSession tracer hasFS hasBlockIO sessionDir (action . Session)
450+
withSession tracer hasFS hasBlockIO sessionSalt sessionDir action = do
451+
Internal.withSession tracer hasFS hasBlockIO sessionSalt sessionDir (action . Session)
445452

446453
-- | Variant of 'withSession' that is specialised to 'IO' using the real filesystem.
447454
withSessionIO ::
@@ -453,8 +460,9 @@ withSessionIO tracer sessionDir action = do
453460
let mountPoint = MountPoint sessionDir
454461
let sessionDirFsPath = mkFsPath []
455462
let hasFS = ioHasFS mountPoint
463+
sessionSalt <- randomIO
456464
withIOHasBlockIO hasFS defaultIOCtxParams $ \hasBlockIO ->
457-
withSession tracer hasFS hasBlockIO sessionDirFsPath action
465+
withSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath action
458466

459467
{- |
460468
Open a session from a session directory.
@@ -480,7 +488,7 @@ Throws the following exceptions:
480488
Tracer IO LSMTreeTrace ->
481489
HasFS IO HandleIO ->
482490
HasBlockIO IO HandleIO ->
483-
-- \| The session directory.
491+
Salt ->
484492
FsPath ->
485493
IO (Session IO)
486494
#-}
@@ -490,26 +498,29 @@ openSession ::
490498
Tracer m LSMTreeTrace ->
491499
HasFS m h ->
492500
HasBlockIO m h ->
501+
-- | The session salt.
502+
Salt ->
493503
-- | The session directory.
494504
FsPath ->
495505
m (Session m)
496-
openSession tracer hasFS hasBlockIO sessionDir =
497-
Session <$> Internal.openSession tracer hasFS hasBlockIO sessionDir
506+
openSession tracer hasFS hasBlockIO sessionSalt sessionDir =
507+
Session <$> Internal.openSession tracer hasFS hasBlockIO sessionSalt sessionDir
498508

499509
-- | Variant of 'openSession' that is specialised to 'IO' using the real filesystem.
500510
openSessionIO ::
501511
Tracer IO LSMTreeTrace ->
502-
-- \| The session directory.
512+
-- | The session directory.
503513
FilePath ->
504514
IO (Session IO)
505515
openSessionIO tracer sessionDir = do
506516
let mountPoint = MountPoint sessionDir
507517
let sessionDirFsPath = mkFsPath []
508518
let hasFS = ioHasFS mountPoint
519+
sessionSalt <- randomIO
509520
let acquireHasBlockIO = ioHasBlockIO hasFS defaultIOCtxParams
510521
let releaseHasBlockIO HasBlockIO{close} = close
511522
bracketOnError acquireHasBlockIO releaseHasBlockIO $ \hasBlockIO ->
512-
openSession tracer hasFS hasBlockIO sessionDirFsPath
523+
openSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath
513524

514525
{- |
515526
Close a session.

src/Database/LSMTree/Internal/BloomFilter.hs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ module Database.LSMTree.Internal.BloomFilter (
88
-- * Types
99
Bloom.Bloom,
1010
Bloom.MBloom,
11+
Bloom.Salt,
1112

1213
-- * Bulk query
1314
bloomQueries,
@@ -108,17 +109,18 @@ type ResIx = Int -- Result index
108109
-- number of keys but this is grown if needed (using a doubling strategy).
109110
--
110111
bloomQueries ::
111-
V.Vector (Bloom SerialisedKey)
112+
Bloom.Salt
113+
-> V.Vector (Bloom SerialisedKey)
112114
-> V.Vector SerialisedKey
113115
-> VP.Vector RunIxKeyIx
114-
bloomQueries !filters !keys
116+
bloomQueries !_salt !filters !keys
115117
| V.null filters || V.null keys = VP.empty
116-
bloomQueries !filters !keys =
118+
bloomQueries !salt !filters !keys =
117119
runST (bloomQueries_loop1 filters' keyhashes)
118120
where
119121
filters' = toFiltersArray filters
120122
keyhashes = P.generatePrimArray (V.length keys) $ \i ->
121-
Bloom.hashes (V.unsafeIndex keys i)
123+
Bloom.hashesWithSalt salt (V.unsafeIndex keys i)
122124

123125
-- loop over all keys
124126
bloomQueries_loop1 ::
@@ -220,15 +222,16 @@ bloomFilterVersion = 1 + fromIntegral Bloom.formatVersion
220222

221223
bloomFilterToLBS :: Bloom a -> LBS.ByteString
222224
bloomFilterToLBS bf =
223-
let (size, ba, off, len) = Bloom.serialise bf
224-
in header size <> byteArrayToLBS ba off len
225+
let (size, salt, ba, off, len) = Bloom.serialise bf
226+
in header size salt <> byteArrayToLBS ba off len
225227
where
226-
header Bloom.BloomSize { sizeBits, sizeHashes } =
227-
-- creates a single 16 byte chunk
228-
B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $
228+
header Bloom.BloomSize { sizeBits, sizeHashes } salt =
229+
-- creates a single 24 byte chunk
230+
B.toLazyByteStringWith (B.safeStrategy 24 B.smallChunkSize) mempty $
229231
B.word32Host bloomFilterVersion
230232
<> B.word32Host (fromIntegral sizeHashes)
231233
<> B.word64Host (fromIntegral sizeBits)
234+
<> B.word64Host salt
232235

233236
byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString
234237
byteArrayToLBS ba off len =
@@ -250,11 +253,12 @@ bloomFilterFromFile ::
250253
-> m (Bloom a)
251254
bloomFilterFromFile hfs h = do
252255
header <- rethrowEOFError "Doesn't contain a header" $
253-
hGetByteArrayExactly hfs h 16
256+
hGetByteArrayExactly hfs h 24
254257

255258
let !version = P.indexByteArray header 0 :: Word32
256259
!nhashes = P.indexByteArray header 1 :: Word32
257260
!nbits = P.indexByteArray header 1 :: Word64
261+
!salt = P.indexByteArray header 2 :: Word64
258262

259263
when (version /= bloomFilterVersion) $ throwFormatError $
260264
if byteSwap32 version == bloomFilterVersion
@@ -274,6 +278,7 @@ bloomFilterFromFile hfs h = do
274278
Bloom.sizeBits = fromIntegral nbits,
275279
Bloom.sizeHashes = fromIntegral nhashes
276280
}
281+
salt
277282
(\buf off len ->
278283
rethrowEOFError "bloom filter file too short" $
279284
void $ hGetBufExactly hfs

src/Database/LSMTree/Internal/Lookup.hs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import Control.RefCount
3838
import Database.LSMTree.Internal.BlobRef (WeakBlobRef (..))
3939
import Database.LSMTree.Internal.BloomFilter (Bloom, RunIxKeyIx (..),
4040
bloomQueries)
41+
import qualified Database.LSMTree.Internal.BloomFilter as Bloom
4142
import Database.LSMTree.Internal.Entry
4243
import Database.LSMTree.Internal.Index (Index)
4344
import qualified Database.LSMTree.Internal.Index as Index (search)
@@ -61,13 +62,14 @@ import System.FS.BlockIO.API
6162
-- associated with each 'IOOp'.
6263
prepLookups ::
6364
Arena s
65+
-> Bloom.Salt
6466
-> V.Vector (Bloom SerialisedKey)
6567
-> V.Vector Index
6668
-> V.Vector (Handle h)
6769
-> V.Vector SerialisedKey
6870
-> ST s (VP.Vector RunIxKeyIx, V.Vector (IOOp s h))
69-
prepLookups arena blooms indexes kopsFiles ks = do
70-
let !rkixs = bloomQueries blooms ks
71+
prepLookups arena salt blooms indexes kopsFiles ks = do
72+
let !rkixs = bloomQueries salt blooms ks
7173
!ioops <- indexSearches arena indexes kopsFiles ks rkixs
7274
pure (rkixs, ioops)
7375

@@ -110,6 +112,7 @@ type LookupAcc m h = V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))
110112
HasBlockIO IO h
111113
-> ArenaManager RealWorld
112114
-> ResolveSerialisedValue
115+
-> Bloom.Salt
113116
-> WB.WriteBuffer
114117
-> Ref (WBB.WriteBufferBlobs IO h)
115118
-> V.Vector (Ref (Run IO h))
@@ -125,6 +128,7 @@ lookupsIOWithWriteBuffer ::
125128
=> HasBlockIO m h
126129
-> ArenaManager (PrimState m)
127130
-> ResolveSerialisedValue
131+
-> Bloom.Salt
128132
-> WB.WriteBuffer
129133
-> Ref (WBB.WriteBufferBlobs m h)
130134
-> V.Vector (Ref (Run m h)) -- ^ Runs @rs@
@@ -133,10 +137,10 @@ lookupsIOWithWriteBuffer ::
133137
-> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@
134138
-> V.Vector SerialisedKey
135139
-> m (LookupAcc m h)
136-
lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks =
140+
lookupsIOWithWriteBuffer !hbio !mgr !resolveV !salt !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks =
137141
assert precondition $
138142
withArena mgr $ \arena -> do
139-
(rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks
143+
(rkixs, ioops) <- ST.stToIO $ prepLookups arena salt blooms indexes kopsFiles ks
140144
ioress <- submitIO hbio ioops
141145
intraPageLookupsWithWriteBuffer resolveV wb wbblobs rs ks rkixs ioops ioress
142146
where
@@ -152,6 +156,7 @@ lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes
152156
HasBlockIO IO h
153157
-> ArenaManager RealWorld
154158
-> ResolveSerialisedValue
159+
-> Bloom.Salt
155160
-> V.Vector (Ref (Run IO h))
156161
-> V.Vector (Bloom SerialisedKey)
157162
-> V.Vector Index
@@ -168,16 +173,17 @@ lookupsIO ::
168173
=> HasBlockIO m h
169174
-> ArenaManager (PrimState m)
170175
-> ResolveSerialisedValue
176+
-> Bloom.Salt
171177
-> V.Vector (Ref (Run m h)) -- ^ Runs @rs@
172178
-> V.Vector (Bloom SerialisedKey) -- ^ The bloom filters inside @rs@
173179
-> V.Vector Index -- ^ The indexes inside @rs@
174180
-> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@
175181
-> V.Vector SerialisedKey
176182
-> m (LookupAcc m h)
177-
lookupsIO !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles !ks =
183+
lookupsIO !hbio !mgr !resolveV !salt !rs !blooms !indexes !kopsFiles !ks =
178184
assert precondition $
179185
withArena mgr $ \arena -> do
180-
(rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks
186+
(rkixs, ioops) <- ST.stToIO $ prepLookups arena salt blooms indexes kopsFiles ks
181187
ioress <- submitIO hbio ioops
182188
intraPageLookupsOn resolveV (V.map (const Nothing) ks) rs ks rkixs ioops ioress
183189
where

src/Database/LSMTree/Internal/Merge.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import Data.Primitive.MutVar
3434
import Data.Traversable (for)
3535
import qualified Data.Vector as V
3636
import Database.LSMTree.Internal.BlobRef (RawBlobRef)
37+
import qualified Database.LSMTree.Internal.BloomFilter as Bloom
3738
import Database.LSMTree.Internal.Entry
3839
import Database.LSMTree.Internal.Readers (Readers)
3940
import qualified Database.LSMTree.Internal.Readers as Readers
@@ -153,6 +154,7 @@ instance IsMergeType TreeMergeType where
153154
IsMergeType t
154155
=> HasFS IO h
155156
-> HasBlockIO IO h
157+
-> Bloom.Salt
156158
-> RunParams
157159
-> t
158160
-> ResolveSerialisedValue
@@ -165,13 +167,14 @@ new ::
165167
(IsMergeType t, MonadMask m, MonadSTM m, MonadST m)
166168
=> HasFS m h
167169
-> HasBlockIO m h
170+
-> Bloom.Salt
168171
-> RunParams
169172
-> t
170173
-> ResolveSerialisedValue
171174
-> Run.RunFsPaths
172175
-> V.Vector (Ref (Run m h))
173176
-> m (Maybe (Merge t m h))
174-
new hfs hbio runParams mergeType mergeResolve targetPaths runs = do
177+
new hfs hbio salt runParams mergeType mergeResolve targetPaths runs = do
175178
let sources = Readers.FromRun <$> V.toList runs
176179
mreaders <- Readers.new mergeResolve Readers.NoOffsetKey sources
177180
-- TODO: Exception safety! If Readers.new fails after already creating some
@@ -180,7 +183,7 @@ new hfs hbio runParams mergeType mergeResolve targetPaths runs = do
180183
for mreaders $ \mergeReaders -> do
181184
-- calculate upper bounds based on input runs
182185
let numEntries = V.foldMap' Run.size runs
183-
mergeBuilder <- Builder.new hfs hbio runParams targetPaths numEntries
186+
mergeBuilder <- Builder.new hfs hbio salt runParams targetPaths numEntries
184187
mergeState <- newMutVar $! Merging
185188
pure Merge {
186189
mergeIsLastLevel = isLastLevel mergeType

0 commit comments

Comments
 (0)