Skip to content

Commit 7de86b9

Browse files
committed
Check that the salts of snapshotted bloomfilters matches the session salt
This prevents opening snapshots with the wrong salt, which would lead to incorrect lookup results.
1 parent 3b2d995 commit 7de86b9

File tree

6 files changed

+39
-14
lines changed

6 files changed

+39
-14
lines changed

src/Database/LSMTree/Internal/BloomFilter.hs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import qualified Data.Primitive as P
2929
import qualified Data.Vector as V
3030
import qualified Data.Vector.Primitive as VP
3131
import Data.Word (Word32, Word64, byteSwap32)
32+
import Text.Printf (printf)
3233

3334
import Control.Exception (assert)
3435
import Control.Monad (void, when)
@@ -242,23 +243,25 @@ bloomFilterToLBS bf =
242243

243244
{-# SPECIALISE bloomFilterFromFile ::
244245
HasFS IO h
246+
-> Bloom.Salt
245247
-> Handle h
246248
-> IO (Bloom a) #-}
247249
-- | Read a 'Bloom' from a file.
248250
--
249251
bloomFilterFromFile ::
250252
(PrimMonad m, MonadCatch m)
251253
=> HasFS m h
254+
-> Bloom.Salt -- ^ Expected salt
252255
-> Handle h -- ^ The open file, in read mode
253256
-> m (Bloom a)
254-
bloomFilterFromFile hfs h = do
257+
bloomFilterFromFile hfs expectedSalt h = do
255258
header <- rethrowEOFError "Doesn't contain a header" $
256259
hGetByteArrayExactly hfs h 24
257260

258261
let !version = P.indexByteArray header 0 :: Word32
259262
!nhashes = P.indexByteArray header 1 :: Word32
260263
!nbits = P.indexByteArray header 1 :: Word64
261-
!salt = P.indexByteArray header 2 :: Word64
264+
!salt = P.indexByteArray header 2 :: Bloom.Salt
262265

263266
when (version /= bloomFilterVersion) $ throwFormatError $
264267
if byteSwap32 version == bloomFilterVersion
@@ -268,8 +271,12 @@ bloomFilterFromFile hfs h = do
268271
when (nbits <= 0) $ throwFormatError "Length is zero"
269272

270273
-- limit to 2^48 bits
271-
when (nbits >= 0x1_0000_0000_0000) $ throwFormatError "Too large bloomfilter"
272-
--TODO: get max size from bloomfilter lib
274+
when (nbits >= fromIntegral Bloom.maxSizeBits) $ throwFormatError "Too large bloomfilter"
275+
276+
when (expectedSalt /= salt) $ throwFormatError $
277+
printf "Expected salt does not match actual salt: %d /= %d"
278+
expectedSalt
279+
salt
273280

274281
-- read the filter data from the file directly into the bloom filter
275282
bloom <-

src/Database/LSMTree/Internal/Run.hs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ fromWriteBuffer fs hbio salt params fsPaths buffer blobs = do
269269
-> HasBlockIO IO h
270270
-> RunDataCaching
271271
-> IndexType
272+
-> Bloom.Salt
272273
-> RunFsPaths
273274
-> IO (Ref (Run IO h)) #-}
274275
-- | Load a previously written run from disk, checking each file's checksum
@@ -294,10 +295,11 @@ openFromDisk ::
294295
-> HasBlockIO m h
295296
-> RunDataCaching
296297
-> IndexType
298+
-> Bloom.Salt -- ^ Expected salt
297299
-> RunFsPaths
298300
-> m (Ref (Run m h))
299301
-- TODO: make exception safe
300-
openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do
302+
openFromDisk fs hbio runRunDataCaching indexType expectedSalt runRunFsPaths = do
301303
expectedChecksums <-
302304
CRC.expectValidFile fs (runChecksumsPath runRunFsPaths) CRC.FormatChecksumsFile
303305
. fromChecksumsFile
@@ -312,7 +314,7 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do
312314
let filterPath = forRunFilterRaw paths
313315
checkCRC CacheRunData (forRunFilterRaw expectedChecksums) filterPath
314316
runFilter <- FS.withFile fs filterPath FS.ReadMode $
315-
bloomFilterFromFile fs
317+
bloomFilterFromFile fs expectedSalt
316318

317319
(runNumEntries, runIndex) <-
318320
CRC.expectValidFile fs (forRunIndexRaw paths) CRC.FormatIndexFile

src/Database/LSMTree/Internal/Snapshot.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,9 +586,10 @@ snapshotRun hfs hbio snapUc reg (NamedSnapshotDir targetDir) run = do
586586
-> ActionRegistry IO
587587
-> NamedSnapshotDir
588588
-> ActiveDir
589+
-> Bloom.Salt
589590
-> SnapshotRun
590591
-> IO (Ref (Run IO h)) #-}
591-
-- | @'openRun' _ _ uniqCounter _ sourceDir targetDir snaprun@ takes all run
592+
-- | @'openRun' _ _ uniqCounter _ sourceDir targetDir _ snaprun@ takes all run
592593
-- files that are referenced by @snaprun@, and hard links them from @sourceDir@
593594
-- into @targetDir@ with new, unique names (using @uniqCounter@). Each set of
594595
-- (hard linked) files that represents a run is opened and verified, returning
@@ -603,10 +604,12 @@ openRun ::
603604
-> ActionRegistry m
604605
-> NamedSnapshotDir
605606
-> ActiveDir
607+
-> Bloom.Salt
606608
-> SnapshotRun
607609
-> m (Ref (Run m h))
608610
openRun hfs hbio uc reg
609611
(NamedSnapshotDir sourceDir) (ActiveDir targetDir)
612+
expectedSalt
610613
SnapshotRun {
611614
snapRunNumber = runNum,
612615
snapRunCaching = caching,
@@ -618,7 +621,7 @@ openRun hfs hbio uc reg
618621
hardLinkRunFiles hfs hbio reg sourcePaths targetPaths
619622

620623
withRollback reg
621-
(Run.openFromDisk hfs hbio caching indexType targetPaths)
624+
(Run.openFromDisk hfs hbio caching indexType expectedSalt targetPaths)
622625
releaseRef
623626

624627
{-------------------------------------------------------------------------------

src/Database/LSMTree/Internal/Unsafe.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,11 +1541,11 @@ openTableFromSnapshot policyOveride sesh snap label resolve =
15411541
openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths
15421542

15431543
-- Hard link runs into the active directory,
1544-
snapLevels' <- traverse (openRun hfs hbio uc reg snapDir activeDir) snapLevels
1544+
snapLevels' <- traverse (openRun hfs hbio uc reg snapDir activeDir salt) snapLevels
15451545
unionLevel <- case mTreeOpt of
15461546
Nothing -> pure NoUnion
15471547
Just mTree -> do
1548-
snapTree <- traverse (openRun hfs hbio uc reg snapDir activeDir) mTree
1548+
snapTree <- traverse (openRun hfs hbio uc reg snapDir activeDir salt) mTree
15491549
mt <- fromSnapMergingTree hfs hbio salt uc resolve activeDir reg snapTree
15501550
isStructurallyEmpty mt >>= \case
15511551
True ->

test/Test/Database/LSMTree/Internal/BloomFilter.hs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
module Test.Database.LSMTree.Internal.BloomFilter (tests) where
22

33
import Control.DeepSeq (deepseq)
4-
import Control.Exception (displayException)
4+
import Control.Exception (Exception (..), displayException)
55
import Control.Monad (void)
66
import qualified Control.Monad.IOSim as IOSim
77
import Data.Bits ((.&.))
88
import qualified Data.ByteString as BS
99
import qualified Data.ByteString.Builder as BS.Builder
1010
import qualified Data.ByteString.Builder.Extra as BS.Builder
1111
import qualified Data.ByteString.Lazy as LBS
12+
import qualified Data.List as List
1213
import qualified Data.Set as Set
1314
import qualified Data.Vector as V
1415
import qualified Data.Vector.Primitive as VP
@@ -25,6 +26,8 @@ import Test.Tasty.QuickCheck hiding ((.&.))
2526

2627
import qualified Data.BloomFilter.Blocked as Bloom
2728
import Database.LSMTree.Internal.BloomFilter
29+
import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..),
30+
FileFormat (..))
2831
import Database.LSMTree.Internal.Serialise (SerialisedKey,
2932
serialiseKey)
3033

@@ -64,9 +67,18 @@ limitBits b = b .&. 0xffffff
6467
prop_total_deserialisation :: BS.ByteString -> Property
6568
prop_total_deserialisation bs =
6669
case bloomFilterFromBS bs of
67-
Left err -> label (displayException err) $ property True
70+
Left err ->
71+
label (mkLabel err) $ property True
6872
Right bf -> label "parsed successfully" $ property $
6973
bf `deepseq` True
74+
where
75+
mkLabel err = case err of
76+
IOSim.FailureException e
77+
| Just (ErrFileFormatInvalid fsep FormatBloomFilterFile msg) <- fromException e
78+
, let msg' = "Expected salt does not match actual salt"
79+
, msg' `List.isPrefixOf` msg
80+
-> displayException $ ErrFileFormatInvalid fsep FormatBloomFilterFile msg'
81+
_ -> displayException err
7082

7183
-- | Write the bytestring to a file in the mock file system and then use
7284
-- 'bloomFilterFromFile'.
@@ -80,7 +92,7 @@ bloomFilterFromBS bs =
8092
void $ FS.hPutAllStrict hfs h bs
8193
-- deserialise from file
8294
FS.withFile hfs file FS.ReadMode $ \h ->
83-
bloomFilterFromFile hfs h
95+
bloomFilterFromFile hfs testSalt h
8496

8597
-- Length is in bits. A large length would require significant amount of
8698
-- memory, so we make it 'Small'.

test/Test/Database/LSMTree/Internal/Run.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ prop_WriteAndOpen fs hbio wb =
216216
paths' = paths { runNumber = RunNumber 17}
217217
hardLinkRunFiles fs hbio reg paths paths'
218218
loaded <- openFromDisk fs hbio (runParamCaching runParams)
219-
(runParamIndex runParams) (simplePath 17)
219+
(runParamIndex runParams) testSalt
220+
(simplePath 17)
220221

221222
Run.size written @=? Run.size loaded
222223
withRef written $ \written' ->

0 commit comments

Comments
 (0)