diff --git a/bench-unions/Bench/Unions.hs b/bench-unions/Bench/Unions.hs index d5b6609a1..6d0a7b895 100644 --- a/bench-unions/Bench/Unions.hs +++ b/bench-unions/Bench/Unions.hs @@ -292,7 +292,7 @@ doSetup gopts = do createDirectoryIfMissing True $ rootDir gopts -- Populate the specified number of tables - LSM.withSession (rootDir gopts) $ \session -> do + LSM.withOpenSession (rootDir gopts) $ \session -> do -- Create a "baseline" table -- -- We create a single table that *already has* all the same key value pairs @@ -337,7 +337,7 @@ tableRange gopts = -- | Count duplicate keys in all tables that will be unioned together doCollisionAnalysis :: GlobalOpts -> IO () doCollisionAnalysis gopts = do - LSM.withSession (rootDir gopts) $ \session -> do + LSM.withOpenSession (rootDir gopts) $ \session -> do seenRef <- newIORef Set.empty dupRef <- newIORef Set.empty @@ -381,7 +381,7 @@ doRun gopts opts = do withFile dataPath WriteMode $ \h -> do hPutStrLn h "# iteration \t baseline (ops/sec) \t union (ops/sec) \t union debt" - LSM.withSession (rootDir gopts) $ \session -> do + LSM.withOpenSession (rootDir gopts) $ \session -> do -- Load the baseline table LSM.withTableFromSnapshot session baselineTableName label $ \baselineTable -> do diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index 37c6e1ff3..2faff955e 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -108,7 +108,7 @@ benchmarks = do benchmark "bloomQueries" "(this is the batch lookup, less the cost of computing and hashing the keys)" (benchInBatches benchmarkBatchSize rng0 - (\ks -> Bloom.bloomQueries vbs ks `seq` ())) + (\ks -> Bloom.bloomQueries benchSalt vbs ks `seq` ())) (fromIntegralChecked benchmarkNumLookups) hashcost 0 @@ -200,6 +200,8 @@ totalNumEntriesSanityCheck l1 filterSizes = == sum [ 2^l1 * sizeFactor | (_, sizeFactor, _) <- filterSizes ] +benchSalt :: Bloom.Salt +benchSalt = 4 -- | Input environment for benchmarking 'Bloom.elemMany'. -- @@ -223,7 +225,10 @@ elemManyEnv :: [BloomFilterSizeInfo] elemManyEnv filterSizes rng0 = stToIO $ do -- create the filters - mbs <- sequence [ Bloom.new bsize | (_, _, bsize) <- filterSizes ] + mbs <- sequence + [ Bloom.new bsize benchSalt + | (_, _, bsize) <- filterSizes + ] -- add elements foldM_ (\rng (i, mb) -> do @@ -264,7 +269,7 @@ benchInBatches !b !rng0 !action = benchMakeHashes :: Vector (Bloom SerialisedKey) -> BatchBench benchMakeHashes !_bs !ks = let khs :: VP.Vector (Bloom.Hashes SerialisedKey) - !khs = V.convert (V.map Bloom.hashes ks) + !khs = V.convert (V.map (Bloom.hashesWithSalt benchSalt) ks) in khs `seq` () -- | This gives us a combined cost of calculating the series of keys, their @@ -273,7 +278,7 @@ benchMakeHashes !_bs !ks = benchElemHashes :: Vector (Bloom SerialisedKey) -> BatchBench benchElemHashes !bs !ks = let khs :: VP.Vector (Bloom.Hashes SerialisedKey) - !khs = V.convert (V.map Bloom.hashes ks) + !khs = V.convert (V.map (Bloom.hashesWithSalt benchSalt) ks) in V.foldl' (\_ b -> VP.foldl' (\_ kh -> Bloom.elemHashes b kh `seq` ()) diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index 69762cdd4..01f8a146c 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -129,6 +129,9 @@ entryBitsWithOverhead = entryBits -- key and value size numEntriesFitInPage :: Fractional a => a numEntriesFitInPage = fromIntegral unusedPageBits / fromIntegral entryBitsWithOverhead +benchSalt :: Bloom.Salt +benchSalt = 4 + benchmarks :: Run.RunDataCaching -> IO () benchmarks !caching = withFS $ \hfs hbio -> do #ifdef NO_IGNORE_ASSERTS @@ -351,7 +354,7 @@ lookupsEnv runSizes keyRng0 hfs hbio caching = do -- create the runs rbs <- sequence - [ RunBuilder.new hfs hbio + [ RunBuilder.new hfs hbio benchSalt RunParams { runParamCaching = caching, runParamAlloc = RunAllocFixed benchmarkNumBitsPerEntry, @@ -428,7 +431,7 @@ benchBloomQueries !bs !keyRng !n | n <= 0 = () | otherwise = let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize - in bloomQueries bs ks `seq` + in bloomQueries benchSalt bs ks `seq` benchBloomQueries bs keyRng' (n-benchmarkGenBatchSize) -- | This gives us the combined cost of calculating batches of keys, performing @@ -445,7 +448,7 @@ benchIndexSearches !arenaManager !bs !ics !hs !keyRng !n | n <= 0 = pure () | otherwise = do let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize - !rkixs = bloomQueries bs ks + !rkixs = bloomQueries benchSalt bs ks !_ioops <- withArena arenaManager $ \arena -> stToIO $ indexSearches arena ics hs ks rkixs benchIndexSearches arenaManager bs ics hs keyRng' (n-benchmarkGenBatchSize) @@ -463,7 +466,7 @@ benchPrepLookups !arenaManager !bs !ics !hs !keyRng !n | n <= 0 = pure () | otherwise = do let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize - (!_rkixs, !_ioops) <- withArena arenaManager $ \arena -> stToIO $ prepLookups arena bs ics hs ks + (!_rkixs, !_ioops) <- withArena arenaManager $ \arena -> stToIO $ prepLookups arena benchSalt bs ics hs ks benchPrepLookups arenaManager bs ics hs keyRng' (n-benchmarkGenBatchSize) -- | This gives us the combined cost of calculating batches of keys, and @@ -489,7 +492,7 @@ benchLookupsIO !hbio !arenaManager !resolve !wb !wbblobs !rs !bs !ics !hs = | otherwise = do let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize !_ <- lookupsIOWithWriteBuffer - hbio arenaManager resolve wb wbblobs rs bs ics hs ks + hbio arenaManager resolve benchSalt wb wbblobs rs bs ics hs ks go keyRng' (n-benchmarkGenBatchSize) {------------------------------------------------------------------------------- @@ -524,7 +527,7 @@ classifyLookups !bs !keyRng0 !n0 = | otherwise = unsafePerformIO (putStr ".") `seq` let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize - !rkixs = bloomQueries bs ks + !rkixs = bloomQueries benchSalt bs ks in loop (positives + VP.length rkixs) keyRng' (n-benchmarkGenBatchSize) -- | Fill a mutable vector with uniformly random values. diff --git a/bench/macro/lsm-tree-bench-wp8.hs b/bench/macro/lsm-tree-bench-wp8.hs index 5870c933d..e6cccccba 100644 --- a/bench/macro/lsm-tree-bench-wp8.hs +++ b/bench/macro/lsm-tree-bench-wp8.hs @@ -87,6 +87,9 @@ benchTableConfig :: LSM.TableConfig benchTableConfig = LSM.defaultTableConfig {LSM.confFencePointerIndex = LSM.CompactIndex} +benchSalt :: LSM.Salt +benchSalt = 4 + ------------------------------------------------------------------------------- -- Keys and values ------------------------------------------------------------------------------- @@ -413,7 +416,7 @@ doSetup' gopts opts = do let name = LSM.toSnapshotName "bench" - LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session -> do + LSM.withOpenSession (mkTracer gopts) hasFS hasBlockIO benchSalt (FS.mkFsPath []) $ \session -> do tbl <- LSM.newTableWith @IO @K @V @B (mkTableConfigSetup gopts opts benchTableConfig) session forM_ (groupsOfN 256 [ 0 .. initialSize gopts ]) $ \batch -> do @@ -575,7 +578,7 @@ doRun gopts opts = do let name = LSM.toSnapshotName "bench" - LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session -> + LSM.withOpenSession (mkTracer gopts) hasFS hasBlockIO benchSalt (FS.mkFsPath []) $ \session -> withLatencyHandle $ \h -> do -- open snapshot -- In checking mode we start with an empty table, since our pure diff --git a/bench/micro/Bench/Database/LSMTree.hs b/bench/micro/Bench/Database/LSMTree.hs index 2b2dc3eb9..b949c24ea 100644 --- a/bench/micro/Bench/Database/LSMTree.hs +++ b/bench/micro/Bench/Database/LSMTree.hs @@ -6,6 +6,7 @@ import Control.DeepSeq import Control.Exception import Control.Tracer import Criterion.Main +import qualified Data.BloomFilter.Hash as Bloom import Data.ByteString.Short (ShortByteString) import qualified Data.ByteString.Short as SBS import Data.Foldable @@ -82,6 +83,9 @@ benchConfig = defaultTableConfig , confFencePointerIndex = CompactIndex } +benchSalt :: Bloom.Salt +benchSalt = 4 + {------------------------------------------------------------------------------- Large Value vs. Small Value Blob -------------------------------------------------------------------------------} @@ -135,7 +139,7 @@ benchLargeValueVsSmallValueBlob = initialise inss = do (tmpDir, hfs, hbio) <- mkFiles - s <- openSession nullTracer hfs hbio (FS.mkFsPath []) + s <- openSession nullTracer hfs hbio benchSalt (FS.mkFsPath []) t <- newTableWith benchConfig s V.mapM_ (inserts t) inss pure (tmpDir, hfs, hbio, s, t) @@ -220,7 +224,7 @@ benchCursorScanVsRangeLookupScan = initialise inss = do (tmpDir, hfs, hbio) <- mkFiles - s <- openSession nullTracer hfs hbio (FS.mkFsPath []) + s <- openSession nullTracer hfs hbio benchSalt (FS.mkFsPath []) t <- newTableWith benchConfig s V.mapM_ (inserts t) inss pure (tmpDir, hfs, hbio, s, t) @@ -265,7 +269,7 @@ benchInsertBatches = initialise = do (tmpDir, hfs, hbio) <- mkFiles - s <- openSession nullTracer hfs hbio (FS.mkFsPath []) + s <- openSession nullTracer hfs hbio benchSalt (FS.mkFsPath []) t <- newTableWith _benchConfig s pure (tmpDir, hfs, hbio, s, t) @@ -451,7 +455,7 @@ mkTable :: , Table IO K V3 B3 ) mkTable hfs hbio conf = do - sesh <- openSession nullTracer hfs hbio (FS.mkFsPath []) + sesh <- openSession nullTracer hfs hbio benchSalt (FS.mkFsPath []) t <- newTableWith conf sesh pure (sesh, t) diff --git a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs index 624029fe1..ddf3324d4 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs @@ -45,6 +45,9 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.BloomFilter" [ ] ] +benchSalt :: Bloom.Salt +benchSalt = 4 + -- | Input environment for benchmarking 'Bloom.elem'. elemEnv :: Double -- ^ False positive rate @@ -61,7 +64,7 @@ elemEnv fpr nbloom nelemsPositive nelemsNegative = do $ uniformWithoutReplacement @UTxOKey g1 (nbloom + nelemsNegative) ys2 = sampleUniformWithReplacement @UTxOKey g2 nelemsPositive xs zs = shuffle (ys1 ++ ys2) g3 - pure ( Bloom.fromList (Bloom.policyForFPR fpr) (fmap serialiseKey xs) + pure ( Bloom.fromList (Bloom.policyForFPR fpr) benchSalt (fmap serialiseKey xs) , fmap serialiseKey zs ) @@ -86,5 +89,5 @@ constructBloom :: constructBloom fpr m = -- For faster construction, avoid going via lists and use Bloom.create, -- traversing the map inserting the keys - Bloom.create (Bloom.sizeForFPR fpr (Map.size m)) $ \b -> + Bloom.create (Bloom.sizeForFPR fpr (Map.size m)) benchSalt $ \b -> BiFold.bifoldMap (\k -> Bloom.insert b k) (\_v -> pure ()) m diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs b/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs index 9610d90de..5c02b3b72 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs @@ -21,6 +21,7 @@ import Database.LSMTree.Extras.Random (frequency, randomByteStringR, import Database.LSMTree.Extras.UTxO import Database.LSMTree.Internal.Arena (ArenaManager, closeArena, newArena, newArenaManager, withArena) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) import Database.LSMTree.Internal.Index as Index import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches, @@ -84,6 +85,9 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Lookup" [ } ] +benchSalt :: Bloom.Salt +benchSalt = 4 + benchLookups :: Config -> Benchmark benchLookups conf@Config{name} = withEnv $ \ ~(_dir, arenaManager, _hasFS, hasBlockIO, wbblobs, rs, ks) -> @@ -96,23 +100,23 @@ benchLookups conf@Config{name} = -- The bloomfilter is queried for all lookup keys. The result is an -- unboxed vector, so only use @whnf@. bench "Bloomfilter query" $ - whnf (\ks' -> bloomQueries blooms ks') ks + whnf (\ks' -> bloomQueries benchSalt blooms ks') ks -- The compact index is only searched for (true and false) positive -- lookup keys. We use whnf here because the result is - , env (pure $ bloomQueries blooms ks) $ \rkixs -> + , env (pure $ bloomQueries benchSalt blooms ks) $ \rkixs -> bench "Compact index search" $ whnfAppIO (\ks' -> withArena arenaManager $ \arena -> stToIO $ indexSearches arena indexes kopsFiles ks' rkixs) ks -- prepLookups combines bloom filter querying and index searching. -- The implementation forces the results to WHNF, so we use -- whnfAppIO here instead of nfAppIO. , bench "Lookup preparation in memory" $ - whnfAppIO (\ks' -> withArena arenaManager $ \arena -> stToIO $ prepLookups arena blooms indexes kopsFiles ks') ks + whnfAppIO (\ks' -> withArena arenaManager $ \arena -> stToIO $ prepLookups arena benchSalt blooms indexes kopsFiles ks') ks -- Submit the IOOps we get from prepLookups to HasBlockIO. We use -- perRunEnv because IOOps contain mutable buffers, so we want fresh -- ones for each run of the benchmark. We manually evaluate the -- result to WHNF since it is unboxed vector. , bench "Submit IOOps" $ - perRunEnv (withArena arenaManager $ \arena -> stToIO $ prepLookups arena blooms indexes kopsFiles ks) $ \ ~(_rkixs, ioops) -> do + perRunEnv (withArena arenaManager $ \arena -> stToIO $ prepLookups arena benchSalt blooms indexes kopsFiles ks) $ \ ~(_rkixs, ioops) -> do !_ioress <- FS.submitIO hasBlockIO ioops pure () -- When IO result have been collected, intra-page lookups searches @@ -125,7 +129,7 @@ benchLookups conf@Config{name} = , bench "Perform intra-page lookups" $ perRunEnvWithCleanup ( do arena <- newArena arenaManager - (rkixs, ioops) <- stToIO (prepLookups arena blooms indexes kopsFiles ks) + (rkixs, ioops) <- stToIO (prepLookups arena benchSalt blooms indexes kopsFiles ks) ioress <- FS.submitIO hasBlockIO ioops pure (rkixs, ioops, ioress, arena) ) @@ -141,7 +145,7 @@ benchLookups conf@Config{name} = , bench "Lookups in IO" $ whnfAppIO (\ks' -> lookupsIOWithWriteBuffer hasBlockIO arenaManager resolveV - WB.empty wbblobs + benchSalt WB.empty wbblobs rs blooms indexes kopsFiles ks') ks ] -- TODO: consider adding benchmarks that also use the write buffer @@ -192,7 +196,7 @@ lookupsInBatchesEnv Config {..} = do wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"]) wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0) - r <- Run.fromWriteBuffer hasFS hasBlockIO runParams fsps wb wbblobs + r <- Run.fromWriteBuffer hasFS hasBlockIO benchSalt runParams fsps wb wbblobs let NumEntries nentriesReal = Run.size r assertEqual nentriesReal nentries $ pure () -- 42 to 43 entries per page diff --git a/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs b/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs index 988701b1b..07b26adc3 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/Merge.hs @@ -16,6 +16,7 @@ import Database.LSMTree.Extras.Orphans () import qualified Database.LSMTree.Extras.Random as R import Database.LSMTree.Extras.RunData import Database.LSMTree.Extras.UTxO +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Merge (MergeType (..)) @@ -220,6 +221,9 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Merge" [ | w <- weights ] +benchSalt :: Bloom.Salt +benchSalt = 4 + runParams :: RunBuilder.RunParams runParams = RunBuilder.RunParams { @@ -273,7 +277,7 @@ merge :: merge fs hbio Config {..} targetPaths runs = do let f = fromMaybe const mergeResolve m <- fromMaybe (error "empty inputs, no merge created") <$> - Merge.new fs hbio runParams mergeType f targetPaths runs + Merge.new fs hbio benchSalt runParams mergeType f targetPaths runs Merge.stepsToCompletion m stepSize fsPath :: FS.FsPath @@ -397,7 +401,7 @@ randomRuns :: randomRuns hasFS hasBlockIO config@Config {..} rng0 = do counter <- inputRunPathsCounter fmap V.fromList $ - mapM (unsafeCreateRun hasFS hasBlockIO runParams fsPath counter) $ + mapM (unsafeCreateRun hasFS hasBlockIO benchSalt runParams fsPath counter) $ zipWith (randomRunData config) nentries @@ -446,5 +450,5 @@ randomRunData Config {..} runentries g0 = -- Each run entry needs a distinct key. randomWord64OutOf :: Int -> Rnd SerialisedKey randomWord64OutOf possibleKeys = - first (serialiseKey . Hash.hash64) + first (serialiseKey . Hash.hashSalt64 benchSalt) . uniformR (0, fromIntegral possibleKeys :: Word64) diff --git a/blockio/src-linux/System/FS/BlockIO/Async.hs b/blockio/src-linux/System/FS/BlockIO/Async.hs index 830966dcc..348e4d4bd 100644 --- a/blockio/src-linux/System/FS/BlockIO/Async.hs +++ b/blockio/src-linux/System/FS/BlockIO/Async.hs @@ -56,7 +56,8 @@ ctxParamsConv API.IOCtxParams{API.ioctxBatchSizeLimit, API.ioctxConcurrencyLimit } submitIO :: - HasFS IO HandleIO + HasCallStack + => HasFS IO HandleIO -> I.IOCtx -> V.Vector (IOOp RealWorld HandleIO) -> IO (VU.Vector IOResult) diff --git a/blockio/src/System/FS/BlockIO/Serial.hs b/blockio/src/System/FS/BlockIO/Serial.hs index 8db4e56c4..c8d75019c 100644 --- a/blockio/src/System/FS/BlockIO/Serial.hs +++ b/blockio/src/System/FS/BlockIO/Serial.hs @@ -9,6 +9,7 @@ import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld) import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU import qualified Data.Vector.Unboxed.Mutable as VUM +import GHC.Stack (HasCallStack) import System.FS.API import qualified System.FS.BlockIO.API as API import System.FS.BlockIO.API (IOOp (..), IOResult (..), LockMode (..)) @@ -55,7 +56,7 @@ serialHasBlockIO hSetNoCache hAdvise hAllocate tryLockFile hSynchronise synchron data IOCtx m = IOCtx { ctxFS :: SomeHasFS m, openVar :: MVar m Bool } {-# SPECIALISE guardIsOpen :: IOCtx IO -> IO () #-} -guardIsOpen :: (MonadMVar m, MonadThrow m) => IOCtx m -> m () +guardIsOpen :: (HasCallStack, MonadMVar m, MonadThrow m) => IOCtx m -> m () guardIsOpen ctx = readMVar (openVar ctx) >>= \b -> unless b $ throwIO (API.mkClosedError (ctxFS ctx) "submitIO") @@ -72,7 +73,7 @@ close ctx = modifyMVar_ (openVar ctx) $ const (pure False) -> IOCtx IO -> V.Vector (IOOp RealWorld h) -> IO (VU.Vector IOResult) #-} submitIO :: - (MonadMVar m, MonadThrow m, PrimMonad m) + (HasCallStack, MonadMVar m, MonadThrow m, PrimMonad m) => HasFS m h -> IOCtx m -> V.Vector (IOOp (PrimState m) h) diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs index 5a7265ed9..ceb2d58b1 100644 --- a/bloomfilter/bench/bloomfilter-bench.hs +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -1,13 +1,11 @@ module Main where +import Criterion.Main (bench, bgroup, defaultMain, env, whnf) import qualified Data.BloomFilter.Blocked as B.Blocked import qualified Data.BloomFilter.Classic as B.Classic -import Data.BloomFilter.Hash (Hashable (..), hash64) - +import Data.BloomFilter.Hash (Hashable (..)) import Data.Word (Word64) -import System.Random - -import Criterion.Main +import System.Random (StdGen, newStdGen, uniform) main :: IO () main = @@ -42,11 +40,13 @@ main = constructBloom_classic :: Int -> Double -> StdGen -> B.Classic.Bloom Word64 constructBloom_classic n fpr g0 = - B.Classic.unfold (B.Classic.sizeForFPR fpr n) (nextElement n) (g0, 0) + let (!salt, !g1) = uniform g0 in + B.Classic.unfold (B.Classic.sizeForFPR fpr n) salt (nextElement n) (g1, 0) constructBloom_blocked :: Int -> Double -> StdGen -> B.Blocked.Bloom Word64 constructBloom_blocked n fpr g0 = - B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) (nextElement n) (g0, 0) + let (!salt, !g1) = uniform g0 in + B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) salt (nextElement n) (g1, 0) {-# INLINE nextElement #-} nextElement :: Int -> (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) diff --git a/bloomfilter/examples/spell.hs b/bloomfilter/examples/spell.hs index a1cf356a4..8a3bc1957 100644 --- a/bloomfilter/examples/spell.hs +++ b/bloomfilter/examples/spell.hs @@ -10,7 +10,10 @@ main :: IO () main = do files <- getArgs dictionary <- readFile "/usr/share/dict/words" - let !bloom = B.fromList (B.policyForFPR 0.01) (words dictionary) + let !bloom = B.fromList (B.policyForFPR 0.01) bSalt (words dictionary) forM_ files $ \file -> putStrLn . unlines . filter (`B.notElem` bloom) . words =<< readFile file + +bSalt :: B.Salt +bSalt = 4 diff --git a/bloomfilter/src/Data/BloomFilter/Blocked.hs b/bloomfilter/src/Data/BloomFilter/Blocked.hs index 9e1e2cc51..a94a3d876 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked.hs @@ -16,6 +16,7 @@ module Data.BloomFilter.Blocked ( -- * Types Hash, + Salt, Hashable, -- * Immutable Bloom filters @@ -64,7 +65,7 @@ module Data.BloomFilter.Blocked ( -- * Low level variants Hashes, - hashes, + hashesWithSalt, insertHashes, elemHashes, -- ** Prefetching @@ -92,19 +93,20 @@ import Prelude hiding (elem, notElem) -- Example: -- -- @ ---filter = create (sizeForBits 16 2) $ \mf -> do +-- filter = create (sizeForBits 16 2) 4 $ \mf -> do -- insert mf \"foo\" -- insert mf \"bar\" -- @ -- -- Note that the result of the setup function is not used. create :: BloomSize + -> Salt -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function -> Bloom a {-# INLINE create #-} -create bloomsize body = +create bloomsize bloomsalt body = runST $ do - mb <- new bloomsize + mb <- new bloomsize bloomsalt body mb unsafeFreeze mb @@ -112,14 +114,14 @@ create bloomsize body = -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. insert :: Hashable a => MBloom s a -> a -> ST s () -insert = \ !mb !x -> insertHashes mb (hashes x) +insert = \ !mb !x -> insertHashes mb (hashesWithSalt (mbHashSalt mb) x) {-# INLINE elem #-} -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. elem :: Hashable a => a -> Bloom a -> Bool -elem = \ !x !b -> elemHashes b (hashes x) +elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x) -- | Same as 'elem' but with the opposite argument order: -- @@ -150,12 +152,13 @@ notElem = \x b -> not (x `elem` b) unfold :: forall a b. Hashable a => BloomSize + -> Salt -> (b -> Maybe (a, b)) -- ^ seeding function -> b -- ^ initial seed -> Bloom a {-# INLINE unfold #-} -unfold bloomsize f k = - create bloomsize body +unfold bloomsize bloomsalt f k = + create bloomsize bloomsalt body where body :: forall s. MBloom s a -> ST s () body mb = loop k @@ -170,26 +173,29 @@ unfold bloomsize f k = -- For example -- -- @ --- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"] +-- filter = fromList (policyForBits 10) 4 [\"foo\", \"bar\", \"quux\"] -- @ fromList :: (Foldable t, Hashable a) => BloomPolicy + -> Salt -> t a -- ^ values to populate with -> Bloom a -fromList policy xs = - create bsize (\b -> mapM_ (insert b) xs) +fromList policy bloomsalt xs = + create bsize bloomsalt (\b -> mapM_ (insert b) xs) where bsize = sizeForPolicy policy (length xs) {-# SPECIALISE deserialise :: BloomSize + -> Salt -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) -> IO (Bloom a) #-} deserialise :: PrimMonad m => BloomSize + -> Salt -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) -> m (Bloom a) -deserialise bloomsize fill = do - mbloom <- stToPrim $ new bloomsize +deserialise bloomsize bloomsalt fill = do + mbloom <- stToPrim $ new bloomsize bloomsalt Internal.deserialise mbloom fill stToPrim $ unsafeFreeze mbloom @@ -235,7 +241,7 @@ insertMany bloom key n = prepareProbes !i !i_w | i_w < 0x0f && i < n = do k <- key i - let !kh = hashes k + let !kh = hashesWithSalt (mbHashSalt bloom) k prefetchInsert bloom kh P.writePrimArray buf i_w kh prepareProbes (i+1) (i_w+1) @@ -258,7 +264,7 @@ insertMany bloom key n = -- (from the read end of the buffer). | i < n = do k <- key i - let !kh = hashes k + let !kh = hashesWithSalt (mbHashSalt bloom) k prefetchInsert bloom kh P.writePrimArray buf i_w kh insertProbe diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs index 61e38fd76..5dc41cdcb 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs @@ -8,18 +8,19 @@ -- the trusted base. module Data.BloomFilter.Blocked.Internal ( -- * Mutable Bloom filters - MBloom, + MBloom (mbHashSalt), new, maxSizeBits, -- * Immutable Bloom filters - Bloom, + Bloom (hashSalt), bloomInvariant, size, -- * Hash-based operations Hashes, - hashes, + Salt, + hashesWithSalt, insertHashes, prefetchInsert, elemHashes, @@ -84,6 +85,7 @@ type MBloom :: Type -> Type -> Type data MBloom s a = MBloom { mbNumBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero , mbNumHashes :: {-# UNPACK #-} !Int + , mbHashSalt :: {-# UNPACK #-} !Salt , mbBitArray :: {-# UNPACK #-} !(MBitArray s) } type role MBloom nominal nominal @@ -100,13 +102,14 @@ instance NFData (MBloom s a) where -- -- The filter size is capped at 'maxSizeBits'. -- -new :: BloomSize -> ST s (MBloom s a) -new BloomSize { sizeBits, sizeHashes } = do +new :: BloomSize -> Salt -> ST s (MBloom s a) +new BloomSize { sizeBits, sizeHashes } mbHashSalt = do let numBlocks = bitsToBlocks (max 1 (min maxSizeBits sizeBits)) mbBitArray <- BitArray.new numBlocks pure MBloom { mbNumBlocks = numBlocks, mbNumHashes = max 1 sizeHashes, + mbHashSalt, mbBitArray } @@ -174,6 +177,7 @@ type Bloom :: Type -> Type data Bloom a = Bloom { numBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero , numHashes :: {-# UNPACK #-} !Int + , hashSalt :: {-# UNPACK #-} !Salt , bitArray :: {-# UNPACK #-} !BitArray } deriving stock Eq @@ -239,9 +243,9 @@ prefetchElem Bloom { numBlocks, bitArray } !h = -- -- See also 'formatVersion' for compatibility advice. -- -serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise :: Bloom a -> (BloomSize, Salt, ByteArray, Int, Int) serialise b@Bloom{bitArray} = - (size b, ba, off, len) + (size b, hashSalt b, ba, off, len) where (ba, off, len) = BitArray.serialise bitArray @@ -253,11 +257,12 @@ serialise b@Bloom{bitArray} = -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter may be modified afterwards. freeze :: MBloom s a -> ST s (Bloom a) -freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do +freeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do bitArray <- BitArray.freeze mbBitArray let !bf = Bloom { numBlocks = mbNumBlocks, numHashes = mbNumHashes, + hashSalt = mbHashSalt, bitArray } assert (bloomInvariant bf) $ pure bf @@ -266,11 +271,12 @@ freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do -- mutable filter /must not/ be modified afterwards. For a safer creation -- interface, use 'freeze' or 'create'. unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do +unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do bitArray <- BitArray.unsafeFreeze mbBitArray let !bf = Bloom { numBlocks = mbNumBlocks, numHashes = mbNumHashes, + hashSalt = mbHashSalt, bitArray } assert (bloomInvariant bf) $ pure bf @@ -278,11 +284,12 @@ unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do -- | Copy an immutable Bloom filter to create a mutable one. There is -- no non-copying equivalent. thaw :: Bloom a -> ST s (MBloom s a) -thaw Bloom { numBlocks, numHashes, bitArray } = do +thaw Bloom { numBlocks, numHashes, hashSalt, bitArray } = do mbBitArray <- BitArray.thaw bitArray pure MBloom { mbNumBlocks = numBlocks, mbNumHashes = numHashes, + mbHashSalt = hashSalt, mbBitArray } @@ -317,9 +324,9 @@ newtype Hashes a = Hashes Hash deriving newtype Prim type role Hashes nominal -{-# INLINE hashes #-} -hashes :: Hashable a => a -> Hashes a -hashes = Hashes . hash64 +{-# INLINE hashesWithSalt #-} +hashesWithSalt :: Hashable a => Salt -> a -> Hashes a +hashesWithSalt = \ !salt !x -> Hashes (hashSalt64 salt x) {-# INLINE blockIxAndBitGen #-} -- | The scheme for turning 'Hashes' into block and bit indexes is as follows: diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index 833639e4d..15375b532 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -22,6 +22,7 @@ module Data.BloomFilter.Classic ( -- * Types Hash, + Salt, Hashable, -- * Immutable Bloom filters @@ -70,7 +71,7 @@ module Data.BloomFilter.Classic ( -- * Low level variants Hashes, - hashes, + hashesWithSalt, insertHashes, elemHashes, readHashes, @@ -94,32 +95,33 @@ import Prelude hiding (elem, notElem, read) -- Example: -- -- @ ---filter = create (sizeForBits 16 2) $ \mf -> do +--filter = create (sizeForBits 16 2) 4 $ \mf -> do -- insert mf \"foo\" -- insert mf \"bar\" -- @ -- -- Note that the result of the setup function is not used. create :: BloomSize + -> Salt -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function -> Bloom a {-# INLINE create #-} -create bloomsize body = +create bloomsize bloomsalt body = runST $ do - mb <- new bloomsize + mb <- new bloomsize bloomsalt body mb unsafeFreeze mb -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. insert :: Hashable a => MBloom s a -> a -> ST s () -insert !mb !x = insertHashes mb (hashes x) +insert !mb !x = insertHashes mb (hashesWithSalt (mbHashSalt mb) x) -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. elem :: Hashable a => a -> Bloom a -> Bool -elem = \ !x !b -> elemHashes b (hashes x) +elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x) -- | Same as 'elem' but with the opposite argument order: -- @@ -142,7 +144,7 @@ notElem = \ x b -> not (x `elem` b) -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. read :: Hashable a => MBloom s a -> a -> ST s Bool -read !mb !x = readHashes mb (hashes x) +read !mb !x = readHashes mb (hashesWithSalt (mbHashSalt mb) x) -- | Build an immutable Bloom filter from a seed value. The seeding -- function populates the filter as follows. @@ -155,12 +157,13 @@ read !mb !x = readHashes mb (hashes x) unfold :: forall a b. Hashable a => BloomSize + -> Salt -> (b -> Maybe (a, b)) -- ^ seeding function -> b -- ^ initial seed -> Bloom a {-# INLINE unfold #-} -unfold bloomsize f k = - create bloomsize body +unfold bloomsize bloomsalt f k = + create bloomsize bloomsalt body where body :: forall s. MBloom s a -> ST s () body mb = loop k @@ -177,26 +180,29 @@ unfold bloomsize f k = -- For example -- -- @ --- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"] +-- filt = fromList (policyForBits 10) 4 [\"foo\", \"bar\", \"quux\"] -- @ fromList :: (Foldable t, Hashable a) => BloomPolicy + -> Salt -> t a -- ^ values to populate with -> Bloom a -fromList policy xs = - create bsize (\b -> mapM_ (insert b) xs) +fromList policy bsalt xs = + create bsize bsalt (\b -> mapM_ (insert b) xs) where bsize = sizeForPolicy policy (length xs) {-# SPECIALISE deserialise :: BloomSize + -> Salt -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) -> IO (Bloom a) #-} deserialise :: PrimMonad m => BloomSize + -> Salt -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) -> m (Bloom a) -deserialise bloomsize fill = do - mbloom <- stToPrim $ new bloomsize +deserialise bloomsalt bloomsize fill = do + mbloom <- stToPrim $ new bloomsalt bloomsize Internal.deserialise mbloom fill stToPrim $ unsafeFreeze mbloom diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index 20fb0438e..ed08da5b3 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -7,18 +7,19 @@ -- the trusted base. module Data.BloomFilter.Classic.Internal ( -- * Mutable Bloom filters - MBloom, + MBloom (mbHashSalt), new, maxSizeBits, -- * Immutable Bloom filters - Bloom, + Bloom (hashSalt), bloomInvariant, size, -- * Hash-based operations Hashes, - hashes, + Salt, + hashesWithSalt, insertHashes, elemHashes, readHashes, @@ -89,6 +90,7 @@ type MBloom :: Type -> Type -> Type data MBloom s a = MBloom { mbNumBits :: {-# UNPACK #-} !Int -- ^ non-zero , mbNumHashes :: {-# UNPACK #-} !Int + , mbHashSalt :: {-# UNPACK #-} !Salt , mbBitArray :: {-# UNPACK #-} !(MBitArray s) } type role MBloom nominal nominal @@ -103,13 +105,14 @@ instance NFData (MBloom s a) where -- -- The filter size is capped at 'maxSizeBits'. -- -new :: BloomSize -> ST s (MBloom s a) -new BloomSize { sizeBits, sizeHashes } = do +new :: BloomSize -> Salt -> ST s (MBloom s a) +new BloomSize { sizeBits, sizeHashes } mbHashSalt = do let !mbNumBits = max 1 (min maxSizeBits sizeBits) mbBitArray <- BitArray.new mbNumBits pure MBloom { mbNumBits, mbNumHashes = max 1 sizeHashes, + mbHashSalt, mbBitArray } @@ -173,6 +176,7 @@ type Bloom :: Type -> Type data Bloom a = Bloom { numBits :: {-# UNPACK #-} !Int -- ^ non-zero , numHashes :: {-# UNPACK #-} !Int + , hashSalt :: {-# UNPACK #-} !Salt , bitArray :: {-# UNPACK #-} !BitArray } deriving stock Eq @@ -224,9 +228,9 @@ elemHashes Bloom { numBits, numHashes, bitArray } !h = -- -- See also 'formatVersion' for compatibility advice. -- -serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise :: Bloom a -> (BloomSize, Salt, ByteArray, Int, Int) serialise b@Bloom{bitArray} = - (size b, ba, off, len) + (size b, hashSalt b, ba, off, len) where (ba, off, len) = BitArray.serialise bitArray @@ -238,11 +242,12 @@ serialise b@Bloom{bitArray} = -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter may be modified afterwards. freeze :: MBloom s a -> ST s (Bloom a) -freeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do +freeze MBloom { mbNumBits, mbNumHashes, mbHashSalt, mbBitArray } = do bitArray <- BitArray.freeze mbBitArray let !bf = Bloom { numBits = mbNumBits, numHashes = mbNumHashes, + hashSalt = mbHashSalt, bitArray } assert (bloomInvariant bf) $ pure bf @@ -251,11 +256,12 @@ freeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do -- mutable filter /must not/ be modified afterwards. For a safer creation -- interface, use 'freeze' or 'create'. unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do +unsafeFreeze MBloom { mbNumBits, mbNumHashes, mbHashSalt, mbBitArray } = do bitArray <- BitArray.unsafeFreeze mbBitArray let !bf = Bloom { numBits = mbNumBits, numHashes = mbNumHashes, + hashSalt = mbHashSalt, bitArray } assert (bloomInvariant bf) $ pure bf @@ -263,11 +269,12 @@ unsafeFreeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do -- | Copy an immutable Bloom filter to create a mutable one. There is -- no non-copying equivalent. thaw :: Bloom a -> ST s (MBloom s a) -thaw Bloom { numBits, numHashes, bitArray } = do +thaw Bloom { numBits, numHashes, hashSalt, bitArray } = do mbBitArray <- BitArray.thaw bitArray pure MBloom { mbNumBits = numBits, mbNumHashes = numHashes, + mbHashSalt = hashSalt, mbBitArray } @@ -429,6 +436,6 @@ evalHashes (Hashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) -- | Create 'Hashes' structure. -- -- It's simply hashes the value twice using seed 0 and 1. -hashes :: Hashable a => a -> Hashes a -hashes v = Hashes (hashSalt64 0 v) (hashSalt64 1 v) -{-# INLINE hashes #-} +hashesWithSalt :: Hashable a => Salt -> a -> Hashes a +hashesWithSalt salt v = Hashes (hashSalt64 salt v) (hashSalt64 (salt + 1) v) +{-# INLINE hashesWithSalt #-} diff --git a/bloomfilter/src/Data/BloomFilter/Hash.hs b/bloomfilter/src/Data/BloomFilter/Hash.hs index f778b4799..5cb15ba85 100644 --- a/bloomfilter/src/Data/BloomFilter/Hash.hs +++ b/bloomfilter/src/Data/BloomFilter/Hash.hs @@ -8,6 +8,7 @@ module Data.BloomFilter.Hash ( -- * Basic hash functionality Hash, + Salt, Hashable(..), hash64, hashByteArray, @@ -29,6 +30,9 @@ import qualified XXH3 -- | A hash value is 64 bits wide. type Hash = Word64 +-- | The salt value to be used for hashes. +type Salt = Word64 + ------------------------------------------------------------------------------- -- One shot hashing ------------------------------------------------------------------------------- @@ -40,12 +44,12 @@ type Hash = Word64 class Hashable a where -- | Compute a 64-bit hash of a value. hashSalt64 :: - Word64 -- ^ seed - -> a -- ^ value to hash - -> Word64 + Salt -- ^ seed + -> a -- ^ value to hash + -> Hash -- | Compute a 64-bit hash. -hash64 :: Hashable a => a -> Word64 +hash64 :: Hashable a => a -> Hash hash64 = hashSalt64 0 instance Hashable () where @@ -105,7 +109,7 @@ instance (Hashable a, Hashable b) => Hashable (a, b) where update s (hash64 y) -- | Hash a (part of) 'P.ByteArray'. -hashByteArray :: P.ByteArray -> Int -> Int -> Word64 -> Word64 +hashByteArray :: P.ByteArray -> Int -> Int -> Salt -> Hash hashByteArray = XXH3.xxh3_64bit_withSeed_ba ------------------------------------------------------------------------------- @@ -132,7 +136,7 @@ instance Incremental Char where update s c = update s (fromIntegral (ord c) :: Word32) -- | Calculate incrementally constructed hash. -incrementalHash :: Word64 -> (forall s. HashState s -> ST s ()) -> Word64 +incrementalHash :: Salt -> (forall s. HashState s -> ST s ()) -> Hash incrementalHash seed f = runST $ do s <- XXH3.xxh3_64bit_createState XXH3.xxh3_64bit_reset_withSeed s seed diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index dae37ff50..59e5e30cb 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -103,10 +103,10 @@ proxyBlocked = Proxy prop_elem :: forall bloom a. (BloomFilter bloom, Hashable a) => Proxy bloom -> Proxy a - -> a -> [a] -> FPR -> Property -prop_elem proxy _ x xs (FPR q) = + -> B.Salt -> a -> [a] -> FPR -> Property +prop_elem proxy _ salt x xs (FPR q) = let bf :: bloom a - bf = fromList (policyForFPR proxy q) (x:xs) + bf = fromList (policyForFPR proxy q) salt (x:xs) in elem x bf .&&. not (notElem x bf) ------------------------------------------------------------------------------- @@ -257,21 +257,23 @@ prop_insertMany (FPR fpr) keys = bloom_insert === bloom_insertMany where bloom_insert = - Bloom.Blocked.create (Bloom.Blocked.sizeForFPR fpr n) $ \mb -> + Bloom.Blocked.create (Bloom.Blocked.sizeForFPR fpr n) salt $ \mb -> mapM_ (Bloom.Blocked.insert mb) keys bloom_insertMany = - Bloom.Blocked.create (Bloom.Blocked.sizeForFPR fpr n) $ \mb -> + Bloom.Blocked.create (Bloom.Blocked.sizeForFPR fpr n) salt $ \mb -> Bloom.Blocked.insertMany mb (\k -> pure $ keys !! k) n !n = length keys + !salt = 4 -- https://xkcd.com/221/ + ------------------------------------------------------------------------------- -- Class to allow testing two filter implementations ------------------------------------------------------------------------------- class BloomFilter bloom where - fromList :: Hashable a => B.BloomPolicy -> [a] -> bloom a + fromList :: Hashable a => B.BloomPolicy -> B.Salt -> [a] -> bloom a elem :: Hashable a => a -> bloom a -> Bool notElem :: Hashable a => a -> bloom a -> Bool diff --git a/bloomfilter/tests/fpr-calc.hs b/bloomfilter/tests/fpr-calc.hs index 8776eb99c..6fb5a467a 100644 --- a/bloomfilter/tests/fpr-calc.hs +++ b/bloomfilter/tests/fpr-calc.hs @@ -2,7 +2,7 @@ module Main (main) where import qualified Data.BloomFilter as B (BitsPerEntry, BloomPolicy, BloomSize, - FPR, Hashable) + FPR, Hashable, Salt) import qualified Data.BloomFilter.Blocked as B.Blocked import qualified Data.BloomFilter.Classic as B.Classic @@ -142,23 +142,26 @@ actualFalsePositiveRate bloomimpl policy n g0 = countFalsePositives :: forall bloom. BloomImpl bloom -> B.BloomPolicy -> Int -> StdGen -> Int countFalsePositives BloomImpl{..} policy n g0 = - let (!g0', !g0'') = splitGen g0 + let (!g01, !g02) = splitGen g0 + + -- create a random salt + (!salt, !g03) = uniform g02 -- create a bloom filter from n elements from g0 size = sizeForPolicy policy n xs_b :: bloom Int - !xs_b = unfold size nextElement (g0', 0) + !xs_b = unfold size salt nextElement (g01, 0) -- and a set, so we can make sure we don't count true positives xs_s :: IntSet - !xs_s = IntSet.fromList (unfoldr nextElement (g0', 0)) + !xs_s = IntSet.fromList (unfoldr nextElement (g01, 0)) -- now for a different random sequence (that will mostly not overlap) -- count the number of false positives in length [ () - | y <- unfoldr nextElement (g0'', 0) + | y <- unfoldr nextElement (g03, 0) , y `elem` xs_b -- Bloom filter reports positive , not (y `IntSet.member` xs_s) -- but it is not a true positive ] @@ -177,7 +180,7 @@ data BloomImpl bloom = BloomImpl { policyFPR :: B.BloomPolicy -> B.FPR, sizeForPolicy :: B.BloomPolicy -> Int -> B.BloomSize, unfold :: forall a b. B.Hashable a - => B.BloomSize -> (b -> Maybe (a, b)) -> b -> bloom a, + => B.BloomSize -> B.Salt -> (b -> Maybe (a, b)) -> b -> bloom a, elem :: forall a. B.Hashable a => a -> bloom a -> Bool } diff --git a/doc/format-run.md b/doc/format-run.md index 74e986c2d..3607e669b 100644 --- a/doc/format-run.md +++ b/doc/format-run.md @@ -140,10 +140,11 @@ It (and remaining fields) are serialised in native byte order. The remainder of the header for format 1 consists of: 1. The hash function count (32bit) 2. The bit size of the filter (64bit) + 3. The hash salt of the filter (64bit) The fields of the header are serialized in native byte order. -The maximum filter size is 2^48 bits, corresponding to 32 Terabytes. +The maximum filter size is 2^41 bits, corresponding to 32 Terabytes. The family of hash functions to use is implied by the format version. The filter bit vector itself is organised as a whole number of 64 bit words. diff --git a/lsm-tree.cabal b/lsm-tree.cabal index f05d07216..6ba33cfdd 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -593,6 +593,8 @@ library , lsm-tree:control , lsm-tree:kmerge , primitive ^>=0.9 + , random ^>=1.0 || ^>=1.1 || ^>=1.2 || ^>=1.3 + , serialise ^>=0.2 , text ^>=2.1.1 , utf8-string ^>=1.0 , vector ^>=0.13 @@ -770,6 +772,7 @@ test-suite lsm-tree-test Database.LSMTree.Model.IO Database.LSMTree.Model.Session Database.LSMTree.Model.Table + Test.Database.LSMTree Test.Database.LSMTree.Class Test.Database.LSMTree.Generators Test.Database.LSMTree.Internal diff --git a/src-extras/Database/LSMTree/Extras.hs b/src-extras/Database/LSMTree/Extras.hs index 94fa3261c..0b9f998dd 100644 --- a/src-extras/Database/LSMTree/Extras.hs +++ b/src-extras/Database/LSMTree/Extras.hs @@ -1,6 +1,7 @@ module Database.LSMTree.Extras ( showPowersOf10 , showPowersOf + , showRangesOf , groupsOfN , vgroupsOfN ) where @@ -26,6 +27,17 @@ showPowersOf factor n ub = fromJust (find (n <) (iterate (* factor) factor)) lb = ub `div` factor +showRangesOf :: Int -> Int -> String +showRangesOf range n + | range <= 0 = error "showRangesOf: range must be larger than 0" + | n == 0 = "n == 0" + | m == 0 = printf "%d < n < %d" lb ub + | otherwise = printf "%d <= n < %d" lb ub + where + m = n `div` range + lb = m * range + ub = (m + 1) * range + -- | Make groups of @n@ elements from a list @xs@ groupsOfN :: Int -> [a] -> [NonEmpty a] groupsOfN n diff --git a/src-extras/Database/LSMTree/Extras/MergingRunData.hs b/src-extras/Database/LSMTree/Extras/MergingRunData.hs index a3d5819db..a3b3a7caf 100644 --- a/src-extras/Database/LSMTree/Extras/MergingRunData.hs +++ b/src-extras/Database/LSMTree/Extras/MergingRunData.hs @@ -23,6 +23,7 @@ import qualified Data.Vector as V import Database.LSMTree.Extras (showPowersOf) import Database.LSMTree.Extras.Generators () import Database.LSMTree.Extras.RunData +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.MergingRun (MergingRun) import qualified Database.LSMTree.Internal.MergingRun as MR import Database.LSMTree.Internal.Paths @@ -46,15 +47,16 @@ withMergingRun :: => HasFS IO h -> HasBlockIO IO h -> ResolveSerialisedValue + -> Bloom.Salt -> RunBuilder.RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedMergingRunData t -> (Ref (MergingRun t IO h) -> IO a) -> IO a -withMergingRun hfs hbio resolve runParams path counter mrd = do +withMergingRun hfs hbio resolve salt runParams path counter mrd = do bracket - (unsafeCreateMergingRun hfs hbio resolve runParams path counter mrd) + (unsafeCreateMergingRun hfs hbio resolve salt runParams path counter mrd) releaseRef -- | Flush serialised merging run data to disk. @@ -68,24 +70,25 @@ unsafeCreateMergingRun :: => HasFS IO h -> HasBlockIO IO h -> ResolveSerialisedValue + -> Bloom.Salt -> RunBuilder.RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedMergingRunData t -> IO (Ref (MergingRun t IO h)) -unsafeCreateMergingRun hfs hbio resolve runParams path counter = \case +unsafeCreateMergingRun hfs hbio resolve salt runParams path counter = \case CompletedMergeData _ rd -> do - withRun hfs hbio runParams path counter rd $ \run -> do + withRun hfs hbio salt runParams path counter rd $ \run -> do -- slightly hacky, generally it's larger let totalDebt = MR.numEntriesToMergeDebt (Run.size run) MR.newCompleted totalDebt run OngoingMergeData mergeType rds -> do - withRuns hfs hbio runParams path counter (toRunData <$> rds) + withRuns hfs hbio salt runParams path counter (toRunData <$> rds) $ \runs -> do n <- incrUniqCounter counter let fsPaths = RunFsPaths path (RunNumber (uniqueToInt n)) - MR.new hfs hbio resolve runParams mergeType + MR.new hfs hbio resolve salt runParams mergeType fsPaths (V.fromList runs) {------------------------------------------------------------------------------- diff --git a/src-extras/Database/LSMTree/Extras/MergingTreeData.hs b/src-extras/Database/LSMTree/Extras/MergingTreeData.hs index 912305bf9..675de359e 100644 --- a/src-extras/Database/LSMTree/Extras/MergingTreeData.hs +++ b/src-extras/Database/LSMTree/Extras/MergingTreeData.hs @@ -24,6 +24,7 @@ import Database.LSMTree.Extras (showPowersOf) import Database.LSMTree.Extras.Generators () import Database.LSMTree.Extras.MergingRunData import Database.LSMTree.Extras.RunData +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import qualified Database.LSMTree.Internal.MergingRun as MR import Database.LSMTree.Internal.MergingTree (MergingTree) import qualified Database.LSMTree.Internal.MergingTree as MT @@ -44,15 +45,16 @@ withMergingTree :: HasFS IO h -> HasBlockIO IO h -> ResolveSerialisedValue + -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedMergingTreeData -> (Ref (MergingTree IO h) -> IO a) -> IO a -withMergingTree hfs hbio resolve runParams path counter mrd = do +withMergingTree hfs hbio resolve salt runParams path counter mrd = do bracket - (unsafeCreateMergingTree hfs hbio resolve runParams path counter mrd) + (unsafeCreateMergingTree hfs hbio resolve salt runParams path counter mrd) releaseRef -- | Flush serialised merging tree data to disk. @@ -65,19 +67,20 @@ unsafeCreateMergingTree :: HasFS IO h -> HasBlockIO IO h -> ResolveSerialisedValue + -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedMergingTreeData -> IO (Ref (MergingTree IO h)) -unsafeCreateMergingTree hfs hbio resolve runParams path counter = go +unsafeCreateMergingTree hfs hbio resolve salt runParams path counter = go where go = \case CompletedTreeMergeData rd -> - withRun hfs hbio runParams path counter rd $ \run -> + withRun hfs hbio salt runParams path counter rd $ \run -> MT.newCompletedMerge run OngoingTreeMergeData mrd -> - withMergingRun hfs hbio resolve runParams path counter mrd $ \mr -> + withMergingRun hfs hbio resolve salt runParams path counter mrd $ \mr -> MT.newOngoingMerge mr PendingLevelMergeData prds mtd -> withPreExistingRuns prds $ \prs -> @@ -100,11 +103,11 @@ unsafeCreateMergingTree hfs hbio resolve runParams path counter = go withPreExistingRuns [] act = act [] withPreExistingRuns (PreExistingRunData rd : rest) act = - withRun hfs hbio runParams path counter rd $ \r -> + withRun hfs hbio salt runParams path counter rd $ \r -> withPreExistingRuns rest $ \prs -> act (MT.PreExistingRun r : prs) withPreExistingRuns (PreExistingMergingRunData mrd : rest) act = - withMergingRun hfs hbio resolve runParams path counter mrd $ \mr -> + withMergingRun hfs hbio resolve salt runParams path counter mrd $ \mr -> withPreExistingRuns rest $ \prs -> act (MT.PreExistingMergingRun mr : prs) diff --git a/src-extras/Database/LSMTree/Extras/RunData.hs b/src-extras/Database/LSMTree/Extras/RunData.hs index f560241f3..b313ded60 100644 --- a/src-extras/Database/LSMTree/Extras/RunData.hs +++ b/src-extras/Database/LSMTree/Extras/RunData.hs @@ -47,6 +47,7 @@ import qualified Data.Map.Strict as Map import qualified Data.Vector as V import Database.LSMTree.Extras (showPowersOf10) import Database.LSMTree.Extras.Generators () +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.MergeSchedule (addWriteBufferEntries) import Database.LSMTree.Internal.Paths @@ -75,29 +76,31 @@ import Test.QuickCheck withRun :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedRunData -> (Ref (Run IO h) -> IO a) -> IO a -withRun hfs hbio runParams path counter rd = do +withRun hfs hbio salt runParams path counter rd = do bracket - (unsafeCreateRun hfs hbio runParams path counter rd) + (unsafeCreateRun hfs hbio salt runParams path counter rd) releaseRef -- | Create a temporary 'Run' using 'unsafeCreateRunAt'. withRunAt :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> RunFsPaths -> SerialisedRunData -> (Ref (Run IO h) -> IO a) -> IO a -withRunAt hfs hbio runParams path rd = do +withRunAt hfs hbio salt runParams path rd = do bracket - (unsafeCreateRunAt hfs hbio runParams path rd) + (unsafeCreateRunAt hfs hbio salt runParams path rd) releaseRef {-# INLINABLE withRuns #-} @@ -105,17 +108,18 @@ withRunAt hfs hbio runParams path rd = do withRuns :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> [SerialisedRunData] -> ([Ref (Run IO h)] -> IO a) -> IO a -withRuns hfs hbio runParams path counter = go +withRuns hfs hbio salt runParams path counter = go where go [] act = act [] go (rd:rds) act = - withRun hfs hbio runParams path counter rd $ \r -> + withRun hfs hbio salt runParams path counter rd $ \r -> go rds $ \rs -> act (r:rs) @@ -124,15 +128,16 @@ withRuns hfs hbio runParams path counter = go unsafeCreateRun :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedRunData -> IO (Ref (Run IO h)) -unsafeCreateRun fs hbio runParams path counter rd = do +unsafeCreateRun fs hbio salt runParams path counter rd = do n <- incrUniqCounter counter let fsPaths = RunFsPaths path (uniqueToRunNumber n) - unsafeCreateRunAt fs hbio runParams fsPaths rd + unsafeCreateRunAt fs hbio salt runParams fsPaths rd -- | Flush serialised run data to disk as if it were a write buffer. -- @@ -143,18 +148,19 @@ unsafeCreateRun fs hbio runParams path counter rd = do unsafeCreateRunAt :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> RunFsPaths -> SerialisedRunData -> IO (Ref (Run IO h)) -unsafeCreateRunAt fs hbio runParams fsPaths (RunData m) = do +unsafeCreateRunAt fs hbio salt runParams fsPaths (RunData m) = do -- the WBB file path doesn't have to be at a specific place relative to -- the run we want to create, but fsPaths should already point to a unique -- location, so we just append something to not conflict with that. let blobpath = FS.addExtension (runBlobPath fsPaths) ".wb" bracket (WBB.new fs blobpath) releaseRef $ \wbblobs -> do wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob fs wbblobs)) m - Run.fromWriteBuffer fs hbio runParams fsPaths wb wbblobs + Run.fromWriteBuffer fs hbio salt runParams fsPaths wb wbblobs -- | Create a 'RunFsPaths' using an empty 'FsPath'. The empty path corresponds -- to the "root" or "mount point" of a 'HasFS' instance. diff --git a/src/Database/LSMTree.hs b/src/Database/LSMTree.hs index 04cd1c412..8dcc6dbbc 100644 --- a/src/Database/LSMTree.hs +++ b/src/Database/LSMTree.hs @@ -19,10 +19,14 @@ module Database.LSMTree ( -- * Sessions Session, - withSession, - withSessionIO, + withOpenSession, + withOpenSessionIO, + withNewSession, + withRestoreSession, openSession, openSessionIO, + newSession, + restoreSession, closeSession, -- * Tables @@ -101,6 +105,9 @@ module Database.LSMTree ( toSnapshotName, SnapshotLabel (..), + -- * Session Configuration #session_configuration# + Salt, + -- * Table Configuration #table_configuration# TableConfig ( confMergePolicy, @@ -197,8 +204,8 @@ import Control.DeepSeq (NFData (..)) import Control.Exception.Base (assert) import Control.Monad.Class.MonadAsync (MonadAsync) import Control.Monad.Class.MonadST (MonadST) -import Control.Monad.Class.MonadThrow (MonadCatch (..), MonadMask, - MonadThrow (..)) +import Control.Monad.Class.MonadThrow (MonadCatch (..), MonadEvaluate, + MonadMask, MonadThrow (..)) import Control.Monad.Primitive (PrimMonad) import Control.Tracer (Tracer) import Data.Bifunctor (Bifunctor (..)) @@ -242,7 +249,7 @@ import Database.LSMTree.Internal.Serialise.Class (SerialiseKey (..), import Database.LSMTree.Internal.Snapshot (SnapshotLabel (..)) import Database.LSMTree.Internal.Types (BlobRef (..), Cursor (..), ResolveAsFirst (..), ResolveValue (..), - ResolveViaSemigroup (..), Session (..), Table (..), + ResolveViaSemigroup (..), Salt, Session (..), Table (..), resolveAssociativity, resolveCompatibility, resolveValidOutput) import Database.LSMTree.Internal.Unsafe (BlobRefInvalidError (..), @@ -263,6 +270,7 @@ import System.FS.API (FsPath, HasFS (..), MountPoint (..), mkFsPath) import System.FS.BlockIO.API (HasBlockIO (..), defaultIOCtxParams) import System.FS.BlockIO.IO (ioHasBlockIO, withIOHasBlockIO) import System.FS.IO (HandleIO, ioHasFS) +import System.Random (randomIO) -------------------------------------------------------------------------------- -- Usage Notes @@ -286,6 +294,7 @@ type IOLike m = , MonadMask m , PrimMonad m , MonadST m + , MonadEvaluate m ) -------------------------------------------------------------------------------- @@ -380,7 +389,7 @@ runExample action = do let createSessionDir = Dir.createDirectoryIfMissing True sessionDir let removeSessionDir = Dir.removeDirectoryRecursive sessionDir bracket_ createSessionDir removeSessionDir $ do - LSMT.withSessionIO mempty sessionDir $ \session -> do + LSMT.withOpenSessionIO mempty sessionDir $ \session -> do LSMT.withTable session $ \table -> action session table :} @@ -395,8 +404,8 @@ runExample action = do {- | Run an action with access to a session opened from a session directory. -If the session directory is empty, a new session is created. -Otherwise, the session directory is opened as an existing session. +If the session directory is empty, a new session is created using the given salt. +Otherwise, the session directory is restored as an existing session ignoring the given salt. If there are no open tables or cursors when the session terminates, then the disk I\/O complexity of this operation is \(O(1)\). Otherwise, 'closeTable' is called for each open table and 'closeCursor' is called for each open cursor. @@ -422,45 +431,153 @@ Throws the following exceptions: If the session directory is malformed. -} {-# SPECIALISE - withSession :: + withOpenSession :: Tracer IO LSMTreeTrace -> HasFS IO HandleIO -> HasBlockIO IO HandleIO -> + Salt -> FsPath -> (Session IO -> IO a) -> IO a #-} -withSession :: +withOpenSession :: forall m h a. (IOLike m, Typeable h) => Tracer m LSMTreeTrace -> HasFS m h -> HasBlockIO m h -> + -- | The session salt. + Salt -> -- | The session directory. FsPath -> (Session m -> m a) -> m a -withSession tracer hasFS hasBlockIO sessionDir action = do - Internal.withSession tracer hasFS hasBlockIO sessionDir (action . Session) +withOpenSession tracer hasFS hasBlockIO sessionSalt sessionDir action = do + Internal.withOpenSession tracer hasFS hasBlockIO sessionSalt sessionDir (action . Session) --- | Variant of 'withSession' that is specialised to 'IO' using the real filesystem. -withSessionIO :: +-- | Variant of 'withOpenSession' that is specialised to 'IO' using the real filesystem. +withOpenSessionIO :: Tracer IO LSMTreeTrace -> FilePath -> (Session IO -> IO a) -> IO a -withSessionIO tracer sessionDir action = do +withOpenSessionIO tracer sessionDir action = do let mountPoint = MountPoint sessionDir let sessionDirFsPath = mkFsPath [] let hasFS = ioHasFS mountPoint + sessionSalt <- randomIO withIOHasBlockIO hasFS defaultIOCtxParams $ \hasBlockIO -> - withSession tracer hasFS hasBlockIO sessionDirFsPath action + withOpenSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath action + +{- | +Run an action with access to a new session. + +The session directory must be empty. + +If there are no open tables or cursors when the session terminates, then the disk I\/O complexity of this operation is \(O(1)\). +Otherwise, 'closeTable' is called for each open table and 'closeCursor' is called for each open cursor. +Consequently, the worst-case disk I\/O complexity of this operation depends on the merge policy of the open tables in the session. +The following assumes all tables in the session have the same merge policy: + +['LazyLevelling']: + \(O(o \: T \log_T \frac{n}{B})\). + +The variable \(o\) refers to the number of open tables and cursors in the session. + +This function is exception-safe for both synchronous and asynchronous exceptions. + +It is recommended to use this function instead of 'newSession' and 'closeSession'. + +Throws the following exceptions: + +['SessionDirDoesNotExistError']: + If the session directory does not exist. +['SessionDirLockedError']: + If the session directory is locked by another process. +['SessionDirCorruptedError']: + If the session directory is malformed. +-} +{-# SPECIALISE + withNewSession :: + Tracer IO LSMTreeTrace -> + HasFS IO HandleIO -> + HasBlockIO IO HandleIO -> + Salt -> + FsPath -> + (Session IO -> IO a) -> + IO a + #-} +withNewSession :: + forall m h a. + (IOLike m, Typeable h) => + Tracer m LSMTreeTrace -> + HasFS m h -> + HasBlockIO m h -> + -- | The session salt. + Salt -> + -- | The session directory. + FsPath -> + (Session m -> m a) -> + m a +withNewSession tracer hasFS hasBlockIO sessionSalt sessionDir action = do + Internal.withNewSession tracer hasFS hasBlockIO sessionSalt sessionDir (action . Session) + +{- | +Run an action with access to a restored session. + +The session directory must be non-empty: a session must have previously been +opened and closed in this directory. + +If there are no open tables or cursors when the session terminates, then the disk I\/O complexity of this operation is \(O(1)\). +Otherwise, 'closeTable' is called for each open table and 'closeCursor' is called for each open cursor. +Consequently, the worst-case disk I\/O complexity of this operation depends on the merge policy of the open tables in the session. +The following assumes all tables in the session have the same merge policy: + +['LazyLevelling']: + \(O(o \: T \log_T \frac{n}{B})\). + +The variable \(o\) refers to the number of open tables and cursors in the session. + +This function is exception-safe for both synchronous and asynchronous exceptions. + +It is recommended to use this function instead of 'restoreSession' and 'closeSession'. + +Throws the following exceptions: + +['SessionDirDoesNotExistError']: + If the session directory does not exist. +['SessionDirLockedError']: + If the session directory is locked by another process. +['SessionDirCorruptedError']: + If the session directory is malformed. +-} +{-# SPECIALISE + withRestoreSession :: + Tracer IO LSMTreeTrace -> + HasFS IO HandleIO -> + HasBlockIO IO HandleIO -> + FsPath -> + (Session IO -> IO a) -> + IO a + #-} +withRestoreSession :: + forall m h a. + (IOLike m, Typeable h) => + Tracer m LSMTreeTrace -> + HasFS m h -> + HasBlockIO m h -> + -- | The session directory. + FsPath -> + (Session m -> m a) -> + m a +withRestoreSession tracer hasFS hasBlockIO sessionDir action = do + Internal.withRestoreSession tracer hasFS hasBlockIO sessionDir (action . Session) {- | Open a session from a session directory. -If the session directory is empty, a new session is created. -Otherwise, the session directory is opened as an existing session. +If the session directory is empty, a new session is created using the given salt. +Otherwise, the session directory is restored as an existing session ignoring the given salt. The worst-case disk I\/O complexity of this operation is \(O(1)\). @@ -480,7 +597,7 @@ Throws the following exceptions: Tracer IO LSMTreeTrace -> HasFS IO HandleIO -> HasBlockIO IO HandleIO -> - -- \| The session directory. + Salt -> FsPath -> IO (Session IO) #-} @@ -490,26 +607,109 @@ openSession :: Tracer m LSMTreeTrace -> HasFS m h -> HasBlockIO m h -> + -- | The session salt. + Salt -> -- | The session directory. FsPath -> m (Session m) -openSession tracer hasFS hasBlockIO sessionDir = - Session <$> Internal.openSession tracer hasFS hasBlockIO sessionDir +openSession tracer hasFS hasBlockIO sessionSalt sessionDir = + Session <$> Internal.openSession tracer hasFS hasBlockIO sessionSalt sessionDir -- | Variant of 'openSession' that is specialised to 'IO' using the real filesystem. openSessionIO :: Tracer IO LSMTreeTrace -> - -- \| The session directory. + -- | The session directory. FilePath -> IO (Session IO) openSessionIO tracer sessionDir = do let mountPoint = MountPoint sessionDir let sessionDirFsPath = mkFsPath [] let hasFS = ioHasFS mountPoint + sessionSalt <- randomIO let acquireHasBlockIO = ioHasBlockIO hasFS defaultIOCtxParams let releaseHasBlockIO HasBlockIO{close} = close bracketOnError acquireHasBlockIO releaseHasBlockIO $ \hasBlockIO -> - openSession tracer hasFS hasBlockIO sessionDirFsPath + openSession tracer hasFS hasBlockIO sessionSalt sessionDirFsPath + +{- | +Create a new session. + +The session directory must be empty. + +The worst-case disk I\/O complexity of this operation is \(O(1)\). + +__Warning:__ Sessions hold open resources and must be closed using 'closeSession'. + +Throws the following exceptions: + +['SessionDirDoesNotExistError']: + If the session directory does not exist. +['SessionDirLockedError']: + If the session directory is locked by another process. +['SessionDirCorruptedError']: + If the session directory is malformed. +-} +{-# SPECIALISE + newSession :: + Tracer IO LSMTreeTrace -> + HasFS IO HandleIO -> + HasBlockIO IO HandleIO -> + Salt -> + FsPath -> + IO (Session IO) + #-} +newSession :: + forall m h. + (IOLike m, Typeable h) => + Tracer m LSMTreeTrace -> + HasFS m h -> + HasBlockIO m h -> + -- | The session salt. + Salt -> + -- | The session directory. + FsPath -> + m (Session m) +newSession tracer hasFS hasBlockIO sessionSalt sessionDir = + Session <$> Internal.newSession tracer hasFS hasBlockIO sessionSalt sessionDir + +{- | +Restore a session from a session directory. + +The session directory must be non-empty: a session must have previously been +opened (and closed) in this directory. + +The worst-case disk I\/O complexity of this operation is \(O(1)\). + +__Warning:__ Sessions hold open resources and must be closed using 'closeSession'. + +Throws the following exceptions: + +['SessionDirDoesNotExistError']: + If the session directory does not exist. +['SessionDirLockedError']: + If the session directory is locked by another process. +['SessionDirCorruptedError']: + If the session directory is malformed. +-} +{-# SPECIALISE + restoreSession :: + Tracer IO LSMTreeTrace -> + HasFS IO HandleIO -> + HasBlockIO IO HandleIO -> + FsPath -> + IO (Session IO) + #-} +restoreSession :: + forall m h. + (IOLike m, Typeable h) => + Tracer m LSMTreeTrace -> + HasFS m h -> + HasBlockIO m h -> + -- | The session directory. + FsPath -> + m (Session m) +restoreSession tracer hasFS hasBlockIO sessionDir = + Session <$> Internal.restoreSession tracer hasFS hasBlockIO sessionDir {- | Close a session. diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index 8721a8228..f9e6e709d 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -8,6 +8,7 @@ module Database.LSMTree.Internal.BloomFilter ( -- * Types Bloom.Bloom, Bloom.MBloom, + Bloom.Salt, -- * Bulk query bloomQueries, @@ -28,6 +29,7 @@ import qualified Data.Primitive as P import qualified Data.Vector as V import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64, byteSwap32) +import Text.Printf (printf) import Control.Exception (assert) import Control.Monad (void, when) @@ -108,17 +110,18 @@ type ResIx = Int -- Result index -- number of keys but this is grown if needed (using a doubling strategy). -- bloomQueries :: - V.Vector (Bloom SerialisedKey) + Bloom.Salt + -> V.Vector (Bloom SerialisedKey) -> V.Vector SerialisedKey -> VP.Vector RunIxKeyIx -bloomQueries !filters !keys +bloomQueries !_salt !filters !keys | V.null filters || V.null keys = VP.empty -bloomQueries !filters !keys = +bloomQueries !salt !filters !keys = runST (bloomQueries_loop1 filters' keyhashes) where filters' = toFiltersArray filters keyhashes = P.generatePrimArray (V.length keys) $ \i -> - Bloom.hashes (V.unsafeIndex keys i) + Bloom.hashesWithSalt salt (V.unsafeIndex keys i) -- loop over all keys bloomQueries_loop1 :: @@ -220,15 +223,16 @@ bloomFilterVersion = 1 + fromIntegral Bloom.formatVersion bloomFilterToLBS :: Bloom a -> LBS.ByteString bloomFilterToLBS bf = - let (size, ba, off, len) = Bloom.serialise bf - in header size <> byteArrayToLBS ba off len + let (size, salt, ba, off, len) = Bloom.serialise bf + in header size salt <> byteArrayToLBS ba off len where - header Bloom.BloomSize { sizeBits, sizeHashes } = - -- creates a single 16 byte chunk - B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $ + header Bloom.BloomSize { sizeBits, sizeHashes } salt = + -- creates a single 24 byte chunk + B.toLazyByteStringWith (B.safeStrategy 24 B.smallChunkSize) mempty $ B.word32Host bloomFilterVersion <> B.word32Host (fromIntegral sizeHashes) <> B.word64Host (fromIntegral sizeBits) + <> B.word64Host salt byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString byteArrayToLBS ba off len = @@ -239,6 +243,7 @@ bloomFilterToLBS bf = {-# SPECIALISE bloomFilterFromFile :: HasFS IO h + -> Bloom.Salt -> Handle h -> IO (Bloom a) #-} -- | Read a 'Bloom' from a file. @@ -246,15 +251,17 @@ bloomFilterToLBS bf = bloomFilterFromFile :: (PrimMonad m, MonadCatch m) => HasFS m h + -> Bloom.Salt -- ^ Expected salt -> Handle h -- ^ The open file, in read mode -> m (Bloom a) -bloomFilterFromFile hfs h = do +bloomFilterFromFile hfs expectedSalt h = do header <- rethrowEOFError "Doesn't contain a header" $ - hGetByteArrayExactly hfs h 16 + hGetByteArrayExactly hfs h 24 let !version = P.indexByteArray header 0 :: Word32 !nhashes = P.indexByteArray header 1 :: Word32 !nbits = P.indexByteArray header 1 :: Word64 + !salt = P.indexByteArray header 2 :: Bloom.Salt when (version /= bloomFilterVersion) $ throwFormatError $ if byteSwap32 version == bloomFilterVersion @@ -264,8 +271,12 @@ bloomFilterFromFile hfs h = do when (nbits <= 0) $ throwFormatError "Length is zero" -- limit to 2^48 bits - when (nbits >= 0x1_0000_0000_0000) $ throwFormatError "Too large bloomfilter" - --TODO: get max size from bloomfilter lib + when (nbits >= fromIntegral Bloom.maxSizeBits) $ throwFormatError "Too large bloomfilter" + + when (expectedSalt /= salt) $ throwFormatError $ + printf "Expected salt does not match actual salt: %d /= %d" + expectedSalt + salt -- read the filter data from the file directly into the bloom filter bloom <- @@ -274,6 +285,7 @@ bloomFilterFromFile hfs h = do Bloom.sizeBits = fromIntegral nbits, Bloom.sizeHashes = fromIntegral nhashes } + salt (\buf off len -> rethrowEOFError "bloom filter file too short" $ void $ hGetBufExactly hfs diff --git a/src/Database/LSMTree/Internal/Lookup.hs b/src/Database/LSMTree/Internal/Lookup.hs index c546c02c7..80a2e4bfb 100644 --- a/src/Database/LSMTree/Internal/Lookup.hs +++ b/src/Database/LSMTree/Internal/Lookup.hs @@ -38,6 +38,7 @@ import Control.RefCount import Database.LSMTree.Internal.BlobRef (WeakBlobRef (..)) import Database.LSMTree.Internal.BloomFilter (Bloom, RunIxKeyIx (..), bloomQueries) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Index (Index) import qualified Database.LSMTree.Internal.Index as Index (search) @@ -61,13 +62,14 @@ import System.FS.BlockIO.API -- associated with each 'IOOp'. prepLookups :: Arena s + -> Bloom.Salt -> V.Vector (Bloom SerialisedKey) -> V.Vector Index -> V.Vector (Handle h) -> V.Vector SerialisedKey -> ST s (VP.Vector RunIxKeyIx, V.Vector (IOOp s h)) -prepLookups arena blooms indexes kopsFiles ks = do - let !rkixs = bloomQueries blooms ks +prepLookups arena salt blooms indexes kopsFiles ks = do + let !rkixs = bloomQueries salt blooms ks !ioops <- indexSearches arena indexes kopsFiles ks rkixs pure (rkixs, ioops) @@ -110,6 +112,7 @@ type LookupAcc m h = V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h))) HasBlockIO IO h -> ArenaManager RealWorld -> ResolveSerialisedValue + -> Bloom.Salt -> WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs IO h) -> V.Vector (Ref (Run IO h)) @@ -125,6 +128,7 @@ lookupsIOWithWriteBuffer :: => HasBlockIO m h -> ArenaManager (PrimState m) -> ResolveSerialisedValue + -> Bloom.Salt -> WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs m h) -> V.Vector (Ref (Run m h)) -- ^ Runs @rs@ @@ -133,10 +137,10 @@ lookupsIOWithWriteBuffer :: -> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@ -> V.Vector SerialisedKey -> m (LookupAcc m h) -lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks = +lookupsIOWithWriteBuffer !hbio !mgr !resolveV !salt !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks = assert precondition $ withArena mgr $ \arena -> do - (rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks + (rkixs, ioops) <- ST.stToIO $ prepLookups arena salt blooms indexes kopsFiles ks ioress <- submitIO hbio ioops intraPageLookupsWithWriteBuffer resolveV wb wbblobs rs ks rkixs ioops ioress where @@ -152,6 +156,7 @@ lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes HasBlockIO IO h -> ArenaManager RealWorld -> ResolveSerialisedValue + -> Bloom.Salt -> V.Vector (Ref (Run IO h)) -> V.Vector (Bloom SerialisedKey) -> V.Vector Index @@ -168,16 +173,17 @@ lookupsIO :: => HasBlockIO m h -> ArenaManager (PrimState m) -> ResolveSerialisedValue + -> Bloom.Salt -> V.Vector (Ref (Run m h)) -- ^ Runs @rs@ -> V.Vector (Bloom SerialisedKey) -- ^ The bloom filters inside @rs@ -> V.Vector Index -- ^ The indexes inside @rs@ -> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@ -> V.Vector SerialisedKey -> m (LookupAcc m h) -lookupsIO !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles !ks = +lookupsIO !hbio !mgr !resolveV !salt !rs !blooms !indexes !kopsFiles !ks = assert precondition $ withArena mgr $ \arena -> do - (rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks + (rkixs, ioops) <- ST.stToIO $ prepLookups arena salt blooms indexes kopsFiles ks ioress <- submitIO hbio ioops intraPageLookupsOn resolveV (V.map (const Nothing) ks) rs ks rkixs ioops ioress where diff --git a/src/Database/LSMTree/Internal/Merge.hs b/src/Database/LSMTree/Internal/Merge.hs index 49bdde025..edf51d491 100644 --- a/src/Database/LSMTree/Internal/Merge.hs +++ b/src/Database/LSMTree/Internal/Merge.hs @@ -34,6 +34,7 @@ import Data.Primitive.MutVar import Data.Traversable (for) import qualified Data.Vector as V import Database.LSMTree.Internal.BlobRef (RawBlobRef) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Readers (Readers) import qualified Database.LSMTree.Internal.Readers as Readers @@ -153,6 +154,7 @@ instance IsMergeType TreeMergeType where IsMergeType t => HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> t -> ResolveSerialisedValue @@ -165,13 +167,14 @@ new :: (IsMergeType t, MonadMask m, MonadSTM m, MonadST m) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> RunParams -> t -> ResolveSerialisedValue -> Run.RunFsPaths -> V.Vector (Ref (Run m h)) -> m (Maybe (Merge t m h)) -new hfs hbio runParams mergeType mergeResolve targetPaths runs = do +new hfs hbio salt runParams mergeType mergeResolve targetPaths runs = do let sources = Readers.FromRun <$> V.toList runs mreaders <- Readers.new mergeResolve Readers.NoOffsetKey sources -- TODO: Exception safety! If Readers.new fails after already creating some @@ -180,7 +183,7 @@ new hfs hbio runParams mergeType mergeResolve targetPaths runs = do for mreaders $ \mergeReaders -> do -- calculate upper bounds based on input runs let numEntries = V.foldMap' Run.size runs - mergeBuilder <- Builder.new hfs hbio runParams targetPaths numEntries + mergeBuilder <- Builder.new hfs hbio salt runParams targetPaths numEntries mergeState <- newMutVar $! Merging pure Merge { mergeIsLastLevel = isLastLevel mergeType diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index 68f5a6637..585809305 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -57,6 +57,7 @@ import Data.Foldable (fold, traverse_) import qualified Data.Vector as V import Database.LSMTree.Internal.Assertions (assert) import Database.LSMTree.Internal.BloomFilter (Bloom) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Entry (Entry, NumEntries (..), unNumEntries) @@ -445,6 +446,7 @@ releaseUnionCache reg (UnionCache mt) = -> HasFS IO h -> HasBlockIO IO h -> SessionRoot + -> Bloom.Salt -> UniqCounter IO -> V.Vector (SerialisedKey, Entry SerialisedValue SerialisedBlob) -> ActionRegistry IO @@ -483,12 +485,13 @@ updatesWithInterleavedFlushes :: -> HasFS m h -> HasBlockIO m h -> SessionRoot + -> Bloom.Salt -> UniqCounter m -> V.Vector (SerialisedKey, Entry SerialisedValue SerialisedBlob) -> ActionRegistry m -> TableContent m h -> m (TableContent m h) -updatesWithInterleavedFlushes tr conf resolve hfs hbio root uc es reg tc = do +updatesWithInterleavedFlushes tr conf resolve hfs hbio root salt uc es reg tc = do let wb = tableWriteBuffer tc wbblobs = tableWriteBufferBlobs tc (wb', es') <- addWriteBufferEntries hfs resolve wbblobs maxn wb es @@ -502,14 +505,14 @@ updatesWithInterleavedFlushes tr conf resolve hfs hbio root uc es reg tc = do pure $! tc' -- If the write buffer did reach capacity, then we flush. else do - tc'' <- flushWriteBuffer tr conf resolve hfs hbio root uc reg tc' + tc'' <- flushWriteBuffer tr conf resolve hfs hbio root salt uc reg tc' -- In the fortunate case where we have already performed all the updates, -- return, if V.null es' then pure $! tc'' -- otherwise, keep going else - updatesWithInterleavedFlushes tr conf resolve hfs hbio root uc es' reg tc'' + updatesWithInterleavedFlushes tr conf resolve hfs hbio root salt uc es' reg tc'' where AllocNumEntries (NumEntries -> maxn) = confWriteBufferAlloc conf @@ -563,6 +566,7 @@ addWriteBufferEntries hfs f wbblobs maxn = -> HasFS IO h -> HasBlockIO IO h -> SessionRoot + -> Bloom.Salt -> UniqCounter IO -> ActionRegistry IO -> TableContent IO h @@ -579,11 +583,12 @@ flushWriteBuffer :: -> HasFS m h -> HasBlockIO m h -> SessionRoot + -> Bloom.Salt -> UniqCounter m -> ActionRegistry m -> TableContent m h -> m (TableContent m h) -flushWriteBuffer tr conf resolve hfs hbio root uc reg tc +flushWriteBuffer tr conf resolve hfs hbio root salt uc reg tc | WB.null (tableWriteBuffer tc) = pure tc | otherwise = do !uniq <- incrUniqCounter uc @@ -597,7 +602,7 @@ flushWriteBuffer tr conf resolve hfs hbio root uc reg tc TraceFlushWriteBuffer size (runNumber runPaths) runParams r <- withRollback reg (Run.fromWriteBuffer - hfs hbio + hfs hbio salt runParams runPaths (tableWriteBuffer tc) (tableWriteBufferBlobs tc)) @@ -605,7 +610,7 @@ flushWriteBuffer tr conf resolve hfs hbio root uc reg tc delayedCommit reg (releaseRef (tableWriteBufferBlobs tc)) wbblobs' <- withRollback reg (WBB.new hfs (Paths.tableBlobPath root uniq)) releaseRef - levels' <- addRunToLevels tr conf resolve hfs hbio root uc r reg + levels' <- addRunToLevels tr conf resolve hfs hbio root salt uc r reg (tableLevels tc) (tableUnionLevel tc) tableCache' <- rebuildCache reg (tableCache tc) levels' @@ -625,6 +630,7 @@ flushWriteBuffer tr conf resolve hfs hbio root uc reg tc -> HasFS IO h -> HasBlockIO IO h -> SessionRoot + -> Bloom.Salt -> UniqCounter IO -> Ref (Run IO h) -> ActionRegistry IO @@ -644,13 +650,14 @@ addRunToLevels :: -> HasFS m h -> HasBlockIO m h -> SessionRoot + -> Bloom.Salt -> UniqCounter m -> Ref (Run m h) -> ActionRegistry m -> Levels m h -> UnionLevel m h -> m (Levels m h) -addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels ul = do +addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root salt uc r0 reg levels ul = do go (LevelNo 1) (V.singleton r0) levels where -- NOTE: @go@ is based on the @increment@ function from the @@ -732,7 +739,7 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels ul newMerge mergePolicy mergeType ln rs = do ir <- withRollback reg (newIncomingRunAtLevel tr hfs hbio - root uc conf resolve + root salt uc conf resolve mergePolicy mergeType ln rs) releaseIncomingRun -- The runs will end up inside the incoming/merging run, with fresh @@ -756,6 +763,7 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels ul -> HasFS IO h -> HasBlockIO IO h -> SessionRoot + -> Bloom.Salt -> UniqCounter IO -> TableConfig -> ResolveSerialisedValue @@ -770,6 +778,7 @@ newIncomingRunAtLevel :: -> HasFS m h -> HasBlockIO m h -> SessionRoot + -> Bloom.Salt -> UniqCounter m -> TableConfig -> ResolveSerialisedValue @@ -779,7 +788,7 @@ newIncomingRunAtLevel :: -> V.Vector (Ref (Run m h)) -> m (IncomingRun m h) newIncomingRunAtLevel tr hfs hbio - root uc conf resolve + root salt uc conf resolve mergePolicy mergeType ln rs | Just (r, rest) <- V.uncons rs, V.null rest = do @@ -799,7 +808,7 @@ newIncomingRunAtLevel tr hfs hbio runParams mergePolicy mergeType bracket - (MR.new hfs hbio resolve runParams mergeType runPaths rs) + (MR.new hfs hbio resolve salt runParams mergeType runPaths rs) releaseRef $ \mr -> assert (MR.totalMergeDebt mr <= maxMergeDebt conf mergePolicy ln) $ let nominalDebt = nominalDebtForLevel conf ln in diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index 10aa7f41f..71000fba7 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -65,6 +65,7 @@ import Data.Primitive.MutVar import Data.Primitive.PrimVar import qualified Data.Vector as V import Database.LSMTree.Internal.Assertions (assert) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry (NumEntries (..)) import Database.LSMTree.Internal.Merge (IsMergeType (..), LevelMergeType (..), Merge, RunParams (..), @@ -132,6 +133,7 @@ instance NFData MergeKnownCompleted where => HasFS IO h -> HasBlockIO IO h -> ResolveSerialisedValue + -> Bloom.Salt -> RunParams -> t -> RunFsPaths @@ -149,12 +151,13 @@ new :: => HasFS m h -> HasBlockIO m h -> ResolveSerialisedValue + -> Bloom.Salt -> RunParams -> t -> RunFsPaths -> V.Vector (Ref (Run m h)) -> m (Ref (MergingRun t m h)) -new hfs hbio resolve runParams ty runPaths inputRuns = +new hfs hbio resolve salt runParams ty runPaths inputRuns = assert (V.length inputRuns > 0) $ do -- there can be empty runs, which we don't want to include in the merge -- TODO: making runs non-empty would involve introducing a constructor @@ -172,7 +175,7 @@ new hfs hbio resolve runParams ty runPaths inputRuns = -- as we do in the prototype. but that would mean that the result -- doesn't follow the supplied @runParams@. -- TODO: decide whether that optimisation is okay - r <- Run.newEmpty hfs hbio runParams runPaths + r <- Run.newEmpty hfs hbio salt runParams runPaths unsafeNew (MergeDebt 0) (SpentCredits 0) @@ -181,7 +184,7 @@ new hfs hbio resolve runParams ty runPaths inputRuns = _ -> do rs <- V.mapM dupRun nonEmptyRuns merge <- fromMaybe (error "newMerge: merges can not be empty") - <$> Merge.new hfs hbio runParams ty resolve runPaths rs + <$> Merge.new hfs hbio salt runParams ty resolve runPaths rs unsafeNew (numEntriesToMergeDebt (V.foldMap' Run.size rs)) (SpentCredits 0) diff --git a/src/Database/LSMTree/Internal/MergingTree.hs b/src/Database/LSMTree/Internal/MergingTree.hs index 83630ae17..133a79189 100644 --- a/src/Database/LSMTree/Internal/MergingTree.hs +++ b/src/Database/LSMTree/Internal/MergingTree.hs @@ -36,6 +36,7 @@ import Data.List (foldl') #endif import Data.Vector (Vector) import qualified Data.Vector as V +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry (NumEntries (..)) import Database.LSMTree.Internal.MergingRun (MergeDebt (..), MergingRun) @@ -368,6 +369,7 @@ debtOfNestedMerge debts = HasFS IO h -> HasBlockIO IO h -> ResolveSerialisedValue + -> Bloom.Salt -> Run.RunParams -> MR.CreditThreshold -> SessionRoot @@ -381,6 +383,7 @@ supplyCredits :: => HasFS m h -> HasBlockIO m h -> ResolveSerialisedValue + -> Bloom.Salt -> Run.RunParams -> MR.CreditThreshold -> SessionRoot @@ -388,7 +391,7 @@ supplyCredits :: -> Ref (MergingTree m h) -> MR.MergeCredits -> m MR.MergeCredits -supplyCredits hfs hbio resolve runParams threshold root uc = \mt0 c0 -> do +supplyCredits hfs hbio resolve salt runParams threshold root uc = \mt0 c0 -> do if c0 <= 0 then pure 0 else supplyTree mt0 c0 @@ -437,7 +440,7 @@ supplyCredits hfs hbio resolve runParams threshold root uc = \mt0 c0 -> do withRollback reg -- TODO: the builder's handles aren't cleaned up if we fail -- before fromBuilder closes them - (Run.newEmpty hfs hbio runParams runPaths) + (Run.newEmpty hfs hbio salt runParams runPaths) releaseRef pure (CompletedTreeMerge run, credits) @@ -501,7 +504,7 @@ supplyCredits hfs hbio resolve runParams threshold root uc = \mt0 c0 -> do runPaths <- mkFreshRunPaths mr <- withRollback reg - (MR.new hfs hbio resolve runParams mergeType runPaths rs) + (MR.new hfs hbio resolve salt runParams mergeType runPaths rs) releaseRef -- no need for the runs anymore, 'MR.new' made duplicates traverse_ (\r -> delayedCommit reg (releaseRef r)) rs diff --git a/src/Database/LSMTree/Internal/Paths.hs b/src/Database/LSMTree/Internal/Paths.hs index a93bb07cb..87c8439c9 100644 --- a/src/Database/LSMTree/Internal/Paths.hs +++ b/src/Database/LSMTree/Internal/Paths.hs @@ -4,6 +4,8 @@ module Database.LSMTree.Internal.Paths ( SessionRoot (..) , lockFile + , lockFileName + , metadataFile , ActiveDir (..) , activeDir , runPath @@ -75,7 +77,13 @@ newtype SessionRoot = SessionRoot { getSessionRoot :: FsPath } deriving stock Eq lockFile :: SessionRoot -> FsPath -lockFile (SessionRoot dir) = dir mkFsPath ["lock"] +lockFile (SessionRoot dir) = dir mkFsPath [lockFileName] + +lockFileName :: String +lockFileName = "lock" + +metadataFile :: SessionRoot -> FsPath +metadataFile (SessionRoot dir) = dir mkFsPath ["metadata"] newtype ActiveDir = ActiveDir { getActiveDir :: FsPath } diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index 875e87e25..9dd77cba7 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -44,6 +44,7 @@ import Database.LSMTree.Internal.BlobRef hiding (mkRawBlobRef, import qualified Database.LSMTree.Internal.BlobRef as BlobRef import Database.LSMTree.Internal.BloomFilter (Bloom, bloomFilterFromFile) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry (NumEntries (..)) import Database.LSMTree.Internal.Index (Index, IndexType (..)) @@ -185,6 +186,7 @@ setRunDataCaching hbio runKOpsFile NoCacheRunData = do {-# SPECIALISE newEmpty :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> RunFsPaths -> IO (Ref (Run IO h)) #-} @@ -194,11 +196,12 @@ newEmpty :: (MonadST m, MonadSTM m, MonadMask m) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> RunParams -> RunFsPaths -> m (Ref (Run m h)) -newEmpty hfs hbio runParams runPaths = do - builder <- Builder.new hfs hbio runParams runPaths (NumEntries 0) +newEmpty hfs hbio salt runParams runPaths = do + builder <- Builder.new hfs hbio salt runParams runPaths (NumEntries 0) fromBuilder builder {-# SPECIALISE fromBuilder :: @@ -224,6 +227,7 @@ fromBuilder builder = do {-# SPECIALISE fromWriteBuffer :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> RunFsPaths -> WriteBuffer @@ -243,13 +247,14 @@ fromWriteBuffer :: (MonadST m, MonadSTM m, MonadMask m) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> RunParams -> RunFsPaths -> WriteBuffer -> Ref (WriteBufferBlobs m h) -> m (Ref (Run m h)) -fromWriteBuffer fs hbio params fsPaths buffer blobs = do - builder <- Builder.new fs hbio params fsPaths (WB.numEntries buffer) +fromWriteBuffer fs hbio salt params fsPaths buffer blobs = do + builder <- Builder.new fs hbio salt params fsPaths (WB.numEntries buffer) for_ (WB.toList buffer) $ \(k, e) -> Builder.addKeyOp builder k (fmap (WBB.mkRawBlobRef blobs) e) --TODO: the fmap entry here reallocates even when there are no blobs @@ -264,6 +269,7 @@ fromWriteBuffer fs hbio params fsPaths buffer blobs = do -> HasBlockIO IO h -> RunDataCaching -> IndexType + -> Bloom.Salt -> RunFsPaths -> IO (Ref (Run IO h)) #-} -- | Load a previously written run from disk, checking each file's checksum @@ -289,10 +295,11 @@ openFromDisk :: -> HasBlockIO m h -> RunDataCaching -> IndexType + -> Bloom.Salt -- ^ Expected salt -> RunFsPaths -> m (Ref (Run m h)) -- TODO: make exception safe -openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do +openFromDisk fs hbio runRunDataCaching indexType expectedSalt runRunFsPaths = do expectedChecksums <- CRC.expectValidFile fs (runChecksumsPath runRunFsPaths) CRC.FormatChecksumsFile . fromChecksumsFile @@ -307,7 +314,7 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do let filterPath = forRunFilterRaw paths checkCRC CacheRunData (forRunFilterRaw expectedChecksums) filterPath runFilter <- FS.withFile fs filterPath FS.ReadMode $ - bloomFilterFromFile fs + bloomFilterFromFile fs expectedSalt (runNumEntries, runIndex) <- CRC.expectValidFile fs (forRunIndexRaw paths) CRC.FormatIndexFile diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index d1d8aed06..977510348 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -36,11 +36,11 @@ module Database.LSMTree.Internal.RunAcc ( import Control.DeepSeq (NFData (..)) import Control.Exception (assert) import Control.Monad.ST.Strict -import Data.BloomFilter.Blocked (Bloom, MBloom) import qualified Data.BloomFilter.Blocked as Bloom import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar, readPrimVar) import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) +import Database.LSMTree.Internal.BloomFilter (Bloom, MBloom) import Database.LSMTree.Internal.Chunk (Chunk) import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) import Database.LSMTree.Internal.Index (Index, IndexAcc, IndexType) @@ -80,10 +80,11 @@ data RunAcc s = RunAcc { new :: NumEntries -> RunBloomFilterAlloc + -> Bloom.Salt -> IndexType -> ST s (RunAcc s) -new nentries alloc indexType = do - mbloom <- newMBloom nentries alloc +new nentries alloc salt indexType = do + mbloom <- newMBloom nentries alloc salt mindex <- Index.newWithDefaults indexType mpageacc <- PageAcc.newPageAcc entryCount <- newPrimVar 0 @@ -343,9 +344,9 @@ instance NFData RunBloomFilterAlloc where rnf (RunAllocFixed a) = rnf a rnf (RunAllocRequestFPR a) = rnf a -newMBloom :: NumEntries -> RunBloomFilterAlloc -> ST s (MBloom s a) -newMBloom (NumEntries nentries) alloc = - Bloom.new (Bloom.sizeForPolicy (policy alloc) nentries) +newMBloom :: NumEntries -> RunBloomFilterAlloc -> Bloom.Salt -> ST s (MBloom s a) +newMBloom (NumEntries nentries) alloc salt = + Bloom.new (Bloom.sizeForPolicy (policy alloc) nentries) salt where --TODO: it'd be possible to turn the RunBloomFilterAlloc into a BloomPolicy -- without the NumEntries, and cache the policy, avoiding recalculating the diff --git a/src/Database/LSMTree/Internal/RunBuilder.hs b/src/Database/LSMTree/Internal/RunBuilder.hs index 4f275373b..218187bc3 100644 --- a/src/Database/LSMTree/Internal/RunBuilder.hs +++ b/src/Database/LSMTree/Internal/RunBuilder.hs @@ -27,6 +27,7 @@ import Data.Primitive.PrimVar import Data.Word (Word64) import Database.LSMTree.Internal.BlobRef (RawBlobRef) import Database.LSMTree.Internal.BloomFilter (Bloom) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.ChecksumHandle import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry @@ -96,6 +97,7 @@ instance NFData RunDataCaching where {-# SPECIALISE new :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> RunParams -> RunFsPaths -> NumEntries @@ -107,13 +109,14 @@ new :: (MonadST m, MonadSTM m) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> RunParams -> RunFsPaths -> NumEntries -- ^ an upper bound of the number of entries to be added -> m (RunBuilder m h) -new hfs hbio runBuilderParams@RunParams{..} runBuilderFsPaths numEntries = do +new hfs hbio salt runBuilderParams@RunParams{..} runBuilderFsPaths numEntries = do runBuilderAcc <- ST.stToIO $ - RunAcc.new numEntries runParamAlloc runParamIndex + RunAcc.new numEntries runParamAlloc salt runParamIndex runBuilderBlobOffset <- newPrimVar 0 runBuilderHandles <- traverse (makeHandle hfs) (pathsForRunFiles runBuilderFsPaths) diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index ba2a6fb52..a5681c9b9 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -48,6 +48,7 @@ import Data.Foldable (sequenceA_, traverse_) import Data.String (IsString) import Data.Text (Text) import qualified Data.Vector as V +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.CRC32C (checkCRC) import qualified Database.LSMTree.Internal.CRC32C as CRC @@ -231,6 +232,7 @@ instance NFData r => NFData (SnapPreExistingRun r) where {-# SPECIALISE fromSnapMergingTree :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> UniqCounter IO -> ResolveSerialisedValue -> ActiveDir @@ -245,13 +247,14 @@ fromSnapMergingTree :: forall m h. (MonadMask m, MonadMVar m, MonadSTM m, MonadST m) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> UniqCounter m -> ResolveSerialisedValue -> ActiveDir -> ActionRegistry m -> SnapMergingTree (Ref (Run m h)) -> m (Ref (MT.MergingTree m h)) -fromSnapMergingTree hfs hbio uc resolve dir = +fromSnapMergingTree hfs hbio salt uc resolve dir = go where -- Reference strategy: @@ -291,7 +294,7 @@ fromSnapMergingTree hfs hbio uc resolve dir = go reg (SnapMergingTree (SnapOngoingTreeMerge smrs)) = do mr <- withRollback reg - (fromSnapMergingRun hfs hbio uc resolve dir smrs) + (fromSnapMergingRun hfs hbio salt uc resolve dir smrs) releaseRef mt <- withRollback reg (MT.newOngoingMerge mr) @@ -309,7 +312,7 @@ fromSnapMergingTree hfs hbio uc resolve dir = fromSnapPreExistingRun reg (SnapPreExistingMergingRun smrs) = MT.PreExistingMergingRun <$> withRollback reg - (fromSnapMergingRun hfs hbio uc resolve dir smrs) + (fromSnapMergingRun hfs hbio salt uc resolve dir smrs) releaseRef releasePER (MT.PreExistingRun r) = releaseRef r @@ -583,9 +586,10 @@ snapshotRun hfs hbio snapUc reg (NamedSnapshotDir targetDir) run = do -> ActionRegistry IO -> NamedSnapshotDir -> ActiveDir + -> Bloom.Salt -> SnapshotRun -> IO (Ref (Run IO h)) #-} --- | @'openRun' _ _ uniqCounter _ sourceDir targetDir snaprun@ takes all run +-- | @'openRun' _ _ uniqCounter _ sourceDir targetDir _ snaprun@ takes all run -- files that are referenced by @snaprun@, and hard links them from @sourceDir@ -- into @targetDir@ with new, unique names (using @uniqCounter@). Each set of -- (hard linked) files that represents a run is opened and verified, returning @@ -600,10 +604,12 @@ openRun :: -> ActionRegistry m -> NamedSnapshotDir -> ActiveDir + -> Bloom.Salt -> SnapshotRun -> m (Ref (Run m h)) openRun hfs hbio uc reg (NamedSnapshotDir sourceDir) (ActiveDir targetDir) + expectedSalt SnapshotRun { snapRunNumber = runNum, snapRunCaching = caching, @@ -615,7 +621,7 @@ openRun hfs hbio uc reg hardLinkRunFiles hfs hbio reg sourcePaths targetPaths withRollback reg - (Run.openFromDisk hfs hbio caching indexType targetPaths) + (Run.openFromDisk hfs hbio caching indexType expectedSalt targetPaths) releaseRef {------------------------------------------------------------------------------- @@ -625,6 +631,7 @@ openRun hfs hbio uc reg {-# SPECIALISE fromSnapLevels :: HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> UniqCounter IO -> TableConfig -> ResolveSerialisedValue @@ -638,6 +645,7 @@ fromSnapLevels :: forall m h. (MonadMask m, MonadMVar m, MonadSTM m, MonadST m) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> UniqCounter m -> TableConfig -> ResolveSerialisedValue @@ -645,7 +653,7 @@ fromSnapLevels :: -> ActiveDir -> SnapLevels (Ref (Run m h)) -> m (Levels m h) -fromSnapLevels hfs hbio uc conf resolve reg dir (SnapLevels levels) = +fromSnapLevels hfs hbio salt uc conf resolve reg dir (SnapLevels levels) = V.iforM levels $ \i -> fromSnapLevel (LevelNo (i+1)) where -- TODO: we may wish to trace the merges created during snapshot restore: @@ -671,7 +679,7 @@ fromSnapLevels hfs hbio uc conf resolve reg dir (SnapLevels levels) = fromSnapIncomingRun ln (SnapIncomingMergingRun mergePolicy nominalDebt nominalCredits smrs) = bracket - (fromSnapMergingRun hfs hbio uc resolve dir smrs) + (fromSnapMergingRun hfs hbio salt uc resolve dir smrs) releaseRef $ \mr -> do ir <- newIncomingMergingRun mergePolicy nominalDebt mr @@ -685,6 +693,7 @@ fromSnapLevels hfs hbio uc conf resolve reg dir (SnapLevels levels) = MR.IsMergeType t => HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> UniqCounter IO -> ResolveSerialisedValue -> ActiveDir @@ -694,20 +703,21 @@ fromSnapMergingRun :: (MonadMask m, MonadMVar m, MonadSTM m, MonadST m, MR.IsMergeType t) => HasFS m h -> HasBlockIO m h + -> Bloom.Salt -> UniqCounter m -> ResolveSerialisedValue -> ActiveDir -> SnapMergingRun t (Ref (Run m h)) -> m (Ref (MR.MergingRun t m h)) -fromSnapMergingRun _ _ _ _ _ (SnapCompletedMerge mergeDebt r) = +fromSnapMergingRun _ _ _ _ _ _ (SnapCompletedMerge mergeDebt r) = MR.newCompleted mergeDebt r -fromSnapMergingRun hfs hbio uc resolve dir +fromSnapMergingRun hfs hbio salt uc resolve dir (SnapOngoingMerge runParams mergeCredits rs mergeType) = do bracketOnError (do uniq <- incrUniqCounter uc let runPaths = runPath dir (uniqueToRunNumber uniq) - MR.new hfs hbio resolve runParams mergeType runPaths rs) + MR.new hfs hbio resolve salt runParams mergeType runPaths rs) releaseRef $ \mr -> do -- When a snapshot is created, merge progress is lost, so we have to -- redo merging work here. The MergeCredits in SnapMergingRun tracks diff --git a/src/Database/LSMTree/Internal/Types.hs b/src/Database/LSMTree/Internal/Types.hs index 14dfb213b..ca8031cef 100644 --- a/src/Database/LSMTree/Internal/Types.hs +++ b/src/Database/LSMTree/Internal/Types.hs @@ -2,6 +2,7 @@ {-# LANGUAGE DefaultSignatures #-} module Database.LSMTree.Internal.Types ( + Salt, Session (..), Table (..), BlobRef (..), @@ -18,11 +19,20 @@ import Control.DeepSeq (NFData (..), deepseq) import Data.Kind (Type) import Data.Semigroup (Sum) import Data.Typeable +import Data.Word (Word64) import qualified Database.LSMTree.Internal.BlobRef as Unsafe import Database.LSMTree.Internal.RawBytes (RawBytes (..)) import Database.LSMTree.Internal.Serialise.Class (SerialiseValue (..)) import qualified Database.LSMTree.Internal.Unsafe as Unsafe +{- | +The session salt is used to secure the hash operations in the Bloom filters. + +The value of the salt must be kept secret. +Otherwise, there are no restrictions on the value. +-} +type Salt = Word64 + {- | A session stores context that is shared by multiple tables. diff --git a/src/Database/LSMTree/Internal/Unsafe.hs b/src/Database/LSMTree/Internal/Unsafe.hs index 0ab05717d..4fd754898 100644 --- a/src/Database/LSMTree/Internal/Unsafe.hs +++ b/src/Database/LSMTree/Internal/Unsafe.hs @@ -36,16 +36,20 @@ module Database.LSMTree.Internal.Unsafe ( , Session (..) , SessionState (..) , SessionEnv (..) - , withOpenSession + , withKeepSessionOpen -- ** Implementation of public API - , withSession + , withOpenSession + , withNewSession + , withRestoreSession , openSession + , newSession + , restoreSession , closeSession -- * Table , Table (..) , TableState (..) , TableEnv (..) - , withOpenTable + , withKeepTableOpen -- ** Implementation of public API , ResolveSerialisedValue , withTable @@ -82,19 +86,21 @@ module Database.LSMTree.Internal.Unsafe ( , supplyUnionCredits ) where +import qualified Codec.Serialise as S import Control.ActionRegistry import Control.Concurrent.Class.MonadMVar.Strict import Control.Concurrent.Class.MonadSTM (MonadSTM (..)) import Control.Concurrent.Class.MonadSTM.RWVar (RWVar) import qualified Control.Concurrent.Class.MonadSTM.RWVar as RW import Control.DeepSeq -import Control.Monad (forM, unless, void, (<$!>)) +import Control.Monad (forM, unless, void, when, (<$!>)) import Control.Monad.Class.MonadAsync as Async import Control.Monad.Class.MonadST (MonadST (..)) import Control.Monad.Class.MonadThrow import Control.Monad.Primitive import Control.RefCount import Control.Tracer +import qualified Data.BloomFilter.Hash as Bloom import Data.Either (fromRight) import Data.Foldable import Data.List.NonEmpty (NonEmpty (..)) @@ -103,6 +109,8 @@ import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes, maybeToList) import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as Text import Data.Typeable import qualified Data.Vector as V import Database.LSMTree.Internal.Arena (ArenaManager, newArenaManager) @@ -144,6 +152,7 @@ import qualified Database.LSMTree.Internal.WriteBuffer as WB import qualified Database.LSMTree.Internal.WriteBufferBlobs as WBB import qualified System.FS.API as FS import System.FS.API (FsError, FsErrorPath (..), FsPath, HasFS) +import qualified System.FS.API.Lazy as FS import qualified System.FS.BlockIO.API as FS import System.FS.BlockIO.API (HasBlockIO) @@ -154,8 +163,8 @@ import System.FS.BlockIO.API (HasBlockIO) data LSMTreeTrace = -- Session TraceOpenSession FsPath - | TraceNewSession - | TraceRestoreSession + | TraceNewSession FsPath + | TraceRestoreSession FsPath | TraceCloseSession -- Table | TraceNewTable @@ -216,7 +225,7 @@ data CursorTrace = data Session m h = Session { -- | The primary purpose of this 'RWVar' is to ensure consistent views of -- the open-/closedness of a session when multiple threads require access - -- to the session's fields (see 'withOpenSession'). We use more + -- to the session's fields (see 'withKeepSessionOpen'). We use more -- fine-grained synchronisation for various mutable parts of an open -- session. -- @@ -240,6 +249,12 @@ data SessionEnv m h = SessionEnv { -- INVARIANT: the session root is never changed during the lifetime of a -- session. sessionRoot :: !SessionRoot + -- | Session-wide salt for bloomfilter hashes + -- + -- INVARIANT: all bloom filters in all tables in the session are created + -- using the same salt, and all bloom filter are queried using that same + -- salt. + , sessionSalt :: !Bloom.Salt , sessionHasFS :: !(HasFS m h) , sessionHasBlockIO :: !(HasBlockIO m h) , sessionLockFile :: !(FS.LockFileHandle m) @@ -273,21 +288,21 @@ data SessionClosedError deriving stock (Show, Eq) deriving anyclass (Exception) -{-# INLINE withOpenSession #-} -{-# SPECIALISE withOpenSession :: +{-# INLINE withKeepSessionOpen #-} +{-# SPECIALISE withKeepSessionOpen :: Session IO h -> (SessionEnv IO h -> IO a) -> IO a #-} --- | 'withOpenSession' ensures that the session stays open for the duration of the --- provided continuation. +-- | 'withKeepSessionOpen' ensures that the session stays open for the duration of +-- the provided continuation. -- -- NOTE: any operation except 'sessionClose' can use this function. -withOpenSession :: +withKeepSessionOpen :: (MonadSTM m, MonadThrow m) => Session m h -> (SessionEnv m h -> m a) -> m a -withOpenSession sesh action = RW.withReadAccess (sessionState sesh) $ \case +withKeepSessionOpen sesh action = RW.withReadAccess (sessionState sesh) $ \case SessionClosed -> throwIO ErrSessionClosed SessionOpen seshEnv -> action seshEnv @@ -295,24 +310,6 @@ withOpenSession sesh action = RW.withReadAccess (sessionState sesh) $ \case -- Implementation of public API -- -{-# SPECIALISE withSession :: - Tracer IO LSMTreeTrace - -> HasFS IO h - -> HasBlockIO IO h - -> FsPath - -> (Session IO h -> IO a) - -> IO a #-} --- | See 'Database.LSMTree.withSession'. -withSession :: - (MonadMask m, MonadSTM m, MonadMVar m, PrimMonad m) - => Tracer m LSMTreeTrace - -> HasFS m h - -> HasBlockIO m h - -> FsPath - -> (Session m h -> m a) - -> m a -withSession tr hfs hbio dir = bracket (openSession tr hfs hbio dir) closeSession - -- | The session directory does not exist. data SessionDirDoesNotExistError = ErrSessionDirDoesNotExist !FsErrorPath @@ -327,108 +324,220 @@ data SessionDirLockedError -- | The session directory is corrupted, e.g., it misses required files or contains unexpected files. data SessionDirCorruptedError - = ErrSessionDirCorrupted !FsErrorPath + = ErrSessionDirCorrupted !Text !FsErrorPath deriving stock (Show, Eq) deriving anyclass (Exception) +{-# INLINE withOpenSession #-} +withOpenSession :: + forall m h a. + (MonadSTM m, MonadMVar m, PrimMonad m, MonadMask m, MonadEvaluate m) + => Tracer m LSMTreeTrace + -> HasFS m h + -> HasBlockIO m h + -> Bloom.Salt + -> FsPath -- ^ Path to the session directory + -> (Session m h -> m a) + -> m a +withOpenSession tr hfs hbio salt dir k = do + bracket + (openSession tr hfs hbio salt dir) + closeSession + k + +{-# INLINE withNewSession #-} +withNewSession :: + forall m h a. + (MonadSTM m, MonadMVar m, PrimMonad m, MonadMask m) + => Tracer m LSMTreeTrace + -> HasFS m h + -> HasBlockIO m h + -> Bloom.Salt + -> FsPath -- ^ Path to the session directory + -> (Session m h -> m a) + -> m a +withNewSession tr hfs hbio salt dir k = do + bracket + (newSession tr hfs hbio salt dir) + closeSession + k + +{-# INLINE withRestoreSession #-} +withRestoreSession :: + forall m h a. + (MonadSTM m, MonadMVar m, PrimMonad m, MonadMask m, MonadEvaluate m) + => Tracer m LSMTreeTrace + -> HasFS m h + -> HasBlockIO m h + -> FsPath -- ^ Path to the session directory + -> (Session m h -> m a) + -> m a +withRestoreSession tr hfs hbio dir k = do + bracket + (restoreSession tr hfs hbio dir) + closeSession + k + {-# SPECIALISE openSession :: Tracer IO LSMTreeTrace -> HasFS IO h -> HasBlockIO IO h + -> Bloom.Salt -> FsPath -> IO (Session IO h) #-} -- | See 'Database.LSMTree.openSession'. openSession :: + forall m h. + (MonadSTM m, MonadMVar m, PrimMonad m, MonadMask m, MonadEvaluate m) + => Tracer m LSMTreeTrace + -> HasFS m h + -> HasBlockIO m h + -> Bloom.Salt + -> FsPath -- ^ Path to the session directory + -> m (Session m h) +openSession tr hfs hbio salt dir = do + traceWith tr $ TraceOpenSession dir + + -- This is checked by 'newSession' and 'restoreSession' too, but it does not + -- hurt to check it twice, and it's arguably simpler like this. + dirExists <- FS.doesDirectoryExist hfs dir + unless dirExists $ + throwIO (ErrSessionDirDoesNotExist (FS.mkFsErrorPath hfs dir)) + + b <- isSessionDirEmpty hfs dir + if b then + newSession tr hfs hbio salt dir + else + restoreSession tr hfs hbio dir + +{-# SPECIALISE newSession :: + Tracer IO LSMTreeTrace + -> HasFS IO h + -> HasBlockIO IO h + -> Bloom.Salt + -> FsPath + -> IO (Session IO h) #-} +-- | See 'Database.LSMTree.newSession'. +newSession :: forall m h. (MonadSTM m, MonadMVar m, PrimMonad m, MonadMask m) => Tracer m LSMTreeTrace -> HasFS m h - -> HasBlockIO m h -- TODO: could we prevent the user from having to pass this in? + -> HasBlockIO m h + -> Bloom.Salt -> FsPath -- ^ Path to the session directory -> m (Session m h) -openSession tr hfs hbio dir = +newSession tr hfs hbio salt dir = do + traceWith tr $ TraceNewSession dir + -- We can not use modifyWithActionRegistry here, since there is no in-memory -- state to modify. We use withActionRegistry instead, which may have a tiny -- chance of leaking resources if openSession is not called in a masked -- state. withActionRegistry $ \reg -> do - traceWith tr (TraceOpenSession dir) dirExists <- FS.doesDirectoryExist hfs dir unless dirExists $ throwIO (ErrSessionDirDoesNotExist (FS.mkFsErrorPath hfs dir)) - -- List directory contents /before/ trying to acquire a file lock, so that - -- that the lock file does not show up in the listed contents. - dirContents <- FS.listDirectory hfs dir + -- Try to acquire the session file lock as soon as possible to reduce the -- risk of race conditions. -- - -- The lock is only released when an exception is raised, otherwise the lock - -- is included in the returned Session. - elock <- - withRollbackFun reg - (fromRight Nothing) - acquireLock - releaseLock - - case elock of - Left e - | FS.FsResourceAlreadyInUse <- FS.fsErrorType e - , fsep@(FsErrorPath _ fsp) <- FS.fsErrorPath e - , fsp == lockFilePath - -> throwIO (ErrSessionDirLocked fsep) - Left e -> throwIO e -- rethrow unexpected errors - Right Nothing -> throwIO (ErrSessionDirLocked (FS.mkFsErrorPath hfs lockFilePath)) - Right (Just sessionFileLock) -> - if Set.null dirContents then newSession reg sessionFileLock - else restoreSession reg sessionFileLock + -- The lock is only released when an exception is raised, otherwise the + -- lock is included in the returned Session. + sessionFileLock <- acquireSessionLock hfs hbio reg lockFilePath + + -- If we're starting a new session, then the session directory should be + -- non-empty. + b <- isSessionDirEmpty hfs dir + unless b $ do + throwIO $ ErrSessionDirCorrupted + (Text.pack "Session directory is non-empty") + (FS.mkFsErrorPath hfs dir) + + withRollback_ reg + (FS.withFile hfs metadataFilePath (FS.WriteMode FS.MustBeNew) $ \h -> + void $ FS.hPutAll hfs h $ S.serialise salt) + (FS.removeFile hfs metadataFilePath) + withRollback_ reg + (FS.createDirectory hfs activeDirPath) + (FS.removeDirectoryRecursive hfs activeDirPath) + withRollback_ reg + (FS.createDirectory hfs snapshotsDirPath) + (FS.removeDirectoryRecursive hfs snapshotsDirPath) + + mkSession tr hfs hbio root sessionFileLock salt where root = Paths.SessionRoot dir lockFilePath = Paths.lockFile root + metadataFilePath = Paths.metadataFile root activeDirPath = Paths.getActiveDir (Paths.activeDir root) snapshotsDirPath = Paths.snapshotsDir root - acquireLock = try @m @FsError $ FS.tryLockFile hbio lockFilePath FS.ExclusiveLock +{-# SPECIALISE restoreSession :: + Tracer IO LSMTreeTrace + -> HasFS IO h + -> HasBlockIO IO h + -> FsPath + -> IO (Session IO h) #-} +-- | See 'Database.LSMTree.restoreSession'. +restoreSession :: + forall m h. + (MonadSTM m, MonadMVar m, PrimMonad m, MonadMask m, MonadEvaluate m) + => Tracer m LSMTreeTrace + -> HasFS m h + -> HasBlockIO m h + -> FsPath -- ^ Path to the session directory + -> m (Session m h) +restoreSession tr hfs hbio dir = do + traceWith tr $ TraceRestoreSession dir - releaseLock = FS.hUnlock + -- We can not use modifyWithActionRegistry here, since there is no in-memory + -- state to modify. We use withActionRegistry instead, which may have a tiny + -- chance of leaking resources if openSession is not called in a masked + -- state. + withActionRegistry $ \reg -> do + dirExists <- FS.doesDirectoryExist hfs dir + unless dirExists $ + throwIO (ErrSessionDirDoesNotExist (FS.mkFsErrorPath hfs dir)) - mkSession lockFile = do - counterVar <- newUniqCounter 0 - openTablesVar <- newMVar Map.empty - openCursorsVar <- newMVar Map.empty - sessionVar <- RW.new $ SessionOpen $ SessionEnv { - sessionRoot = root - , sessionHasFS = hfs - , sessionHasBlockIO = hbio - , sessionLockFile = lockFile - , sessionUniqCounter = counterVar - , sessionOpenTables = openTablesVar - , sessionOpenCursors = openCursorsVar - } - pure $! Session sessionVar tr - - newSession reg sessionFileLock = do - traceWith tr TraceNewSession - withRollback_ reg - (FS.createDirectory hfs activeDirPath) - (FS.removeDirectoryRecursive hfs activeDirPath) - withRollback_ reg - (FS.createDirectory hfs snapshotsDirPath) - (FS.removeDirectoryRecursive hfs snapshotsDirPath) - mkSession sessionFileLock - - restoreSession _reg sessionFileLock = do - traceWith tr TraceRestoreSession - -- If the layouts are wrong, we throw an exception - checkTopLevelDirLayout - - -- Clear the active directory by removing the directory and recreating - -- it again. - FS.removeDirectoryRecursive hfs activeDirPath - `finally` FS.createDirectoryIfMissing hfs False activeDirPath - - checkActiveDirLayout - checkSnapshotsDirLayout - mkSession sessionFileLock + -- Try to acquire the session file lock as soon as possible to reduce the + -- risk of race conditions. + -- + -- The lock is only released when an exception is raised, otherwise the + -- lock is included in the returned Session. + sessionFileLock <- acquireSessionLock hfs hbio reg lockFilePath + + -- If we're restoring a session, then the session directory should be + -- non-empty. + b <- isSessionDirEmpty hfs dir + when b $ do + throwIO $ ErrSessionDirCorrupted + (Text.pack "Session directory is empty") + (FS.mkFsErrorPath hfs dir) + + -- If the layouts are wrong, we throw an exception + checkTopLevelDirLayout + + salt <- + FS.withFile hfs metadataFilePath FS.ReadMode $ \h -> do + bs <- FS.hGetAll hfs h + evaluate $ S.deserialise bs + + -- Clear the active directory by removing the directory and recreating + -- it again. + FS.removeDirectoryRecursive hfs activeDirPath + `finally` FS.createDirectoryIfMissing hfs False activeDirPath + + checkActiveDirLayout + checkSnapshotsDirLayout + + mkSession tr hfs hbio root sessionFileLock salt + where + root = Paths.SessionRoot dir + lockFilePath = Paths.lockFile root + metadataFilePath = Paths.metadataFile root + activeDirPath = Paths.getActiveDir (Paths.activeDir root) + snapshotsDirPath = Paths.snapshotsDir root -- Check that the active directory and snapshots directory exist. We assume -- the lock file already exists at this point. @@ -437,15 +546,29 @@ openSession tr hfs hbio dir = -- Unexpected files in the top-level directory are ignored for the layout -- check. checkTopLevelDirLayout = do + FS.doesFileExist hfs metadataFilePath >>= \b -> + unless b $ throwIO $ + ErrSessionDirCorrupted + (Text.pack "Missing metadata file") + (FS.mkFsErrorPath hfs metadataFilePath) FS.doesDirectoryExist hfs activeDirPath >>= \b -> - unless b $ throwIO (ErrSessionDirCorrupted (FS.mkFsErrorPath hfs activeDirPath)) + unless b $ throwIO $ + ErrSessionDirCorrupted + (Text.pack "Missing active directory") + (FS.mkFsErrorPath hfs activeDirPath) FS.doesDirectoryExist hfs snapshotsDirPath >>= \b -> - unless b $ throwIO (ErrSessionDirCorrupted (FS.mkFsErrorPath hfs snapshotsDirPath)) + unless b $ throwIO $ + ErrSessionDirCorrupted + (Text.pack "Missing snapshot directory") + (FS.mkFsErrorPath hfs snapshotsDirPath) -- The active directory should be empty checkActiveDirLayout = do contents <- FS.listDirectory hfs activeDirPath - unless (Set.null contents) $ throwIO (ErrSessionDirCorrupted (FS.mkFsErrorPath hfs activeDirPath)) + unless (Set.null contents) $ throwIO $ + ErrSessionDirCorrupted + (Text.pack "Active directory is non-empty") + (FS.mkFsErrorPath hfs activeDirPath) -- Nothing to check: snapshots are verified when they are loaded, not when a -- session is restored. @@ -497,11 +620,83 @@ closeSession Session{sessionState, sessionTracer} = do (void . swapMVar (sessionOpenTables seshEnv)) mapM_ (delayedCommit reg . close) tables - delayedCommit reg $ FS.close (sessionHasBlockIO seshEnv) delayedCommit reg $ FS.hUnlock (sessionLockFile seshEnv) pure SessionClosed +{-# SPECIALISE acquireSessionLock :: + HasFS IO h + -> HasBlockIO IO h + -> ActionRegistry IO + -> FsPath + -> IO (FS.LockFileHandle IO) #-} +acquireSessionLock :: + forall m h. (MonadSTM m, PrimMonad m, MonadMask m) + => HasFS m h + -> HasBlockIO m h + -> ActionRegistry m + -> FsPath + -> m (FS.LockFileHandle m) +acquireSessionLock hfs hbio reg lockFilePath = do + elock <- + withRollbackFun reg + (fromRight Nothing) + acquireLock + releaseLock + + case elock of + Left e + | FS.FsResourceAlreadyInUse <- FS.fsErrorType e + , fsep@(FsErrorPath _ fsp) <- FS.fsErrorPath e + , fsp == lockFilePath + -> throwIO (ErrSessionDirLocked fsep) + Left e -> throwIO e -- rethrow unexpected errors + Right Nothing -> throwIO (ErrSessionDirLocked (FS.mkFsErrorPath hfs lockFilePath)) + Right (Just sessionFileLock) -> pure sessionFileLock + where + acquireLock = try @m @FsError $ FS.tryLockFile hbio lockFilePath FS.ExclusiveLock + + releaseLock = FS.hUnlock + +{-# SPECIALISE mkSession :: + Tracer IO LSMTreeTrace + -> HasFS IO h + -> HasBlockIO IO h + -> SessionRoot + -> FS.LockFileHandle IO + -> Bloom.Salt + -> IO (Session IO h) #-} +mkSession :: + (PrimMonad m, MonadMVar m, MonadSTM m) + => Tracer m LSMTreeTrace + -> HasFS m h + -> HasBlockIO m h + -> SessionRoot + -> FS.LockFileHandle m + -> Bloom.Salt + -> m (Session m h) +mkSession tr hfs hbio root lockFile salt = do + counterVar <- newUniqCounter 0 + openTablesVar <- newMVar Map.empty + openCursorsVar <- newMVar Map.empty + sessionVar <- RW.new $ SessionOpen $ SessionEnv { + sessionRoot = root + , sessionSalt = salt + , sessionHasFS = hfs + , sessionHasBlockIO = hbio + , sessionLockFile = lockFile + , sessionUniqCounter = counterVar + , sessionOpenTables = openTablesVar + , sessionOpenCursors = openCursorsVar + } + pure $! Session sessionVar tr + +{-# INLINE isSessionDirEmpty #-} +isSessionDirEmpty :: Monad m => HasFS m h -> FsPath -> m Bool +isSessionDirEmpty hfs dir = do + dirContents <- FS.listDirectory hfs dir + pure $ Set.null dirContents || dirContents == Set.singleton Paths.lockFileName + {------------------------------------------------------------------------------- Table -------------------------------------------------------------------------------} @@ -513,7 +708,7 @@ data Table m h = Table { tableConfig :: !TableConfig -- | The primary purpose of this 'RWVar' is to ensure consistent views of -- the open-/closedness of a table when multiple threads require access to - -- the table's fields (see 'withOpenTable'). We use more fine-grained + -- the table's fields (see 'withKeepTableOpen'). We use more fine-grained -- synchronisation for various mutable parts of an open table. , tableState :: !(RWVar m (TableState m h)) , tableArenaManager :: !(ArenaManager (PrimState m)) @@ -568,6 +763,11 @@ data TableEnv m h = TableEnv { tableSessionRoot :: TableEnv m h -> SessionRoot tableSessionRoot = sessionRoot . tableSessionEnv +{-# INLINE tableSessionSalt #-} + -- | Inherited from session for ease of access. +tableSessionSalt :: TableEnv m h -> Bloom.Salt +tableSessionSalt = sessionSalt . tableSessionEnv + {-# INLINE tableHasFS #-} -- | Inherited from session for ease of access. tableHasFS :: TableEnv m h -> HasFS m h @@ -597,21 +797,21 @@ data TableClosedError deriving stock (Show, Eq) deriving anyclass (Exception) --- | 'withOpenTable' ensures that the table stays open for the duration of the +-- | 'withKeepTableOpen' ensures that the table stays open for the duration of the -- provided continuation. -- -- NOTE: any operation except 'close' can use this function. -{-# INLINE withOpenTable #-} -{-# SPECIALISE withOpenTable :: +{-# INLINE withKeepTableOpen #-} +{-# SPECIALISE withKeepTableOpen :: Table IO h -> (TableEnv IO h -> IO a) -> IO a #-} -withOpenTable :: +withKeepTableOpen :: (MonadSTM m, MonadThrow m) => Table m h -> (TableEnv m h -> m a) -> m a -withOpenTable t action = RW.withReadAccess (tableState t) $ \case +withKeepTableOpen t action = RW.withReadAccess (tableState t) $ \case TableClosed -> throwIO ErrTableClosed TableOpen tEnv -> action tEnv @@ -645,7 +845,7 @@ new :: -> m (Table m h) new sesh conf = do traceWith (sessionTracer sesh) TraceNewTable - withOpenSession sesh $ \seshEnv -> + withKeepSessionOpen sesh $ \seshEnv -> withActionRegistry $ \reg -> do am <- newArenaManager tc <- newEmptyTableContent seshEnv reg @@ -752,7 +952,7 @@ lookups :: -> m (V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))) lookups resolve ks t = do traceWith (tableTracer t) $ TraceLookups (V.length ks) - withOpenTable t $ \tEnv -> + withKeepTableOpen t $ \tEnv -> RW.withReadAccess (tableContent tEnv) $ \tc -> do case tableUnionLevel tc of NoUnion -> lookupsRegular tEnv tc @@ -769,6 +969,7 @@ lookups resolve ks t = do (tableHasBlockIO tEnv) (tableArenaManager t) resolve + (tableSessionSalt tEnv) (tableWriteBuffer tc) (tableWriteBufferBlobs tc) (cachedRuns cache) @@ -799,6 +1000,7 @@ lookups resolve ks t = do (tableHasBlockIO tEnv) (tableArenaManager t) resolve + (tableSessionSalt tEnv) runs (V.mapStrict (\(DeRef r) -> Run.runFilter r) runs) (V.mapStrict (\(DeRef r) -> Run.runIndex r) runs) @@ -862,7 +1064,7 @@ updates :: updates resolve es t = do traceWith (tableTracer t) $ TraceUpdates (V.length es) let conf = tableConfig t - withOpenTable t $ \tEnv -> do + withKeepTableOpen t $ \tEnv -> do let hfs = tableHasFS tEnv modifyWithActionRegistry_ (RW.unsafeAcquireWriteAccess (tableContent tEnv)) @@ -874,6 +1076,7 @@ updates resolve es t = do hfs (tableHasBlockIO tEnv) (tableSessionRoot tEnv) + (tableSessionSalt tEnv) (tableSessionUniqCounter tEnv) es reg @@ -907,7 +1110,7 @@ retrieveBlobs :: -> V.Vector (WeakBlobRef m h) -> m (V.Vector SerialisedBlob) retrieveBlobs sesh wrefs = - withOpenSession sesh $ \seshEnv -> + withKeepSessionOpen sesh $ \seshEnv -> let hbio = sessionHasBlockIO seshEnv in handle (\(BlobRef.WeakBlobRefInvalid i) -> throwIO (ErrBlobRefInvalid i)) $ @@ -1013,7 +1216,7 @@ newCursor :: -> OffsetKey -> Table m h -> m (Cursor m h) -newCursor !resolve !offsetKey t = withOpenTable t $ \tEnv -> do +newCursor !resolve !offsetKey t = withKeepTableOpen t $ \tEnv -> do let cursorSession = tableSession t let cursorSessionEnv = tableSessionEnv tEnv cursorId <- uniqueToCursorId <$> @@ -1023,7 +1226,7 @@ newCursor !resolve !offsetKey t = withOpenTable t $ \tEnv -> do -- We acquire a read-lock on the session open-state to prevent races, see -- 'sessionOpenTables'. - withOpenSession cursorSession $ \_ -> do + withKeepSessionOpen cursorSession $ \_ -> do withActionRegistry $ \reg -> do (wb, wbblobs, cursorRuns, cursorUnion) <- dupTableContent reg (tableContent tEnv) @@ -1198,7 +1401,7 @@ saveSnapshot :: -> m () saveSnapshot snap label t = do traceWith (tableTracer t) $ TraceSnapshot snap - withOpenTable t $ \tEnv -> + withKeepTableOpen t $ \tEnv -> withActionRegistry $ \reg -> do -- TODO: use the action registry for all side effects let hfs = tableHasFS tEnv hbio = tableHasBlockIO tEnv @@ -1305,7 +1508,7 @@ openTableFromSnapshot :: openTableFromSnapshot policyOveride sesh snap label resolve = wrapFileCorruptedErrorAsSnapshotCorruptedError snap $ do traceWith (sessionTracer sesh) $ TraceOpenTableFromSnapshot snap policyOveride - withOpenSession sesh $ \seshEnv -> do + withKeepSessionOpen sesh $ \seshEnv -> do withActionRegistry $ \reg -> do let hfs = sessionHasFS seshEnv hbio = sessionHasBlockIO seshEnv @@ -1329,6 +1532,7 @@ openTableFromSnapshot policyOveride sesh snap label resolve = am <- newArenaManager + let salt = sessionSalt seshEnv let activeDir = Paths.activeDir (sessionRoot seshEnv) -- Read write buffer @@ -1337,12 +1541,12 @@ openTableFromSnapshot policyOveride sesh snap label resolve = openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths -- Hard link runs into the active directory, - snapLevels' <- traverse (openRun hfs hbio uc reg snapDir activeDir) snapLevels + snapLevels' <- traverse (openRun hfs hbio uc reg snapDir activeDir salt) snapLevels unionLevel <- case mTreeOpt of Nothing -> pure NoUnion Just mTree -> do - snapTree <- traverse (openRun hfs hbio uc reg snapDir activeDir) mTree - mt <- fromSnapMergingTree hfs hbio uc resolve activeDir reg snapTree + snapTree <- traverse (openRun hfs hbio uc reg snapDir activeDir salt) mTree + mt <- fromSnapMergingTree hfs hbio salt uc resolve activeDir reg snapTree isStructurallyEmpty mt >>= \case True -> pure NoUnion @@ -1352,7 +1556,7 @@ openTableFromSnapshot policyOveride sesh snap label resolve = pure (Union mt cache) -- Convert from the snapshot format, restoring merge progress in the process - tableLevels <- fromSnapLevels hfs hbio uc conf resolve reg activeDir snapLevels' + tableLevels <- fromSnapLevels hfs hbio salt uc conf resolve reg activeDir snapLevels' traverse_ (delayedCommit reg . releaseRef) snapLevels' tableCache <- mkLevelsCache reg tableLevels @@ -1388,7 +1592,7 @@ doesSnapshotExist :: => Session m h -> SnapshotName -> m Bool -doesSnapshotExist sesh snap = withOpenSession sesh (doesSnapshotDirExist snap) +doesSnapshotExist sesh snap = withKeepSessionOpen sesh (doesSnapshotDirExist snap) -- | Internal helper: Variant of 'doesSnapshotExist' that does not take a session lock. doesSnapshotDirExist :: SnapshotName -> SessionEnv m h -> m Bool @@ -1408,7 +1612,7 @@ deleteSnapshot :: -> m () deleteSnapshot sesh snap = do traceWith (sessionTracer sesh) $ TraceDeleteSnapshot snap - withOpenSession sesh $ \seshEnv -> do + withKeepSessionOpen sesh $ \seshEnv -> do let snapDir = Paths.namedSnapshotDir (sessionRoot seshEnv) snap snapshotExists <- doesSnapshotDirExist snap seshEnv unless snapshotExists $ throwIO (ErrSnapshotDoesNotExist snap) @@ -1422,7 +1626,7 @@ listSnapshots :: -> m [SnapshotName] listSnapshots sesh = do traceWith (sessionTracer sesh) TraceListSnapshots - withOpenSession sesh $ \seshEnv -> do + withKeepSessionOpen sesh $ \seshEnv -> do let hfs = sessionHasFS seshEnv root = sessionRoot seshEnv contents <- FS.listDirectory hfs (Paths.snapshotsDir (sessionRoot seshEnv)) @@ -1450,10 +1654,10 @@ duplicate :: -> m (Table m h) duplicate t@Table{..} = do traceWith tableTracer TraceDuplicate - withOpenTable t $ \TableEnv{..} -> do + withKeepTableOpen t $ \TableEnv{..} -> do -- We acquire a read-lock on the session open-state to prevent races, see -- 'sessionOpenTables'. - withOpenSession tableSession $ \_ -> do + withKeepSessionOpen tableSession $ \_ -> do withActionRegistry $ \reg -> do -- The table contents escape the read access, but we just added references -- to each run so it is safe. @@ -1543,7 +1747,7 @@ unionsInOpenSession :: -> m (Table m h) unionsInOpenSession reg sesh seshEnv conf ts = do mts <- forM (NE.toList ts) $ \t -> - withOpenTable t $ \tEnv -> + withKeepTableOpen t $ \tEnv -> RW.withReadAccess (tableContent tEnv) $ \tc -> -- tableContentToMergingTree duplicates all runs and merges -- so the ones from the tableContent here do not escape @@ -1625,6 +1829,7 @@ writeBufferToNewRun :: -> m (Maybe (Ref (Run m h))) writeBufferToNewRun SessionEnv { sessionRoot = root, + sessionSalt = salt, sessionHasFS = hfs, sessionHasBlockIO = hbio, sessionUniqCounter = uc @@ -1640,7 +1845,7 @@ writeBufferToNewRun SessionEnv { let (!runParams, !runPaths) = mergingRunParamsForLevel (Paths.activeDir root) conf uniq (LevelNo 1) Run.fromWriteBuffer - hfs hbio + hfs hbio salt runParams runPaths tableWriteBuffer tableWriteBufferBlobs @@ -1657,7 +1862,7 @@ ensureSessionsMatch :: -> m (Session m h) ensureSessionsMatch (t :| ts) = do let sesh = tableSession t - withOpenSession sesh $ \seshEnv -> do + withKeepSessionOpen sesh $ \seshEnv -> do let root = FS.mkFsErrorPath (sessionHasFS seshEnv) (getSessionRoot (sessionRoot seshEnv)) -- Check that the session roots for all tables are the same. There can only -- be one *open/active* session per directory because of cooperative file @@ -1666,7 +1871,7 @@ ensureSessionsMatch (t :| ts) = do -- the session roots. for_ (zip [1..] ts) $ \(i, t') -> do let sesh' = tableSession t' - withOpenSession sesh' $ \seshEnv' -> do + withKeepSessionOpen sesh' $ \seshEnv' -> do let root' = FS.mkFsErrorPath (sessionHasFS seshEnv') (getSessionRoot (sessionRoot seshEnv')) -- TODO: compare LockFileHandle instead of SessionRoot (?). -- We can write an Eq instance for LockFileHandle based on pointer equality, @@ -1694,7 +1899,7 @@ remainingUnionDebt :: => Table m h -> m UnionDebt remainingUnionDebt t = do traceWith (tableTracer t) TraceRemainingUnionDebt - withOpenTable t $ \tEnv -> do + withKeepTableOpen t $ \tEnv -> do RW.withReadAccess (tableContent tEnv) $ \tableContent -> do case tableUnionLevel tableContent of NoUnion -> @@ -1717,7 +1922,7 @@ supplyUnionCredits :: => ResolveSerialisedValue -> Table m h -> UnionCredits -> m UnionCredits supplyUnionCredits resolve t credits = do traceWith (tableTracer t) $ TraceSupplyUnionCredits credits - withOpenTable t $ \tEnv -> do + withKeepTableOpen t $ \tEnv -> do -- We also want to mutate the table content to re-build the union cache, -- but we don't need to hold a writer lock while we work on the tree -- itself. @@ -1738,6 +1943,7 @@ supplyUnionCredits resolve t credits = do (tableHasFS tEnv) (tableHasBlockIO tEnv) resolve + (tableSessionSalt tEnv) (runParamsForLevel conf UnionLevel) thresh (tableSessionRoot tEnv) diff --git a/src/Database/LSMTree/Simple.hs b/src/Database/LSMTree/Simple.hs index 67c4545a1..c2484530d 100644 --- a/src/Database/LSMTree/Simple.hs +++ b/src/Database/LSMTree/Simple.hs @@ -26,7 +26,7 @@ module Database.LSMTree.Simple ( -- * Sessions #sessions# Session, - withSession, + withOpenSession, openSession, closeSession, @@ -160,6 +160,7 @@ import Data.Bifunctor (Bifunctor (..)) import Data.Coerce (coerce) import Data.Kind (Type) import Data.List.NonEmpty (NonEmpty (..)) +import Data.Text (Text) import Data.Typeable (TypeRep) import Data.Vector (Vector) import Data.Void (Void) @@ -214,7 +215,7 @@ runExample action = do tmpDir <- getTemporaryDirectory let sessionDir = tmpDir "doctest_Database_LSMTree_Simple" createDirectoryIfMissing True sessionDir - withSession sessionDir $ \session -> + withOpenSession sessionDir $ \session -> withTable session $ \table -> action session table :} @@ -245,7 +246,7 @@ function that combines the two. +------------+--------------------------+-------------------------+-------------------+ | Resource | Bracketed #bracketed# | Allocate #allocate# | Release #release# | +============+==========================+=========================+===================+ -| 'Session' | 'withSession' | 'openSession' | 'closeSession' | +| 'Session' | 'withOpenSession' | 'openSession' | 'closeSession' | +------------+--------------------------+-------------------------+-------------------+ | 'Table' | 'withTable' | 'newTable' | 'closeTable' | + +--------------------------+-------------------------+ + @@ -384,6 +385,9 @@ newtype Session = Session (LSMT.Session IO) {- | Run an action with access to a session opened from a session directory. +If the session directory is empty, a new session is created. +Otherwise, the session directory is restored as an existing session. + If there are no open tables or cursors when the session terminates, then the disk I\/O complexity of this operation is \(O(1)\). Otherwise, 'closeTable' is called for each open table and 'closeCursor' is called for each open cursor. Consequently, the worst-case disk I\/O complexity of this operation depends on the merge policy of the open tables in the session. @@ -407,20 +411,23 @@ Throws the following exceptions: ['SessionDirCorruptedError']: If the session directory is malformed. -} -withSession :: +withOpenSession :: forall a. -- | The session directory. FilePath -> (Session -> IO a) -> IO a -withSession dir action = do +withOpenSession dir action = do let tracer = mempty _convertSessionDirErrors dir $ - LSMT.withSessionIO tracer dir (action . Session) + LSMT.withOpenSessionIO tracer dir (action . Session) {- | Open a session from a session directory. +If the session directory is empty, a new session is created. +Otherwise, the session directory is restored as an existing session. + The worst-case disk I\/O complexity of this operation is \(O(1)\). __Warning:__ Sessions hold open resources and must be closed using 'closeSession'. @@ -1549,7 +1556,7 @@ data SessionDirLockedError -- | The session directory is corrupted, e.g., it misses required files or contains unexpected files. data SessionDirCorruptedError - = ErrSessionDirCorrupted !FilePath + = ErrSessionDirCorrupted !Text !FilePath deriving stock (Show, Eq) deriving anyclass (Exception) @@ -1567,7 +1574,7 @@ _convertSessionDirErrors :: _convertSessionDirErrors sessionDir = mapExceptionWithActionRegistry (\(LSMT.ErrSessionDirDoesNotExist _fsErrorPath) -> SomeException $ ErrSessionDirDoesNotExist sessionDir) . mapExceptionWithActionRegistry (\(LSMT.ErrSessionDirLocked _fsErrorPath) -> SomeException $ ErrSessionDirLocked sessionDir) - . mapExceptionWithActionRegistry (\(LSMT.ErrSessionDirCorrupted _fsErrorPath) -> SomeException $ ErrSessionDirCorrupted sessionDir) + . mapExceptionWithActionRegistry (\(LSMT.ErrSessionDirCorrupted reason _fsErrorPath) -> SomeException $ ErrSessionDirCorrupted reason sessionDir) {------------------------------------------------------------------------------- Table union diff --git a/test/Database/LSMTree/Class.hs b/test/Database/LSMTree/Class.hs index 96b1acfba..d4e4bc39c 100644 --- a/test/Database/LSMTree/Class.hs +++ b/test/Database/LSMTree/Class.hs @@ -27,7 +27,7 @@ import Database.LSMTree.Class.Common as Common import qualified Database.LSMTree.Internal.Paths as RIP import qualified Database.LSMTree.Internal.Types as RT (Table (..)) import qualified Database.LSMTree.Internal.Unsafe as RU (SessionEnv (..), - Table (..), withOpenSession) + Table (..), withKeepSessionOpen) import Test.Util.FS (flipRandomBitInRandomFileHardlinkSafe) import Test.Util.QC (Choice) @@ -260,7 +260,7 @@ rCorruptSnapshot :: -> R.Table m k v b -> m () rCorruptSnapshot choice name (RT.Table t) = - RU.withOpenSession (RU.tableSession t) $ \seshEnv -> + RU.withKeepSessionOpen (RU.tableSession t) $ \seshEnv -> let hfs = RU.sessionHasFS seshEnv root = RU.sessionRoot seshEnv namedSnapDir = RIP.getNamedSnapshotDir (RIP.namedSnapshotDir root name) diff --git a/test/Database/LSMTree/Class/Common.hs b/test/Database/LSMTree/Class/Common.hs index 747e9a7ae..5f591722e 100644 --- a/test/Database/LSMTree/Class/Common.hs +++ b/test/Database/LSMTree/Class/Common.hs @@ -77,6 +77,9 @@ withSession seshArgs = bracket (openSession seshArgs) closeSession Real instance -------------------------------------------------------------------------------} +testSalt :: R.Salt +testSalt = 4 + instance IsSession R.Session where data SessionArgs R.Session m where SessionArgs :: @@ -85,7 +88,7 @@ instance IsSession R.Session where -> SessionArgs R.Session m openSession (SessionArgs hfs hbio dir) = do - R.openSession nullTracer hfs hbio dir + R.openSession nullTracer hfs hbio testSalt dir closeSession = R.closeSession deleteSnapshot = R.deleteSnapshot listSnapshots = R.listSnapshots diff --git a/test/Main.hs b/test/Main.hs index d3382bca2..0b7842fff 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -4,6 +4,7 @@ module Main (main) where import qualified Control.RefCount +import qualified Test.Database.LSMTree import qualified Test.Database.LSMTree.Class import qualified Test.Database.LSMTree.Generators import qualified Test.Database.LSMTree.Internal @@ -51,7 +52,8 @@ import Test.Tasty main :: IO () main = do defaultMain $ testGroup "lsm-tree" - [ Test.Database.LSMTree.Internal.Arena.tests + [ Test.Database.LSMTree.tests + , Test.Database.LSMTree.Internal.Arena.tests , Test.Database.LSMTree.Class.tests , Test.Database.LSMTree.Generators.tests , Test.Database.LSMTree.Internal.tests diff --git a/test/Test/Database/LSMTree.hs b/test/Test/Database/LSMTree.hs new file mode 100644 index 000000000..a645395b9 --- /dev/null +++ b/test/Test/Database/LSMTree.hs @@ -0,0 +1,313 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} + +module Test.Database.LSMTree (tests) where + +import Control.Exception +import Control.Tracer +import Data.Function (on) +import Data.IORef +import Data.Monoid +import Data.Typeable (Typeable) +import qualified Data.Vector as V +import qualified Data.Vector.Algorithms as VA +import Data.Void +import Data.Word +import Database.LSMTree +import Database.LSMTree.Extras (showRangesOf) +import Database.LSMTree.Extras.Generators () +import qualified System.FS.API as FS +import qualified System.FS.BlockIO.API as FS +import Test.QuickCheck +import Test.Tasty +import Test.Tasty.QuickCheck +import Test.Util.FS + +tests :: TestTree +tests = testGroup "Test.Database.LSMTree" [ + testGroup "Session" [ + -- openSession + testProperty "prop_openSession_newSession" prop_openSession_newSession + , testProperty "prop_openSession_restoreSession" prop_openSession_restoreSession + -- happy path + , testProperty "prop_newSession_restoreSession_happyPath" prop_newSession_restoreSession_happyPath + -- missing session directory + , testProperty "prop_sessionDirDoesNotExist" prop_sessionDirDoesNotExist + -- session directory already locked + , testProperty "prop_sessionDirLocked" prop_sessionDirLocked + -- malformed session directory + , testProperty "prop_sessionDirCorrupted" prop_sessionDirCorrupted + -- salt + , testProperty "prop_goodAndBadSessionSalt" prop_goodAndBadSessionSalt + ] + ] + +{------------------------------------------------------------------------------- + Test types and utilities +-------------------------------------------------------------------------------} + +newtype Key = Key Word64 + deriving stock (Show, Eq, Ord) + deriving newtype (Arbitrary, SerialiseKey) + +newtype Value = Value Word64 + deriving stock (Show, Eq) + deriving newtype (Arbitrary, SerialiseValue) + deriving ResolveValue via Sum Word64 + +newtype Blob = Blob Word64 + deriving stock (Show, Eq) + deriving newtype (Arbitrary, SerialiseValue) + +data NewOrRestore = New | Restore + deriving stock (Show, Eq, Bounded, Enum) + +instance Arbitrary NewOrRestore where + arbitrary = arbitraryBoundedEnum + shrink = shrinkBoundedEnum + +-- | If 'New', use 'newSession', otherwise if 'Restore', use 'restoreSession'. +-- +-- This allows us to run properties on both 'newSession' and 'restoreSession', +-- without having to write almost identical code twice. +-- +-- In a sense, this is somewhat similar to 'openSession', but whereas +-- 'openSession' would defer to 'newSession' or 'restoreSession' based on the +-- directory contents, here the user gets to pick whether to use 'newSession' or +-- 'restoreSession'. +withNewSessionOrRestoreSession :: + (IOLike m, Typeable h) + => NewOrRestore + -> Tracer m LSMTreeTrace + -> FS.HasFS m h + -> FS.HasBlockIO m h + -> Salt + -> FS.FsPath + -> (Session m -> m a) + -> m a +withNewSessionOrRestoreSession newOrRestore tr hfs hbio salt path = + case newOrRestore of + New -> withNewSession tr hfs hbio salt path + Restore -> withRestoreSession tr hfs hbio path + +{------------------------------------------------------------------------------- + Session: openSession +-------------------------------------------------------------------------------} + +-- | When the session directory is empty, 'openSession' will call 'newSession' +prop_openSession_newSession :: Property +prop_openSession_newSession = + ioProperty $ + withTempIOHasBlockIO "prop_openSession_newSession" $ \hfs hbio -> do + -- Use resultsVar to record which session functions were called + resultsVar <- newIORef [] + withOpenSession + (mkSessionOpenModeTracer resultsVar) hfs hbio + testSalt (FS.mkFsPath []) + $ \_session -> pure () + results <- readIORef resultsVar + -- Check that we first called openSession, then newSession + pure $ results === ["New", "Open"] + where + testSalt = 6 + +-- | When the session directory is non-empty, 'openSession' will call 'restoreSession' +prop_openSession_restoreSession :: Property +prop_openSession_restoreSession = + ioProperty $ + withTempIOHasBlockIO "prop_openSession_restoreSession" $ \hfs hbio -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) + $ \_session1 -> pure () + -- Use resultsVar to record which session functions were called + resultsVar <- newIORef [] + withOpenSession + (mkSessionOpenModeTracer resultsVar) hfs hbio + testSalt (FS.mkFsPath []) + $ \_session2 -> pure () + results <- readIORef resultsVar + -- Check that we first called openSession, then restoreSession + pure $ results === ["Restore", "Open"] + where + testSalt = 6 + +-- | A tracer that records session open, session new, and session restore +-- messages in a mutable variable. +mkSessionOpenModeTracer :: IORef [String] -> Tracer IO LSMTreeTrace +mkSessionOpenModeTracer var = Tracer $ emit $ \case + TraceOpenSession{} -> modifyIORef var ("Open" :) + TraceNewSession{} -> modifyIORef var ("New" :) + TraceRestoreSession{} -> modifyIORef var ("Restore" :) + _ -> pure () + +{------------------------------------------------------------------------------- + Session: happy path +-------------------------------------------------------------------------------} + +prop_newSession_restoreSession_happyPath :: + Positive (Small Int) + -> V.Vector (Key, Value) + -> Property +prop_newSession_restoreSession_happyPath (Positive (Small bufferSize)) ins = + ioProperty $ + withTempIOHasBlockIO "prop_newSession_restoreSession_happyPath" $ \hfs hbio -> do + withNewSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \session1 -> + withTableWith conf session1 $ \(table :: Table IO Key Value Blob) -> do + inserts table $ V.map (\(k, v) -> (k, v, Nothing)) ins + saveSnapshot "snap" "KeyValueBlob" table + withRestoreSession nullTracer hfs hbio (FS.mkFsPath []) $ \session2 -> + withTableFromSnapshot session2 "snap" "KeyValueBlob" + $ \(_ :: Table IO Key Value Blob) -> pure () + where + testSalt = 6 + conf = defaultTableConfig { + confWriteBufferAlloc = AllocNumEntries bufferSize + } + +{------------------------------------------------------------------------------- + Session: missing session directory +-------------------------------------------------------------------------------} + +prop_sessionDirDoesNotExist :: NewOrRestore -> Property +prop_sessionDirDoesNotExist newOrRestore = + ioProperty $ + withTempIOHasBlockIO "prop_sessionDirDoesNotExist" $ \hfs hbio -> do + result <- try @SessionDirDoesNotExistError $ + withNewSessionOrRestoreSession + newOrRestore + nullTracer hfs hbio testSalt (FS.mkFsPath ["missing-dir"]) + $ \_session -> pure () + pure + $ counterexample + ("Expecting an ErrSessionDirDoesNotExist error, but got: " ++ show result) + $ case result of + Left ErrSessionDirDoesNotExist{} -> True + _ -> False + where + testSalt = 6 + +{------------------------------------------------------------------------------- + Session: session directory already locked +-------------------------------------------------------------------------------} + +prop_sessionDirLocked :: NewOrRestore -> Property +prop_sessionDirLocked newOrRestore = + ioProperty $ + withTempIOHasBlockIO "prop_sessionDirLocked" $ \hfs hbio -> do + result <- + withNewSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \_session1 -> do + try @SessionDirLockedError $ + withNewSessionOrRestoreSession + newOrRestore + nullTracer hfs hbio testSalt (FS.mkFsPath []) + $ \_session2 -> pure () + pure + $ counterexample + ("Expecting an ErrSessionDirLocked error, but got: " ++ show result) + $ case result of + Left ErrSessionDirLocked{} -> True + _ -> False + where + testSalt = 6 + +{------------------------------------------------------------------------------- + Session: malformed session directory +-------------------------------------------------------------------------------} + +prop_sessionDirCorrupted :: NewOrRestore -> Property +prop_sessionDirCorrupted newOrRestore = + ioProperty $ + withTempIOHasBlockIO "sessionDirCorrupted" $ \hfs hbio -> do + FS.createDirectory hfs (FS.mkFsPath ["unexpected-directory"]) + result <- try @SessionDirCorruptedError $ + withNewSessionOrRestoreSession + newOrRestore + nullTracer hfs hbio testSalt (FS.mkFsPath []) + $ \_session -> pure () + pure + $ counterexample + ("Expecting an ErrSessionDirCorrupted error, but got: " ++ show result) + $ case result of + Left ErrSessionDirCorrupted{} -> True + _ -> False + where + testSalt = 6 + +{------------------------------------------------------------------------------- + Session: salt +-------------------------------------------------------------------------------} + +-- | When we call 'openSession' on an existing session directory, then the salt +-- value we pass in is ignored and the actual salt is restored from a metatada +-- file instead. This property verifies that we indeed ignore the salt value by +-- checking that lookups return the right results, which wouldn't happen if the +-- wrong salt was used. +-- +-- NOTE: this only tests with /positive/ lookups, i.e., lookups for keys that +-- are known to exist in the tables. +prop_goodAndBadSessionSalt :: + Positive (Small Int) + -> V.Vector (Key, Value) + -> Property +prop_goodAndBadSessionSalt (Positive (Small bufferSize)) ins = + checkCoverage $ + ioProperty $ + withTempIOHasBlockIO "prop_sessionSalt" $ \hfs hbio -> do + -- Open a session and create a snapshot for some arbitrary table contents + withOpenSession nullTracer hfs hbio goodSalt sessionDir $ \session -> + withTableWith conf session $ \(table :: Table IO Key Value Void) -> do + inserts table $ V.map (\(k, v) -> (k, v, Nothing)) insWithoutDupKeys + saveSnapshot "snap" "KeyValueBlob" table + + -- Determine the expected results of key lookups + let + expectedValues :: V.Vector (Maybe Value) + expectedValues = V.map (Just . snd) insWithoutDupKeys + + -- Open the session using the good salt, open the snapshot, perform lookups + goodSaltLookups <- + withOpenSession nullTracer hfs hbio goodSalt sessionDir $ \session -> + withTableFromSnapshot session "snap" "KeyValueBlob" $ \(table :: Table IO Key Value Void) -> do + lookups table $ V.map fst insWithoutDupKeys + + -- Determine the result of key lookups using the good salt + let + goodSaltValues :: V.Vector (Maybe Value) + goodSaltValues = V.map getValue goodSaltLookups + + -- Open the session using a bad salt, open the snapshot, perform lookups + badSaltLookups <- + withOpenSession nullTracer hfs hbio badSalt sessionDir $ \session -> + withTableFromSnapshot session "snap" "KeyValueBlob" $ \(table :: Table IO Key Value Void) -> do + lookups table $ V.map fst insWithoutDupKeys + + -- Determine the result of key lookups using a bad salt + let + badSaltValues :: V.Vector (Maybe Value) + badSaltValues = V.map getValue badSaltLookups + + pure $ + tabulate "number of keys" [ showRangesOf 10 (V.length insWithoutDupKeys) ] $ + -- Regardless of whether the salt we passed to 'openSession' was a good + -- or bad salt, the lookup results are correct. + expectedValues === badSaltValues .&&. + expectedValues === goodSaltValues + where + -- Duplicate keys in inserts make the property more complicated, because + -- keys that are inserted /earlier/ (towards the head of the vector) are + -- overridden by keys that are inserted /later/ (towards the tail of the + -- vector). So, we remove duplicate keys instead + insWithoutDupKeys :: V.Vector (Key, Value) + insWithoutDupKeys = VA.nubBy (compare `on` fst) ins + + goodSalt :: Salt + goodSalt = 17 + + badSalt :: Salt + badSalt = 19 + + sessionDir = FS.mkFsPath [] + + conf = defaultTableConfig { + confWriteBufferAlloc = AllocNumEntries bufferSize + } diff --git a/test/Test/Database/LSMTree/Generators.hs b/test/Test/Database/LSMTree/Generators.hs index 10bcdd474..7be339533 100644 --- a/test/Test/Database/LSMTree/Generators.hs +++ b/test/Test/Database/LSMTree/Generators.hs @@ -14,6 +14,7 @@ import Database.LSMTree.Extras.MergingTreeData import Database.LSMTree.Extras.ReferenceImpl import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef (BlobSpan) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import qualified Database.LSMTree.Internal.Index as Index import qualified Database.LSMTree.Internal.MergingRun as MR @@ -108,6 +109,9 @@ tests = testGroup "Test.Database.LSMTree.Generators" [ ] ] +testSalt :: Bloom.Salt +testSalt = 4 + runParams :: Index.IndexType -> RunBuilder.RunParams runParams indexType = RunBuilder.RunParams { @@ -162,7 +166,7 @@ prop_withRunDoesntLeak hfs hbio rd = do let path = FS.mkFsPath ["something-1"] let fsPaths = RunFsPaths path (RunNumber 0) FS.createDirectory hfs path - withRunAt hfs hbio (runParams indexType) fsPaths rd $ \_run -> do + withRunAt hfs hbio testSalt (runParams indexType) fsPaths rd $ \_run -> do pure (QC.property True) prop_withMergingRunDoesntLeak :: @@ -175,7 +179,7 @@ prop_withMergingRunDoesntLeak hfs hbio mrd = do let path = FS.mkFsPath ["something-2"] FS.createDirectory hfs path counter <- newUniqCounter 0 - withMergingRun hfs hbio resolveVal (runParams indexType) path counter mrd $ + withMergingRun hfs hbio resolveVal testSalt (runParams indexType) path counter mrd $ \_mr -> do pure (QC.property True) @@ -191,7 +195,7 @@ prop_withMergingTreeDoesntLeak hfs hbio mrd = do let path = FS.mkFsPath ["something-3"] FS.createDirectory hfs path counter <- newUniqCounter 0 - withMergingTree hfs hbio resolveVal (runParams indexType) path counter mrd $ + withMergingTree hfs hbio resolveVal testSalt (runParams indexType) path counter mrd $ \_tree -> do pure (QC.property True) diff --git a/test/Test/Database/LSMTree/Internal.hs b/test/Test/Database/LSMTree/Internal.hs index cd0a28d17..dfaf33e2c 100644 --- a/test/Test/Database/LSMTree/Internal.hs +++ b/test/Test/Database/LSMTree/Internal.hs @@ -4,20 +4,14 @@ module Test.Database.LSMTree.Internal (tests) where -import Control.Concurrent.Class.MonadMVar (MonadMVar) -import Control.Concurrent.Class.MonadSTM (MonadSTM) -import Control.Exception -import Control.Monad.Class.MonadThrow (MonadMask) -import Control.Monad.Primitive (PrimMonad) import Control.Tracer -import Data.Bifunctor (Bifunctor (..)) import Data.Coerce (coerce) import qualified Data.Map.Strict as Map import Data.Maybe (isJust, mapMaybe) import qualified Data.Vector as V -import Data.Word import Database.LSMTree.Extras.Generators () import Database.LSMTree.Internal.BlobRef +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Serialise @@ -25,25 +19,21 @@ import Database.LSMTree.Internal.Unsafe import qualified System.FS.API as FS import Test.QuickCheck import Test.Tasty -import Test.Tasty.HUnit import Test.Tasty.QuickCheck import Test.Util.FS tests :: TestTree tests = testGroup "Test.Database.LSMTree.Internal" [ - testGroup "Session" [ - testProperty "newSession" newSession - , testProperty "restoreSession" restoreSession - , testProperty "sessionDirLocked" sessionDirLocked - , testCase "sessionDirCorrupted" sessionDirCorrupted - , testCase "sessionDirDoesNotExist" sessionDirDoesNotExist - ] - , testGroup "Cursor" [ + testGroup "Cursor" [ testProperty "prop_roundtripCursor" $ withMaxSuccess 500 $ prop_roundtripCursor ] ] + +testSalt :: Bloom.Salt +testSalt = 4 + testTableConfig :: TableConfig testTableConfig = defaultTableConfig { -- Write buffer size is small on purpose, so that the test actually @@ -51,75 +41,6 @@ testTableConfig = defaultTableConfig { confWriteBufferAlloc = AllocNumEntries 3 } -newSession :: - Positive (Small Int) - -> V.Vector (Word64, Entry Word64 Word64) - -> Property -newSession (Positive (Small bufferSize)) es = - ioProperty $ - withTempIOHasBlockIO "newSession" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \session -> - withTable session conf (updates const es') - where - conf = testTableConfig { - confWriteBufferAlloc = AllocNumEntries bufferSize - } - es' = fmap (bimap serialiseKey (bimap serialiseValue serialiseBlob)) es - -restoreSession :: - Positive (Small Int) - -> V.Vector (Word64, Entry Word64 Word64) - -> Property -restoreSession (Positive (Small bufferSize)) es = - ioProperty $ - withTempIOHasBlockIO "restoreSession" $ \hfs hbio -> do - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \session1 -> - withTable session1 conf (updates const es') - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \session2 -> - withTable session2 conf (updates const es') - where - conf = testTableConfig { - confWriteBufferAlloc = AllocNumEntries bufferSize - } - es' = fmap (bimap serialiseKey (bimap serialiseValue serialiseBlob)) es - -sessionDirLocked :: Property -sessionDirLocked = ioProperty $ - withTempIOHasBlockIO "sessionDirLocked" $ \hfs hbio -> do - bracket (openSession nullTracer hfs hbio (FS.mkFsPath [])) closeSession $ \_sesh1 -> - bracket (try @SessionDirLockedError $ openSession nullTracer hfs hbio (FS.mkFsPath [])) tryCloseSession $ \case - Left (ErrSessionDirLocked _dir) -> pure () - x -> assertFailure $ "Opening a session twice in the same directory \ - \should fail with an ErrSessionDirLocked error, but \ - \it returned this instead: " <> showLeft "Session" x - -sessionDirCorrupted :: Assertion -sessionDirCorrupted = - withTempIOHasBlockIO "sessionDirCorrupted" $ \hfs hbio -> do - FS.createDirectory hfs (FS.mkFsPath ["unexpected-directory"]) - bracket (try @SessionDirCorruptedError (openSession nullTracer hfs hbio (FS.mkFsPath []))) tryCloseSession $ \case - Left (ErrSessionDirCorrupted _dir) -> pure () - x -> assertFailure $ "Restoring a session in a directory with a wrong \ - \layout should fail with a ErrSessionDirCorrupted, but \ - \it returned this instead: " <> showLeft "Session" x - -sessionDirDoesNotExist :: Assertion -sessionDirDoesNotExist = withTempIOHasBlockIO "sessionDirDoesNotExist" $ \hfs hbio -> do - bracket (try @SessionDirDoesNotExistError (openSession nullTracer hfs hbio (FS.mkFsPath ["missing-dir"]))) tryCloseSession $ \case - Left (ErrSessionDirDoesNotExist _dir) -> pure () - x -> assertFailure $ "Opening a session in a non-existent directory should \ - \fail with a ErrSessionDirDoesNotExist error, but it \ - \returned this instead: " <> showLeft "Session" x - --- | Internal helper: close a session opened with 'try'. -tryCloseSession :: (MonadMask m, MonadSTM m, MonadMVar m, PrimMonad m) => Either e (Session m h) -> m () -tryCloseSession = either (const $ pure ()) closeSession - -showLeft :: Show a => String -> Either a b -> String -showLeft x = \case - Left e -> show e - Right _ -> x - -- | Check that reading from a cursor returns exactly the entries that have -- been inserted into the table. Roughly: -- @@ -140,7 +61,7 @@ prop_roundtripCursor :: -> Property prop_roundtripCursor lb ub kops = ioProperty $ withTempIOHasBlockIO "prop_roundtripCursor" $ \hfs hbio -> do - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sesh -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sesh -> do withTable sesh conf $ \t -> do updates resolve (coerce kops) t fromCursor <- withCursor resolve (toOffsetKey lb) t $ \c -> diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index b07778a87..d527f42d8 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -1,7 +1,7 @@ module Test.Database.LSMTree.Internal.BloomFilter (tests) where import Control.DeepSeq (deepseq) -import Control.Exception (displayException) +import Control.Exception (Exception (..), displayException) import Control.Monad (void) import qualified Control.Monad.IOSim as IOSim import Data.Bits ((.&.)) @@ -9,6 +9,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Builder as BS.Builder import qualified Data.ByteString.Builder.Extra as BS.Builder import qualified Data.ByteString.Lazy as LBS +import qualified Data.List as List import qualified Data.Set as Set import qualified Data.Vector as V import qualified Data.Vector.Primitive as VP @@ -25,6 +26,8 @@ import Test.Tasty.QuickCheck hiding ((.&.)) import qualified Data.BloomFilter.Blocked as Bloom import Database.LSMTree.Internal.BloomFilter +import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..), + FileFormat (..)) import Database.LSMTree.Internal.Serialise (SerialisedKey, serialiseKey) @@ -43,6 +46,9 @@ tests = testGroup "Database.LSMTree.Internal.BloomFilter" prop_bloomQueries ] +testSalt :: Bloom.Salt +testSalt = 4 + roundtrip_prop :: Positive (Small Int) -> Positive Int -> [Word64] -> Property roundtrip_prop (Positive (Small hfN)) (Positive bits) ws = counterexample (show bs) $ @@ -52,7 +58,7 @@ roundtrip_prop (Positive (Small hfN)) (Positive bits) ws = where sz = Bloom.BloomSize { sizeBits = limitBits bits, sizeHashes = hfN } - lhs = Bloom.create sz (\b -> mapM_ (Bloom.insert b) ws) + lhs = Bloom.create sz testSalt (\b -> mapM_ (Bloom.insert b) ws) bs = LBS.toStrict (bloomFilterToLBS lhs) limitBits :: Int -> Int @@ -61,9 +67,18 @@ limitBits b = b .&. 0xffffff prop_total_deserialisation :: BS.ByteString -> Property prop_total_deserialisation bs = case bloomFilterFromBS bs of - Left err -> label (displayException err) $ property True + Left err -> + label (mkLabel err) $ property True Right bf -> label "parsed successfully" $ property $ bf `deepseq` True + where + mkLabel err = case err of + IOSim.FailureException e + | Just (ErrFileFormatInvalid fsep FormatBloomFilterFile msg) <- fromException e + , let msg' = "Expected salt does not match actual salt" + , msg' `List.isPrefixOf` msg + -> displayException $ ErrFileFormatInvalid fsep FormatBloomFilterFile msg' + _ -> displayException err -- | Write the bytestring to a file in the mock file system and then use -- 'bloomFilterFromFile'. @@ -77,7 +92,7 @@ bloomFilterFromBS bs = void $ FS.hPutAllStrict hfs h bs -- deserialise from file FS.withFile hfs file FS.ReadMode $ \h -> - bloomFilterFromFile hfs h + bloomFilterFromFile hfs testSalt h -- Length is in bits. A large length would require significant amount of -- memory, so we make it 'Small'. @@ -116,7 +131,7 @@ prop_bloomQueries :: FPR -> Property prop_bloomQueries (FPR fpr) filters keys = let filters' :: [Bloom SerialisedKey] - filters' = map (Bloom.fromList (Bloom.policyForFPR fpr) + filters' = map (Bloom.fromList (Bloom.policyForFPR fpr) testSalt . map (\(Small k) -> serialiseKey k)) filters @@ -152,5 +167,4 @@ prop_bloomQueries (FPR fpr) filters keys = referenceResults === map (\(RunIxKeyIx rix kix) -> (rix, kix)) - (VP.toList (bloomQueries (V.fromList filters') - (V.fromList keys'))) + (VP.toList (bloomQueries testSalt (V.fromList filters') (V.fromList keys'))) diff --git a/test/Test/Database/LSMTree/Internal/Lookup.hs b/test/Test/Database/LSMTree/Internal/Lookup.hs index 035f61113..e1b000fcb 100644 --- a/test/Test/Database/LSMTree/Internal/Lookup.hs +++ b/test/Test/Database/LSMTree/Internal/Lookup.hs @@ -123,6 +123,9 @@ runParams indexType = runParamIndex = indexType } +testSalt :: Bloom.Salt +testSalt = 4 + {------------------------------------------------------------------------------- Models -------------------------------------------------------------------------------} @@ -141,7 +144,7 @@ prop_bloomQueriesModel dats = blooms = fmap snd3 runs lookupss = concatMap lookups $ getSmallList dats real = map (\(RunIxKeyIx rix kix) -> (rix,kix)) $ VP.toList $ - bloomQueries (V.fromList blooms) (V.fromList lookupss) + bloomQueries testSalt (V.fromList blooms) (V.fromList lookupss) model = bloomQueriesModel (fmap Map.keysSet runDatas) lookupss -- | A bloom filter is a probablistic set that can return false positives, but @@ -204,6 +207,7 @@ prop_prepLookupsModel dats = real === model ks = V.fromList lookupss (kixs, ioops) <- prepLookups arena + testSalt (V.map snd3 rs) (V.map thrd3 rs) (V.map fst3 rs) ks @@ -245,6 +249,7 @@ prop_inMemRunLookupAndConstruction dat = (kixs, ioops) <- let r = V.singleton (runWithHandle run) in prepLookups arena + testSalt (V.map snd3 r) (V.map thrd3 r) (V.map fst3 r) @@ -332,6 +337,7 @@ prop_roundtripFromWriteBufferLookupIO (SmallList dats) = hbio arenaManager resolveV + testSalt wb wbblobs runs (V.map (\(DeRef r) -> Run.runFilter r) runs) @@ -374,7 +380,7 @@ withWbAndRuns hfs hbio indexType (wbdat:rundats) action = let wb = WB.fromMap wbkops let rds = map (RunData . runData) rundats counter <- newUniqCounter 1 - withRuns hfs hbio (runParams indexType) (FS.mkFsPath []) counter rds $ + withRuns hfs hbio testSalt (runParams indexType) (FS.mkFsPath []) counter rds $ \runs -> action wb wbblobs (V.fromList runs) @@ -443,7 +449,7 @@ mkTestRun dat = (rawPages, b, ic) -- one-shot run construction (pages, b, ic) = runST $ do - racc <- Run.new nentries (RunAllocFixed 10) Index.Ordinary + racc <- Run.new nentries (RunAllocFixed 10) testSalt Index.Ordinary let kops = Map.toList dat psopss <- traverse (uncurry (Run.addKeyOp racc)) kops (mp, _ , b', ic', _) <- Run.unsafeFinalise racc diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index 038987b45..7dbca8c1d 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -67,6 +67,9 @@ runParams = runParamIndex = Index.Ordinary } +testSalt :: Bloom.Salt +testSalt = 4 + -- | Creating multiple runs from write buffers and merging them leads to the -- same run as merging the write buffers and creating a run. -- @@ -81,13 +84,13 @@ prop_MergeDistributes :: prop_MergeDistributes fs hbio mergeType stepSize (SmallList rds) = do let path = FS.mkFsPath [] counter <- newUniqCounter 0 - withRuns fs hbio runParams path counter rds' $ \runs -> do + withRuns fs hbio testSalt runParams path counter rds' $ \runs -> do let stepsNeeded = sum (map (Map.size . unRunData) rds) fsPathLhs <- RunFsPaths path . uniqueToRunNumber <$> incrUniqCounter counter (stepsDone, lhs) <- mergeRuns fs hbio mergeType stepSize fsPathLhs runs let runData = RunData $ mergeWriteBuffers mergeType $ fmap unRunData rds' - withRun fs hbio runParams path counter runData $ \rhs -> do + withRun fs hbio testSalt runParams path counter runData $ \rhs -> do (lhsSize, lhsFilter, lhsIndex, lhsKOps, lhsKOpsFileContent, lhsBlobFileContent) <- getRunContent lhs @@ -159,7 +162,7 @@ prop_AbortMerge fs hbio mergeType (Positive stepSize) (SmallList wbs) = do let path = FS.mkFsPath [] let pathOut = RunFsPaths path (RunNumber 0) counter <- newUniqCounter 1 - withRuns fs hbio runParams path counter wbs' $ \runs -> do + withRuns fs hbio testSalt runParams path counter wbs' $ \runs -> do mergeToClose <- makeInProgressMerge pathOut runs traverse_ Merge.abort mergeToClose @@ -172,7 +175,7 @@ prop_AbortMerge fs hbio mergeType (Positive stepSize) (SmallList wbs) = do wbs' = fmap serialiseRunData wbs makeInProgressMerge path runs = - Merge.new fs hbio runParams mergeType resolveVal + Merge.new fs hbio testSalt runParams mergeType resolveVal path (V.fromList runs) >>= \case Nothing -> pure Nothing -- not in progress Just merge -> do @@ -199,11 +202,11 @@ mergeRuns :: [Ref (Run.Run IO h)] -> IO (Int, Ref (Run.Run IO h)) mergeRuns fs hbio mergeType (Positive stepSize) fsPath runs = do - Merge.new fs hbio runParams mergeType resolveVal + Merge.new fs hbio testSalt runParams mergeType resolveVal fsPath (V.fromList runs) >>= \case Just m -> Merge.stepsToCompletionCounted m stepSize - Nothing -> (,) 0 <$> unsafeCreateRunAt fs hbio runParams fsPath + Nothing -> (,) 0 <$> unsafeCreateRunAt fs hbio testSalt runParams fsPath (RunData Map.empty) type SerialisedEntry = Entry.Entry SerialisedValue SerialisedBlob diff --git a/test/Test/Database/LSMTree/Internal/MergingTree.hs b/test/Test/Database/LSMTree/Internal/MergingTree.hs index 01b15d391..25ea5659c 100644 --- a/test/Test/Database/LSMTree/Internal/MergingTree.hs +++ b/test/Test/Database/LSMTree/Internal/MergingTree.hs @@ -19,6 +19,7 @@ import Database.LSMTree.Extras.MergingTreeData import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.Arena (newArenaManager) import Database.LSMTree.Internal.BlobRef +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry (Entry) import qualified Database.LSMTree.Internal.Entry as Entry import qualified Database.LSMTree.Internal.Index as Index @@ -61,6 +62,9 @@ runParams = runParamIndex = Index.Ordinary } +testSalt :: Bloom.Salt +testSalt = 4 + -- | Check that the merging tree constructor functions preserve the property -- that if the inputs are obviously empty, the output is also obviously empty. -- @@ -126,7 +130,7 @@ prop_lookupTree :: prop_lookupTree hfs hbio keys mtd = do let path = FS.mkFsPath [] counter <- newUniqCounter 0 - withMergingTree hfs hbio resolveVal runParams path counter mtd $ \tree -> do + withMergingTree hfs hbio resolveVal testSalt runParams path counter mtd $ \tree -> do arenaManager <- newArenaManager withActionRegistry $ \reg -> do res <- fetchBlobs =<< lookupsIO reg arenaManager tree @@ -169,6 +173,7 @@ prop_lookupTree hfs hbio keys mtd = do hbio mgr resolveVal + testSalt runs (fmap (\(DeRef r) -> Run.runFilter r) runs) (fmap (\(DeRef r) -> Run.runIndex r) runs) @@ -229,7 +234,7 @@ prop_supplyCredits hfs hbio threshold credits mtd = do FS.createDirectory hfs setupPath FS.createDirectory hfs (FS.mkFsPath ["active"]) counter <- newUniqCounter 0 - withMergingTree hfs hbio resolveVal runParams setupPath counter mtd $ \tree -> do + withMergingTree hfs hbio resolveVal testSalt runParams setupPath counter mtd $ \tree -> do (MR.MergeDebt initialDebt, _) <- remainingMergeDebt tree props <- for credits $ \c -> do (MR.MergeDebt debt, _) <- remainingMergeDebt tree @@ -238,7 +243,7 @@ prop_supplyCredits hfs hbio threshold credits mtd = do pure $ property True else do leftovers <- - supplyCredits hfs hbio resolveVal runParams threshold root counter tree c + supplyCredits hfs hbio resolveVal testSalt runParams threshold root counter tree c (MR.MergeDebt debt', _) <- remainingMergeDebt tree pure $ -- semi-useful, but mainly tells us in how many steps we supplied diff --git a/test/Test/Database/LSMTree/Internal/Readers.hs b/test/Test/Database/LSMTree/Internal/Readers.hs index f74878693..cc8b8b247 100644 --- a/test/Test/Database/LSMTree/Internal/Readers.hs +++ b/test/Test/Database/LSMTree/Internal/Readers.hs @@ -20,6 +20,7 @@ import Database.LSMTree.Extras (showPowersOf) import Database.LSMTree.Extras.Generators (BiasedKey (..)) import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import qualified Database.LSMTree.Internal.Index as Index (IndexType (Ordinary)) import Database.LSMTree.Internal.Readers (HasMore (Drained, HasMore), @@ -74,6 +75,9 @@ runParams = runParamIndex = Index.Ordinary } +testSalt :: Bloom.Salt +testSalt = 4 + -------------------------------------------------------------------------------- type SerialisedEntry = Entry SerialisedValue SerialisedBlob @@ -453,7 +457,7 @@ runIO act lu = case act of wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hfs wbblobs)) kops pure $ Readers.FromWriteBuffer wb wbblobs FromRunData rd -> do - r <- unsafeCreateRun hfs hbio runParams (FS.mkFsPath []) counter $ serialiseRunData rd + r <- unsafeCreateRun hfs hbio testSalt runParams (FS.mkFsPath []) counter $ serialiseRunData rd pure $ Readers.FromRun r FromReadersData ty rds -> do Readers.FromReaders ty <$> traverse (fromSourceData hfs hbio counter) rds diff --git a/test/Test/Database/LSMTree/Internal/Run.hs b/test/Test/Database/LSMTree/Internal/Run.hs index 80e551c02..b703b037f 100644 --- a/test/Test/Database/LSMTree/Internal/Run.hs +++ b/test/Test/Database/LSMTree/Internal/Run.hs @@ -16,6 +16,7 @@ import Data.Maybe (fromJust) import qualified Data.Primitive.ByteArray as BA import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry import qualified Database.LSMTree.Internal.Index as Index (IndexType (Ordinary)) @@ -98,6 +99,9 @@ runParams = runParamIndex = Index.Ordinary } +testSalt :: Bloom.Salt +testSalt = 4 + -- | Runs in IO, with a real file system. testSingleInsert :: FilePath -> SerialisedKey -> SerialisedValue -> Maybe SerialisedBlob -> IO () testSingleInsert sessionRoot key val mblob = @@ -106,7 +110,7 @@ testSingleInsert sessionRoot key val mblob = -- flush write buffer let e = case mblob of Nothing -> Insert val; Just blob -> InsertWithBlob val blob wb = Map.singleton key e - withRunAt fs hbio runParams (simplePath 42) (RunData wb) $ \_ -> do + withRunAt fs hbio testSalt runParams (simplePath 42) (RunData wb) $ \_ -> do -- check all files have been written let activeDir = sessionRoot bsKOps <- BS.readFile (activeDir "42.keyops") @@ -188,7 +192,7 @@ prop_WriteNumEntries :: -> RunData SerialisedKey SerialisedValue SerialisedBlob -> IO Property prop_WriteNumEntries fs hbio wb@(RunData m) = - withRunAt fs hbio runParams (simplePath 42) wb' $ \run -> do + withRunAt fs hbio testSalt runParams (simplePath 42) wb' $ \run -> do let !runSize = Run.size run pure . labelRunData wb' $ @@ -206,13 +210,14 @@ prop_WriteAndOpen :: -> RunData SerialisedKey SerialisedValue SerialisedBlob -> IO Property prop_WriteAndOpen fs hbio wb = - withRunAt fs hbio runParams (simplePath 1337) (serialiseRunData wb) $ \written -> + withRunAt fs hbio testSalt runParams (simplePath 1337) (serialiseRunData wb) $ \written -> withActionRegistry $ \reg -> do let paths = Run.runFsPaths written paths' = paths { runNumber = RunNumber 17} hardLinkRunFiles fs hbio reg paths paths' loaded <- openFromDisk fs hbio (runParamCaching runParams) - (runParamIndex runParams) (simplePath 17) + (runParamIndex runParams) testSalt + (simplePath 17) Run.size written @=? Run.size loaded withRef written $ \written' -> @@ -268,7 +273,7 @@ prop_WriteRunEqWriteWriteBuffer hfs hbio rd = do let rdPaths = simplePath 1337 let rdKOpsFile = Paths.runKOpsPath rdPaths let rdBlobFile = Paths.runBlobPath rdPaths - withRunAt hfs hbio runParams rdPaths srd $ \_run -> do + withRunAt hfs hbio testSalt runParams rdPaths srd $ \_run -> do -- Serialise run data as write buffer: let f (SerialisedValue x) (SerialisedValue y) = SerialisedValue (x <> y) let inPaths = WrapRunFsPaths $ simplePath 1111 diff --git a/test/Test/Database/LSMTree/Internal/RunAcc.hs b/test/Test/Database/LSMTree/Internal/RunAcc.hs index 131c354ff..36c49b92b 100644 --- a/test/Test/Database/LSMTree/Internal/RunAcc.hs +++ b/test/Test/Database/LSMTree/Internal/RunAcc.hs @@ -47,6 +47,9 @@ tests = testGroup "Database.LSMTree.Internal.RunAcc" [ ] where largerTestCases = localOption (QuickCheckMaxSize 500) . localOption (QuickCheckTests 10000) +testSalt :: Bloom.Salt +testSalt = 4 + {------------------------------------------------------------------------------- RunAcc -------------------------------------------------------------------------------} @@ -57,7 +60,7 @@ test_singleKeyRun = do !e = InsertWithBlob (SerialisedValue' (VP.fromList [48, 19])) (BlobSpan 55 77) (addRes, (mp, mc, b, ic, _numEntries)) <- stToIO $ do - racc <- new (NumEntries 1) (RunAllocFixed 10) Index.Ordinary + racc <- new (NumEntries 1) (RunAllocFixed 10) testSalt Index.Ordinary addRes <- addKeyOp racc k e (addRes,) <$> unsafeFinalise racc diff --git a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs index d52920858..69caa50e1 100644 --- a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs +++ b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs @@ -54,6 +54,9 @@ tests = testGroup "Database.LSMTree.Internal.RunBloomFilterAlloc" [ prop_arbitraryAndShrinkPreserveInvariant noTags numEntriesInvariant ] +testSalt :: Bloom.Salt +testSalt = 4 + {------------------------------------------------------------------------------- Properties -------------------------------------------------------------------------------} @@ -294,7 +297,7 @@ type BloomMaker a = [a] -> Bloom a -- filter according to 'RunBloomFilterAlloc'. mkBloomFromAlloc :: Hashable a => RunBloomFilterAlloc -> BloomMaker a mkBloomFromAlloc alloc xs = runST $ do - mb <- newMBloom n alloc + mb <- newMBloom n alloc testSalt mapM_ (Bloom.insert mb) xs Bloom.unsafeFreeze mb where diff --git a/test/Test/Database/LSMTree/Internal/RunBuilder.hs b/test/Test/Database/LSMTree/Internal/RunBuilder.hs index 5d6d52457..7f59dfc28 100644 --- a/test/Test/Database/LSMTree/Internal/RunBuilder.hs +++ b/test/Test/Database/LSMTree/Internal/RunBuilder.hs @@ -4,6 +4,7 @@ module Test.Database.LSMTree.Internal.RunBuilder (tests) where import Control.Monad.Class.MonadThrow import Data.Foldable (traverse_) +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry (NumEntries (..)) import qualified Database.LSMTree.Internal.Index as Index import Database.LSMTree.Internal.Paths (RunFsPaths (..)) @@ -50,13 +51,16 @@ runParams = runParamIndex = Index.Ordinary } +testSalt :: Bloom.Salt +testSalt = 4 + -- | 'new' in an existing directory should be successful. prop_newInExistingDir :: HasFS IO h -> FS.HasBlockIO IO h -> IO Property prop_newInExistingDir hfs hbio = do let runDir = FS.mkFsPath ["a", "b", "c"] FS.createDirectoryIfMissing hfs True runDir bracket - (try (RunBuilder.new hfs hbio runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0))) + (try (RunBuilder.new hfs hbio testSalt runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0))) (traverse_ RunBuilder.close) $ pure . \case Left e@FS.FsError{} -> counterexample ("expected a success, but got: " <> show e) $ property False @@ -67,7 +71,7 @@ prop_newInNonExistingDir :: HasFS IO h -> FS.HasBlockIO IO h -> IO Property prop_newInNonExistingDir hfs hbio = do let runDir = FS.mkFsPath ["a", "b", "c"] bracket - (try (RunBuilder.new hfs hbio runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0))) + (try (RunBuilder.new hfs hbio testSalt runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0))) (traverse_ RunBuilder.close) $ pure . \case Left FS.FsError{} -> property True Right _ -> @@ -81,10 +85,10 @@ prop_newTwice :: HasFS IO h -> FS.HasBlockIO IO h -> IO Property prop_newTwice hfs hbio = do let runDir = FS.mkFsPath [] bracket - (RunBuilder.new hfs hbio runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0)) + (RunBuilder.new hfs hbio testSalt runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0)) RunBuilder.close $ \_ -> bracket - (try (RunBuilder.new hfs hbio runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0))) + (try (RunBuilder.new hfs hbio testSalt runParams (RunFsPaths runDir (RunNumber 17)) (NumEntries 0))) (traverse_ RunBuilder.close) $ pure . \case Left FS.FsError{} -> property True Right _ -> diff --git a/test/Test/Database/LSMTree/Internal/RunReader.hs b/test/Test/Database/LSMTree/Internal/RunReader.hs index d77d6bf86..a80709b5c 100644 --- a/test/Test/Database/LSMTree/Internal/RunReader.hs +++ b/test/Test/Database/LSMTree/Internal/RunReader.hs @@ -11,6 +11,7 @@ import qualified Data.Map as Map import Database.LSMTree.Extras.Generators (BiasedKey (..)) import Database.LSMTree.Extras.RunData import Database.LSMTree.Internal.BlobRef +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry (Entry) import qualified Database.LSMTree.Internal.Index as Index (IndexType (Ordinary)) import Database.LSMTree.Internal.Run (Run) @@ -73,6 +74,9 @@ runParams = runParamIndex = Index.Ordinary } +testSalt :: Bloom.Salt +testSalt = 4 + -- | Creating a run from a write buffer and reading from the run yields the -- original elements. -- @@ -89,7 +93,7 @@ prop_readAtOffset :: -> Maybe BiasedKey -> IO Property prop_readAtOffset fs hbio rd offsetKey = - withRunAt fs hbio runParams (simplePath 42) rd' $ \run -> do + withRunAt fs hbio testSalt runParams (simplePath 42) rd' $ \run -> do rhs <- readKOps (coerce offsetKey) run pure . labelRunData rd' $ @@ -133,7 +137,7 @@ prop_readAtOffsetIdempotence :: -> Maybe BiasedKey -> IO Property prop_readAtOffsetIdempotence fs hbio rd offsetKey = - withRunAt fs hbio runParams (simplePath 42) rd' $ \run -> do + withRunAt fs hbio testSalt runParams (simplePath 42) rd' $ \run -> do lhs <- readKOps (coerce offsetKey) run rhs <- readKOps (coerce offsetKey) run @@ -157,7 +161,7 @@ prop_readAtOffsetReadHead :: -> RunData BiasedKey SerialisedValue SerialisedBlob -> IO Property prop_readAtOffsetReadHead fs hbio rd = - withRunAt fs hbio runParams (simplePath 42) rd' $ \run -> do + withRunAt fs hbio testSalt runParams (simplePath 42) rd' $ \run -> do lhs <- readKOps Nothing run rhs <- case lhs of [] -> pure [] diff --git a/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs b/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs index 7d0a3bee6..6f1138221 100644 --- a/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs +++ b/test/Test/Database/LSMTree/Internal/Snapshot/FS.hs @@ -11,6 +11,7 @@ import qualified Data.Vector as V import Data.Word import Database.LSMTree.Extras (showPowersOf10) import Database.LSMTree.Extras.Generators () +import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Config.Override (OverrideDiskCachePolicy (..)) @@ -159,6 +160,9 @@ instance Arbitrary TestErrors where Snapshot corruption -------------------------------------------------------------------------------} +testSalt :: Bloom.Salt +testSalt = 4 + -- TODO: an alternative to generating a Choice a priori is to run the monadic -- code in @PropertyM (IOSim s)@, and then we can do quantification inside the -- monadic property using @pick@. This complicates matters, however, because @@ -173,7 +177,7 @@ prop_flipSnapshotBit :: prop_flipSnapshotBit (Positive (Small bufferSize)) es pickFileBit = runSimOrThrow $ withSimHasBlockIO propNoOpenHandles MockFS.empty $ \hfs hbio _fsVar -> - withSession nullTracer hfs hbio root $ \s -> + withOpenSession nullTracer hfs hbio testSalt root $ \s -> withTable s conf $ \t -> do -- Create a table, populate it, and create a snapshot updates resolve es' t diff --git a/test/Test/Database/LSMTree/StateMachine.hs b/test/Test/Database/LSMTree/StateMachine.hs index 109d98434..ac1dd67b3 100644 --- a/test/Test/Database/LSMTree/StateMachine.hs +++ b/test/Test/Database/LSMTree/StateMachine.hs @@ -106,7 +106,7 @@ import NoThunks.Class import Prelude hiding (init) import System.Directory (removeDirectoryRecursive) import System.FS.API (FsError (..), HasFS, MountPoint (..), mkFsPath) -import System.FS.BlockIO.API (HasBlockIO, defaultIOCtxParams) +import System.FS.BlockIO.API (HasBlockIO, close, defaultIOCtxParams) import System.FS.BlockIO.IO (ioHasBlockIO) import System.FS.IO (HandleIO, ioHasFS) import qualified System.FS.Sim.Error as FSSim @@ -287,16 +287,17 @@ instance Arbitrary R.FencePointerIndexType where propLockstep_RealImpl_RealFS_IO :: Tracer IO R.LSMTreeTrace + -> QC.Fixed R.Salt -> Actions (Lockstep (ModelState R.Table)) -> QC.Property -propLockstep_RealImpl_RealFS_IO tr = +propLockstep_RealImpl_RealFS_IO tr (QC.Fixed salt) = runActionsBracket (Proxy @(ModelState R.Table)) CheckCleanup CheckRefs acquire release - (\r (_, session, errsVar, logVar) -> do + (\r (_, session, _, errsVar, logVar) -> do faultsVar <- newMutVar [] let env :: RealEnv R.Table IO @@ -313,18 +314,19 @@ propLockstep_RealImpl_RealFS_IO tr = ) tagFinalState' where - acquire :: IO (FilePath, Class.Session R.Table IO, StrictTVar IO Errors, StrictTVar IO ErrorsLog) + acquire :: IO (FilePath, Class.Session R.Table IO, HasBlockIO IO HandleIO, StrictTVar IO Errors, StrictTVar IO ErrorsLog) acquire = do (tmpDir, hasFS, hasBlockIO) <- createSystemTempDirectory "prop_lockstepIO_RealImpl_RealFS" - session <- R.openSession tr hasFS hasBlockIO (mkFsPath []) + session <- R.openSession tr hasFS hasBlockIO salt (mkFsPath []) errsVar <- newTVarIO FSSim.emptyErrors logVar <- newTVarIO emptyLog - pure (tmpDir, session, errsVar, logVar) + pure (tmpDir, session, hasBlockIO, errsVar, logVar) - release :: (FilePath, Class.Session R.Table IO, StrictTVar IO Errors, StrictTVar IO ErrorsLog) -> IO Property - release (tmpDir, !session, _, _) = do + release :: (FilePath, Class.Session R.Table IO, HasBlockIO IO HandleIO, StrictTVar IO Errors, StrictTVar IO ErrorsLog) -> IO Property + release (tmpDir, !session, hasBlockIO, _, _) = do !prop <- propNoThunks session R.closeSession session + close hasBlockIO removeDirectoryRecursive tmpDir pure prop @@ -333,14 +335,15 @@ propLockstep_RealImpl_MockFS_IO :: -> CheckCleanup -> CheckFS -> CheckRefs + -> QC.Fixed R.Salt -> Actions (Lockstep (ModelState R.Table)) -> QC.Property -propLockstep_RealImpl_MockFS_IO tr cleanupFlag fsFlag refsFlag = +propLockstep_RealImpl_MockFS_IO tr cleanupFlag fsFlag refsFlag (QC.Fixed salt) = runActionsBracket (Proxy @(ModelState R.Table)) cleanupFlag refsFlag - (acquire_RealImpl_MockFS tr) + (acquire_RealImpl_MockFS tr salt) (release_RealImpl_MockFS fsFlag) (\r (_, session, errsVar, logVar) -> do faultsVar <- newMutVar [] @@ -372,14 +375,15 @@ propLockstep_RealImpl_MockFS_IOSim :: -> CheckCleanup -> CheckFS -> CheckRefs + -> QC.Fixed R.Salt -> Actions (Lockstep (ModelState R.Table)) -> QC.Property -propLockstep_RealImpl_MockFS_IOSim tr cleanupFlag fsFlag refsFlag actions = +propLockstep_RealImpl_MockFS_IOSim tr cleanupFlag fsFlag refsFlag (QC.Fixed salt) actions = monadicIOSim_ prop where prop :: forall s. PropertyM (IOSim s) Property prop = do - (fsVar, session, errsVar, logVar) <- QC.run (acquire_RealImpl_MockFS tr) + (fsVar, session, errsVar, logVar) <- QC.run (acquire_RealImpl_MockFS tr salt) faultsVar <- QC.run $ newMutVar [] let env :: RealEnv R.Table (IOSim s) @@ -405,13 +409,14 @@ propLockstep_RealImpl_MockFS_IOSim tr cleanupFlag fsFlag refsFlag actions = acquire_RealImpl_MockFS :: R.IOLike m => Tracer m R.LSMTreeTrace + -> R.Salt -> m (StrictTMVar m MockFS, Class.Session R.Table m, StrictTVar m Errors, StrictTVar m ErrorsLog) -acquire_RealImpl_MockFS tr = do +acquire_RealImpl_MockFS tr salt = do fsVar <- newTMVarIO MockFS.empty errsVar <- newTVarIO FSSim.emptyErrors logVar <- newTVarIO emptyLog (hfs, hbio) <- simErrorHasBlockIOLogged fsVar errsVar logVar - session <- R.openSession tr hfs hbio (mkFsPath []) + session <- R.openSession tr hfs hbio salt (mkFsPath []) pure (fsVar, session, errsVar, logVar) -- | Flag that turns on\/off file system checks. @@ -445,7 +450,7 @@ getAllSessionTables :: => R.Session m -> m [SomeTable m] getAllSessionTables (R.Types.Session s) = do - R.Unsafe.withOpenSession s $ \seshEnv -> do + R.Unsafe.withKeepSessionOpen s $ \seshEnv -> do ts <- readMVar (R.Unsafe.sessionOpenTables seshEnv) pure ((\x -> SomeTable (R.Types.Table x)) <$> Map.elems ts) @@ -454,7 +459,7 @@ getAllSessionCursors :: => R.Session m -> m [SomeCursor m] getAllSessionCursors (R.Types.Session s) = - R.Unsafe.withOpenSession s $ \seshEnv -> do + R.Unsafe.withKeepSessionOpen s $ \seshEnv -> do cs <- readMVar (R.Unsafe.sessionOpenCursors seshEnv) pure ((\x -> SomeCursor (R.Types.Cursor x)) <$> Map.elems cs) @@ -517,7 +522,7 @@ handleSessionDirLockedError = \case handleSessionDirCorruptedError :: SessionDirCorruptedError -> Model.Err handleSessionDirCorruptedError = \case - ErrSessionDirCorrupted _dir -> Model.ErrSessionDirCorrupted + ErrSessionDirCorrupted _reason _dir -> Model.ErrSessionDirCorrupted handleSessionClosedError :: SessionClosedError -> Model.Err handleSessionClosedError = \case diff --git a/test/Test/Database/LSMTree/StateMachine/DL.hs b/test/Test/Database/LSMTree/StateMachine/DL.hs index 82750a560..3e709117c 100644 --- a/test/Test/Database/LSMTree/StateMachine/DL.hs +++ b/test/Test/Database/LSMTree/StateMachine/DL.hs @@ -58,7 +58,7 @@ prop_example = -- Run the example ... forAllDL dl_example $ -- ... with the given lockstep property - propLockstep_RealImpl_MockFS_IO tr CheckCleanup CheckFS CheckRefs + propLockstep_RealImpl_MockFS_IO tr CheckCleanup CheckFS CheckRefs (QC.Fixed 17) where -- To enable tracing, use something like @show `contramap` stdoutTracer@ -- instead @@ -142,11 +142,11 @@ test_noSwallowedExceptions = -- exceptions safe. When we generate injected errors for these errors by default -- (in @arbitraryWithVars@), the swallowed exception assertion automatically -- runs for those actions as well. -prop_noSwallowedExceptions :: Property -prop_noSwallowedExceptions = forAllDL dl_noSwallowExceptions runner +prop_noSwallowedExceptions :: QC.Fixed Salt -> Property +prop_noSwallowedExceptions salt = forAllDL dl_noSwallowExceptions runner where -- disable all file system and reference checks - runner = propLockstep_RealImpl_MockFS_IO tr NoCheckCleanup NoCheckFS NoCheckRefs + runner = propLockstep_RealImpl_MockFS_IO tr NoCheckCleanup NoCheckFS NoCheckRefs salt tr = nullTracer -- | Run any number of actions using the default actions generator, and finally diff --git a/test/Test/Database/LSMTree/UnitTests.hs b/test/Test/Database/LSMTree/UnitTests.hs index 33ee0f5ee..bd2397322 100644 --- a/test/Test/Database/LSMTree/UnitTests.hs +++ b/test/Test/Database/LSMTree/UnitTests.hs @@ -39,10 +39,13 @@ tests = , testCase "unit_union_blobref_invalidation" unit_union_blobref_invalidation ] +testSalt :: R.Salt +testSalt = 4 + unit_blobs :: (String -> IO ()) -> Assertion unit_blobs info = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> do table <- newTable @_ @ByteString @(ResolveAsFirst ByteString) @ByteString sess inserts table [("key1", ResolveAsFirst "value1", Just "blob1")] @@ -60,7 +63,7 @@ unit_blobs info = unit_closed_table :: Assertion unit_closed_table = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> do table <- newTable @_ @Key1 @Value1 @Blob1 sess inserts table [(Key1 42, Value1 42, Nothing)] r1 <- lookups table [Key1 42] @@ -75,7 +78,7 @@ unit_closed_table = unit_closed_cursor :: Assertion unit_closed_cursor = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> do table <- newTable @_ @Key1 @Value1 @Blob1 sess inserts table [(Key1 42, Value1 42, Nothing), (Key1 43, Value1 43, Nothing)] cur <- newCursor table @@ -93,7 +96,7 @@ unit_closed_cursor = unit_twoTableTypes :: Assertion unit_twoTableTypes = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> do let tableConfig = defaultTableConfig { confWriteBufferAlloc = AllocNumEntries 10 @@ -128,7 +131,7 @@ unit_twoTableTypes = unit_snapshots :: Assertion unit_snapshots = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> do table <- newTable @_ @Key1 @Value1 @Blob1 sess assertException (ErrSnapshotDoesNotExist snap2) $ @@ -156,7 +159,7 @@ unit_snapshots = unit_unions_1 :: Assertion unit_unions_1 = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> withTable @_ @Key1 @Value1 @Blob1 sess $ \table -> do inserts table [(Key1 17, Value1 42, Nothing)] @@ -181,7 +184,7 @@ unit_unions_1 = unit_union_credits :: Assertion unit_union_credits = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> withTable @_ @Key1 @Value1 @Blob1 sess $ \table -> do inserts table [(Key1 17, Value1 42, Nothing)] @@ -197,7 +200,7 @@ unit_union_credits = unit_union_credit_0 :: Assertion unit_union_credit_0 = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> withTable @_ @Key1 @Value1 @Blob1 sess $ \table -> do inserts table [(Key1 17, Value1 42, Nothing)] @@ -219,7 +222,7 @@ unit_union_credit_0 = unit_union_blobref_invalidation :: Assertion unit_union_blobref_invalidation = withTempIOHasBlockIO "test" $ \hfs hbio -> - withSession nullTracer hfs hbio (FS.mkFsPath []) $ \sess -> do + withOpenSession nullTracer hfs hbio testSalt (FS.mkFsPath []) $ \sess -> do t1 <- newTableWith @_ @Key1 @Value1 @Blob1 config sess for_ ([0..99] :: [Word64]) $ \i -> inserts t1 [(Key1 i, Value1 i, Just (Blob1 i))]