Skip to content

Commit a320d80

Browse files
authored
Merge pull request #538 from IntersectMBO/mheinzel/fix-benchmarks
Fix micro benchmarks
2 parents e394fce + a7114d1 commit a320d80

File tree

2 files changed

+54
-54
lines changed

2 files changed

+54
-54
lines changed

bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
module Bench.Database.LSMTree.Internal.Lookup (benchmarks) where
44

5-
import Control.Exception (assert)
65
import Control.Monad
76
import Control.Monad.ST.Strict (stToIO)
87
import Control.RefCount
@@ -11,16 +10,16 @@ import Criterion.Main (Benchmark, bench, bgroup, env, envWithCleanup,
1110
import Data.Arena (ArenaManager, closeArena, newArena,
1211
newArenaManager, withArena)
1312
import Data.Bifunctor (Bifunctor (..))
13+
import Data.ByteString (ByteString)
1414
import qualified Data.List as List
1515
import Data.Map.Strict (Map)
1616
import qualified Data.Map.Strict as Map
1717
import Data.Maybe (fromMaybe)
1818
import qualified Data.Vector as V
1919
import Database.LSMTree.Extras.Orphans ()
20-
import Database.LSMTree.Extras.Random (frequency,
20+
import Database.LSMTree.Extras.Random (frequency, randomByteStringR,
2121
sampleUniformWithReplacement, uniformWithoutReplacement)
2222
import Database.LSMTree.Extras.UTxO
23-
import Database.LSMTree.Internal.BlobRef (BlobSpan (..))
2423
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
2524
import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches,
2625
intraPageLookups, lookupsIO, prepLookups)
@@ -34,6 +33,7 @@ import Database.LSMTree.Internal.Serialise
3433
import qualified Database.LSMTree.Internal.WriteBuffer as WB
3534
import qualified Database.LSMTree.Internal.WriteBufferBlobs as WBB
3635
import GHC.Exts (RealWorld)
36+
import GHC.Stack (HasCallStack)
3737
import Prelude hiding (getContents)
3838
import System.Directory (removeDirectoryRecursive)
3939
import qualified System.FS.API as FS
@@ -83,7 +83,7 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Lookup" [
8383

8484
benchLookups :: Config -> Benchmark
8585
benchLookups conf@Config{name} =
86-
withEnv $ \ ~(_dir, arenaManager, hasFS, hasBlockIO, rs, ks) ->
86+
withEnv $ \ ~(_dir, arenaManager, _hasFS, hasBlockIO, wbblobs, rs, ks) ->
8787
env ( pure ( V.map (\(DeRef r) -> Run.runFilter r) rs
8888
, V.map (\(DeRef r) -> Run.runIndex r) rs
8989
, V.map (\(DeRef r) -> Run.runKOpsFile r) rs
@@ -126,28 +126,24 @@ benchLookups conf@Config{name} =
126126
( do arena <- newArena arenaManager
127127
(rkixs, ioops) <- stToIO (prepLookups arena blooms indexes kopsFiles ks)
128128
ioress <- FS.submitIO hasBlockIO ioops
129-
wbblobs <- WBB.new hasFS (FS.mkFsPath [])
130-
pure (rkixs, ioops, ioress, arena, wbblobs)
129+
pure (rkixs, ioops, ioress, arena)
131130
)
132-
(\(_, _, _, arena, wbblobs) -> do
133-
closeArena arenaManager arena
134-
releaseRef wbblobs)
135-
(\ ~(rkixs, ioops, ioress, _, wbblobs_unused) -> do
136-
!_ <- intraPageLookups resolveV WB.empty wbblobs_unused
131+
(\(_, _, _, arena) -> closeArena arenaManager arena)
132+
(\ ~(rkixs, ioops, ioress, _) -> do
133+
!_ <- intraPageLookups resolveV WB.empty wbblobs
137134
rs ks rkixs ioops ioress
138135
pure ())
139136
-- The whole shebang: lookup preparation, doing the IO, and then
140137
-- performing intra-page-lookups. Again, we evaluate the result to
141138
-- WHNF because it is the same result that intraPageLookups produces
142139
-- (see above).
143-
, let wb_unused = WB.empty in
144-
env (WBB.new hasFS (FS.mkFsPath [])) $ \wbblobs_unused ->
145-
bench "Lookups in IO" $
140+
, bench "Lookups in IO" $
146141
whnfAppIO (\ks' -> lookupsIO hasBlockIO arenaManager resolveV
147-
wb_unused wbblobs_unused
142+
WB.empty wbblobs
148143
rs blooms indexes kopsFiles ks') ks
149144
]
150-
--TODO: consider adding benchmarks that also use the write buffer
145+
-- TODO: consider adding benchmarks that also use the write buffer
146+
-- (then we can't just use 'WB.empty', but must take it from the env)
151147
where
152148
withEnv = envWithCleanup
153149
(lookupsInBatchesEnv conf)
@@ -181,6 +177,7 @@ lookupsInBatchesEnv ::
181177
, ArenaManager RealWorld
182178
, FS.HasFS IO FS.HandleIO
183179
, FS.HasBlockIO IO FS.HandleIO
180+
, Ref (WBB.WriteBufferBlobs IO FS.HandleIO)
184181
, V.Vector (Ref (Run IO FS.HandleIO))
185182
, V.Vector SerialisedKey
186183
)
@@ -191,19 +188,19 @@ lookupsInBatchesEnv Config {..} = do
191188
(storedKeys, lookupKeys) <- lookupsEnv (mkStdGen 17) nentries npos nneg
192189
let hasFS = FS.ioHasFS (FS.MountPoint benchTmpDir)
193190
hasBlockIO <- FS.ioHasBlockIO hasFS (fromMaybe FS.defaultIOCtxParams ioctxps)
194-
let wb = WB.fromMap storedKeys
195-
fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
196-
wbblobs <- WBB.new hasFS (FS.mkFsPath [])
191+
wbblobs <- WBB.new hasFS (FS.mkFsPath ["0.wbblobs"])
192+
wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob hasFS wbblobs)) storedKeys
193+
let fsps = RunFsPaths (FS.mkFsPath []) (RunNumber 0)
197194
r <- Run.fromWriteBuffer hasFS hasBlockIO caching (RunAllocFixed 10) fsps wb wbblobs
198195
let NumEntries nentriesReal = Run.size r
199-
assert (nentriesReal == nentries) $ pure ()
200-
let npagesReal = Run.sizeInPages r
201-
assert (getNumPages npagesReal * 42 <= nentriesReal) $ pure ()
202-
assert (getNumPages npagesReal * 43 >= nentriesReal) $ pure ()
196+
assertEqual nentriesReal nentries $ pure ()
197+
-- 42 to 43 entries per page
198+
assertEqual (nentriesReal `div` getNumPages (Run.sizeInPages r)) 42 $ pure ()
203199
pure ( benchTmpDir
204200
, arenaManager
205201
, hasFS
206202
, hasBlockIO
203+
, wbblobs
207204
, V.singleton r
208205
, lookupKeys
209206
)
@@ -213,13 +210,15 @@ lookupsInBatchesCleanup ::
213210
, ArenaManager RealWorld
214211
, FS.HasFS IO FS.HandleIO
215212
, FS.HasBlockIO IO FS.HandleIO
213+
, Ref (WBB.WriteBufferBlobs IO FS.HandleIO)
216214
, V.Vector (Ref (Run IO FS.HandleIO))
217215
, V.Vector SerialisedKey
218216
)
219217
-> IO ()
220-
lookupsInBatchesCleanup (tmpDir, _arenaManager, _hasFS, hasBlockIO, rs, _) = do
218+
lookupsInBatchesCleanup (tmpDir, _arenaManager, _hasFS, hasBlockIO, wbblobs, rs, _) = do
221219
FS.close hasBlockIO
222220
forM_ rs releaseRef
221+
releaseRef wbblobs
223222
removeDirectoryRecursive tmpDir
224223

225224
-- | Generate keys to store and keys to lookup
@@ -228,8 +227,8 @@ lookupsEnv ::
228227
-> Int -- ^ Number of stored key\/operation pairs
229228
-> Int -- ^ Number of positive lookups
230229
-> Int -- ^ Number of negative lookups
231-
-> IO ( Map SerialisedKey (Entry SerialisedValue BlobSpan)
232-
, V.Vector (SerialisedKey)
230+
-> IO ( Map SerialisedKey (Entry SerialisedValue SerialisedBlob)
231+
, V.Vector SerialisedKey
233232
)
234233
lookupsEnv g nentries npos nneg = do
235234
let (g1, g') = R.split g
@@ -242,25 +241,26 @@ lookupsEnv g nentries npos nneg = do
242241
lookups <- generate $ shuffle (negLookups ++ posLookups)
243242

244243
let entries' = Map.mapKeys serialiseKey
245-
$ Map.map (bimap serialiseValue id) entries
244+
$ Map.map (bimap serialiseValue serialiseBlob) entries
246245
lookups' = V.fromList $ fmap serialiseKey lookups
247-
assert (Map.size entries' == nentries) $ pure ()
248-
assert (length lookups' == npos + nneg) $ pure ()
246+
assertEqual (Map.size entries') (nentries) $ pure ()
247+
assertEqual (length lookups') (npos + nneg) $ pure ()
249248
pure (entries', lookups')
250249

251250
-- TODO: tweak distribution
252-
randomEntry :: StdGen -> (Entry UTxOValue BlobSpan, StdGen)
251+
randomEntry :: StdGen -> (Entry UTxOValue ByteString, StdGen)
253252
randomEntry g = frequency [
254253
(20, \g' -> let (!v, !g'') = uniform g' in (Insert v, g''))
255254
, (1, \g' -> let (!v, !g'') = uniform g'
256-
(!b, !g''') = genBlobSpan g''
255+
(!b, !g''') = randomByteStringR (0, 2000) g'' -- < 2kB
257256
in (InsertWithBlob v b, g'''))
258257
, (2, \g' -> let (!v, !g'') = uniform g' in (Mupdate v, g''))
259258
, (2, \g' -> (Delete, g'))
260259
] g
261260

262-
genBlobSpan :: RandomGen g => g -> (BlobSpan, g)
263-
genBlobSpan !g =
264-
let (off, !g') = uniform g
265-
(len, !g'') = uniform g'
266-
in (BlobSpan off len, g'')
261+
-- | Assertions on the generated environment should also be checked for release
262+
-- builds, so don't use 'Control.Exception.assert'.
263+
assertEqual :: (HasCallStack, Eq a, Show a) => a -> a -> b -> b
264+
assertEqual x y
265+
| x == y = id
266+
| otherwise = error $ show x ++ " /= " ++ show y

bench/micro/Bench/Database/LSMTree/Internal/Merge.hs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
module Bench.Database.LSMTree.Internal.Merge (benchmarks) where
22

3-
import Control.Monad (when, zipWithM)
3+
import Control.Monad (zipWithM)
44
import Control.RefCount
55
import Criterion.Main (Benchmark, bench, bgroup)
66
import qualified Criterion.Main as Cr
77
import Data.Bifunctor (first)
88
import qualified Data.BloomFilter.Hash as Hash
99
import Data.Foldable (traverse_)
10+
import Data.IORef
1011
import qualified Data.List as List
1112
import qualified Data.Map.Strict as Map
1213
import Data.Maybe (fromMaybe)
@@ -19,8 +20,7 @@ import Database.LSMTree.Extras.UTxO
1920
import Database.LSMTree.Internal.Entry
2021
import Database.LSMTree.Internal.Merge (MergeType (..))
2122
import qualified Database.LSMTree.Internal.Merge as Merge
22-
import Database.LSMTree.Internal.Paths (RunFsPaths (..),
23-
pathsForRunFiles, runChecksumsPath)
23+
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
2424
import Database.LSMTree.Internal.Run (Run)
2525
import qualified Database.LSMTree.Internal.Run as Run
2626
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
@@ -114,7 +114,7 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Merge" [
114114
{ name = "insert-large-keys-x4" -- potentially long keys
115115
, nentries = (totalEntries `div` 10) `splitInto` 4
116116
, finserts = 1
117-
, randomKey = first serialiseKey . R.randomByteStringR (6, 4000)
117+
, randomKey = first serialiseKey . R.randomByteStringR (8, 4000)
118118
}
119119
, benchMerge configWord64
120120
{ name = "insert-mixed-vals-x4" -- potentially long values
@@ -201,28 +201,28 @@ benchMerge conf@Config{name} =
201201
-- thread `runs` through the environment, too.
202202
-- 2. It forces the result to normal form, which would traverse the
203203
-- whole run, so we force to WHNF ourselves and just return `()`.
204+
205+
-- We make sure to immediately close resulting runs so we don't run
206+
-- out of file handles or disk space. However, we don't want it to
207+
-- be part of the measurement, as it includes deleting files.
208+
-- Therefore, ... TODO
204209
Cr.perRunEnvWithCleanup
205-
(pure (runs, outputRunPaths))
206-
(const (removeOutputRunFiles hasFS)) $ \(runs', p) -> do
207-
!run <- merge hasFS hasBlockIO conf p runs'
208-
-- Make sure to immediately close resulting runs so we don't run
209-
-- out of file handles. Ideally this would not be measured, but at
210-
-- least it's pretty cheap.
211-
releaseRef run
210+
((runs,) <$> newIORef Nothing)
211+
(releaseRun . snd) $ \(runs', ref) -> do
212+
!run <- merge hasFS hasBlockIO conf outputRunPaths runs'
213+
writeIORef ref $ Just $ releaseRef run
212214
]
213215
where
214216
withEnv =
215217
Cr.envWithCleanup
216218
(mergeEnv conf)
217219
mergeEnvCleanup
218220

219-
-- We need to keep the input runs, but remove the freshly created one.
220-
removeOutputRunFiles :: FS.HasFS IO FS.HandleIO -> IO ()
221-
removeOutputRunFiles hasFS = do
222-
traverse_ (FS.removeFile hasFS) (pathsForRunFiles outputRunPaths)
223-
exists <- FS.doesFileExist hasFS (runChecksumsPath outputRunPaths)
224-
when exists $
225-
FS.removeFile hasFS (runChecksumsPath outputRunPaths)
221+
releaseRun :: IORef (Maybe (IO ())) -> IO ()
222+
releaseRun ref =
223+
readIORef ref >>= \case
224+
Nothing -> pure ()
225+
Just release -> release
226226

227227
merge ::
228228
FS.HasFS IO FS.HandleIO

0 commit comments

Comments
 (0)