Skip to content

Commit 6c49199

Browse files
committed
Measure latency for batches of updates and lookups in the WP8 benchmark
We add a new `measure-batch-latency` flag that, if enabled, produces a file with datapoints about the that can be rendered, for example using `gnuplot`. In sequential mode, the datapoints consist of latency for lookups, and latency for updates separately. In pipelined mode, there are two additional datapoints for the sync latency between threads. When the flag is disabled, all functions related to latency measurements default to no-ops. These functions are always inlined, and should be optimised away nicely.
1 parent 31983c6 commit 6c49199

File tree

2 files changed

+166
-37
lines changed

2 files changed

+166
-37
lines changed

bench/macro/lsm-tree-bench-wp8.hs

Lines changed: 154 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE CPP #-}
12
{-# LANGUAGE DuplicateRecordFields #-}
23
{-# LANGUAGE OverloadedStrings #-}
34
{-# OPTIONS_GHC -Wno-orphans #-}
@@ -40,17 +41,18 @@ import Control.Concurrent (getNumCapabilities)
4041
import Control.Concurrent.Async
4142
import Control.Concurrent.MVar
4243
import Control.DeepSeq (force)
43-
import Control.Exception (evaluate)
44+
import Control.Exception
4445
import Control.Monad (forM_, unless, void, when)
4546
import Control.Monad.Trans.State.Strict (runState, state)
4647
import Control.Tracer
4748
import qualified Data.ByteString.Short as BS
4849
import qualified Data.Foldable as Fold
4950
import qualified Data.IntSet as IS
50-
import Data.IORef (modifyIORef', newIORef, readIORef, writeIORef)
51+
import Data.IORef
5152
import qualified Data.List.NonEmpty as NE
5253
import Data.Map.Strict (Map)
5354
import qualified Data.Map.Strict as Map
55+
import Data.Monoid
5456
import qualified Data.Primitive as P
5557
import qualified Data.Vector as V
5658
import Data.Void (Void)
@@ -565,30 +567,31 @@ doRun gopts opts = do
565567
name <- maybe (fail "invalid snapshot name") return $
566568
LSM.mkSnapshotName "bench"
567569

568-
LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session -> do
570+
LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session ->
571+
withLatencyHandle $ \h -> do
569572
-- open snapshot
570573
-- In checking mode we start with an empty table, since our pure
571574
-- reference version starts with empty (as it's not practical or
572575
-- necessary for testing to load the whole snapshot).
573576
tbl <- if check opts
574-
then LSM.new @IO @K @V @B session (mkTableConfigRun gopts LSM.defaultTableConfig)
575-
else LSM.open @IO @K @V @B session (mkTableConfigOverride gopts) name
577+
then LSM.new @IO @K @V @B session (mkTableConfigRun gopts LSM.defaultTableConfig)
578+
else LSM.open @IO @K @V @B session (mkTableConfigOverride gopts) name
576579

577580
-- In checking mode, compare each output against a pure reference.
578581
checkvar <- newIORef $ pureReference
579-
(initialSize gopts) (batchSize opts)
580-
(batchCount opts) (seed opts)
582+
(initialSize gopts) (batchSize opts)
583+
(batchCount opts) (seed opts)
581584
let fcheck | not (check opts) = \_ _ -> return ()
582-
| otherwise = \b y -> do
585+
| otherwise = \b y -> do
583586
(x:xs) <- readIORef checkvar
584587
unless (x == y) $
585588
fail $ "lookup result mismatch in batch " ++ show b
586589
writeIORef checkvar xs
587590

588591
let benchmarkIterations
589-
| pipelined opts = pipelinedIterations
592+
| pipelined opts = pipelinedIterations h
590593
| lookuponly opts= sequentialIterationsLO
591-
| otherwise = sequentialIterations
594+
| otherwise = sequentialIterations h
592595
!progressInterval = max 1 ((batchCount opts) `div` 100)
593596
madeProgress b = b `mod` progressInterval == 0
594597
(time, _, _) <- timed_ $ do
@@ -611,33 +614,39 @@ doRun gopts opts = do
611614
type LookupResults = V.Vector (K, LSM.LookupResult V ())
612615

613616
{-# INLINE sequentialIteration #-}
614-
sequentialIteration :: (Int -> LookupResults -> IO ())
617+
sequentialIteration :: LatencyHandle
618+
-> (Int -> LookupResults -> IO ())
615619
-> Int
616620
-> Int
617621
-> LSM.Table IO K V B
618622
-> Int
619623
-> MCG.MCG
620624
-> IO MCG.MCG
621-
sequentialIteration output !initialSize !batchSize !tbl !b !g = do
625+
sequentialIteration h output !initialSize !batchSize !tbl !b !g =
626+
withTimedBatch h b $ \tref -> do
622627
let (!g', ls, is) = generateBatch initialSize batchSize g b
623628

624629
-- lookups
625-
results <- LSM.lookups ls tbl
630+
results <- timeLatency tref $ LSM.lookups ls tbl
626631
output b (V.zip ls (fmap (fmap (const ())) results))
627632

628633
-- deletes and inserts
629-
LSM.updates is tbl
634+
_ <- timeLatency tref $ LSM.updates is tbl
630635

631636
-- continue to the next batch
632637
return g'
633638

634-
sequentialIterations :: (Int -> LookupResults -> IO ())
639+
640+
sequentialIterations :: LatencyHandle
641+
-> (Int -> LookupResults -> IO ())
635642
-> Int -> Int -> Int -> Word64
636643
-> LSM.Table IO K V B
637644
-> IO ()
638-
sequentialIterations output !initialSize !batchSize !batchCount !seed !tbl =
645+
sequentialIterations h output !initialSize !batchSize !batchCount !seed !tbl = do
646+
createGnuplotExampleFileSequential
647+
hPutHeaderSequential h
639648
void $ forFoldM_ g0 [ 0 .. batchCount - 1 ] $ \b g ->
640-
sequentialIteration output initialSize batchSize tbl b g
649+
sequentialIteration h output initialSize batchSize tbl b g
641650
where
642651
g0 = initGen initialSize batchSize batchCount seed
643652

@@ -718,7 +727,8 @@ And the initial setup looks like this:
718727
Updates (db_3) tx_2
719728
4. Sync ! (db_3, updates) 2. Sync ? (db_3, updates)
720729
-}
721-
pipelinedIteration :: (Int -> LookupResults -> IO ())
730+
pipelinedIteration :: LatencyHandle
731+
-> (Int -> LookupResults -> IO ())
722732
-> Int
723733
-> Int
724734
-> MVar (LSM.Table IO K V B, Map K (LSM.Update V B))
@@ -728,33 +738,39 @@ pipelinedIteration :: (Int -> LookupResults -> IO ())
728738
-> LSM.Table IO K V B
729739
-> Int
730740
-> IO (LSM.Table IO K V B)
731-
pipelinedIteration output !initialSize !batchSize
741+
pipelinedIteration h output !initialSize !batchSize
732742
!syncTblIn !syncTblOut
733743
!syncRngIn !syncRngOut
734-
!tbl_n !b = do
744+
!tbl_n !b =
745+
withTimedBatch h b $ \tref -> do
735746
g <- takeMVar syncRngIn
736747
let (!g', !ls, !is) = generateBatch initialSize batchSize g b
737748

738749
-- 1: perform the lookups
739-
lrs <- LSM.lookups ls tbl_n
750+
lrs <- timeLatency tref $ LSM.lookups ls tbl_n
740751

741752
-- 2. sync: receive updates and new table
742-
putMVar syncRngOut g'
743-
(tbl_n1, delta) <- takeMVar syncTblIn
753+
tbl_n1 <- timeLatency tref $ do
754+
putMVar syncRngOut g'
755+
(tbl_n1, delta) <- takeMVar syncTblIn
744756

745-
-- At this point, after syncing, our peer is guaranteed to no longer be
746-
-- using tbl_n. They used it to generate tbl_n+1 (which they gave us).
747-
LSM.close tbl_n
748-
output b $! applyUpdates delta (V.zip ls lrs)
757+
-- At this point, after syncing, our peer is guaranteed to no longer be
758+
-- using tbl_n. They used it to generate tbl_n+1 (which they gave us).
759+
LSM.close tbl_n
760+
output b $! applyUpdates delta (V.zip ls lrs)
761+
pure tbl_n1
749762

750763
-- 3. perform the inserts and report outputs (in any order)
751-
tbl_n2 <- LSM.duplicate tbl_n1
752-
LSM.updates is tbl_n2
764+
tbl_n2 <- timeLatency tref $ do
765+
tbl_n2 <- LSM.duplicate tbl_n1
766+
LSM.updates is tbl_n2
767+
pure tbl_n2
753768

754769
-- 4. sync: send the updates and new table
755-
let delta' :: Map K (LSM.Update V B)
756-
!delta' = Map.fromList (V.toList is)
757-
putMVar syncTblOut (tbl_n2, delta')
770+
timeLatency tref $ do
771+
let delta' :: Map K (LSM.Update V B)
772+
!delta' = Map.fromList (V.toList is)
773+
putMVar syncTblOut (tbl_n2, delta')
758774

759775
return tbl_n2
760776
where
@@ -767,11 +783,14 @@ pipelinedIteration output !initialSize !batchSize
767783
Nothing -> (k, fmap (const ()) lr)
768784
Just u -> (k, updateToLookupResult u)
769785

770-
pipelinedIterations :: (Int -> LookupResults -> IO ())
786+
pipelinedIterations :: LatencyHandle
787+
-> (Int -> LookupResults -> IO ())
771788
-> Int -> Int -> Int -> Word64
772789
-> LSM.Table IO K V B
773790
-> IO ()
774-
pipelinedIterations output !initialSize !batchSize !batchCount !seed tbl_0 = do
791+
pipelinedIterations h output !initialSize !batchSize !batchCount !seed tbl_0 = do
792+
createGnuplotExampleFilePipelined
793+
hPutHeaderPipelined h
775794
n <- getNumCapabilities
776795
printf "INFO: the pipelined benchmark is running with %d capabilities.\n" n
777796

@@ -794,14 +813,14 @@ pipelinedIterations output !initialSize !batchSize !batchCount !seed tbl_0 = do
794813

795814
threadA =
796815
forFoldM_ tbl_1 [ 2, 4 .. batchCount - 1 ] $ \b tbl_n ->
797-
pipelinedIteration output initialSize batchSize
816+
pipelinedIteration h output initialSize batchSize
798817
syncTblB2A syncTblA2B -- in, out
799818
syncRngB2A syncRngA2B -- in, out
800819
tbl_n b
801820

802821
threadB =
803822
forFoldM_ tbl_0 [ 1, 3 .. batchCount - 1 ] $ \b tbl_n ->
804-
pipelinedIteration output initialSize batchSize
823+
pipelinedIteration h output initialSize batchSize
805824
syncTblA2B syncTblB2A -- in, out
806825
syncRngA2B syncRngB2A -- in, out
807826
tbl_n b
@@ -862,6 +881,105 @@ batchOverlaps initialSize batchSize batchCount seed =
862881

863882
g0 = initGen initialSize batchSize batchCount seed
864883

884+
-------------------------------------------------------------------------------
885+
-- measure batch latency
886+
-------------------------------------------------------------------------------
887+
888+
_mEASURE_BATCH_LATENCY :: Bool
889+
#ifdef MEASURE_BATCH_LATENCY
890+
_mEASURE_BATCH_LATENCY = True
891+
#else
892+
_mEASURE_BATCH_LATENCY = False
893+
#endif
894+
895+
type LatencyHandle = Handle
896+
897+
type TimeRef = IORef [Integer]
898+
899+
withLatencyHandle :: (LatencyHandle -> IO a) -> IO a
900+
withLatencyHandle k
901+
| _mEASURE_BATCH_LATENCY = withFile "latency.dat" WriteMode k
902+
| otherwise = k (error "LatencyHandle: do not use")
903+
904+
{-# INLINE hPutHeaderSequential #-}
905+
hPutHeaderSequential :: LatencyHandle -> IO ()
906+
hPutHeaderSequential h
907+
| _mEASURE_BATCH_LATENCY = do
908+
hPutStrLn h "# batch number \t lookup time (ns) \t update time (ns)"
909+
| otherwise = pure ()
910+
911+
{-# INLINE createGnuplotExampleFileSequential #-}
912+
createGnuplotExampleFileSequential :: IO ()
913+
createGnuplotExampleFileSequential
914+
| _mEASURE_BATCH_LATENCY = do
915+
withFile "latency.gp" WriteMode $ \h -> do
916+
mapM_ (hPutStrLn h) [
917+
"set title \"Latency (sequential)\""
918+
, ""
919+
, "set xlabel \"Batch number\""
920+
, ""
921+
, "set logscale y"
922+
, "set ylabel \"Time (nanoseconds)\""
923+
, ""
924+
, "plot \"latency.dat\" using 1:2 title 'lookups' axis x1y1, \\"
925+
, " \"latency.dat\" using 1:3 title 'updates' axis x1y1"
926+
]
927+
| otherwise = pure ()
928+
929+
{-# INLINE hPutHeaderPipelined #-}
930+
hPutHeaderPipelined :: LatencyHandle -> IO ()
931+
hPutHeaderPipelined h
932+
| _mEASURE_BATCH_LATENCY = do
933+
hPutStr h "# batch number"
934+
hPutStr h "\t lookup time (ns) \t sync receive time (ns) "
935+
hPutStrLn h "\t update time (ns) \t sync send time (ns)"
936+
| otherwise = pure ()
937+
938+
{-# INLINE createGnuplotExampleFilePipelined #-}
939+
createGnuplotExampleFilePipelined :: IO ()
940+
createGnuplotExampleFilePipelined
941+
| _mEASURE_BATCH_LATENCY =
942+
withFile "latency.gp" WriteMode $ \h -> do
943+
mapM_ (hPutStrLn h) [
944+
"set title \"Latency (pipelined)\""
945+
, ""
946+
, "set xlabel \"Batch number\""
947+
, ""
948+
, "set logscale y"
949+
, "set ylabel \"Time (nanoseconds)\""
950+
, ""
951+
, "plot \"latency.dat\" using 1:2 title 'lookups' axis x1y1, \\"
952+
, " \"latency.dat\" using 1:3 title 'sync receive' axis x1y1, \\"
953+
, " \"latency.dat\" using 1:4 title 'updates' axis x1y1, \\"
954+
, " \"latency.dat\" using 1:5 title 'sync send' axis x1y1"
955+
]
956+
| otherwise = pure ()
957+
958+
{-# INLINE withTimedBatch #-}
959+
withTimedBatch :: LatencyHandle -> Int -> (TimeRef -> IO a) -> IO a
960+
withTimedBatch h b k
961+
| _mEASURE_BATCH_LATENCY = do
962+
tref <- newIORef []
963+
x <- k tref
964+
ts <- readIORef tref
965+
let s = shows b
966+
. getDual (foldMap (\t -> Dual (showString "\t" <> shows t)) ts)
967+
hPutStrLn h (s "")
968+
pure x
969+
| otherwise = k (error "TimeRef: do not use")
970+
971+
{-# INLINE timeLatency #-}
972+
timeLatency :: TimeRef -> IO a -> IO a
973+
timeLatency tref k
974+
| _mEASURE_BATCH_LATENCY = do
975+
t1 <- Clock.getTime Clock.Monotonic
976+
x <- k
977+
t2 <- Clock.getTime Clock.Monotonic
978+
let !t = Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)
979+
modifyIORef tref (t :)
980+
pure x
981+
| otherwise = k
982+
865983
-------------------------------------------------------------------------------
866984
-- main
867985
-------------------------------------------------------------------------------

lsm-tree.cabal

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,19 @@ library mcg
520520
, base
521521
, primes
522522

523+
flag measure-batch-latency
524+
description:
525+
Measure the latency for individual batches of updates and lookups
526+
527+
default: False
528+
manual: True
529+
530+
common measure-batch-latency
531+
if flag(measure-batch-latency)
532+
cpp-options: -DMEASURE_BATCH_LATENCY
533+
523534
benchmark lsm-tree-bench-wp8
524-
import: language, warnings, wno-x-partial
535+
import: language, warnings, wno-x-partial, measure-batch-latency
525536
type: exitcode-stdio-1.0
526537
hs-source-dirs: bench/macro
527538
main-is: lsm-tree-bench-wp8.hs

0 commit comments

Comments
 (0)