Skip to content

Commit 320473e

Browse files
authored
Merge pull request #328 from IntersectMBO/jdral/wp8-collect-statistics
Collect `RTSStats` and `/proc/self/io` measurements in WP8 benchmark
2 parents b15520e + 28e2c34 commit 320473e

File tree

2 files changed

+154
-22
lines changed

2 files changed

+154
-22
lines changed

bench/macro/lsm-tree-bench-wp8.hs

Lines changed: 153 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ I. The benchmark should be able to run in two modes, using the
3535
batches), or fully pipelined (in batches).
3636
3737
TODO 2024-04-29 consider alternative methods of implementing key generation
38-
TODO 2024-07-05 pipelined mode needs the 'duplicate' operation. It has been
39-
tested for correctness with the model implementation.
4038
-}
4139
module Main (main) where
4240

@@ -59,7 +57,8 @@ import Data.Traversable (mapAccumL)
5957
import Data.Tuple (swap)
6058
import qualified Data.Vector as V
6159
import Data.Void (Void)
62-
import Data.Word (Word64)
60+
import Data.Word (Word32, Word64)
61+
import qualified GHC.Stats as GHC
6362
import qualified MCG
6463
import qualified Options.Applicative as O
6564
import Prelude hiding (lookup)
@@ -69,7 +68,9 @@ import qualified System.FS.BlockIO.API as FS
6968
import qualified System.FS.BlockIO.IO as FsIO
7069
import qualified System.FS.IO as FsIO
7170
import System.IO
71+
import System.Mem (performMajorGC)
7272
import Text.Printf (printf)
73+
import Text.Show.Pretty
7374

7475
-- We should be able to write this benchmark
7576
-- using only use public lsm-tree interface
@@ -191,31 +192,167 @@ runOptsP = pure RunOpts
191192
<*> O.switch (O.long "pipelined" <> O.help "Use pipelined mode")
192193

193194
-------------------------------------------------------------------------------
194-
-- clock
195+
-- measurements
195196
-------------------------------------------------------------------------------
196197

197-
timed :: IO a -> IO (a, Double)
198+
timed :: IO a -> IO (a, Double, RTSStatsDiff Triple, ProcIODiff)
198199
timed action = do
200+
!p1 <- getProcIO
201+
performMajorGC
202+
s1 <- GHC.getRTSStats
199203
t1 <- Clock.getTime Clock.Monotonic
200204
x <- action
201205
t2 <- Clock.getTime Clock.Monotonic
206+
performMajorGC
207+
s2 <- GHC.getRTSStats
208+
!p2 <- getProcIO
202209
let !t = fromIntegral (Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)) * 1e-9
203-
return (x, t)
204-
205-
timed_ :: IO () -> IO Double
210+
!s = s2 `diffRTSStats` s1
211+
!p = p2 `diffProcIO` p1
212+
printf "Running time: %.03f sec\n" t
213+
printf "/proc/self/io after vs. before: %s\n" (ppShow p)
214+
printf "RTSStats after vs. before: %s\n" (ppShow s)
215+
return (x, t, s, p)
216+
217+
timed_ :: IO () -> IO (Double, RTSStatsDiff Triple, ProcIODiff)
206218
timed_ action = do
207-
t1 <- Clock.getTime Clock.Monotonic
208-
action
209-
t2 <- Clock.getTime Clock.Monotonic
210-
return $! fromIntegral (Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)) * 1e-9
219+
((), t, sdiff, pdiff) <- timed action
220+
pure (t, sdiff, pdiff)
221+
222+
-- | This /should/ include the statistics of any child processes.
223+
getProcIO :: IO ProcIO
224+
getProcIO = do
225+
s <- readFile "/proc/self/io"
226+
let ss = concatMap words $ lines s
227+
pure $ parse ss
228+
where
229+
parse [
230+
"rchar:", rcharS
231+
, "wchar:", wcharS
232+
, "syscr:", syscrS
233+
, "syscw:", syscwS
234+
, "read_bytes:", read_bytesS
235+
, "write_bytes:", write_bytesS
236+
, "cancelled_write_bytes:", cancellled_write_bytesS
237+
] = ProcIO {
238+
rchar = read rcharS
239+
, wchar = read wcharS
240+
, syscr = read syscrS
241+
, syscw = read syscwS
242+
, read_bytes = read read_bytesS
243+
, write_bytes = read write_bytesS
244+
, cancelled_write_bytes = read cancellled_write_bytesS
245+
}
246+
parse s = error $ "getProcIO: parse of /proc/self/io failed. Input is " <> show s
247+
248+
diffProcIO :: ProcIO -> ProcIO -> ProcIODiff
249+
diffProcIO after before = ProcIODiff ProcIO {
250+
rchar = subtractOn rchar
251+
, wchar = subtractOn wchar
252+
, syscr = subtractOn syscr
253+
, syscw = subtractOn syscw
254+
, read_bytes = subtractOn read_bytes
255+
, write_bytes = subtractOn write_bytes
256+
, cancelled_write_bytes = subtractOn cancelled_write_bytes
257+
}
258+
where
259+
subtractOn f = f after - f before
260+
261+
newtype ProcIODiff = ProcIODiff ProcIO
262+
deriving stock Show
263+
264+
-- | See the @/proc/[pid]/io@ section in @man proc@
265+
data ProcIO = ProcIO {
266+
rchar :: !Integer
267+
, wchar :: !Integer
268+
, syscr :: !Integer
269+
, syscw :: !Integer
270+
, read_bytes :: !Integer
271+
, write_bytes :: !Integer
272+
, cancelled_write_bytes :: !Integer
273+
}
274+
deriving stock Show
275+
276+
-- | 'diffRTSStats a b = b - a'
277+
diffRTSStats :: GHC.RTSStats -> GHC.RTSStats -> RTSStatsDiff Triple
278+
diffRTSStats after before = RTSStatsDiff {
279+
gcs = subtractOn GHC.gcs
280+
, major_gcs = subtractOn GHC.major_gcs
281+
, allocated_bytes = subtractOn GHC.allocated_bytes
282+
, max_live_bytes = subtractOn GHC.max_live_bytes
283+
, max_large_objects_bytes = subtractOn GHC.max_large_objects_bytes
284+
, max_compact_bytes = subtractOn GHC.max_compact_bytes
285+
, max_slop_bytes = subtractOn GHC.max_slop_bytes
286+
, max_mem_in_use_bytes = subtractOn GHC.max_mem_in_use_bytes
287+
, cumulative_live_bytes = subtractOn GHC.cumulative_live_bytes
288+
, copied_bytes = subtractOn GHC.copied_bytes
289+
, par_copied_bytes = subtractOn GHC.par_copied_bytes
290+
, cumulative_par_balanced_copied_bytes = subtractOn GHC.cumulative_par_balanced_copied_bytes
291+
, init_cpu_ns = subtractOn GHC.init_cpu_ns
292+
, init_elapsed_ns = subtractOn GHC.init_elapsed_ns
293+
, mutator_cpu_ns = subtractOn GHC.mutator_cpu_ns
294+
, mutator_elapsed_ns = subtractOn GHC.mutator_elapsed_ns
295+
, gc_cpu_ns = subtractOn GHC.gc_cpu_ns
296+
, gc_elapsed_ns = subtractOn GHC.gc_elapsed_ns
297+
, cpu_ns = subtractOn GHC.cpu_ns
298+
, elapsed_ns = subtractOn GHC.elapsed_ns
299+
}
300+
where
301+
subtractOn :: Num a => (GHC.RTSStats -> a) -> Triple a
302+
subtractOn f = Triple {before = x, after = y, difference = y - x}
303+
where x = f before
304+
y = f after
305+
306+
-- | A difference datatype for 'GHC.RTSStats'.
307+
--
308+
-- Most fields, like 'GHC.gcs' or 'GHC.cpu_ns', are an aggregate sum, and so a
309+
-- diff can be computed by pointwise subtraction.
310+
--
311+
-- Others fields, like 'GHC.max_live_bytes' only record the maximum value thus
312+
-- far seen. We report a triplet containing the maximum before and after, and
313+
-- their difference.
314+
data RTSStatsDiff f = RTSStatsDiff {
315+
gcs :: !(f Word32)
316+
, major_gcs :: !(f Word32)
317+
, allocated_bytes :: !(f Word64)
318+
, max_live_bytes :: !(f Word64)
319+
, max_large_objects_bytes :: !(f Word64)
320+
, max_compact_bytes :: !(f Word64)
321+
, max_slop_bytes :: !(f Word64)
322+
, max_mem_in_use_bytes :: !(f Word64)
323+
, cumulative_live_bytes :: !(f Word64)
324+
, copied_bytes :: !(f Word64)
325+
, par_copied_bytes :: !(f Word64)
326+
, cumulative_par_balanced_copied_bytes :: !(f Word64)
327+
, init_cpu_ns :: !(f GHC.RtsTime)
328+
, init_elapsed_ns :: !(f GHC.RtsTime)
329+
, mutator_cpu_ns :: !(f GHC.RtsTime)
330+
, mutator_elapsed_ns :: !(f GHC.RtsTime)
331+
, gc_cpu_ns :: !(f GHC.RtsTime)
332+
, gc_elapsed_ns :: !(f GHC.RtsTime)
333+
, cpu_ns :: !(f GHC.RtsTime)
334+
, elapsed_ns :: !(f GHC.RtsTime)
335+
}
336+
337+
deriving stock instance Show (RTSStatsDiff Triple)
338+
339+
data Triple a = Triple {
340+
before :: !a
341+
, after :: !a
342+
, difference :: !a
343+
}
344+
deriving stock Show
211345

212346
-------------------------------------------------------------------------------
213347
-- setup
214348
-------------------------------------------------------------------------------
215349

216-
-- https://input-output-hk.github.io/fs-sim
217350
doSetup :: GlobalOpts -> SetupOpts -> IO ()
218351
doSetup gopts opts = do
352+
void $ timed_ $ doSetup' gopts opts
353+
354+
doSetup' :: GlobalOpts -> SetupOpts -> IO ()
355+
doSetup' gopts opts = do
219356
let mountPoint :: FS.MountPoint
220357
mountPoint = FS.MountPoint (rootDir gopts)
221358

@@ -250,8 +387,7 @@ doSetup gopts opts = do
250387

251388
doDryRun :: GlobalOpts -> RunOpts -> IO ()
252389
doDryRun gopts opts = do
253-
time <- timed_ $ doDryRun' gopts opts
254-
printf "Batch generation: %.03f sec\n" time
390+
void $ timed_ $ doDryRun' gopts opts
255391

256392
doDryRun' :: GlobalOpts -> RunOpts -> IO ()
257393
doDryRun' gopts opts = do
@@ -271,7 +407,6 @@ doDryRun' gopts opts = do
271407
printf "Probability of a duplicate: %5f\n" p
272408
printf "Expected number of duplicates (extreme upper bound): %5f out of %f\n" q n
273409

274-
-- TODO: open session to measure that as well.
275410
let g0 = initGen (initialSize gopts) (batchSize opts) (batchCount opts) (seed opts)
276411

277412
keysRef <- newIORef $
@@ -417,22 +552,18 @@ doRun gopts opts = do
417552
| otherwise = sequentialIterations
418553
!progressInterval = max 1 ((batchCount opts) `div` 100)
419554
madeProgress b = b `mod` progressInterval == 0
420-
time <- timed_ $
555+
(time, _, _) <- timed_ $ do
421556
benchmarkIterations
422557
(\b y -> fcheck b y >> when (madeProgress b) (putChar '.'))
423558
(initialSize gopts)
424559
(batchSize opts)
425560
(batchCount opts)
426561
(seed opts)
427562
tbl
563+
putStrLn ""
428564

429-
putStrLn ""
430-
printf "Proper run: %.03f sec\n" time
431565
let ops = batchCount opts * batchSize opts
432566
printf "Operations per second: %7.01f ops/sec\n" (fromIntegral ops / time)
433-
-- TODO: collect more statistic, save them in dry-run,
434-
-- TODO: make the results human comprehensible.
435-
436567

437568
-------------------------------------------------------------------------------
438569
-- sequential

lsm-tree.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@ benchmark lsm-tree-bench-wp8
494494
, lsm-tree:blockio-api
495495
, lsm-tree:mcg
496496
, optparse-applicative
497+
, pretty-show
497498
, vector
498499

499500
ghc-options: -rtsopts -with-rtsopts=-T -threaded

0 commit comments

Comments
 (0)