@@ -35,8 +35,6 @@ I. The benchmark should be able to run in two modes, using the
3535 batches), or fully pipelined (in batches).
3636
3737TODO 2024-04-29 consider alternative methods of implementing key generation
38- TODO 2024-07-05 pipelined mode needs the 'duplicate' operation. It has been
39- tested for correctness with the model implementation.
4038-}
4139module Main (main ) where
4240
@@ -59,7 +57,8 @@ import Data.Traversable (mapAccumL)
5957import Data.Tuple (swap )
6058import qualified Data.Vector as V
6159import Data.Void (Void )
62- import Data.Word (Word64 )
60+ import Data.Word (Word32 , Word64 )
61+ import qualified GHC.Stats as GHC
6362import qualified MCG
6463import qualified Options.Applicative as O
6564import Prelude hiding (lookup )
@@ -69,7 +68,9 @@ import qualified System.FS.BlockIO.API as FS
6968import qualified System.FS.BlockIO.IO as FsIO
7069import qualified System.FS.IO as FsIO
7170import System.IO
71+ import System.Mem (performMajorGC )
7272import Text.Printf (printf )
73+ import Text.Show.Pretty
7374
7475-- We should be able to write this benchmark
7576-- using only use public lsm-tree interface
@@ -191,31 +192,167 @@ runOptsP = pure RunOpts
191192 <*> O. switch (O. long " pipelined" <> O. help " Use pipelined mode" )
192193
193194-------------------------------------------------------------------------------
194- -- clock
195+ -- measurements
195196-------------------------------------------------------------------------------
196197
197- timed :: IO a -> IO (a , Double )
198+ timed :: IO a -> IO (a , Double , RTSStatsDiff Triple , ProcIODiff )
198199timed action = do
200+ ! p1 <- getProcIO
201+ performMajorGC
202+ s1 <- GHC. getRTSStats
199203 t1 <- Clock. getTime Clock. Monotonic
200204 x <- action
201205 t2 <- Clock. getTime Clock. Monotonic
206+ performMajorGC
207+ s2 <- GHC. getRTSStats
208+ ! p2 <- getProcIO
202209 let ! t = fromIntegral (Clock. toNanoSecs (Clock. diffTimeSpec t2 t1)) * 1e-9
203- return (x, t)
204-
205- timed_ :: IO () -> IO Double
210+ ! s = s2 `diffRTSStats` s1
211+ ! p = p2 `diffProcIO` p1
212+ printf " Running time: %.03f sec\n " t
213+ printf " /proc/self/io after vs. before: %s\n " (ppShow p)
214+ printf " RTSStats after vs. before: %s\n " (ppShow s)
215+ return (x, t, s, p)
216+
217+ timed_ :: IO () -> IO (Double , RTSStatsDiff Triple , ProcIODiff )
206218timed_ action = do
207- t1 <- Clock. getTime Clock. Monotonic
208- action
209- t2 <- Clock. getTime Clock. Monotonic
210- return $! fromIntegral (Clock. toNanoSecs (Clock. diffTimeSpec t2 t1)) * 1e-9
219+ (() , t, sdiff, pdiff) <- timed action
220+ pure (t, sdiff, pdiff)
221+
222+ -- | This /should/ include the statistics of any child processes.
223+ getProcIO :: IO ProcIO
224+ getProcIO = do
225+ s <- readFile " /proc/self/io"
226+ let ss = concatMap words $ lines s
227+ pure $ parse ss
228+ where
229+ parse [
230+ " rchar:" , rcharS
231+ , " wchar:" , wcharS
232+ , " syscr:" , syscrS
233+ , " syscw:" , syscwS
234+ , " read_bytes:" , read_bytesS
235+ , " write_bytes:" , write_bytesS
236+ , " cancelled_write_bytes:" , cancellled_write_bytesS
237+ ] = ProcIO {
238+ rchar = read rcharS
239+ , wchar = read wcharS
240+ , syscr = read syscrS
241+ , syscw = read syscwS
242+ , read_bytes = read read_bytesS
243+ , write_bytes = read write_bytesS
244+ , cancelled_write_bytes = read cancellled_write_bytesS
245+ }
246+ parse s = error $ " getProcIO: parse of /proc/self/io failed. Input is " <> show s
247+
248+ diffProcIO :: ProcIO -> ProcIO -> ProcIODiff
249+ diffProcIO after before = ProcIODiff ProcIO {
250+ rchar = subtractOn rchar
251+ , wchar = subtractOn wchar
252+ , syscr = subtractOn syscr
253+ , syscw = subtractOn syscw
254+ , read_bytes = subtractOn read_bytes
255+ , write_bytes = subtractOn write_bytes
256+ , cancelled_write_bytes = subtractOn cancelled_write_bytes
257+ }
258+ where
259+ subtractOn f = f after - f before
260+
261+ newtype ProcIODiff = ProcIODiff ProcIO
262+ deriving stock Show
263+
264+ -- | See the @/proc/[pid]/io@ section in @man proc@
265+ data ProcIO = ProcIO {
266+ rchar :: ! Integer
267+ , wchar :: ! Integer
268+ , syscr :: ! Integer
269+ , syscw :: ! Integer
270+ , read_bytes :: ! Integer
271+ , write_bytes :: ! Integer
272+ , cancelled_write_bytes :: ! Integer
273+ }
274+ deriving stock Show
275+
276+ -- | 'diffRTSStats a b = b - a'
277+ diffRTSStats :: GHC. RTSStats -> GHC. RTSStats -> RTSStatsDiff Triple
278+ diffRTSStats after before = RTSStatsDiff {
279+ gcs = subtractOn GHC. gcs
280+ , major_gcs = subtractOn GHC. major_gcs
281+ , allocated_bytes = subtractOn GHC. allocated_bytes
282+ , max_live_bytes = subtractOn GHC. max_live_bytes
283+ , max_large_objects_bytes = subtractOn GHC. max_large_objects_bytes
284+ , max_compact_bytes = subtractOn GHC. max_compact_bytes
285+ , max_slop_bytes = subtractOn GHC. max_slop_bytes
286+ , max_mem_in_use_bytes = subtractOn GHC. max_mem_in_use_bytes
287+ , cumulative_live_bytes = subtractOn GHC. cumulative_live_bytes
288+ , copied_bytes = subtractOn GHC. copied_bytes
289+ , par_copied_bytes = subtractOn GHC. par_copied_bytes
290+ , cumulative_par_balanced_copied_bytes = subtractOn GHC. cumulative_par_balanced_copied_bytes
291+ , init_cpu_ns = subtractOn GHC. init_cpu_ns
292+ , init_elapsed_ns = subtractOn GHC. init_elapsed_ns
293+ , mutator_cpu_ns = subtractOn GHC. mutator_cpu_ns
294+ , mutator_elapsed_ns = subtractOn GHC. mutator_elapsed_ns
295+ , gc_cpu_ns = subtractOn GHC. gc_cpu_ns
296+ , gc_elapsed_ns = subtractOn GHC. gc_elapsed_ns
297+ , cpu_ns = subtractOn GHC. cpu_ns
298+ , elapsed_ns = subtractOn GHC. elapsed_ns
299+ }
300+ where
301+ subtractOn :: Num a => (GHC. RTSStats -> a ) -> Triple a
302+ subtractOn f = Triple {before = x, after = y, difference = y - x}
303+ where x = f before
304+ y = f after
305+
306+ -- | A difference datatype for 'GHC.RTSStats'.
307+ --
308+ -- Most fields, like 'GHC.gcs' or 'GHC.cpu_ns', are an aggregate sum, and so a
309+ -- diff can be computed by pointwise subtraction.
310+ --
311+ -- Others fields, like 'GHC.max_live_bytes' only record the maximum value thus
312+ -- far seen. We report a triplet containing the maximum before and after, and
313+ -- their difference.
314+ data RTSStatsDiff f = RTSStatsDiff {
315+ gcs :: ! (f Word32 )
316+ , major_gcs :: ! (f Word32 )
317+ , allocated_bytes :: ! (f Word64 )
318+ , max_live_bytes :: ! (f Word64 )
319+ , max_large_objects_bytes :: ! (f Word64 )
320+ , max_compact_bytes :: ! (f Word64 )
321+ , max_slop_bytes :: ! (f Word64 )
322+ , max_mem_in_use_bytes :: ! (f Word64 )
323+ , cumulative_live_bytes :: ! (f Word64 )
324+ , copied_bytes :: ! (f Word64 )
325+ , par_copied_bytes :: ! (f Word64 )
326+ , cumulative_par_balanced_copied_bytes :: ! (f Word64 )
327+ , init_cpu_ns :: ! (f GHC. RtsTime )
328+ , init_elapsed_ns :: ! (f GHC. RtsTime )
329+ , mutator_cpu_ns :: ! (f GHC. RtsTime )
330+ , mutator_elapsed_ns :: ! (f GHC. RtsTime )
331+ , gc_cpu_ns :: ! (f GHC. RtsTime )
332+ , gc_elapsed_ns :: ! (f GHC. RtsTime )
333+ , cpu_ns :: ! (f GHC. RtsTime )
334+ , elapsed_ns :: ! (f GHC. RtsTime )
335+ }
336+
337+ deriving stock instance Show (RTSStatsDiff Triple )
338+
339+ data Triple a = Triple {
340+ before :: ! a
341+ , after :: ! a
342+ , difference :: ! a
343+ }
344+ deriving stock Show
211345
212346-------------------------------------------------------------------------------
213347-- setup
214348-------------------------------------------------------------------------------
215349
216- -- https://input-output-hk.github.io/fs-sim
217350doSetup :: GlobalOpts -> SetupOpts -> IO ()
218351doSetup gopts opts = do
352+ void $ timed_ $ doSetup' gopts opts
353+
354+ doSetup' :: GlobalOpts -> SetupOpts -> IO ()
355+ doSetup' gopts opts = do
219356 let mountPoint :: FS. MountPoint
220357 mountPoint = FS. MountPoint (rootDir gopts)
221358
@@ -250,8 +387,7 @@ doSetup gopts opts = do
250387
251388doDryRun :: GlobalOpts -> RunOpts -> IO ()
252389doDryRun gopts opts = do
253- time <- timed_ $ doDryRun' gopts opts
254- printf " Batch generation: %.03f sec\n " time
390+ void $ timed_ $ doDryRun' gopts opts
255391
256392doDryRun' :: GlobalOpts -> RunOpts -> IO ()
257393doDryRun' gopts opts = do
@@ -271,7 +407,6 @@ doDryRun' gopts opts = do
271407 printf " Probability of a duplicate: %5f\n " p
272408 printf " Expected number of duplicates (extreme upper bound): %5f out of %f\n " q n
273409
274- -- TODO: open session to measure that as well.
275410 let g0 = initGen (initialSize gopts) (batchSize opts) (batchCount opts) (seed opts)
276411
277412 keysRef <- newIORef $
@@ -417,22 +552,18 @@ doRun gopts opts = do
417552 | otherwise = sequentialIterations
418553 ! progressInterval = max 1 ((batchCount opts) `div` 100 )
419554 madeProgress b = b `mod` progressInterval == 0
420- time <- timed_ $
555+ ( time, _, _) <- timed_ $ do
421556 benchmarkIterations
422557 (\ b y -> fcheck b y >> when (madeProgress b) (putChar ' .' ))
423558 (initialSize gopts)
424559 (batchSize opts)
425560 (batchCount opts)
426561 (seed opts)
427562 tbl
563+ putStrLn " "
428564
429- putStrLn " "
430- printf " Proper run: %.03f sec\n " time
431565 let ops = batchCount opts * batchSize opts
432566 printf " Operations per second: %7.01f ops/sec\n " (fromIntegral ops / time)
433- -- TODO: collect more statistic, save them in dry-run,
434- -- TODO: make the results human comprehensible.
435-
436567
437568-------------------------------------------------------------------------------
438569-- sequential
0 commit comments