Skip to content

Commit 63be78b

Browse files
committed
Sort: Make sorting parameters configurable
1 parent 6dd43c1 commit 63be78b

File tree

1 file changed

+50
-24
lines changed

1 file changed

+50
-24
lines changed

src/GHC/RTS/Events/Sort.hs

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
-- This module provides a routine for sorting events in constant-space via
44
-- on-disk merge sort.
55
module GHC.RTS.Events.Sort
6-
( GHC.RTS.Events.Sort.sortEvents
6+
( sortEvents
7+
, sortEvents'
8+
, SortParams(..)
9+
, defaultSortParams
710
) where
811

912
import Data.Traversable
@@ -20,24 +23,11 @@ import Data.Binary.Put as P
2023
import qualified Data.ByteString.Lazy as BSL
2124
import qualified Data.Sequence as S
2225

23-
import GHC.RTS.Events
26+
import GHC.RTS.Events hiding (sortEvents)
2427
import GHC.RTS.Events.Binary (putEventLog)
2528

2629
type SortedChunk = FilePath
2730

28-
-- | The chunk size which the input eventlog is broken into (in events). This
29-
-- determines the upper-bound on memory usage during the sorting process.
30-
--
31-
-- This value is a reasonable trade-off between memory and computation,
32-
-- requiring approximately 100MBytes while sorting a "typical" eventlog.
33-
cHUNK_SIZE :: Int
34-
cHUNK_SIZE = 500*1000
35-
36-
-- | Maximum number of chunks to merge at once. Determined by the largest
37-
-- number of file descriptors we can safely open at once.
38-
fAN_IN :: Int
39-
fAN_IN = 256
40-
4131
newtype OnTime = OnTime Event
4232

4333
instance Ord OnTime where
@@ -46,39 +36,75 @@ instance Ord OnTime where
4636
instance Eq OnTime where
4737
(==) = coerce ((==) `on` evTime)
4838

39+
-- | Parameters which determine the behavior of the merge sort.
40+
data SortParams = SortParams
41+
{ -- | The chunk size which the input eventlog is broken into (in events). This
42+
-- determines the upper-bound on memory usage during the sorting process.
43+
--
44+
-- This value is a reasonable trade-off between memory and computation,
45+
-- requiring approximately 100MBytes while sorting a "typical" eventlog.
46+
chunkSize :: !Int
47+
48+
-- | Maximum number of chunks to merge at once. Determined by the largest
49+
-- number of file descriptors we can safely open at once.
50+
, maxFanIn :: !Int
51+
}
52+
53+
-- | A reasonable set of sorting parameters.
54+
defaultSortParams :: SortParams
55+
defaultSortParams =
56+
SortParams { chunkSize = 500*1000
57+
, maxFanIn = 256
58+
}
59+
4960
-- | @sortEvents tmpDir outPath eventlog@ sorts @eventlog@ via on-disk merge
5061
-- sort, using @tmpDir@ for intermediate data. The sorted eventlog is written
5162
-- to @eventlog@.
52-
sortEvents :: FilePath -- ^ temporary directory
53-
-> FilePath -- ^ output eventlog file path
54-
-> EventLog -- ^ eventlog to sort
55-
-> IO ()
56-
sortEvents _tmpDir _outPath (EventLog _ (Data [])) = fail "sortEvents: no events"
57-
sortEvents tmpDir outPath (EventLog hdr (Data events0)) = do
63+
sortEvents
64+
:: FilePath -- ^ temporary directory
65+
-> FilePath -- ^ output eventlog file path
66+
-> EventLog -- ^ eventlog to sort
67+
-> IO ()
68+
sortEvents = sortEvents' defaultSortParams
69+
70+
-- | @sortEvents' params tmpDir outPath eventlog@ sorts
71+
-- @eventlog@ via on-disk merge sort, using @tmpDir@ for
72+
-- intermediate data. The sorted eventlog is written to
73+
-- @eventlog@.
74+
sortEvents'
75+
:: SortParams
76+
-> FilePath -- ^ temporary directory
77+
-> FilePath -- ^ output eventlog file path
78+
-> EventLog -- ^ eventlog to sort
79+
-> IO ()
80+
sortEvents' _params _tmpDir _outPath (EventLog _ (Data [])) = fail "sortEvents: no events"
81+
sortEvents' params tmpDir outPath (EventLog hdr (Data events0)) = do
5882
chunks <- toSortedChunks events0
5983
hdl <- openBinaryFile outPath WriteMode
6084
mergeChunks' hdl chunks
6185
hClose hdl
6286
return ()
6387
where
88+
SortParams chunkSize fanIn = params
89+
6490
toSortedChunks :: [Event] -> IO (S.Seq SortedChunk)
6591
toSortedChunks =
6692
fmap S.fromList
6793
. mapM (writeTempChunk . sortEventsInMem)
68-
. chunksOf cHUNK_SIZE
94+
. chunksOf chunkSize
6995

7096
mergeChunks' :: Handle -> S.Seq SortedChunk -> IO ()
7197
mergeChunks' destFile chunks
7298
| S.null chunks =
7399
fail "sortEvents: this can't happen"
74-
| S.length chunks <= fAN_IN = do
100+
| S.length chunks <= fanIn = do
75101
events <- mapM readChunk chunks
76102
let sorted = mergeSort $ toList (coerce events :: S.Seq [OnTime])
77103
writeChunk destFile (coerce sorted)
78104
mapM_ removeFile chunks
79105
hClose destFile
80106
| otherwise = do
81-
chunksss <- flip mapM (nChunks fAN_IN chunks) $ \fps -> do
107+
chunksss <- flip mapM (nChunks fanIn chunks) $ \fps -> do
82108
(fp, hdl) <- createTempChunk
83109
mergeChunks' hdl fps
84110
return fp

0 commit comments

Comments
 (0)