Skip to content

Commit 73ef6f3

Browse files
committed
Add GHC.RTS.Events.Sort
1 parent 1b1728a commit 73ef6f3

File tree

3 files changed

+162
-1
lines changed

3 files changed

+162
-1
lines changed

ghc-events.cabal

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,10 @@ library
8888
bytestring >= 0.10.4,
8989
array >= 0.2 && < 0.6,
9090
text >= 0.11.2.3 && < 1.3,
91-
vector >= 0.7 && < 0.13
91+
vector >= 0.7 && < 0.13,
92+
directory
9293
exposed-modules: GHC.RTS.Events,
94+
GHC.RTS.Events.Sort
9395
GHC.RTS.Events.Incremental
9496
GHC.RTS.Events.Merge
9597
GHC.RTS.Events.Analysis

src/GHC/RTS/Events/Incremental.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ module GHC.RTS.Events.Incremental
1313
, readHeader
1414
, readEvents
1515
, readEventLog
16+
17+
-- * Low-level API
18+
, mkEventDecoder
1619
) where
1720
import Control.Monad
1821
import Data.Either

src/GHC/RTS/Events/Sort.hs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
-- | Constant-space sorting.
2+
--
3+
-- This module provides a routine for sorting events in constant-space via
4+
-- on-disk merge sort.
5+
module GHC.RTS.Events.Sort
6+
( GHC.RTS.Events.Sort.sortEvents
7+
) where
8+
9+
import Data.Traversable
10+
import Data.Coerce
11+
import Data.Function (on)
12+
import Data.List (sortBy, minimumBy)
13+
import Data.Maybe
14+
import Data.Foldable (toList)
15+
import System.IO
16+
import System.Directory
17+
import Prelude
18+
19+
import Data.Binary.Put as P
20+
import qualified Data.ByteString.Lazy as BSL
21+
import qualified Data.Sequence as S
22+
23+
import GHC.RTS.Events
24+
import GHC.RTS.Events.Binary (putEventLog)
25+
26+
type SortedChunk = FilePath
27+
28+
-- | The chunk size which the input eventlog is broken into (in events). This
29+
-- determines the upper-bound on memory usage during the sorting process.
30+
--
31+
-- This value is a reasonable trade-off between memory and computation,
32+
-- requiring approximately 100MBytes while sorting a "typical" eventlog.
33+
cHUNK_SIZE :: Int
34+
cHUNK_SIZE = 500*1000
35+
36+
-- | Maximum number of chunks to merge at once. Determined by the largest
37+
-- number of file descriptors we can safely open at once.
38+
fAN_IN :: Int
39+
fAN_IN = 256
40+
41+
newtype OnTime = OnTime Event
42+
43+
instance Ord OnTime where
44+
compare = coerce (compare `on` evTime)
45+
46+
instance Eq OnTime where
47+
(==) = coerce ((==) `on` evTime)
48+
49+
-- | @sortEvents tmpDir outPath eventlog@ sorts @eventlog@ via on-disk merge
50+
-- sort, using @tmpDir@ for intermediate data. The sorted eventlog is written
51+
-- to @eventlog@.
52+
sortEvents :: FilePath -- ^ temporary directory
53+
-> FilePath -- ^ output eventlog file path
54+
-> EventLog -- ^ eventlog to sort
55+
-> IO ()
56+
sortEvents _tmpDir _outPath (EventLog _ (Data [])) = fail "sortEvents: no events"
57+
sortEvents tmpDir outPath (EventLog hdr (Data events0)) = do
58+
chunks <- toSortedChunks events0
59+
hdl <- openBinaryFile outPath WriteMode
60+
mergeChunks' hdl chunks
61+
hClose hdl
62+
return ()
63+
where
64+
toSortedChunks :: [Event] -> IO (S.Seq SortedChunk)
65+
toSortedChunks =
66+
fmap S.fromList
67+
. mapM (writeTempChunk . sortEventsInMem)
68+
. chunksOf cHUNK_SIZE
69+
70+
mergeChunks' :: Handle -> S.Seq SortedChunk -> IO ()
71+
mergeChunks' destFile chunks
72+
| S.null chunks =
73+
fail "sortEvents: this can't happen"
74+
| S.length chunks <= fAN_IN = do
75+
events <- mapM readChunk chunks
76+
let sorted = mergeSort $ toList (coerce events :: S.Seq [OnTime])
77+
writeChunk destFile (coerce sorted)
78+
| otherwise = do
79+
chunksss <- flip mapM (nChunks fAN_IN chunks) $ \fps -> do
80+
(fp, hdl) <- createTempChunk
81+
mergeChunks' hdl fps
82+
mapM_ removeFile fps
83+
return fp
84+
mergeChunks' destFile (S.fromList chunksss)
85+
86+
readChunk :: SortedChunk -> IO [Event]
87+
readChunk fp = do
88+
result <- readEventLogFromFile fp
89+
case result of
90+
Left err -> fail $ "sortEvents: error reading chunk: " ++ fp ++ ": " ++ err
91+
Right (EventLog _ (Data events)) -> return events
92+
93+
createTempChunk :: IO (FilePath, Handle)
94+
createTempChunk =
95+
openBinaryTempFile tmpDir "chunk"
96+
97+
writeTempChunk :: [Event] -> IO FilePath
98+
writeTempChunk evs = do
99+
(fp, hdl) <- createTempChunk
100+
writeChunk hdl evs
101+
hClose hdl
102+
return fp
103+
104+
writeChunk :: Handle -> [Event] -> IO ()
105+
writeChunk hdl events =
106+
BSL.hPutStr hdl
107+
$ P.runPut
108+
$ putEventLog
109+
$ EventLog hdr
110+
$ Data events
111+
112+
blkMarker = Event 0 (EventBlock 0 0 0) Nothing
113+
114+
-- | An unordered set.
115+
type Bag a = [a]
116+
117+
-- | Break a list in chunks of the given size.
118+
chunksOf :: Int -> [a] -> [[a]]
119+
chunksOf _ [] = []
120+
chunksOf n xs =
121+
let (ys, rest) = splitAt n xs
122+
in ys : chunksOf n rest
123+
124+
-- | Break a 'S.Seq' into \(n\) roughly-even chunks.
125+
nChunks :: Int -> S.Seq a -> [S.Seq a]
126+
nChunks n xs0 = go xs0
127+
where
128+
go xs = let (x,y) = S.splitAt len xs in x : go y
129+
len = S.length xs0 `div` n + 1
130+
131+
-- | Merge the given lists into sorted order.
132+
mergeSort :: Ord a => Bag [a] -> [a]
133+
mergeSort = go
134+
where
135+
go [] = []
136+
go xss =
137+
case catMaybes $ mapZipper f xss of
138+
[] -> []
139+
xs -> minimumBy (compare `on` head) xs
140+
141+
f :: Ord a => Bag [a] -> [a] -> Maybe [a]
142+
f _ [] = Nothing
143+
f rest (x:xs) = Just $ x : go (xs : rest)
144+
145+
mapZipper :: (Bag a -> a -> b) -> Bag a -> [b]
146+
mapZipper f = go []
147+
where
148+
--go :: Bag a -> Bag [a] -> [b]
149+
go _prevs [] = []
150+
go prevs (x:nexts) =
151+
f (prevs ++ nexts) x : go (x : prevs) nexts
152+
153+
sortEventsInMem :: [Event] -> [Event]
154+
sortEventsInMem =
155+
sortBy (compare `on` evTime)
156+

0 commit comments

Comments
 (0)