|
| 1 | +-- | Constant-space sorting. |
| 2 | +-- |
| 3 | +-- This module provides a routine for sorting events in constant-space via |
| 4 | +-- on-disk merge sort. |
| 5 | +module GHC.RTS.Events.Sort |
| 6 | + ( GHC.RTS.Events.Sort.sortEvents |
| 7 | + ) where |
| 8 | + |
| 9 | +import Data.Traversable |
| 10 | +import Data.Coerce |
| 11 | +import Data.Function (on) |
| 12 | +import Data.List (sortBy, minimumBy) |
| 13 | +import Data.Maybe |
| 14 | +import Data.Foldable (toList) |
| 15 | +import System.IO |
| 16 | +import System.Directory |
| 17 | +import Prelude |
| 18 | + |
| 19 | +import Data.Binary.Put as P |
| 20 | +import qualified Data.ByteString.Lazy as BSL |
| 21 | +import qualified Data.Sequence as S |
| 22 | + |
| 23 | +import GHC.RTS.Events |
| 24 | +import GHC.RTS.Events.Binary (putEventLog) |
| 25 | + |
| 26 | +type SortedChunk = FilePath |
| 27 | + |
| 28 | +-- | The chunk size which the input eventlog is broken into (in events). This |
| 29 | +-- determines the upper-bound on memory usage during the sorting process. |
| 30 | +-- |
| 31 | +-- This value is a reasonable trade-off between memory and computation, |
| 32 | +-- requiring approximately 100MBytes while sorting a "typical" eventlog. |
| 33 | +cHUNK_SIZE :: Int |
| 34 | +cHUNK_SIZE = 500*1000 |
| 35 | + |
| 36 | +-- | Maximum number of chunks to merge at once. Determined by the largest |
| 37 | +-- number of file descriptors we can safely open at once. |
| 38 | +fAN_IN :: Int |
| 39 | +fAN_IN = 256 |
| 40 | + |
| 41 | +newtype OnTime = OnTime Event |
| 42 | + |
| 43 | +instance Ord OnTime where |
| 44 | + compare = coerce (compare `on` evTime) |
| 45 | + |
| 46 | +instance Eq OnTime where |
| 47 | + (==) = coerce ((==) `on` evTime) |
| 48 | + |
| 49 | +-- | @sortEvents tmpDir outPath eventlog@ sorts @eventlog@ via on-disk merge |
| 50 | +-- sort, using @tmpDir@ for intermediate data. The sorted eventlog is written |
| 51 | +-- to @eventlog@. |
| 52 | +sortEvents :: FilePath -- ^ temporary directory |
| 53 | + -> FilePath -- ^ output eventlog file path |
| 54 | + -> EventLog -- ^ eventlog to sort |
| 55 | + -> IO () |
| 56 | +sortEvents _tmpDir _outPath (EventLog _ (Data [])) = fail "sortEvents: no events" |
| 57 | +sortEvents tmpDir outPath (EventLog hdr (Data events0)) = do |
| 58 | + chunks <- toSortedChunks events0 |
| 59 | + hdl <- openBinaryFile outPath WriteMode |
| 60 | + mergeChunks' hdl chunks |
| 61 | + hClose hdl |
| 62 | + return () |
| 63 | + where |
| 64 | + toSortedChunks :: [Event] -> IO (S.Seq SortedChunk) |
| 65 | + toSortedChunks = |
| 66 | + fmap S.fromList |
| 67 | + . mapM (writeTempChunk . sortEventsInMem) |
| 68 | + . chunksOf cHUNK_SIZE |
| 69 | + |
| 70 | + mergeChunks' :: Handle -> S.Seq SortedChunk -> IO () |
| 71 | + mergeChunks' destFile chunks |
| 72 | + | S.null chunks = |
| 73 | + fail "sortEvents: this can't happen" |
| 74 | + | S.length chunks <= fAN_IN = do |
| 75 | + events <- mapM readChunk chunks |
| 76 | + let sorted = mergeSort $ toList (coerce events :: S.Seq [OnTime]) |
| 77 | + writeChunk destFile (coerce sorted) |
| 78 | + | otherwise = do |
| 79 | + chunksss <- flip mapM (nChunks fAN_IN chunks) $ \fps -> do |
| 80 | + (fp, hdl) <- createTempChunk |
| 81 | + mergeChunks' hdl fps |
| 82 | + mapM_ removeFile fps |
| 83 | + return fp |
| 84 | + mergeChunks' destFile (S.fromList chunksss) |
| 85 | + |
| 86 | + readChunk :: SortedChunk -> IO [Event] |
| 87 | + readChunk fp = do |
| 88 | + result <- readEventLogFromFile fp |
| 89 | + case result of |
| 90 | + Left err -> fail $ "sortEvents: error reading chunk: " ++ fp ++ ": " ++ err |
| 91 | + Right (EventLog _ (Data events)) -> return events |
| 92 | + |
| 93 | + createTempChunk :: IO (FilePath, Handle) |
| 94 | + createTempChunk = |
| 95 | + openBinaryTempFile tmpDir "chunk" |
| 96 | + |
| 97 | + writeTempChunk :: [Event] -> IO FilePath |
| 98 | + writeTempChunk evs = do |
| 99 | + (fp, hdl) <- createTempChunk |
| 100 | + writeChunk hdl evs |
| 101 | + hClose hdl |
| 102 | + return fp |
| 103 | + |
| 104 | + writeChunk :: Handle -> [Event] -> IO () |
| 105 | + writeChunk hdl events = |
| 106 | + BSL.hPutStr hdl |
| 107 | + $ P.runPut |
| 108 | + $ putEventLog |
| 109 | + $ EventLog hdr |
| 110 | + $ Data events |
| 111 | + |
| 112 | + blkMarker = Event 0 (EventBlock 0 0 0) Nothing |
| 113 | + |
| 114 | +-- | An unordered set. |
| 115 | +type Bag a = [a] |
| 116 | + |
| 117 | +-- | Break a list in chunks of the given size. |
| 118 | +chunksOf :: Int -> [a] -> [[a]] |
| 119 | +chunksOf _ [] = [] |
| 120 | +chunksOf n xs = |
| 121 | + let (ys, rest) = splitAt n xs |
| 122 | + in ys : chunksOf n rest |
| 123 | + |
| 124 | +-- | Break a 'S.Seq' into \(n\) roughly-even chunks. |
| 125 | +nChunks :: Int -> S.Seq a -> [S.Seq a] |
| 126 | +nChunks n xs0 = go xs0 |
| 127 | + where |
| 128 | + go xs = let (x,y) = S.splitAt len xs in x : go y |
| 129 | + len = S.length xs0 `div` n + 1 |
| 130 | + |
| 131 | +-- | Merge the given lists into sorted order. |
| 132 | +mergeSort :: Ord a => Bag [a] -> [a] |
| 133 | +mergeSort = go |
| 134 | + where |
| 135 | + go [] = [] |
| 136 | + go xss = |
| 137 | + case catMaybes $ mapZipper f xss of |
| 138 | + [] -> [] |
| 139 | + xs -> minimumBy (compare `on` head) xs |
| 140 | + |
| 141 | + f :: Ord a => Bag [a] -> [a] -> Maybe [a] |
| 142 | + f _ [] = Nothing |
| 143 | + f rest (x:xs) = Just $ x : go (xs : rest) |
| 144 | + |
| 145 | +mapZipper :: (Bag a -> a -> b) -> Bag a -> [b] |
| 146 | +mapZipper f = go [] |
| 147 | + where |
| 148 | + --go :: Bag a -> Bag [a] -> [b] |
| 149 | + go _prevs [] = [] |
| 150 | + go prevs (x:nexts) = |
| 151 | + f (prevs ++ nexts) x : go (x : prevs) nexts |
| 152 | + |
| 153 | +sortEventsInMem :: [Event] -> [Event] |
| 154 | +sortEventsInMem = |
| 155 | + sortBy (compare `on` evTime) |
| 156 | + |
0 commit comments