Skip to content

Commit af3d0c7

Browse files
alt-romesBodigrim
authored andcommitted
Improve deserialisation performance
Use the lower-level array-construction primitives to avoid intermediate allocations and perform much better at deserialisation. On Cabal (which uses tar for the hackage index), we observed: * Deserialisation of IntTries go from 1.5s to 200ms, with 10GB of allocations going down to roughly 600MB. * StringTable deserialization go from 700ms to 50ms, with 4GB of allocations going down to 80MB. Unfortunately, the newGenArray primitive was only introduced in array 0.5.6. Since we can't update the bound to force such a recent version of array, we implement the beToLe function using unboxed array primitives that have been long available, rather than newGenArray.
1 parent 0ce2ead commit af3d0c7

File tree

5 files changed

+125
-54
lines changed

5 files changed

+125
-54
lines changed

Codec/Archive/Tar/Index/IntTrie.hs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
22
{-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-}
3+
{-# LANGUAGE MagicHash #-}
4+
{-# LANGUAGE UnboxedTuples #-}
35
{-# OPTIONS_HADDOCK hide #-}
46

57
module Codec.Archive.Tar.Index.IntTrie (
@@ -55,6 +57,9 @@ import Data.IntMap.Strict (IntMap)
5557

5658
import Data.List hiding (lookup, insert)
5759
import Data.Function (on)
60+
import GHC.IO
61+
62+
import Codec.Archive.Tar.Index.Utils
5863

5964
-- | A compact mapping from sequences of nats to nats.
6065
--
@@ -338,19 +343,11 @@ deserialise bs
338343
, let lenArr = readWord32BE bs 0
339344
lenTotal = 4 + 4 * fromIntegral lenArr
340345
, BS.length bs >= 4 + 4 * fromIntegral lenArr
341-
, let !arr = A.array (0, lenArr-1)
342-
[ (i, readWord32BE bs off)
343-
| (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ]
344-
!bs' = BS.drop lenTotal bs
345-
= Just (IntTrie arr, bs')
346+
, let !bs_without_len = BS.unsafeDrop 4 bs
347+
!bs_remaining = BS.unsafeDrop lenTotal bs
348+
!arr = unsafePerformIO $ beToLe lenArr bs_without_len
349+
= Just (IntTrie arr, bs_remaining)
346350

347351
| otherwise
348352
= Nothing
349353

350-
readWord32BE :: BS.ByteString -> Int -> Word32
351-
readWord32BE bs i =
352-
assert (i >= 0 && i+3 <= BS.length bs - 1) $
353-
fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
354-
+ fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
355-
+ fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
356-
+ fromIntegral (BS.unsafeIndex bs (i + 3))

Codec/Archive/Tar/Index/Internal.hs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ import Codec.Archive.Tar.Read as Tar
6363
import qualified Codec.Archive.Tar.Index.StringTable as StringTable
6464
import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder)
6565
import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
66+
import Codec.Archive.Tar.Index.Utils (readWord32BE)
6667
import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder)
6768
import Codec.Archive.Tar.PackAscii
6869

@@ -496,28 +497,20 @@ deserialise bs
496497

497498
| let ver = readWord32BE bs 0
498499
, ver == 1
499-
= do let !finalOffset = readWord32BE bs 4
500-
(stringTable, bs') <- StringTable.deserialiseV1 (BS.drop 8 bs)
500+
= do let !finalOffset = readWord32BE bs 1
501+
(stringTable, bs') <- StringTable.deserialiseV1 (BS.unsafeDrop 8 bs)
501502
(intTrie, bs'') <- IntTrie.deserialise bs'
502503
return (TarIndex stringTable intTrie finalOffset, bs'')
503504

504505
| let ver = readWord32BE bs 0
505506
, ver == 2
506-
= do let !finalOffset = readWord32BE bs 4
507-
(stringTable, bs') <- StringTable.deserialiseV2 (BS.drop 8 bs)
507+
= do let !finalOffset = readWord32BE bs 1
508+
(stringTable, bs') <- StringTable.deserialiseV2 (BS.unsafeDrop 8 bs)
508509
(intTrie, bs'') <- IntTrie.deserialise bs'
509510
return (TarIndex stringTable intTrie finalOffset, bs'')
510511

511512
| otherwise = Nothing
512513

513-
readWord32BE :: BS.ByteString -> Int -> Word32
514-
readWord32BE bs i =
515-
assert (i >= 0 && i+3 <= BS.length bs - 1) $
516-
fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
517-
+ fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
518-
+ fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
519-
+ fromIntegral (BS.unsafeIndex bs (i + 3))
520-
521514
toStrict :: LBS.ByteString -> BS.ByteString
522515
toStrict = LBS.toStrict
523516

Codec/Archive/Tar/Index/StringTable.hs

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import Data.Monoid ((<>))
3737
import Control.Exception (assert)
3838

3939
import qualified Data.Array.Unboxed as A
40+
import qualified Data.Array.Base as A
4041
import Data.Array.Unboxed ((!))
4142
import qualified Data.Map.Strict as Map
4243
import Data.Map.Strict (Map)
@@ -45,6 +46,10 @@ import qualified Data.ByteString.Unsafe as BS
4546
import qualified Data.ByteString.Lazy as LBS
4647
import Data.ByteString.Builder as BS
4748
import Data.ByteString.Builder.Extra as BS (byteStringCopy)
49+
import GHC.IO (unsafePerformIO)
50+
51+
import Unsafe.Coerce (unsafeCoerce)
52+
import Codec.Archive.Tar.Index.Utils
4853

4954
-- | An efficient mapping from strings to a dense set of integers.
5055
--
@@ -169,10 +174,10 @@ deserialiseV1 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
169174
deserialiseV1 bs
170175
| BS.length bs >= 8
171176
, let lenStrs = fromIntegral (readWord32BE bs 0)
172-
lenArr = fromIntegral (readWord32BE bs 4)
177+
lenArr = fromIntegral (readWord32BE bs 1)
173178
lenTotal= 8 + lenStrs + 4 * lenArr
174179
, BS.length bs >= lenTotal
175-
, let strs = BS.take lenStrs (BS.drop 8 bs)
180+
, let strs = BS.unsafeTake lenStrs (BS.unsafeDrop 8 bs)
176181
arr = A.array (0, fromIntegral lenArr - 1)
177182
[ (i, readWord32BE bs off)
178183
| (i, off) <- zip [0 .. fromIntegral lenArr - 1]
@@ -194,41 +199,32 @@ deserialiseV2 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
194199
deserialiseV2 bs
195200
| BS.length bs >= 8
196201
, let lenStrs = fromIntegral (readWord32BE bs 0)
197-
lenArr = fromIntegral (readWord32BE bs 4)
202+
lenArr = fromIntegral (readWord32BE bs 1)
198203
lenTotal= 8 -- the two length prefixes
199204
+ lenStrs
200205
+ 4 * lenArr
201206
+(4 * (lenArr - 1)) * 2 -- offsets array is 1 longer
202207
, BS.length bs >= lenTotal
203-
, let strs = BS.take lenStrs (BS.drop 8 bs)
204-
offs = A.listArray (0, fromIntegral lenArr - 1)
205-
[ readWord32BE bs off
206-
| off <- offsets offsOff ]
207-
-- the second two arrays are 1 shorter
208-
ids = A.listArray (0, fromIntegral lenArr - 2)
209-
[ readInt32BE bs off
210-
| off <- offsets idsOff ]
211-
ixs = A.listArray (0, fromIntegral lenArr - 2)
212-
[ readInt32BE bs off
213-
| off <- offsets ixsOff ]
214-
offsOff = 8 + lenStrs
215-
idsOff = offsOff + 4 * lenArr
216-
ixsOff = idsOff + 4 * (lenArr-1)
217-
offsets from = [from,from+4 .. from + 4 * (lenArr - 1)]
208+
, let strs = BS.unsafeTake lenStrs (BS.unsafeDrop 8 bs)
209+
offs_bs = BS.unsafeDrop (8 + lenStrs) bs
210+
ids_bs = BS.unsafeDrop (lenArr * 4) offs_bs
211+
ixs_bs = BS.unsafeDrop ((lenArr - 1) * 4) ids_bs
212+
213+
castArray :: A.UArray i Word32 -> A.UArray i Int32
214+
castArray (A.UArray a b c d) = (A.UArray a b c d)
215+
216+
-- Bangs are crucial for this to work in spite of unsafePerformIO!
217+
(offs, ids, ixs) = unsafePerformIO $ do
218+
!r1 <- beToLe (fromIntegral lenArr) offs_bs
219+
!r2 <- castArray <$> beToLe (fromIntegral lenArr - 1) ids_bs
220+
!r3 <- castArray <$> beToLe (fromIntegral lenArr - 1) ixs_bs
221+
return (r1, r2, r3)
222+
223+
218224
!stringTable = StringTable strs offs ids ixs
219-
!bs' = BS.drop lenTotal bs
220-
= Just (stringTable, bs')
225+
!bs_left = BS.drop lenTotal bs
226+
= Just (stringTable, bs_left)
221227

222228
| otherwise
223229
= Nothing
224230

225-
readInt32BE :: BS.ByteString -> Int -> Int32
226-
readInt32BE bs i = fromIntegral (readWord32BE bs i)
227-
228-
readWord32BE :: BS.ByteString -> Int -> Word32
229-
readWord32BE bs i =
230-
assert (i >= 0 && i+3 <= BS.length bs - 1) $
231-
fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
232-
+ fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
233-
+ fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
234-
+ fromIntegral (BS.unsafeIndex bs (i + 3))

Codec/Archive/Tar/Index/Utils.hs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
{-# LANGUAGE MagicHash, UnboxedTuples, BangPatterns, CPP #-}
2+
module Codec.Archive.Tar.Index.Utils where
3+
4+
import Data.ByteString as BS
5+
import Control.Exception (assert)
6+
7+
import Data.ByteString.Internal (ByteString(..), unsafeWithForeignPtr, accursedUnutterablePerformIO)
8+
import GHC.Int (Int(..), Int32)
9+
import GHC.Word (Word32(..), byteSwap32)
10+
import Foreign.Storable (peek)
11+
import GHC.Ptr (castPtr, plusPtr, Ptr)
12+
import GHC.Exts
13+
import GHC.IO (IO(..), unsafePerformIO)
14+
import Data.Array.Base
15+
import Data.Array.IO.Internals (unsafeFreezeIOUArray)
16+
import Control.DeepSeq (NFData(..))
17+
import GHC.Storable
18+
import GHC.ByteOrder
19+
20+
#include <ghcautoconf.h>
21+
22+
-- | Construct a `UArray Word32 Word32` from a ByteString of 32bit big endian
23+
-- words.
24+
--
25+
-- Note: If using `unsafePerformIO`, be sure to force the result of running the
26+
-- IO action right away... (e.g. see calls to beToLe in StringTable)
27+
beToLe :: (Integral i, Num i) => i
28+
-- ^ The total array length (the number of 32bit words in the array)
29+
-> BS.ByteString
30+
-- ^ The bytestring from which the UArray is constructed.
31+
-- The content must start in the first byte! (i.e. the meta-data words
32+
-- that shouldn't be part of the array, must have been dropped already)
33+
-> IO (UArray i Word32)
34+
beToLe lenArr (BS fptr _) = do
35+
unsafeWithForeignPtr fptr $ \ptr -> do
36+
let ptr' = castPtr ptr :: Ptr Word32
37+
!(I# lenBytes#) = fromIntegral (lenArr * 4)
38+
39+
-- In spirit, the following does this, but we can't use `newGenArray`
40+
-- because it only has been introduced in later versions of array:
41+
-- @@
42+
-- unsafeFreezeIOUArray =<<
43+
-- newGenArray (0, lenArr - 1) (\offset -> do
44+
-- byteSwap32 <$> peek (ptr' `plusPtr` (fromIntegral offset * 4)))
45+
-- @@
46+
IO $ \rw0 ->
47+
case newByteArray# lenBytes# rw0 of
48+
(# rw1, mba# #) ->
49+
50+
let loop :: Int -> State# RealWorld -> State# RealWorld
51+
loop !offset st
52+
| offset < fromIntegral lenArr
53+
= let IO getV = readWord32OffPtrBE ptr' offset
54+
!(I# o#) = offset
55+
in case getV st of
56+
(# st', W32# v# #) ->
57+
loop (offset + 1) (writeWord32Array# mba# o# v# st')
58+
| otherwise = st
59+
60+
in case unsafeFreezeByteArray# mba# (loop 0 rw1) of
61+
(# rw2, ba# #) -> (# rw2, UArray 0 (lenArr - 1) (fromIntegral lenArr) ba# #)
62+
63+
{-# SPECIALISE beToLe :: Word32 -> BS.ByteString -> IO (UArray Word32 Word32) #-}
64+
{-# SPECIALISE beToLe :: Int32 -> BS.ByteString -> IO (UArray Int32 Word32) #-}
65+
66+
readInt32BE :: BS.ByteString -> Int -> Int32
67+
readInt32BE bs i = fromIntegral (readWord32BE bs i)
68+
{-# INLINE readInt32BE #-}
69+
70+
readWord32OffPtrBE :: Ptr Word32 -> Int -> IO Word32
71+
readWord32OffPtrBE ptr i = do
72+
#if defined(WORDS_BIGENDIAN)
73+
readWord32OffPtr ptr i
74+
#else
75+
byteSwap32 <$> readWord32OffPtr ptr i
76+
#endif
77+
78+
readWord32BE :: BS.ByteString -> Int -> Word32
79+
readWord32BE (BS fptr len) i =
80+
assert (i >= 0 && i+3 <= len - 1) $
81+
accursedUnutterablePerformIO $
82+
unsafeWithForeignPtr fptr $ \ptr -> do
83+
readWord32OffPtrBE (castPtr ptr) i
84+
{-# INLINE readWord32BE #-}

tar.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ library tar-internal
7575
Codec.Archive.Tar.Index.StringTable
7676
Codec.Archive.Tar.Index.IntTrie
7777
Codec.Archive.Tar.Index.Internal
78+
Codec.Archive.Tar.Index.Utils
7879

7980
other-extensions:
8081
BangPatterns

0 commit comments

Comments
 (0)