-
Notifications
You must be signed in to change notification settings - Fork 7
Add quick flag and use unsigned unlifted Word# #198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
78bfc07
465628f
e657f4e
10a467f
aeb8068
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,9 @@ | ||
| {-# LANGUAGE BangPatterns #-} | ||
| {-# LANGUAGE CPP #-} | ||
| {-# LANGUAGE MagicHash #-} | ||
| {-# LANGUAGE PatternSynonyms #-} | ||
| {-# LANGUAGE QuasiQuotes #-} | ||
| {-# LANGUAGE TemplateHaskell #-} | ||
|
|
||
| module Data.LLVM.BitCode.BitString | ||
| ( | ||
|
|
@@ -15,17 +18,24 @@ module Data.LLVM.BitCode.BitString | |
| , NumBits, NumBytes, pattern Bits', pattern Bytes' | ||
| , bitCount, bitCount# | ||
| , bitsToBytes, bytesToBits | ||
| , bitMask | ||
| , addBitCounts | ||
| , subtractBitCounts | ||
| ) | ||
| where | ||
|
|
||
| #ifdef QUICK | ||
| import Data.Bits ( Bits ) | ||
| import Numeric ( showIntAtBase ) | ||
| #else | ||
| import Data.Bits ( bit, bitSizeMaybe, Bits ) | ||
| import GHC.Exts | ||
| import Numeric ( showIntAtBase, showHex ) | ||
| #endif | ||
| import GHC.Exts | ||
|
|
||
| import Prelude hiding (take,drop,splitAt) | ||
|
|
||
|
|
||
| ---------------------------------------------------------------------- | ||
| -- Define some convenience newtypes to clarify whether the count of bits or count | ||
| -- of bytes is being referenced, and to convert between the two. | ||
|
|
@@ -69,26 +79,27 @@ bytesToBits (NumBytes (I# n#)) = NumBits (I# (n# `uncheckedIShiftL#` 3#)) | |
|
|
||
| data BitString = BitString | ||
| { bsLength :: !NumBits | ||
| , bsData :: !Int | ||
| , bsData :: !Word | ||
| -- Note: the bsData was originally an Integer, which allows an essentially | ||
| -- unlimited size value. However, this adds some overhead to various | ||
| -- computations, and since LLVM Bitcode is unlikely to ever represent values | ||
| -- greater than the native size (64 bits) as discrete values. By changing | ||
| -- this to @Int@, the use of unboxed calculations is enabled for better | ||
| -- performance. | ||
| -- | ||
| -- The use of Int is potentially unsound because GHC only guarantees it's a | ||
| -- signed integer of at least 32-bits. However current implementations in | ||
| -- all environments where it's reasonable to use this parser have a 64-bit | ||
| -- Int implementation. This can be verified via: | ||
| -- this to @Word@ (which is verified to be 64 bits), the use of unboxed | ||
| -- calculations is enabled for better performance. | ||
| -- | ||
| -- > import Data.Bits | ||
| -- > bitSizeMaybe (maxBound :: Int) >= Just 64 | ||
| -- | ||
| -- There's no good location here to automate this check (perhaps | ||
| -- GetBits.hs:runGetBits?), which is why it isn't verified at runtime. | ||
| -- Note that Word is used instead of Word64; in GHC pre 9.x, Word64 was | ||
| -- intended to represent a 64-bit value on a 32-bit system. | ||
| } deriving (Show, Eq) | ||
|
|
||
| -- Verify a Word is 64-bits (at compile time) | ||
| $(return $ if isTrue# ((int2Word# 3#) `eqWord#` | ||
| (((int2Word# 0xF0#) `uncheckedShiftL#` 58#) | ||
| `uncheckedShiftRL#` 62#)) | ||
| then [] | ||
| else error "Word type must be 64-bits!" | ||
| ) | ||
|
Comment on lines
+94
to
+100
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm very leery about making this library completely unbuildable on 32-bit architectures. Why not just use |
||
|
|
||
|
|
||
| -- | Create an empty BitString | ||
|
|
||
| emptyBitString :: BitString | ||
|
|
@@ -100,31 +111,36 @@ emptyBitString = BitString (NumBits 0) 0 | |
| -- BitString. | ||
|
|
||
| joinBitString :: BitString -> BitString -> BitString | ||
| joinBitString (BitString (Bits' (I# szA#)) (I# a#)) | ||
| (BitString (Bits' (I# szB#)) (I# b#)) = | ||
| joinBitString (BitString (Bits' (I# szA#)) (W# a#)) | ||
| (BitString (Bits' (I# szB#)) (W# b#)) = | ||
| BitString { bsLength = NumBits (I# (szA# +# szB#)) | ||
| , bsData = I# (a# `orI#` (b# `uncheckedIShiftL#` szA#)) | ||
| , bsData = W# (a# `or#` (b# `uncheckedShiftL#` szA#)) | ||
| } | ||
|
|
||
| bitMask :: NumBits -> Word# | ||
| bitMask (Bits' (I# len#)) = | ||
| ((int2Word# 1#) `uncheckedShiftL#` len#) `minusWord#` (int2Word# 1#) | ||
|
|
||
|
|
||
| -- | Given a number of bits to take, and an @Integer@, create a @BitString@. | ||
|
|
||
| toBitString :: NumBits -> Int -> BitString | ||
| toBitString len@(Bits' (I# len#)) (I# val#) = | ||
| let !mask# = (1# `uncheckedIShiftL#` len#) -# 1# | ||
| in BitString len (I# (val# `andI#` mask#)) | ||
| toBitString :: NumBits -> Word -> BitString | ||
| toBitString len (W# val#) = BitString len (W# (val# `and#` (bitMask len))) | ||
|
|
||
|
|
||
| -- | Extract the referenced Integer value from a BitString | ||
|
|
||
| bitStringValue :: BitString -> Int | ||
| bitStringValue :: BitString -> Word | ||
| bitStringValue = bsData | ||
|
|
||
|
|
||
| -- | Extract a target (Num) value of the desired type from a BitString (using | ||
| -- fromInteger to perform the target type conversion). | ||
|
|
||
| fromBitString :: (Num a, Bits a) => BitString -> a | ||
| #ifdef QUICK | ||
| fromBitString (BitString _ i) = x | ||
| #else | ||
|
Comment on lines
+141
to
+143
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a fan of sprinkling CPP everywhere throughout the library like this, since (1) it makes adds a lot of lexical noise, and (2) it makes it more difficult to remember to keep the two versions of every function in sync, since you have to remember to build with both Rather this going with this approach, why not do something like this instead? fromBitString :: (Num a, Bits a) => BitString -> a
fromBitString (BitString l i)
| quickMode = x
| otherwise =
case bitSizeMaybe x of
...Where: quickMode :: Bool
#if defined(QUICK)
quickMode = True
#else
quickMode = False
#endifThis way, we can continue to typecheck both versions of each function, decreasing the chances of bitrot. Since |
||
| fromBitString (BitString l i) = | ||
| case bitSizeMaybe x of | ||
| Nothing -> x | ||
|
|
@@ -137,6 +153,7 @@ fromBitString (BitString l i) = | |
| , "(mask=0x" <> showHex i ")" | ||
| , "could not be parsed into type with only", show n, "bits" | ||
| ]) | ||
| #endif | ||
| where | ||
| x = fromInteger ival -- use Num to convert the Integer to the target type | ||
| ival = toInteger i -- convert input to an Integer for ^^ | ||
|
|
@@ -164,10 +181,10 @@ take n bs@(BitString l i) | |
| -- return the remaining as a smaller BitString. | ||
|
|
||
| drop :: NumBits -> BitString -> BitString | ||
| drop !n !(BitString l i) | ||
| drop !n !(BitString l v) | ||
| | n >= l = emptyBitString | ||
| | otherwise = | ||
| let !(I# n#) = bitCount n | ||
| !(I# l#) = bitCount l | ||
| !(I# i#) = i | ||
| in BitString (NumBits (I# (l# -# n#))) (I# (i# `uncheckedIShiftRL#` n#)) | ||
| !(W# v#) = v | ||
| in BitString (NumBits (I# (l# -# n#))) (W# (v# `uncheckedShiftRL#` n#)) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,7 @@ | ||
| {-# LANGUAGE BangPatterns #-} | ||
| {-# LANGUAGE CPP #-} | ||
| {-# LANGUAGE GeneralizedNewtypeDeriving #-} | ||
| {-# LANGUAGE MagicHash #-} | ||
| {-# LANGUAGE PatternGuards #-} | ||
|
|
||
| module Data.LLVM.BitCode.Bitstream ( | ||
|
|
@@ -16,27 +19,34 @@ module Data.LLVM.BitCode.Bitstream ( | |
| , parseMetadataStringLengths | ||
| ) where | ||
|
|
||
| import Data.LLVM.BitCode.BitString as BS | ||
| import Data.LLVM.BitCode.GetBits | ||
|
|
||
| import Control.Monad ( unless, replicateM, guard ) | ||
| import Data.Bits ( Bits ) | ||
| import qualified Data.ByteString as S | ||
| import qualified Data.ByteString.Lazy as L | ||
| import Data.LLVM.BitCode.BitString as BS | ||
| import Data.LLVM.BitCode.GetBits | ||
| import qualified Data.Map as Map | ||
| import Data.Word ( Word8, Word16, Word32 ) | ||
| #ifdef QUICK | ||
| import GHC.Exts | ||
| #endif | ||
| import GHC.Word | ||
|
|
||
|
|
||
| -- Primitive Reads ------------------------------------------------------------- | ||
|
|
||
| -- | Parse a @Bool@ out of a single bit. | ||
| boolean :: GetBits Bool | ||
| boolean = ((1 :: Word8) ==) . fromBitString <$> fixed (Bits' 1) | ||
| boolean = do i <- fixedWord (Bits' 1) | ||
| return $ 1 == i | ||
|
|
||
|
|
||
| -- | Parse a Num type out of n-bits. | ||
| numeric :: (Num a, Bits a) => NumBits -> GetBits a | ||
| #ifdef QUICK | ||
| numeric n = fromInteger . toInteger <$> fixedWord n | ||
| #else | ||
| numeric n = fromBitString <$> fixed n | ||
| #endif | ||
|
|
||
|
|
||
| -- | Get a @BitString@ formatted as vbr. | ||
|
|
@@ -52,13 +62,52 @@ vbr n = loop emptyBitString | |
| then loop acc' | ||
| else return acc' | ||
|
|
||
| #ifdef QUICK | ||
| vbrWord :: NumBits -> GetBits Word | ||
| vbrWord n@(Bits' (I# n#)) = | ||
| let !contBitMask# = (int2Word# 1#) `uncheckedShiftL#` (n# -# 1#) | ||
| loop = do ic <- fixedWord n | ||
| let !(W# ic#) = ic | ||
| if isTrue# ((ic# `and#` contBitMask#) `eqWord#` (int2Word# 0#)) | ||
| then return ic | ||
| else do nxt <- loop | ||
| let !(W# nxt#) = nxt | ||
| let nxtshft# = nxt# `uncheckedShiftL#` (n# -# 1#) | ||
| return (W# ((ic# `xor#` contBitMask#)`or#` nxtshft#)) | ||
| in loop | ||
| #endif | ||
|
|
||
|
|
||
| -- | Process a variable-bit encoded integer. | ||
| vbrNum :: (Num a, Bits a) => NumBits -> GetBits a | ||
| #ifdef QUICK | ||
| vbrNum = fmap (fromInteger . toInteger) . vbrWord | ||
| #else | ||
| vbrNum n = fromBitString <$> vbr n | ||
| #endif | ||
|
|
||
| -- | Decode a 6-bit encoded character. | ||
| char6 :: GetBits Word8 | ||
| char6 = do | ||
| #ifdef QUICK | ||
| (W# w#) <- fixedWord (Bits' 6) | ||
| let !i# = word2Int# w# | ||
| #if MIN_VERSION_base(4,16,0) | ||
| let wordToWord8 = wordToWord8# | ||
| #else | ||
| let wordToWord8 :: Word# -> Word# | ||
| wordToWord8 !a# = a# | ||
| #endif | ||
| if isTrue# (i# <=# 25#) | ||
| then return (W8# (wordToWord8 (w# `plusWord#` (int2Word# 97#)))) | ||
| else if isTrue# (i# <=# 51#) | ||
| then return (W8# (wordToWord8 (w# `plusWord#` (int2Word# 39#)))) | ||
| else if isTrue# (i# <=# 61#) | ||
| then return (W8# (wordToWord8 (w# `minusWord#` (int2Word# 4#)))) | ||
| else if isTrue# (i# ==# 62#) | ||
| then return (fromIntegral (fromEnum '.')) | ||
| else return (fromIntegral (fromEnum '_')) | ||
| #else | ||
| word <- numeric $ Bits' 6 | ||
| case word of | ||
| n | 0 <= n && n <= 25 -> return (n + 97) | ||
|
|
@@ -67,6 +116,7 @@ char6 = do | |
| 62 -> return (fromIntegral (fromEnum '.')) | ||
| 63 -> return (fromIntegral (fromEnum '_')) | ||
| _ -> fail "invalid char6" | ||
| #endif | ||
|
|
||
|
|
||
| -- Bitstream Parsing ----------------------------------------------------------- | ||
|
|
@@ -86,10 +136,9 @@ parseBitCodeBitstreamLazy :: L.ByteString -> Either String Bitstream | |
| parseBitCodeBitstreamLazy = runGetBits getBitCodeBitstream . L.toStrict | ||
|
|
||
| -- | The magic constant at the beginning of all llvm-bitcode files. | ||
| bcMagicConst :: BitString | ||
| bcMagicConst = toBitString (Bits' 8) 0x42 | ||
| `joinBitString` | ||
| toBitString (Bits' 8) 0x43 | ||
|
|
||
| bcMagicConst :: Word | ||
| bcMagicConst = 0x4342 | ||
|
|
||
| -- | Parse a @Bitstream@ from either a normal bitcode file, or a wrapped | ||
| -- bitcode. | ||
|
|
@@ -108,21 +157,18 @@ getBitCodeBitstream = label "llvm-bitstream" $ do | |
| skip $ Bits' 32 -- CPUType | ||
| isolate size getBitstream | ||
|
|
||
| bcWrapperMagicConst :: BitString | ||
| bcWrapperMagicConst = | ||
| foldr1 joinBitString [ byte 0xDE, byte 0xC0, byte 0x17, byte 0x0B] | ||
| where | ||
| byte = toBitString (Bits' 8) | ||
| bcWrapperMagicConst :: Word | ||
| bcWrapperMagicConst = 0x0b16c0de | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wait, did this code change? The previous version used |
||
|
|
||
| guardWrapperMagic :: GetBits () | ||
| guardWrapperMagic = do | ||
| magic <- fixed (Bits' 32) | ||
| magic <- fixedWord (Bits' 32) | ||
| guard (magic == bcWrapperMagicConst) | ||
|
|
||
| -- | Parse a @Bitstream@. | ||
| getBitstream :: GetBits Bitstream | ||
| getBitstream = label "bitstream" $ do | ||
| bc <- fixed $ Bits' 16 | ||
| bc <- fixedWord $ Bits' 16 | ||
| unless (bc == bcMagicConst) (fail "Invalid magic number") | ||
| appMagic <- numeric $ Bits' 16 | ||
| entries <- getTopLevelEntries | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As far as I can tell, the
QuasiQuotesextension is never used anywhere in this module, justTemplateHaskell.