Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm-pretty-bc-parser.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ Flag regressions
Description: Enable regression testing build
Default: False

Flag quick
Description: Disable traces and validation of LLVM input; minimal parsing checking
Default: False

Source-repository head
type: git
location: http://github.com/galoisinc/llvm-pretty-bc-parser
Expand Down Expand Up @@ -60,6 +64,9 @@ Library
-O2
-funbox-strict-fields

if flag(quick)
CPP-Options: -DQUICK

Build-depends: array >= 0.3,
base >= 4.8 && < 5,
binary >= 0.8,
Expand Down
67 changes: 42 additions & 25 deletions src/Data/LLVM/BitCode/BitString.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE QuasiQuotes #-}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, the QuasiQuotes extension is never used anywhere in this module, just TemplateHaskell.

{-# LANGUAGE TemplateHaskell #-}

module Data.LLVM.BitCode.BitString
(
Expand All @@ -15,17 +18,24 @@ module Data.LLVM.BitCode.BitString
, NumBits, NumBytes, pattern Bits', pattern Bytes'
, bitCount, bitCount#
, bitsToBytes, bytesToBits
, bitMask
, addBitCounts
, subtractBitCounts
)
where

#ifdef QUICK
import Data.Bits ( Bits )
import Numeric ( showIntAtBase )
#else
import Data.Bits ( bit, bitSizeMaybe, Bits )
import GHC.Exts
import Numeric ( showIntAtBase, showHex )
#endif
import GHC.Exts

import Prelude hiding (take,drop,splitAt)


----------------------------------------------------------------------
-- Define some convenience newtypes to clarify whether the count of bits or count
-- of bytes is being referenced, and to convert between the two.
Expand Down Expand Up @@ -69,26 +79,27 @@ bytesToBits (NumBytes (I# n#)) = NumBits (I# (n# `uncheckedIShiftL#` 3#))

data BitString = BitString
{ bsLength :: !NumBits
, bsData :: !Int
, bsData :: !Word
-- Note: the bsData was originally an Integer, which allows an essentially
-- unlimited size value. However, this adds some overhead to various
-- computations, and since LLVM Bitcode is unlikely to ever represent values
-- greater than the native size (64 bits) as discrete values. By changing
-- this to @Int@, the use of unboxed calculations is enabled for better
-- performance.
--
-- The use of Int is potentially unsound because GHC only guarantees it's a
-- signed integer of at least 32-bits. However current implementations in
-- all environments where it's reasonable to use this parser have a 64-bit
-- Int implementation. This can be verified via:
-- this to @Word@ (which is verified to be 64 bits), the use of unboxed
-- calculations is enabled for better performance.
--
-- > import Data.Bits
-- > bitSizeMaybe (maxBound :: Int) >= Just 64
--
-- There's no good location here to automate this check (perhaps
-- GetBits.hs:runGetBits?), which is why it isn't verified at runtime.
-- Note that Word is used instead of Word64; in GHC pre 9.x, Word64 was
-- intended to represent a 64-bit value on a 32-bit system.
} deriving (Show, Eq)

-- Verify a Word is 64-bits (at compile time)
$(return $ if isTrue# ((int2Word# 3#) `eqWord#`
(((int2Word# 0xF0#) `uncheckedShiftL#` 58#)
`uncheckedShiftRL#` 62#))
then []
else error "Word type must be 64-bits!"
)
Comment on lines +94 to +100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm very leery about making this library completely unbuildable on 32-bit architectures. Why not just use Word64 instead?



-- | Create an empty BitString

emptyBitString :: BitString
Expand All @@ -100,31 +111,36 @@ emptyBitString = BitString (NumBits 0) 0
-- BitString.

joinBitString :: BitString -> BitString -> BitString
joinBitString (BitString (Bits' (I# szA#)) (I# a#))
(BitString (Bits' (I# szB#)) (I# b#)) =
joinBitString (BitString (Bits' (I# szA#)) (W# a#))
(BitString (Bits' (I# szB#)) (W# b#)) =
BitString { bsLength = NumBits (I# (szA# +# szB#))
, bsData = I# (a# `orI#` (b# `uncheckedIShiftL#` szA#))
, bsData = W# (a# `or#` (b# `uncheckedShiftL#` szA#))
}

bitMask :: NumBits -> Word#
bitMask (Bits' (I# len#)) =
((int2Word# 1#) `uncheckedShiftL#` len#) `minusWord#` (int2Word# 1#)


-- | Given a number of bits to take, and an @Integer@, create a @BitString@.

toBitString :: NumBits -> Int -> BitString
toBitString len@(Bits' (I# len#)) (I# val#) =
let !mask# = (1# `uncheckedIShiftL#` len#) -# 1#
in BitString len (I# (val# `andI#` mask#))
toBitString :: NumBits -> Word -> BitString
toBitString len (W# val#) = BitString len (W# (val# `and#` (bitMask len)))


-- | Extract the referenced Integer value from a BitString

bitStringValue :: BitString -> Int
bitStringValue :: BitString -> Word
bitStringValue = bsData


-- | Extract a target (Num) value of the desired type from a BitString (using
-- fromInteger to perform the target type conversion).

fromBitString :: (Num a, Bits a) => BitString -> a
#ifdef QUICK
fromBitString (BitString _ i) = x
#else
Comment on lines +141 to +143
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a fan of sprinkling CPP everywhere throughout the library like this, since (1) it makes adds a lot of lexical noise, and (2) it makes it more difficult to remember to keep the two versions of every function in sync, since you have to remember to build with both -fquick and -f-quick unless you want to risk bitrot.

Rather this going with this approach, why not do something like this instead?

fromBitString :: (Num a, Bits a) => BitString -> a
fromBitString (BitString l i)
  | quickMode = x
  | otherwise =
    case bitSizeMaybe x of
      ...

Where:

quickMode :: Bool
#if defined(QUICK)
quickMode = True
#else
quickMode = False
#endif

This way, we can continue to typecheck both versions of each function, decreasing the chances of bitrot. Since quickMode will statically be known to be True or False, the optimizer will be able to make swift work of it.

fromBitString (BitString l i) =
case bitSizeMaybe x of
Nothing -> x
Expand All @@ -137,6 +153,7 @@ fromBitString (BitString l i) =
, "(mask=0x" <> showHex i ")"
, "could not be parsed into type with only", show n, "bits"
])
#endif
where
x = fromInteger ival -- use Num to convert the Integer to the target type
ival = toInteger i -- convert input to an Integer for ^^
Expand Down Expand Up @@ -164,10 +181,10 @@ take n bs@(BitString l i)
-- return the remaining as a smaller BitString.

drop :: NumBits -> BitString -> BitString
drop !n !(BitString l i)
drop !n !(BitString l v)
| n >= l = emptyBitString
| otherwise =
let !(I# n#) = bitCount n
!(I# l#) = bitCount l
!(I# i#) = i
in BitString (NumBits (I# (l# -# n#))) (I# (i# `uncheckedIShiftRL#` n#))
!(W# v#) = v
in BitString (NumBits (I# (l# -# n#))) (W# (v# `uncheckedShiftRL#` n#))
78 changes: 62 additions & 16 deletions src/Data/LLVM/BitCode/Bitstream.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE PatternGuards #-}

module Data.LLVM.BitCode.Bitstream (
Expand All @@ -16,27 +19,34 @@ module Data.LLVM.BitCode.Bitstream (
, parseMetadataStringLengths
) where

import Data.LLVM.BitCode.BitString as BS
import Data.LLVM.BitCode.GetBits

import Control.Monad ( unless, replicateM, guard )
import Data.Bits ( Bits )
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.LLVM.BitCode.BitString as BS
import Data.LLVM.BitCode.GetBits
import qualified Data.Map as Map
import Data.Word ( Word8, Word16, Word32 )
#ifdef QUICK
import GHC.Exts
#endif
import GHC.Word


-- Primitive Reads -------------------------------------------------------------

-- | Parse a @Bool@ out of a single bit.
boolean :: GetBits Bool
boolean = ((1 :: Word8) ==) . fromBitString <$> fixed (Bits' 1)
boolean = do i <- fixedWord (Bits' 1)
return $ 1 == i


-- | Parse a Num type out of n-bits.
numeric :: (Num a, Bits a) => NumBits -> GetBits a
#ifdef QUICK
numeric n = fromInteger . toInteger <$> fixedWord n
#else
numeric n = fromBitString <$> fixed n
#endif


-- | Get a @BitString@ formatted as vbr.
Expand All @@ -52,13 +62,52 @@ vbr n = loop emptyBitString
then loop acc'
else return acc'

#ifdef QUICK
vbrWord :: NumBits -> GetBits Word
vbrWord n@(Bits' (I# n#)) =
let !contBitMask# = (int2Word# 1#) `uncheckedShiftL#` (n# -# 1#)
loop = do ic <- fixedWord n
let !(W# ic#) = ic
if isTrue# ((ic# `and#` contBitMask#) `eqWord#` (int2Word# 0#))
then return ic
else do nxt <- loop
let !(W# nxt#) = nxt
let nxtshft# = nxt# `uncheckedShiftL#` (n# -# 1#)
return (W# ((ic# `xor#` contBitMask#)`or#` nxtshft#))
in loop
#endif


-- | Process a variable-bit encoded integer.
vbrNum :: (Num a, Bits a) => NumBits -> GetBits a
#ifdef QUICK
vbrNum = fmap (fromInteger . toInteger) . vbrWord
#else
vbrNum n = fromBitString <$> vbr n
#endif

-- | Decode a 6-bit encoded character.
char6 :: GetBits Word8
char6 = do
#ifdef QUICK
(W# w#) <- fixedWord (Bits' 6)
let !i# = word2Int# w#
#if MIN_VERSION_base(4,16,0)
let wordToWord8 = wordToWord8#
#else
let wordToWord8 :: Word# -> Word#
wordToWord8 !a# = a#
#endif
if isTrue# (i# <=# 25#)
then return (W8# (wordToWord8 (w# `plusWord#` (int2Word# 97#))))
else if isTrue# (i# <=# 51#)
then return (W8# (wordToWord8 (w# `plusWord#` (int2Word# 39#))))
else if isTrue# (i# <=# 61#)
then return (W8# (wordToWord8 (w# `minusWord#` (int2Word# 4#))))
else if isTrue# (i# ==# 62#)
then return (fromIntegral (fromEnum '.'))
else return (fromIntegral (fromEnum '_'))
#else
word <- numeric $ Bits' 6
case word of
n | 0 <= n && n <= 25 -> return (n + 97)
Expand All @@ -67,6 +116,7 @@ char6 = do
62 -> return (fromIntegral (fromEnum '.'))
63 -> return (fromIntegral (fromEnum '_'))
_ -> fail "invalid char6"
#endif


-- Bitstream Parsing -----------------------------------------------------------
Expand All @@ -86,10 +136,9 @@ parseBitCodeBitstreamLazy :: L.ByteString -> Either String Bitstream
parseBitCodeBitstreamLazy = runGetBits getBitCodeBitstream . L.toStrict

-- | The magic constant at the beginning of all llvm-bitcode files.
bcMagicConst :: BitString
bcMagicConst = toBitString (Bits' 8) 0x42
`joinBitString`
toBitString (Bits' 8) 0x43

bcMagicConst :: Word
bcMagicConst = 0x4342

-- | Parse a @Bitstream@ from either a normal bitcode file, or a wrapped
-- bitcode.
Expand All @@ -108,21 +157,18 @@ getBitCodeBitstream = label "llvm-bitstream" $ do
skip $ Bits' 32 -- CPUType
isolate size getBitstream

bcWrapperMagicConst :: BitString
bcWrapperMagicConst =
foldr1 joinBitString [ byte 0xDE, byte 0xC0, byte 0x17, byte 0x0B]
where
byte = toBitString (Bits' 8)
bcWrapperMagicConst :: Word
bcWrapperMagicConst = 0x0b16c0de
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, did this code change? The previous version used 17, whereas this version uses 16.


guardWrapperMagic :: GetBits ()
guardWrapperMagic = do
magic <- fixed (Bits' 32)
magic <- fixedWord (Bits' 32)
guard (magic == bcWrapperMagicConst)

-- | Parse a @Bitstream@.
getBitstream :: GetBits Bitstream
getBitstream = label "bitstream" $ do
bc <- fixed $ Bits' 16
bc <- fixedWord $ Bits' 16
unless (bc == bcMagicConst) (fail "Invalid magic number")
appMagic <- numeric $ Bits' 16
entries <- getTopLevelEntries
Expand Down
Loading