GaloisInc · kquick · Sep 12, 2022 · Sep 12, 2022 · Sep 13, 2022 · Sep 13, 2022
diff --git a/llvm-pretty-bc-parser.cabal b/llvm-pretty-bc-parser.cabal
@@ -24,6 +24,10 @@ Flag regressions
   Description:         Enable regression testing build
   Default:             False
 
+Flag quick
+  Description:         Disable traces and validation of LLVM input; minimal parsing checking
+  Default:             False
+
 Source-repository head
   type:                git
   location:            http://github.com/galoisinc/llvm-pretty-bc-parser
@@ -60,6 +64,9 @@ Library
                        -O2
                        -funbox-strict-fields
 
+  if flag(quick)
+    CPP-Options:       -DQUICK
+
   Build-depends:       array       >= 0.3,
                        base        >= 4.8 && < 5,
                        binary      >= 0.8,

diff --git a/src/Data/LLVM/BitCode/BitString.hs b/src/Data/LLVM/BitCode/BitString.hs
@@ -1,6 +1,9 @@
 {-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE MagicHash #-}
 {-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE TemplateHaskell #-}
 
 module Data.LLVM.BitCode.BitString
   (
@@ -15,17 +18,24 @@ module Data.LLVM.BitCode.BitString
   , NumBits, NumBytes, pattern Bits', pattern Bytes'
   , bitCount, bitCount#
   , bitsToBytes, bytesToBits
+  , bitMask
   , addBitCounts
   , subtractBitCounts
   )
 where
 
+#ifdef QUICK
+import Data.Bits ( Bits )
+import Numeric ( showIntAtBase )
+#else
 import Data.Bits ( bit, bitSizeMaybe, Bits )
-import GHC.Exts
 import Numeric ( showIntAtBase, showHex )
+#endif
+import GHC.Exts
 
 import Prelude hiding (take,drop,splitAt)
 
+
 ----------------------------------------------------------------------
 -- Define some convenience newtypes to clarify whether the count of bits or count
 -- of bytes is being referenced, and to convert between the two.
@@ -69,26 +79,27 @@ bytesToBits (NumBytes (I# n#)) = NumBits (I# (n# `uncheckedIShiftL#` 3#))
 
 data BitString = BitString
   { bsLength :: !NumBits
-  , bsData   :: !Int
+  , bsData   :: !Word
     -- Note: the bsData was originally an Integer, which allows an essentially
     -- unlimited size value.  However, this adds some overhead to various
     -- computations, and since LLVM Bitcode is unlikely to ever represent values
     -- greater than the native size (64 bits) as discrete values.  By changing
-    -- this to @Int@, the use of unboxed calculations is enabled for better
-    -- performance.
-    --
-    -- The use of Int is potentially unsound because GHC only guarantees it's a
-    -- signed integer of at least 32-bits.  However current implementations in
-    -- all environments where it's reasonable to use this parser have a 64-bit
-    -- Int implementation.  This can be verified via:
+    -- this to @Word@ (which is verified to be 64 bits), the use of unboxed
+    -- calculations is enabled for better performance.
     --
-    --  > import Data.Bits
-    --  > bitSizeMaybe (maxBound :: Int) >= Just 64
-    --
-    -- There's no good location here to automate this check (perhaps
-    -- GetBits.hs:runGetBits?), which is why it isn't verified at runtime.
+    -- Note that Word is used instead of Word64; in GHC pre 9.x, Word64 was
+    -- intended to represent a 64-bit value on a 32-bit system.
   } deriving (Show, Eq)
 
+-- Verify a Word is 64-bits (at compile time)
+$(return $ if isTrue# ((int2Word# 3#) `eqWord#`
+                       (((int2Word# 0xF0#) `uncheckedShiftL#` 58#)
+                        `uncheckedShiftRL#` 62#))
+           then []
+           else error "Word type must be 64-bits!"
+ )
+
+
 -- | Create an empty BitString
 
 emptyBitString :: BitString
@@ -100,31 +111,36 @@ emptyBitString = BitString (NumBits 0) 0
 -- BitString.
 
 joinBitString :: BitString -> BitString -> BitString
-joinBitString (BitString (Bits' (I# szA#)) (I# a#))
-              (BitString (Bits' (I# szB#)) (I# b#)) =
+joinBitString (BitString (Bits' (I# szA#)) (W# a#))
+              (BitString (Bits' (I# szB#)) (W# b#)) =
   BitString { bsLength = NumBits (I# (szA# +# szB#))
-            , bsData = I# (a# `orI#` (b# `uncheckedIShiftL#` szA#))
+            , bsData = W# (a# `or#` (b# `uncheckedShiftL#` szA#))
             }
 
+bitMask :: NumBits -> Word#
+bitMask (Bits' (I# len#)) =
+  ((int2Word# 1#) `uncheckedShiftL#` len#) `minusWord#` (int2Word# 1#)
+
 
 -- | Given a number of bits to take, and an @Integer@, create a @BitString@.
 
-toBitString :: NumBits -> Int -> BitString
-toBitString len@(Bits' (I# len#)) (I# val#) =
-  let !mask# = (1# `uncheckedIShiftL#` len#) -# 1#
-  in BitString len (I# (val# `andI#` mask#))
+toBitString :: NumBits -> Word -> BitString
+toBitString len (W# val#) = BitString len (W# (val# `and#` (bitMask len)))
 
 
 -- | Extract the referenced Integer value from a BitString
 
-bitStringValue :: BitString -> Int
+bitStringValue :: BitString -> Word
 bitStringValue = bsData
 
 
 -- | Extract a target (Num) value of the desired type from a BitString (using
 -- fromInteger to perform the target type conversion).
 
 fromBitString :: (Num a, Bits a) => BitString -> a
+#ifdef QUICK
+fromBitString (BitString _ i) = x
+#else
 fromBitString (BitString l i) =
   case bitSizeMaybe x of
     Nothing -> x
@@ -137,6 +153,7 @@ fromBitString (BitString l i) =
            , "(mask=0x" <> showHex i ")"
            , "could not be parsed into type with only", show n, "bits"
            ])
+#endif
  where
  x    = fromInteger ival  -- use Num to convert the Integer to the target type
  ival = toInteger i  -- convert input to an Integer for ^^
@@ -164,10 +181,10 @@ take n bs@(BitString l i)
 -- return the remaining as a smaller BitString.
 
 drop :: NumBits -> BitString -> BitString
-drop !n !(BitString l i)
+drop !n !(BitString l v)
   | n >= l    = emptyBitString
   | otherwise =
       let !(I# n#) = bitCount n
           !(I# l#) = bitCount l
-          !(I# i#) = i
-      in BitString (NumBits (I# (l# -# n#))) (I# (i# `uncheckedIShiftRL#` n#))
+          !(W# v#) = v
+      in BitString (NumBits (I# (l# -# n#))) (W# (v# `uncheckedShiftRL#` n#))
diff --git a/src/Data/LLVM/BitCode/Bitstream.hs b/src/Data/LLVM/BitCode/Bitstream.hs
@@ -1,4 +1,7 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MagicHash #-}
 {-# LANGUAGE PatternGuards #-}
 
 module Data.LLVM.BitCode.Bitstream (
@@ -16,27 +19,34 @@ module Data.LLVM.BitCode.Bitstream (
   , parseMetadataStringLengths
   ) where
 
-import           Data.LLVM.BitCode.BitString as BS
-import           Data.LLVM.BitCode.GetBits
-
 import           Control.Monad ( unless, replicateM, guard )
 import           Data.Bits ( Bits )
 import qualified Data.ByteString as S
 import qualified Data.ByteString.Lazy as L
+import           Data.LLVM.BitCode.BitString as BS
+import           Data.LLVM.BitCode.GetBits
 import qualified Data.Map as Map
-import           Data.Word ( Word8, Word16, Word32 )
+#ifdef QUICK
+import           GHC.Exts
+#endif
+import           GHC.Word
 
 
 -- Primitive Reads -------------------------------------------------------------
 
 -- | Parse a @Bool@ out of a single bit.
 boolean :: GetBits Bool
-boolean  = ((1 :: Word8) ==) . fromBitString <$> fixed (Bits' 1)
+boolean = do i <- fixedWord (Bits' 1)
+             return $ 1 == i
 
 
 -- | Parse a Num type out of n-bits.
 numeric :: (Num a, Bits a) => NumBits -> GetBits a
+#ifdef QUICK
+numeric n = fromInteger . toInteger <$> fixedWord n
+#else
 numeric n = fromBitString <$> fixed n
+#endif
 
 
 -- | Get a @BitString@ formatted as vbr.
@@ -52,13 +62,52 @@ vbr n = loop emptyBitString
        then loop acc'
        else return acc'
 
+#ifdef QUICK
+vbrWord :: NumBits -> GetBits Word
+vbrWord n@(Bits' (I# n#)) =
+  let !contBitMask# = (int2Word# 1#) `uncheckedShiftL#` (n# -# 1#)
+      loop = do ic <- fixedWord n
+                let !(W# ic#) = ic
+                if isTrue# ((ic# `and#` contBitMask#) `eqWord#` (int2Word# 0#))
+                  then return ic
+                  else do nxt <- loop
+                          let !(W# nxt#) = nxt
+                          let nxtshft# = nxt# `uncheckedShiftL#` (n# -# 1#)
+                          return (W# ((ic# `xor#` contBitMask#)`or#` nxtshft#))
+  in loop
+#endif
+
+
 -- | Process a variable-bit encoded integer.
 vbrNum :: (Num a, Bits a) => NumBits -> GetBits a
+#ifdef QUICK
+vbrNum = fmap (fromInteger . toInteger) . vbrWord
+#else
 vbrNum n = fromBitString <$> vbr n
+#endif
 
 -- | Decode a 6-bit encoded character.
 char6 :: GetBits Word8
 char6  = do
+#ifdef QUICK
+  (W# w#) <- fixedWord (Bits' 6)
+  let !i# = word2Int# w#
+#if MIN_VERSION_base(4,16,0)
+  let wordToWord8 = wordToWord8#
+#else
+  let wordToWord8 :: Word# -> Word#
+      wordToWord8 !a# = a#
+#endif
+  if isTrue# (i# <=# 25#)
+  then return (W8# (wordToWord8 (w# `plusWord#` (int2Word# 97#))))
+  else if isTrue# (i# <=# 51#)
+       then return (W8# (wordToWord8 (w# `plusWord#` (int2Word# 39#))))
+       else if isTrue# (i# <=# 61#)
+            then return (W8# (wordToWord8 (w# `minusWord#` (int2Word# 4#))))
+            else if isTrue# (i# ==# 62#)
+                 then return (fromIntegral (fromEnum '.'))
+                 else return (fromIntegral (fromEnum '_'))
+#else
   word <- numeric $ Bits' 6
   case word of
     n | 0  <= n && n <= 25 -> return (n + 97)
@@ -67,6 +116,7 @@ char6  = do
     62                     -> return (fromIntegral (fromEnum '.'))
     63                     -> return (fromIntegral (fromEnum '_'))
     _                      -> fail "invalid char6"
+#endif
 
 
 -- Bitstream Parsing -----------------------------------------------------------
@@ -86,10 +136,9 @@ parseBitCodeBitstreamLazy :: L.ByteString -> Either String Bitstream
 parseBitCodeBitstreamLazy = runGetBits getBitCodeBitstream . L.toStrict
 
 -- | The magic constant at the beginning of all llvm-bitcode files.
-bcMagicConst :: BitString
-bcMagicConst  = toBitString (Bits' 8) 0x42
-                `joinBitString`
-                toBitString (Bits' 8) 0x43
+
+bcMagicConst :: Word
+bcMagicConst = 0x4342
 
 -- | Parse a @Bitstream@ from either a normal bitcode file, or a wrapped
 -- bitcode.
@@ -108,21 +157,18 @@ getBitCodeBitstream  = label "llvm-bitstream" $ do
       skip $ Bits' 32 -- CPUType
       isolate size getBitstream
 
-bcWrapperMagicConst :: BitString
-bcWrapperMagicConst  =
-  foldr1 joinBitString [ byte 0xDE, byte 0xC0, byte 0x17, byte 0x0B]
-  where
-  byte = toBitString (Bits' 8)
+bcWrapperMagicConst :: Word
+bcWrapperMagicConst = 0x0b16c0de
 
 guardWrapperMagic :: GetBits ()
 guardWrapperMagic  = do
-  magic <- fixed (Bits' 32)
+  magic <- fixedWord (Bits' 32)
   guard (magic == bcWrapperMagicConst)
 
 -- | Parse a @Bitstream@.
 getBitstream :: GetBits Bitstream
 getBitstream  = label "bitstream" $ do
-  bc       <- fixed $ Bits' 16
+  bc       <- fixedWord $ Bits' 16
   unless (bc == bcMagicConst) (fail "Invalid magic number")
   appMagic <- numeric $ Bits' 16
   entries  <- getTopLevelEntries