Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

* API additions and behavior changes:
* Data.ByteString.Short now provides `lazyToShort` and `lazyFromShort`.

* New TH splices: `Data.ByteString.literalFromOctetString` and `Data.ByteString.literalFromHex`
* These validate input strings prior to generating corresponding
compile-time literal ByteStrings.
<!--
* Bug fixes:
* Deprecations:
Expand Down
2 changes: 2 additions & 0 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ module Data.ByteString (
toStrict,
fromFilePath,
toFilePath,
literalFromOctetString,
literalFromHex,

-- * Basic interface
cons,
Expand Down
113 changes: 111 additions & 2 deletions Data/ByteString/Internal/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

{-# OPTIONS_HADDOCK not-home #-}

{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnliftedFFITypes #-}
Expand Down Expand Up @@ -42,6 +43,7 @@ module Data.ByteString.Internal.Type (
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
unsafePackAddress, unsafePackLenAddress,
unsafePackLiteral, unsafePackLenLiteral,
literalFromOctetString, literalFromHex,

-- * Low level imperative construction
empty,
Expand Down Expand Up @@ -152,8 +154,9 @@ import Data.String (IsString(..))

import Control.Exception (assert, throw, Exception)

import Data.Bits ((.&.))
import Data.Bits ((.|.), (.&.), complement, shiftL)
import Data.Char (ord)
import Data.Foldable (foldr')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unqualified foldr' briefly confused me. (Actually, why are these quote-generators defined in D.B.Internal.Type instead of the exposed Data.ByteString?)

import Data.Word

import Data.Data (Data(..), mkConstr, mkDataType, Constr, DataType, Fixity(Prefix), constrIndex)
Expand Down Expand Up @@ -197,6 +200,14 @@ import GHC.ForeignPtr (unsafeWithForeignPtr)

import qualified Language.Haskell.TH.Lib as TH
import qualified Language.Haskell.TH.Syntax as TH
import Language.Haskell.TH.Syntax (Lift, TExp)
#if __GLASGOW_HASKELL__ >= 900
import Language.Haskell.TH.Syntax (Code, Quote)
#endif

#if !MIN_VERSION_base(4,13,0)
import Control.Monad.Fail (MonadFail)
#endif

#if !HS_unsafeWithForeignPtr_AVAILABLE
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
Expand Down Expand Up @@ -359,7 +370,7 @@ byteStringDataType :: DataType
byteStringDataType = mkDataType "Data.ByteString.ByteString" [packConstr]

-- | @since 0.11.2.0
instance TH.Lift ByteString where
instance Lift ByteString where
#if MIN_VERSION_template_haskell(2,16,0)
-- template-haskell-2.16 first ships with ghc-8.10
lift (BS ptr len) = [| unsafePackLenLiteral |]
Expand Down Expand Up @@ -530,6 +541,104 @@ packUptoLenChars len cs0 =
go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs
in go p0 cs0

#if __GLASGOW_HASKELL__ < 900
type Quote m = (TH.Q ~ m)
type Code m a = m (TExp a)
#endif

liftTyped :: forall a m. (MonadFail m, Quote m, Lift a) => a -> Code m a
#if MIN_VERSION_template_haskell(2,17,0)
liftTyped = TH.liftTyped

liftCode :: forall a m. (MonadFail m, Quote m) => m (TExp a) -> Code m a
liftCode = TH.liftCode
#else
liftTyped = TH.unsafeTExpCoerce . TH.lift

liftCode :: forall a m. (MonadFail m, Quote m) => m TH.Exp -> Code m a
liftCode = TH.unsafeTExpCoerce
#endif

data S2W = Octets {-# UNPACK #-} !Int [Word8]
-- ^ Decoded some octets (<= 0xFF)
| Hichar {-# UNPACK #-} !Int {-# UNPACK #-} !Word
-- ^ Found a high char (> 0xFF)

data H2W = Hex {-# UNPACK #-} !Int [Word8]
-- ^ Decoded some full bytes (nibble pairs)
| Odd {-# UNPACK #-} !Int {-# UNPACK #-} !Word [Word8]
-- ^ Decoded a nibble plus some full bytes
| Bad {-# UNPACK #-} !Int {-# UNPACK #-} !Word
-- ^ Found a non hex-digit character

-- | Template Haskell splice to convert string constants to compile-time
-- ByteString literals. Unlike the 'IsString' instance, the input string
-- is validated to ensure that each character is a valid /octet/, i.e. is
-- at most @0xFF@ (255).
--
-- Example:
--
-- > :set -XTemplateHaskell
-- > ehloCmd :: ByteString
-- > ehloCmd = $$(literalFromOctetString "EHLO")
--
literalFromOctetString :: (MonadFail m, Quote m) => String -> Code m ByteString
literalFromOctetString "" = [||empty||]
literalFromOctetString s = case foldr' op (Octets 0 []) s of
Octets n ws -> liftTyped (unsafePackLenBytes n ws)
Hichar i w -> liftCode $ fail $ "non-octet character '\\" ++
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TeofilC Would this liftCode $ fail $ ... stuff require any adjustments to your template-haskell-lift plans?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the headsup. This should be fine.

show w ++ "' at offset: " ++ show i
where
op :: Char -> S2W -> S2W
op (fromIntegral . fromEnum -> !(w :: Word)) acc
| w <= 0xff = case acc of
Octets i ws -> Octets (i + 1) (fromIntegral w : ws)
Hichar i w' -> Hichar (i + 1) w'
| otherwise = Hichar 0 w

-- | Template Haskell splice to convert hex-encoded string constants to compile-time
-- ByteString literals. The input string is validated to ensure that it consists of
-- an even number of valid hexadecimal digits (case insensitive).
--
-- Example:
--
-- > :set -XTemplateHaskell
-- > ehloCmd :: ByteString
-- > ehloCmd = $$(literalFromHex "45484c4F")
--
literalFromHex :: (MonadFail m, Quote m) => String -> Code m ByteString
literalFromHex "" = [||empty||]
literalFromHex s =
case foldr' op (Hex 0 []) s of
Hex n ws -> liftTyped (unsafePackLenBytes n ws)
Odd i _ _ -> liftCode $ fail $ "Odd input length: " ++ show (1 + 2 * i)
Bad i w -> liftCode $ fail $ "Non-hexadecimal character '\\" ++
show w ++ "' at offset: " ++ show i
where
-- Convert char to decimal digit value if result in [0, 9].
-- Otherwise, for hex digits, it remains to:
-- - fold upper and lower case by masking 0x20,
-- - subtract another 0x11 (0x41 total),
-- - check that result in [0,5]
-- - add 0xa
--
c2d :: Char -> Word
c2d c = fromIntegral (fromEnum c) - 0x30

op :: Char -> H2W -> H2W
op (c2d -> !(d :: Word)) acc
| d <= 9 = case acc of
Hex i ws -> Odd i d ws
Odd i lo ws -> Hex (i+1) $ fromIntegral ((d `shiftL` 4 .|. lo)) : ws
Bad i w -> Bad (i + 1) w
| l <- (d .&. complement 0x20) - 0x11
, l <= 5
, x <- l + 0xa = case acc of
Hex i ws -> Odd i (l + 0xa) ws
Odd i lo ws -> Hex (i+ 1) $ fromIntegral (x `shiftL` 4 .|. lo) : ws
Bad i w -> Bad (i + 1) w
| otherwise = Bad 0 (d + 0x30)

-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
-- we would like to write a tight loop that just blasts the list into memory, on
-- the other hand we want it to be unpacked lazily so we don't end up with a
Expand Down
8 changes: 8 additions & 0 deletions tests/Lift.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ testSuite = testGroup "Lift"
let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in
bs === $$(TH.liftTyped $ BS.pack [0,1,2,3,0,1,2,3])
#endif

, testProperty "literalFromOctetString" $
let bs = "EHLO" :: BS.ByteString in
bs === $$(BS.literalFromOctetString "EHLO")

, testProperty "literalFromHex" $
let bs = "EHLO" :: BS.ByteString in
bs === $$(BS.literalFromHex "45484c4F")
]

, testGroup "lazy"
Expand Down