haskell · vdukhovni · Aug 16, 2025 · clyring · Sep 14, 2025 · clyring
diff --git a/Changelog.md b/Changelog.md
@@ -7,7 +7,9 @@
 
 * API additions and behavior changes:
   * Data.ByteString.Short now provides `lazyToShort` and `lazyFromShort`.
-
+  * New TH splices: `Data.ByteString.literalFromOctetString` and `Data.ByteString.literalFromHex`
+    * These validate input strings prior to generating corresponding
+      compile-time literal ByteStrings.
 <!--
 * Bug fixes:
 * Deprecations:

diff --git a/Data/ByteString.hs b/Data/ByteString.hs
@@ -67,6 +67,8 @@ module Data.ByteString (
         toStrict,
         fromFilePath,
         toFilePath,
+        literalFromOctetString,
+        literalFromHex,
 
         -- * Basic interface
         cons,

diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
@@ -3,6 +3,7 @@
 
 {-# OPTIONS_HADDOCK not-home #-}
 
+{-# LANGUAGE ConstraintKinds #-}
 {-# LANGUAGE TemplateHaskellQuotes #-}
 {-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE UnliftedFFITypes #-}
@@ -42,6 +43,7 @@ module Data.ByteString.Internal.Type (
         unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
         unsafePackAddress, unsafePackLenAddress,
         unsafePackLiteral, unsafePackLenLiteral,
+        literalFromOctetString, literalFromHex,
 
         -- * Low level imperative construction
         empty,
@@ -152,8 +154,9 @@ import Data.String              (IsString(..))
 
 import Control.Exception        (assert, throw, Exception)
 
-import Data.Bits                ((.&.))
+import Data.Bits                ((.|.), (.&.), complement, shiftL)
 import Data.Char                (ord)
+import Data.Foldable            (foldr')
 import Data.Word
 
 import Data.Data                (Data(..), mkConstr, mkDataType, Constr, DataType, Fixity(Prefix), constrIndex)
@@ -197,6 +200,14 @@ import GHC.ForeignPtr           (unsafeWithForeignPtr)
 
 import qualified Language.Haskell.TH.Lib as TH
 import qualified Language.Haskell.TH.Syntax as TH
+import Language.Haskell.TH.Syntax (Lift, TExp)
+#if __GLASGOW_HASKELL__ >= 900
+import Language.Haskell.TH.Syntax (Code, Quote)
+#endif
+
+#if !MIN_VERSION_base(4,13,0)
+import Control.Monad.Fail (MonadFail)
+#endif
 
 #if !HS_unsafeWithForeignPtr_AVAILABLE
 unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
@@ -359,7 +370,7 @@ byteStringDataType :: DataType
 byteStringDataType = mkDataType "Data.ByteString.ByteString" [packConstr]
 
 -- | @since 0.11.2.0
-instance TH.Lift ByteString where
+instance Lift ByteString where
 #if MIN_VERSION_template_haskell(2,16,0)
 -- template-haskell-2.16 first ships with ghc-8.10
   lift (BS ptr len) = [| unsafePackLenLiteral |]
@@ -530,6 +541,104 @@ packUptoLenChars len cs0 =
           go !p (c:cs)          = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs
       in go p0 cs0
 
+#if __GLASGOW_HASKELL__ < 900
+type Quote m = (TH.Q ~ m)
+type Code m a = m (TExp a)
+#endif
+
+liftTyped :: forall a m. (MonadFail m, Quote m, Lift a) => a -> Code m a
+#if MIN_VERSION_template_haskell(2,17,0)
+liftTyped = TH.liftTyped
+
+liftCode :: forall a m. (MonadFail m, Quote m) => m (TExp a) -> Code m a
+liftCode = TH.liftCode
+#else
+liftTyped = TH.unsafeTExpCoerce . TH.lift
+
+liftCode :: forall a m. (MonadFail m, Quote m) => m TH.Exp -> Code m a
+liftCode = TH.unsafeTExpCoerce
+#endif
+
+data S2W = Octets {-# UNPACK #-} !Int [Word8]
+           -- ^ Decoded some octets (<= 0xFF)
+         | Hichar {-# UNPACK #-} !Int {-# UNPACK #-} !Word
+           -- ^ Found a high char (> 0xFF)
+
+data H2W = Hex {-# UNPACK #-} !Int [Word8]
+           -- ^ Decoded some full bytes (nibble pairs)
+         | Odd {-# UNPACK #-} !Int {-# UNPACK #-} !Word [Word8]
+           -- ^ Decoded a nibble plus some full bytes
+         | Bad {-# UNPACK #-} !Int {-# UNPACK #-} !Word
+           -- ^ Found a non hex-digit character
+
+-- | Template Haskell splice to convert string constants to compile-time
+-- ByteString literals.  Unlike the 'IsString' instance, the input string
+-- is validated to ensure that each character is a valid /octet/, i.e. is
+-- at most @0xFF@ (255).
+--
+-- Example:
+--
+-- > :set -XTemplateHaskell
+-- > ehloCmd :: ByteString
+-- > ehloCmd = $$(literalFromOctetString "EHLO")
+--
+literalFromOctetString :: (MonadFail m, Quote m) => String -> Code m ByteString
+literalFromOctetString "" = [||empty||]
+literalFromOctetString s = case foldr' op (Octets 0 []) s of
+    Octets n ws -> liftTyped (unsafePackLenBytes n ws)
+    Hichar i w  -> liftCode $ fail $ "non-octet character '\\" ++
+        show w ++ "' at offset: " ++ show i
+  where
+    op :: Char -> S2W -> S2W
+    op (fromIntegral . fromEnum -> !(w :: Word)) acc
+        | w <= 0xff = case acc of
+            Octets i ws -> Octets (i + 1) (fromIntegral w : ws)
+            Hichar i w' -> Hichar (i + 1) w'
+        | otherwise = Hichar 0 w
+
+-- | Template Haskell splice to convert hex-encoded string constants to compile-time
+-- ByteString literals.  The input string is validated to ensure that it consists of
+-- an even number of valid hexadecimal digits (case insensitive).
+--
+-- Example:
+--
+-- > :set -XTemplateHaskell
+-- > ehloCmd :: ByteString
+-- > ehloCmd = $$(literalFromHex "45484c4F")
+--
+literalFromHex :: (MonadFail m, Quote m) => String -> Code m ByteString
+literalFromHex "" = [||empty||]
+literalFromHex s =
+    case foldr' op (Hex 0 []) s of
+        Hex n ws  -> liftTyped (unsafePackLenBytes n ws)
+        Odd i _ _ -> liftCode $ fail $ "Odd input length: " ++ show (1 + 2 * i)
+        Bad i w   -> liftCode $ fail $ "Non-hexadecimal character '\\" ++
+            show w ++ "' at offset: " ++ show i
+  where
+    -- Convert char to decimal digit value if result in [0, 9].
+    -- Otherwise, for hex digits, it remains to:
+    --   - fold upper and lower case by masking 0x20,
+    --   - subtract another 0x11 (0x41 total),
+    --   - check that result in [0,5]
+    --   - add 0xa
+    --
+    c2d :: Char -> Word
+    c2d c = fromIntegral (fromEnum c) - 0x30
+
+    op :: Char -> H2W -> H2W
+    op (c2d -> !(d :: Word)) acc
+        | d <= 9 = case acc of
+            Hex i ws    -> Odd i d ws
+            Odd i lo ws -> Hex (i+1) $ fromIntegral ((d `shiftL` 4 .|. lo)) : ws
+            Bad i w     -> Bad (i + 1) w
+        | l <- (d .&. complement 0x20) - 0x11
+        , l <= 5
+        , x <- l + 0xa = case acc of
+            Hex i ws    -> Odd i (l + 0xa) ws
+            Odd i lo ws -> Hex (i+ 1) $ fromIntegral (x `shiftL` 4 .|. lo) : ws
+            Bad i w     -> Bad (i + 1) w
+        | otherwise = Bad 0 (d + 0x30)
+
 -- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
 -- we would like to write a tight loop that just blasts the list into memory, on
 -- the other hand we want it to be unpacked lazily so we don't end up with a

diff --git a/tests/Lift.hs b/tests/Lift.hs
@@ -31,6 +31,14 @@ testSuite = testGroup "Lift"
         let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in
         bs === $$(TH.liftTyped $ BS.pack [0,1,2,3,0,1,2,3])
 #endif
+
+    , testProperty "literalFromOctetString" $
+        let bs = "EHLO" :: BS.ByteString in
+        bs === $$(BS.literalFromOctetString "EHLO")
+
+    , testProperty "literalFromHex" $
+        let bs = "EHLO" :: BS.ByteString in
+        bs === $$(BS.literalFromHex "45484c4F")
     ]
 
   , testGroup "lazy"