@@ -143,7 +143,7 @@ import qualified Data.ByteString.Short.Internal as Sh
143143import qualified GHC.IO.Buffer as IO (Buffer (.. ), newByteBuffer )
144144import GHC.IO.Handle.Internals (wantWritableHandle , flushWriteBuffer )
145145import GHC.IO.Handle.Types (Handle__ , haByteBuffer , haBufferMode )
146- import GHC.Exts
146+ import GHC.Ptr ( Ptr ( .. ))
147147import System.IO (hFlush , BufferMode (.. ), Handle )
148148import Data.IORef
149149
@@ -878,6 +878,11 @@ ascLiteralCopy = \ !ip !len -> builder $ \k br -> do
878878 let ! ipe = ip `plusPtr` len
879879 wrappedBytesCopyStep (BufferRange ip ipe) k br
880880
881+ -- | GHC represents @NUL@ in string literals via an overlong 2-byte encoding,
882+ -- which is part of "modified UTF-8" (GHC does not also implement CESU-8).
883+ modifiedUtf8NUL :: CString
884+ modifiedUtf8NUL = Ptr " \xc0\x80 " #
885+
881886-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
882887-- encoded strings that may contain embedded overlong-encodings (as the
883888-- two-byte sequence @0xC0 0x80@) of null characters.
@@ -886,7 +891,7 @@ ascLiteralCopy = \ !ip !len -> builder $ \k br -> do
886891{-# INLINABLE modUtf8LitCopy #-}
887892modUtf8LitCopy :: Ptr Word8 -> Int -> Builder
888893modUtf8LitCopy = \ ! ip ! len -> builder $ \ k br -> do
889- nullAt <- c_strstr (castPtr ip) ( Ptr " \xc0\x80 " # )
894+ nullAt <- c_strstr (castPtr ip) modifiedUtf8NUL
890895 modUtf8_step ip len nullAt k br
891896
892897modUtf8_step :: Ptr Word8 -> Int -> Ptr Word8 -> BuildStep r -> BuildStep r
@@ -907,7 +912,7 @@ modUtf8_step !ip !len !nullAt k (BufferRange op0 ope)
907912 len' = len - used
908913 ! ip' = ip `plusPtr` used
909914 ! op' = op0 `plusPtr` (nullFree + 1 )
910- nullAt' <- c_strstr ip' ( Ptr " \xc0\x80 " # )
915+ nullAt' <- c_strstr ip' modifiedUtf8NUL
911916 modUtf8_step ip' len' nullAt' k (BufferRange op' ope)
912917 | avail > 0 = do
913918 -- avail <= nullFree
0 commit comments