11{-# LANGUAGE ScopedTypeVariables, CPP, BangPatterns, RankNTypes, TupleSections #-}
2- {-# LANGUAGE Unsafe #-}
2+ {-# LANGUAGE MagicHash, ViewPatterns, Unsafe #-}
33{-# OPTIONS_HADDOCK not-home #-}
44-- | Copyright : (c) 2010 - 2011 Simon Meier
55-- License : BSD3-style (see LICENSE)
@@ -84,6 +84,8 @@ module Data.ByteString.Builder.Internal (
8484 -- , sizedChunksInsert
8585
8686 , byteStringCopy
87+ , ascLiteralCopy
88+ , modUtf8LitCopy
8789 , byteStringInsert
8890 , byteStringThreshold
8991
@@ -127,6 +129,7 @@ module Data.ByteString.Builder.Internal (
127129) where
128130
129131import Control.Arrow (second )
132+ import Control.Monad (when )
130133
131134#if !(MIN_VERSION_base(4,11,0))
132135import Data.Semigroup (Semigroup ((<>) ))
@@ -140,10 +143,12 @@ import qualified Data.ByteString.Short.Internal as Sh
140143import qualified GHC.IO.Buffer as IO (Buffer (.. ), newByteBuffer )
141144import GHC.IO.Handle.Internals (wantWritableHandle , flushWriteBuffer )
142145import GHC.IO.Handle.Types (Handle__ , haByteBuffer , haBufferMode )
146+ import GHC.Ptr (Ptr (.. ))
143147import System.IO (hFlush , BufferMode (.. ), Handle )
144148import Data.IORef
145149
146150import Foreign
151+ import Foreign.C.String (CString )
147152import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr )
148153import System.IO.Unsafe (unsafeDupablePerformIO )
149154
@@ -857,6 +862,75 @@ byteStringInsert :: S.ByteString -> Builder
857862byteStringInsert =
858863 \ bs -> builder $ \ k (BufferRange op _) -> return $ insertChunk op bs k
859864
865+
866+ ------------------------------------------------------------------------------
867+ -- Raw CString encoding
868+ ------------------------------------------------------------------------------
869+
870+ -- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
871+ -- strings that are free of embedded (overlong-encoded as the two-byte sequence
872+ -- @0xC0 0x80@) null characters.
873+ --
874+ -- @since 0.11.5.0
875+ {-# INLINABLE ascLiteralCopy #-}
876+ ascLiteralCopy :: Ptr Word8 -> Int -> Builder
877+ ascLiteralCopy = \ ! ip ! len -> builder $ \ k br -> do
878+ let ! ipe = ip `plusPtr` len
879+ wrappedBytesCopyStep (BufferRange ip ipe) k br
880+
881+ -- | GHC represents @NUL@ in string literals via an overlong 2-byte encoding,
882+ -- which is part of "modified UTF-8" (GHC does not also implement CESU-8).
883+ modifiedUtf8NUL :: CString
884+ modifiedUtf8NUL = Ptr " \xc0\x80 " #
885+
886+ -- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
887+ -- encoded strings that may contain embedded overlong-encodings (as the
888+ -- two-byte sequence @0xC0 0x80@) of null characters.
889+ --
890+ -- @since 0.11.5.0
891+ {-# INLINABLE modUtf8LitCopy #-}
892+ modUtf8LitCopy :: Ptr Word8 -> Int -> Builder
893+ modUtf8LitCopy = \ ! ip ! len -> builder $ \ k br -> do
894+ nullAt <- c_strstr (castPtr ip) modifiedUtf8NUL
895+ modUtf8_step ip len nullAt k br
896+
897+ modUtf8_step :: Ptr Word8 -> Int -> Ptr Word8 -> BuildStep r -> BuildStep r
898+ modUtf8_step ! ip ! len ((== nullPtr) -> True ) k br =
899+ -- Contains no encoded nulls, use simple copy codepath
900+ wrappedBytesCopyStep (BufferRange ip ipe) k br
901+ where
902+ ! ipe = ip `plusPtr` len
903+ modUtf8_step ! ip ! len ! nullAt k (BufferRange op0 ope)
904+ -- Copy as much of the null-free portion of the string as fits into the
905+ -- available buffer space. If the string is long enough, we may have asked
906+ -- for less than its full length, filling the buffer with the rest will go
907+ -- into the next builder step.
908+ | avail > nullFree = do
909+ when (nullFree > 0 ) (copyBytes op0 ip nullFree)
910+ pokeElemOff op0 nullFree 0
911+ let used = nullFree + 2
912+ len' = len - used
913+ ! ip' = ip `plusPtr` used
914+ ! op' = op0 `plusPtr` (nullFree + 1 )
915+ nullAt' <- c_strstr ip' modifiedUtf8NUL
916+ modUtf8_step ip' len' nullAt' k (BufferRange op' ope)
917+ | avail > 0 = do
918+ -- avail <= nullFree
919+ copyBytes op0 ip avail
920+ let len' = len - avail
921+ ! ip' = ip `plusPtr` avail
922+ ! op' = op0 `plusPtr` avail
923+ return $ bufferFull 1 op' (modUtf8_step ip' len' nullAt k)
924+ | otherwise =
925+ return $ bufferFull 1 op0 (modUtf8_step ip len nullAt k)
926+ where
927+ ! avail = ope `minusPtr` op0
928+ ! nullFree = nullAt `minusPtr` ip
929+
930+ foreign import ccall unsafe " string.h strstr" c_strstr
931+ :: CString -> CString -> IO (Ptr Word8 )
932+
933+
860934-- Short bytestrings
861935------------------------------------------------------------------------------
862936
0 commit comments