11{-# LANGUAGE  ScopedTypeVariables, CPP, BangPatterns, RankNTypes, TupleSections #-}
2- {-# LANGUAGE  Unsafe #-}
2+ {-# LANGUAGE  MagicHash, ViewPatterns,  Unsafe #-}
33{-# OPTIONS_HADDOCK  not-home #-}
44--  |  Copyright : (c) 2010 - 2011 Simon Meier 
55--  License     : BSD3-style (see LICENSE) 
@@ -84,6 +84,8 @@ module Data.ByteString.Builder.Internal (
8484  --  , sizedChunksInsert
8585
8686  , byteStringCopy 
87+   , ascLiteralCopy 
88+   , modUtf8LitCopy 
8789  , byteStringInsert 
8890  , byteStringThreshold 
8991
@@ -127,6 +129,7 @@ module Data.ByteString.Builder.Internal (
127129) where 
128130
129131import            Control.Arrow  (second )
132+ import            Control.Monad  (when )
130133
131134import            Data.Semigroup  (Semigroup (.. ))
132135
@@ -138,10 +141,12 @@ import qualified Data.ByteString.Short.Internal as Sh
138141import  qualified  GHC.IO.Buffer  as  IO  (Buffer (.. ), newByteBuffer )
139142import            GHC.IO.Handle.Internals  (wantWritableHandle , flushWriteBuffer )
140143import            GHC.IO.Handle.Types  (Handle__ , haByteBuffer , haBufferMode )
144+ import            GHC.Ptr  (Ptr (.. ))
141145import            System.IO  (hFlush , BufferMode (.. ), Handle )
142146import            Data.IORef 
143147
144148import            Foreign 
149+ import            Foreign.C.String  (CString )
145150import            Foreign.ForeignPtr.Unsafe  (unsafeForeignPtrToPtr )
146151import            System.IO.Unsafe  (unsafeDupablePerformIO )
147152
@@ -874,6 +879,75 @@ byteStringInsert :: S.ByteString -> Builder
874879byteStringInsert = 
875880    \ bs ->  builder $  \ k (BufferRange  op _) ->  return  $  insertChunk op bs k
876881
882+ 
883+ ------------------------------------------------------------------------------ 
884+ --  Raw CString encoding
885+ ------------------------------------------------------------------------------ 
886+ 
887+ --  |  Builder for raw 'Addr#' pointers to null-terminated primitive ASCII 
888+ --  strings that are free of embedded (overlong-encoded as the two-byte sequence 
889+ --  @0xC0 0x80@) null characters. 
890+ -- 
891+ --  @since 0.11.5.0 
892+ {-# INLINABLE  ascLiteralCopy #-}
893+ ascLiteralCopy  ::  Ptr  Word8  ->  Int   ->  Builder 
894+ ascLiteralCopy =  \  ! ip ! len ->  builder $  \ k br ->  do 
895+     let  ! ipe =  ip `plusPtr`  len
896+     wrappedBytesCopyStep (BufferRange  ip ipe) k br
897+ 
898+ --  |  GHC represents @NUL@ in string literals via an overlong 2-byte encoding, 
899+ --  which is part of "modified UTF-8" (GHC does not also implement CESU-8). 
900+ modifiedUtf8NUL  ::  CString 
901+ modifiedUtf8NUL =  Ptr  " \xc0\x80 " # 
902+ 
903+ --  |  Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8 
904+ --  encoded strings that may contain embedded overlong-encodings (as the 
905+ --  two-byte sequence @0xC0 0x80@) of null characters. 
906+ -- 
907+ --  @since 0.11.5.0 
908+ {-# INLINABLE  modUtf8LitCopy #-}
909+ modUtf8LitCopy  ::  Ptr  Word8  ->  Int   ->  Builder 
910+ modUtf8LitCopy =  \  ! ip ! len ->  builder $  \ k br ->  do 
911+     nullAt <-  c_strstr (castPtr ip) modifiedUtf8NUL
912+     modUtf8_step ip len nullAt k br
913+ 
914+ modUtf8_step  ::  Ptr  Word8  ->  Int   ->  Ptr  Word8  ->  BuildStep  r  ->  BuildStep  r 
915+ modUtf8_step ! ip ! len ((==  nullPtr) ->  True  ) k br = 
916+     --  Contains no encoded nulls, use simple copy codepath
917+     wrappedBytesCopyStep (BufferRange  ip ipe) k br
918+   where 
919+     ! ipe =  ip `plusPtr`  len
920+ modUtf8_step ! ip ! len ! nullAt k (BufferRange  op0 ope)
921+     --  Copy as much of the null-free portion of the string as fits into the
922+     --  available buffer space. If the string is long enough, we may have asked
923+     --  for less than its full length, filling the buffer with the rest will go
924+     --  into the next builder step.
925+     |  avail >  nullFree =  do 
926+         when (nullFree >  0 ) (copyBytes op0 ip nullFree)
927+         pokeElemOff op0 nullFree 0 
928+         let  used =  nullFree +  2 
929+             len' =  len -  used
930+             ! ip' =  ip `plusPtr`  used
931+             ! op' =  op0 `plusPtr`  (nullFree +  1 )
932+         nullAt' <-  c_strstr ip' modifiedUtf8NUL
933+         modUtf8_step ip' len' nullAt' k (BufferRange  op' ope)
934+     |  avail >  0  =  do 
935+         --  avail <= nullFree
936+         copyBytes op0 ip avail
937+         let  len' =  len -  avail
938+             ! ip' =  ip `plusPtr`  avail
939+             ! op' =  op0 `plusPtr`  avail
940+         return  $  bufferFull 1  op' (modUtf8_step ip' len' nullAt k)
941+     |  otherwise  = 
942+         return  $  bufferFull 1  op0 (modUtf8_step ip len nullAt k)
943+   where 
944+     ! avail =  ope `minusPtr`  op0
945+     ! nullFree =  nullAt `minusPtr`  ip
946+ 
947+ foreign  import  ccall unsafe  " string.h strstr"   c_strstr
948+     ::  CString  ->  CString  ->  IO   (Ptr  Word8 )
949+ 
950+ 
877951--  Short bytestrings
878952------------------------------------------------------------------------------ 
879953
0 commit comments