Skip to content

Commit a140979

Browse files
committed
Deprecate cstring{,Utf8} in Builder/Prim.hs
These become aliases to cstringLiteral and cstringUtf8Literal in Builder/Internal.hs.
1 parent e6cc4a2 commit a140979

File tree

5 files changed

+59
-29
lines changed

5 files changed

+59
-29
lines changed

Data/ByteString/Builder.hs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -440,18 +440,18 @@ char8 :: Char -> Builder
440440
char8 = P.primFixed P.char8
441441

442442
-- | Char8 encode a 'String'.
443-
{-# INLINE [1] string8 #-} -- phased to allow P.cstring rewrite
443+
{-# INLINE [1] string8 #-} -- phased to allow cstringLiteral rewrite
444444
string8 :: String -> Builder
445445
string8 = P.primMapListFixed P.char8
446446

447447
-- GHC desugars string literals with unpackCString# which the simplifier tends
448448
-- to promptly turn into build (unpackFoldrCString# s), so we match on both.
449449
{-# RULES
450450
"string8/unpackCString#" forall s.
451-
string8 (unpackCString# s) = P.cstring s
451+
string8 (unpackCString# s) = cstringLiteral s
452452

453453
"string8/unpackFoldrCString#" forall s.
454-
string8 (build (unpackFoldrCString# s)) = P.cstring s
454+
string8 (build (unpackFoldrCString# s)) = cstringLiteral s
455455
#-}
456456

457457
------------------------------------------------------------------------------
@@ -467,19 +467,19 @@ charUtf8 = P.primBounded P.charUtf8
467467
--
468468
-- Note that 'stringUtf8' performs no codepoint validation and consequently may
469469
-- emit invalid UTF-8 if asked (e.g. single surrogates).
470-
{-# INLINE [1] stringUtf8 #-} -- phased to allow P.cstring rewrite
470+
{-# INLINE [1] stringUtf8 #-} -- phased to allow cstringLiteral rewrite
471471
stringUtf8 :: String -> Builder
472472
stringUtf8 = P.primMapListBounded P.charUtf8
473473

474474
{-# RULES
475475
"stringUtf8/unpackCStringUtf8#" forall s.
476-
stringUtf8 (unpackCStringUtf8# s) = P.cstringUtf8 s
476+
stringUtf8 (unpackCStringUtf8# s) = cstringUtf8Literal s
477477

478478
"stringUtf8/unpackCString#" forall s.
479-
stringUtf8 (unpackCString# s) = P.cstring s
479+
stringUtf8 (unpackCString# s) = cstringLiteral s
480480

481481
"stringUtf8/unpackFoldrCString#" forall s.
482-
stringUtf8 (build (unpackFoldrCString# s)) = P.cstring s
482+
stringUtf8 (build (unpackFoldrCString# s)) = cstringLiteral s
483483
#-}
484484

485485
instance IsString Builder where

Data/ByteString/Builder/Internal.hs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ module Data.ByteString.Builder.Internal (
8686
, byteStringCopy
8787
, byteStringInsert
8888
, byteStringThreshold
89-
, cstring
90-
, cstringUtf8
89+
, cstringLiteral
90+
, cstringUtf8Literal
9191

9292
, lazyByteStringCopy
9393
, lazyByteStringInsert
@@ -867,13 +867,14 @@ byteStringInsert =
867867
-- Raw CString encoding
868868
------------------------------------------------------------------------------
869869

870-
-- | A null-terminated ASCII encoded 'Foreign.C.String.CString'.
871-
-- Null characters are not representable.
870+
-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
871+
-- strings that are free of embedded (overlong-encoded as the two-byte sequence
872+
-- @0xC0 0x80@) null characters.
872873
--
873874
-- @since 0.11.5.0
874-
{-# INLINABLE cstring #-}
875-
cstring :: Addr# -> Builder
876-
cstring = \addr -> builder $ \k br -> do
875+
{-# INLINABLE cstringLiteral #-}
876+
cstringLiteral :: Addr# -> Builder
877+
cstringLiteral = \addr -> builder $ \k br -> do
877878
let ip = Ptr addr
878879
#if __GLASGOW_HASKELL__ >= 811
879880
ipe = Ptr (addr `plusAddr#` (cstringLength# addr))
@@ -882,20 +883,21 @@ cstring = \addr -> builder $ \k br -> do
882883
#endif
883884
wrappedBytesCopyStep (BufferRange ip ipe) k br
884885

885-
-- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'.
886-
-- Null characters can be encoded as @0xc0 0x80@.
886+
-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
887+
-- encoded strings that may contain embedded overlong-encodings (as the
888+
-- two-byte sequence @0xC0 0x80@) of null characters.
887889
--
888890
-- @since 0.11.5.0
889-
cstringUtf8 :: Addr# -> Builder
890-
cstringUtf8 = \addr0 -> builder $ \k br -> do
891+
{-# INLINABLE cstringUtf8Literal #-}
892+
cstringUtf8Literal :: Addr# -> Builder
893+
cstringUtf8Literal = \addr0 -> builder $ \k br -> do
891894
#if __GLASGOW_HASKELL__ >= 811
892895
let len = cstringLength# addr0
893896
#else
894897
(I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0)
895898
#endif
896899
nullAt <- c_strstr (Ptr addr0) (Ptr "\xc0\x80"#)
897900
cstringUtf8_step addr0 len nullAt k br
898-
{-# INLINABLE cstringUtf8 #-}
899901

900902
cstringUtf8_step :: Addr# -> Int# -> Ptr Word8 -> BuildStep r -> BuildStep r
901903
cstringUtf8_step addr len ((== nullPtr) -> True) k br =

Data/ByteString/Builder/Prim.hs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,8 @@ module Data.ByteString.Builder.Prim (
433433
-- a decimal number with UTF-8 encoded characters.
434434
, charUtf8
435435

436-
, cstring -- Backwards-compatibility re-exports from Internal.hs
437-
, cstringUtf8 -- these no longer make use of the BoundPrim API.
436+
, cstring
437+
, cstringUtf8
438438

439439
{-
440440
-- * Testing support
@@ -468,9 +468,7 @@ import Data.ByteString.Builder.Prim.ASCII
468468

469469
import Foreign
470470
import Foreign.C.Types
471-
import Foreign.C.String (CString)
472471
import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
473-
import GHC.Int (Int (..))
474472
import GHC.Word (Word8 (..))
475473
import GHC.Exts
476474
import GHC.IO
@@ -665,6 +663,26 @@ primMapLazyByteStringBounded w =
665663
L.foldrChunks (\x b -> primMapByteStringBounded w x `mappend` b) mempty
666664

667665

666+
-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
667+
-- strings that are free of embedded (overlong-encoded as the two-byte sequence
668+
-- @0xC0 0x80@) null characters.
669+
--
670+
-- @since 0.11.0.0
671+
{-# DEPRECATED cstring "Use cstringLiteral instead" #-}
672+
cstring :: Addr# -> Builder
673+
cstring = cstringLiteral
674+
{-# INLINE cstring #-}
675+
676+
-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
677+
-- encoded strings that may contain embedded overlong-encodings (as the
678+
-- two-byte sequence @0xC0 0x80@) of null characters.
679+
--
680+
-- @since 0.11.0.0
681+
{-# DEPRECATED cstringUtf8 "Use cstringUtf8Literal instead" #-}
682+
cstringUtf8 :: Addr# -> Builder
683+
cstringUtf8 = cstringUtf8Literal
684+
{-# INLINE cstringUtf8 #-}
685+
668686
------------------------------------------------------------------------------
669687
-- Char8 encoding
670688
------------------------------------------------------------------------------

bench/BenchAll.hs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,13 @@ import Data.ByteString.Builder.Extra (byteStringCopy,
3333
import Data.ByteString.Builder.Internal (ensureFree)
3434
import Data.ByteString.Builder.Prim (BoundedPrim, FixedPrim,
3535
(>$<))
36+
import qualified Data.ByteString.Builder.Internal as BI
3637
import qualified Data.ByteString.Builder.Prim as P
3738
import qualified Data.ByteString.Builder.Prim.Internal as PI
3839

3940
import Foreign
41+
import GHC.Exts (Addr#)
42+
import GHC.Ptr (Ptr(..))
4043

4144
import System.Random
4245

@@ -247,6 +250,13 @@ largeTraversalInput = S.concat (replicate 10 byteStringData)
247250
smallTraversalInput :: S.ByteString
248251
smallTraversalInput = S8.pack "The quick brown fox"
249252

253+
asciiLit, utf8Lit :: Ptr Word8
254+
asciiLit = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
255+
utf8Lit = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xc0\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
256+
257+
addrOf :: Ptr Word8 -> Addr#
258+
addrOf (Ptr addr) = addr
259+
250260
main :: IO ()
251261
main = do
252262
defaultMain
@@ -256,11 +266,11 @@ main = do
256266
, benchB' "ensureFree 8" () (const (ensureFree 8))
257267
, benchB' "intHost 1" 1 intHost
258268
, benchB' "UTF-8 String (naive)" "hello world\0" fromString
259-
, benchB' "UTF-8 String" () $ \() -> P.cstringUtf8 "hello world\0"#
269+
, benchB' "UTF-8 String" () $ \() -> BI.cstringUtf8Literal "hello world\xc0\x80"#
260270
, benchB' "String (naive)" "hello world!" fromString
261-
, benchB' "String" () $ \() -> P.cstring "hello world!"#
262-
, benchB' "AsciiLit64" () $ \() -> P.cstring "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
263-
, benchB' "Utf8Lit64" () $ \() -> P.cstringUtf8 "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xc0\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
271+
, benchB' "String" () $ \() -> BI.cstringLiteral "hello world!"#
272+
, benchB' "AsciiLit" () $ \() -> BI.cstringLiteral (addrOf asciiLit)
273+
, benchB' "Utf8Lit" () $ \() -> BI.cstringUtf8Literal (addrOf utf8Lit)
264274
]
265275

266276
, bgroup "Encoding wrappers"

tests/builder/Data/ByteString/Builder/Tests.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -994,11 +994,11 @@ testsUtf8 =
994994
testCString :: [TestTree]
995995
testCString =
996996
[ testProperty "cstring" $
997-
toLazyByteString (BI.cstring "hello world!"#) ==
997+
toLazyByteString (BI.cstringLiteral "hello world!"#) ==
998998
LC.pack "hello" `L.append` L.singleton 0x20
999999
`L.append` LC.pack "world!"
10001000
, testProperty "cstringUtf8" $
1001-
toLazyByteString (BI.cstringUtf8 "hello\xc0\x80\xc0\x80world\xc0\x80!"#) ==
1001+
toLazyByteString (BI.cstringUtf8Literal "hello\xc0\x80\xc0\x80world\xc0\x80!"#) ==
10021002
LC.pack "hello" `L.append` L.singleton 0x00
10031003
`L.append` L.singleton 0x00
10041004
`L.append` LC.pack "world"

0 commit comments

Comments
 (0)