Skip to content

Commit 160cec6

Browse files
authored
Fix #280 (surrogate code points in Builder) (#281)
* Add regression test for #280 * Proper handling of invalid scalar values in Builder functions
1 parent 020b94c commit 160cec6

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

src/Data/Text/Internal/Builder.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ import Data.Monoid (Monoid(..))
6262
#if !MIN_VERSION_base(4,11,0) && MIN_VERSION_base(4,9,0)
6363
import Data.Semigroup (Semigroup(..))
6464
#endif
65-
import Data.Text.Internal (Text(..))
65+
import Data.Text.Internal (Text(..), safe)
6666
import Data.Text.Internal.Lazy (smallChunkSize)
6767
import Data.Text.Unsafe (inlineInterleaveST)
6868
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
@@ -138,7 +138,7 @@ empty = Builder (\ k buf -> k buf)
138138
-- * @'toLazyText' ('singleton' c) = 'L.singleton' c@
139139
--
140140
singleton :: Char -> Builder
141-
singleton c = writeAtMost 2 $ \ marr o -> unsafeWrite marr o c
141+
singleton c = writeAtMost 2 $ \ marr o -> unsafeWrite marr o (safe c)
142142
{-# INLINE singleton #-}
143143

144144
------------------------------------------------------------------------
@@ -190,7 +190,7 @@ fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
190190
ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
191191
return $ t : ts
192192
| otherwise = do
193-
n <- unsafeWrite marr (o+u) c
193+
n <- unsafeWrite marr (o+u) (safe c)
194194
loop marr o (u+n) (l-n) cs
195195
in loop p0 o0 u0 l0 str
196196
where

tests/Tests/Regressions.hs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import qualified Data.Text.Encoding as TE
2323
import qualified Data.Text.Internal as T
2424
import qualified Data.Text.IO as T
2525
import qualified Data.Text.Lazy as LT
26+
import qualified Data.Text.Lazy.Builder as TB
2627
import qualified Data.Text.Lazy.Encoding as LE
2728
import qualified Data.Text.Unsafe as T
2829
import qualified Test.Tasty as F
@@ -101,6 +102,18 @@ t227 =
101102
(T.length $ T.filter isLetter $ T.take (-3) "Hello! How are you doing today?")
102103
0
103104

105+
t280_fromString :: IO ()
106+
t280_fromString =
107+
assertEqual "TB.fromString performs replacement on invalid scalar values"
108+
(TB.toLazyText (TB.fromString "\xD800"))
109+
(LT.pack "\xFFFD")
110+
111+
t280_singleton :: IO ()
112+
t280_singleton =
113+
assertEqual "TB.singleton performs replacement on invalid scalar values"
114+
(TB.toLazyText (TB.singleton '\xD800'))
115+
(LT.pack "\xFFFD")
116+
104117
-- See GitHub issue #301
105118
-- This tests whether the "TEXT take . drop -> unfused" rule is applied to the
106119
-- slice function. When the slice function is fused, a new array will be
@@ -129,5 +142,7 @@ tests = F.testGroup "Regressions"
129142
, F.testCase "t197" t197
130143
, F.testCase "t221" t221
131144
, F.testCase "t227" t227
145+
, F.testCase "t280/fromString" t280_fromString
146+
, F.testCase "t280/singleton" t280_singleton
132147
, F.testCase "t301" t301
133148
]

0 commit comments

Comments
 (0)