Skip to content

Commit 271b3a6

Browse files
committed
Bump version and update changelog
1 parent 7532a9f commit 271b3a6

File tree

10 files changed

+65
-6
lines changed

10 files changed

+65
-6
lines changed

changelog.md

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,24 @@
1-
### 1.3
2-
1+
### 2.0
2+
3+
* [Switch internal representation of text from UTF-16 to UTF-8](https://github.com/haskell/text/pull/365):
4+
* Functions in `Data.Text.Array` now operate over arrays of `Word8` instead of `Word16`.
5+
* Rename constructors of `Array` and `MArray` to `ByteArray` and `MutableByteArray`.
6+
* Rename functions and types in `Data.Text.Foreign` to reflect switch
7+
from `Word16` to `Word8`.
8+
* Rename slicing functions in `Data.Text.Unsafe` to reflect switch
9+
from `Word16` to `Word8`.
10+
* Rename `Data.Text.Internal.Unsafe.Char.unsafeChr` to `unsafeChr16`.
11+
* Change semantics and order of arguments of `Data.Text.Array.copyI`:
12+
pass length, not end offset.
13+
* Extend `Data.Text.Internal.Encoding.Utf8` to provide more UTF-8 related routines.
14+
* Extend interface of `Data.Text.Array` with more utility functions.
15+
* Add `instance Show Data.Text.Unsafe.Iter`.
16+
* Add `Data.Text.measureOff`.
17+
* Extend `Data.Text.Unsafe` with `iterArray` and `reverseIterArray`.
18+
* Export `Data.Text.Internal.Lazy.equal`.
19+
* Export `Data.Text.Internal.append`.
20+
* Add `Data.Text.Internal.Private.spanAscii_`.
21+
* Replacement characters in `decodeUtf8With` are no longer limited to Basic Multilingual Plane.
322
* [Disable implicit fusion rules](https://github.com/haskell/text/pull/348)
423
* [Add `Data.Text.Encoding.decodeUtf8Lenient`](https://github.com/haskell/text/pull/342)
524
* [Remove `Data.Text.Internal.Unsafe.Shift`](https://github.com/haskell/text/pull/343)

src/Data/Text.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,8 @@ take n t@(Text arr off len)
12251225
--
12261226
-- This function is used to implement 'take', 'drop', 'splitAt' and 'length'
12271227
-- and is useful on its own in streaming and parsing libraries.
1228+
--
1229+
-- @since 2.0
12281230
measureOff :: Int -> Text -> Int
12291231
measureOff !n (Text (A.ByteArray arr) off len) = if len == 0 then 0 else
12301232
cSsizeToInt $ unsafeDupablePerformIO $

src/Data/Text/Array.hs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ new (I# len#)
8080
{-# INLINE new #-}
8181

8282
-- | Create an uninitialized mutable pinned array.
83+
--
84+
-- @since 2.0
8385
newPinned :: forall s. Int -> ST s (MArray s)
8486
newPinned (I# len#)
8587
#if defined(ASSERTS)
@@ -90,13 +92,15 @@ newPinned (I# len#)
9092
(# s2#, marr# #) -> (# s2#, MutableByteArray marr# #)
9193
{-# INLINE newPinned #-}
9294

95+
-- | @since 2.0
9396
newFilled :: Int -> Int -> ST s (MArray s)
9497
newFilled (I# len#) (I# c#) = ST $ \s1# ->
9598
case newByteArray# len# s1# of
9699
(# s2#, marr# #) -> case setByteArray# marr# 0# len# c# s2# of
97100
s3# -> (# s3#, MutableByteArray marr# #)
98101
{-# INLINE newFilled #-}
99102

103+
-- | @since 2.0
100104
tile :: MArray s -> Int -> ST s ()
101105
tile marr tileLen = do
102106
totalLen <- getSizeofMArray marr
@@ -130,8 +134,10 @@ unsafeIndex (ByteArray arr) i@(I# i#) =
130134
case indexWord8Array# arr i# of r# -> (W8# r#)
131135
{-# INLINE unsafeIndex #-}
132136

133-
-- sizeofMutableByteArray# is deprecated, because it is unsafe in the presence of
134-
-- shrinkMutableByteArray# and resizeMutableByteArray#.
137+
-- | 'sizeofMutableByteArray#' is deprecated, because it is unsafe in the presence of
138+
-- 'shrinkMutableByteArray#' and 'resizeMutableByteArray#'.
139+
--
140+
-- @since 2.0
135141
getSizeofMArray :: MArray s -> ST s Int
136142
getSizeofMArray (MutableByteArray marr) = ST $ \s0# ->
137143
case getSizeofMutableByteArray# marr s0# of
@@ -185,12 +191,14 @@ run2 k = runST (do
185191
return (arr,b))
186192
{-# INLINE run2 #-}
187193

194+
-- | @since 2.0
188195
resizeM :: MArray s -> Int -> ST s (MArray s)
189196
resizeM (MutableByteArray ma) i@(I# i#) = ST $ \s1# ->
190197
case resizeMutableByteArray# ma i# s1# of
191198
(# s2#, newArr #) -> (# s2#, MutableByteArray newArr #)
192199
{-# INLINE resizeM #-}
193200

201+
-- | @since 2.0
194202
shrinkM ::
195203
#if defined(ASSERTS)
196204
HasCallStack =>
@@ -253,6 +261,8 @@ copyI count@(I# count#) (MutableByteArray dst#) dstOff@(I# dstOff#) (ByteArray s
253261
{-# INLINE copyI #-}
254262

255263
-- | Copy from pointer.
264+
--
265+
-- @since 2.0
256266
copyFromPointer
257267
:: MArray s -- ^ Destination
258268
-> Int -- ^ Destination offset
@@ -270,6 +280,8 @@ copyFromPointer (MutableByteArray dst#) dstOff@(I# dstOff#) (Ptr src#) count@(I#
270280
{-# INLINE copyFromPointer #-}
271281

272282
-- | Copy to pointer.
283+
--
284+
-- @since 2.0
273285
copyToPointer
274286
:: Array -- ^ Source
275287
-> Int -- ^ Source offset
@@ -293,6 +305,8 @@ equal src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count == 0
293305
{-# INLINE equal #-}
294306

295307
-- | Compare portions of two arrays. No bounds checking is performed.
308+
--
309+
-- @since 2.0
296310
compare :: Array -> Int -> Array -> Int -> Int -> Ordering
297311
compare src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count `Prelude.compare` 0
298312
{-# INLINE compare #-}

src/Data/Text/Encoding.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ decodeUtf8With2 onErr bs1@(B.length -> len1) bs2@(B.length -> len2) = runST $ do
234234
, bs <- B.drop (srcOff - len1) (B.take guessUtf8Boundary bs2)
235235
, isValidBS bs = do
236236
withBS bs $ \fp _ -> unsafeIOToST $ unsafeWithForeignPtr fp $ \src ->
237-
unsafeSTToIO $ A.copyP dst dstOff src (len1 + guessUtf8Boundary - srcOff)
237+
unsafeSTToIO $ A.copyFromPointer dst dstOff src (len1 + guessUtf8Boundary - srcOff)
238238
inner (len1 + guessUtf8Boundary) (dstOff + (len1 + guessUtf8Boundary - srcOff))
239239

240240
| dstOff + 4 > dstLen = do

src/Data/Text/Foreign.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ import qualified Data.Text.Array as A
5959
-- the functions in the 'Data.Text.Encoding' module.
6060

6161
-- | A type representing a number of UTF-8 code units.
62+
--
63+
-- @since 2.0
6264
newtype I8 = I8 Int
6365
deriving (Bounded, Enum, Eq, Integral, Num, Ord, Read, Real, Show)
6466

@@ -86,6 +88,8 @@ fromPtr ptr (I8 len) = unsafeSTToIO $ do
8688
-- If @n@ would cause the 'Text' to end inside a code point, the
8789
-- end of the prefix will be advanced by several additional 'Word8' units
8890
-- to maintain its validity.
91+
--
92+
-- @since 2.0
8993
takeWord8 :: I8 -> Text -> Text
9094
takeWord8 = (fst .) . splitAtWord8
9195

@@ -95,6 +99,8 @@ takeWord8 = (fst .) . splitAtWord8
9599
-- If @n@ would cause the 'Text' to begin inside a code point, the
96100
-- beginning of the suffix will be advanced by several additional 'Word8'
97101
-- unit to maintain its validity.
102+
--
103+
-- @since 2.0
98104
dropWord8 :: I8 -> Text -> Text
99105
dropWord8 = (snd .) . splitAtWord8
100106

src/Data/Text/Internal/Encoding/Utf8.hs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ between x y z = x >= y && x <= z
6868
-- | ord c < 0x800 = 2
6969
-- | ord c < 0x10000 = 3
7070
-- | otherwise = 4
71+
72+
-- | @since 2.0
7173
utf8Length :: Char -> Int
7274
utf8Length (C# c) = I# ((1# +# geChar# c (chr# 0x80#)) +# (geChar# c (chr# 0x800#) +# geChar# c (chr# 0x10000#)))
7375
{-# INLINE utf8Length #-}
@@ -82,6 +84,8 @@ utf8Length (C# c) = I# ((1# +# geChar# c (chr# 0x80#)) +# (geChar# c (chr# 0x800
8284
-- c `xor` I# (c# <=# 0#) is a branchless equivalent of c `max` 1.
8385
-- It is crucial to write c# <=# 0# and not c# ==# 0#, otherwise
8486
-- GHC is tempted to "optimize" by introduction of branches.
87+
88+
-- | @since 2.0
8589
utf8LengthByLeader :: Word8 -> Int
8690
utf8LengthByLeader w = c `xor` I# (c# <=# 0#)
8791
where
@@ -256,11 +260,13 @@ updateState (ByteClass c) (DecoderState s) = DecoderState (W8# el#)
256260

257261
newtype CodePoint = CodePoint Int
258262

263+
-- | @since 2.0
259264
data DecoderResult
260265
= Accept !Char
261266
| Incomplete !DecoderState !CodePoint
262267
| Reject
263268

269+
-- | @since 2.0
264270
utf8DecodeStart :: Word8 -> DecoderResult
265271
utf8DecodeStart w
266272
| st == utf8AcceptState = Accept (chr (word8ToInt w))
@@ -271,6 +277,7 @@ utf8DecodeStart w
271277
st = updateState cl utf8AcceptState
272278
cp = word8ToInt $ (0xff `shiftR` word8ToInt cl') .&. w
273279

280+
-- | @since 2.0
274281
utf8DecodeContinue :: Word8 -> DecoderState -> CodePoint -> DecoderResult
275282
utf8DecodeContinue w st (CodePoint cp)
276283
| st' == utf8AcceptState = Accept (chr cp')

src/Data/Text/Internal/Private.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ span_ p t@(Text arr off len) = (# hd,tl #)
3838

3939
-- | For the sake of performance this function does not check
4040
-- that a char is in ASCII range; it is a responsibility of @p@.
41+
--
42+
-- @since 2.0
4143
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
4244
spanAscii_ p (Text arr off len) = (# hd, tl #)
4345
where hd = text arr off k

src/Data/Text/Internal/Unsafe/Char.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ ord :: Char -> Int
3939
ord (C# c#) = I# (ord# c#)
4040
{-# INLINE ord #-}
4141

42+
-- | @since 2.0
4243
unsafeChr16 :: Word16 -> Char
4344
unsafeChr16 (W16# w#) = C# (chr# (word2Int# (word16ToWord# w#)))
4445
{-# INLINE unsafeChr16 #-}

src/Data/Text/Unsafe.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ iter ::
7979
iter (Text arr off _len) i = iterArray arr (off + i)
8080
{-# INLINE iter #-}
8181

82+
-- | @since 2.0
8283
iterArray :: A.Array -> Int -> Iter
8384
iterArray arr j = Iter chr l
8485
where m0 = A.unsafeIndex arr j
@@ -107,6 +108,7 @@ reverseIter :: Text -> Int -> Iter
107108
reverseIter (Text arr off _len) i = reverseIterArray arr (off + i)
108109
{-# INLINE reverseIter #-}
109110

111+
-- | @since 2.0
110112
reverseIterArray :: A.Array -> Int -> Iter
111113
reverseIterArray arr j
112114
| m0 < 0x80 = Iter (unsafeChr8 m0) (-1)
@@ -139,16 +141,22 @@ reverseIter_ (Text arr off _len) i
139141
-- | /O(1)/ Return the length of a 'Text' in units of 'Word8'. This
140142
-- is useful for sizing a target array appropriately before using
141143
-- 'unsafeCopyToPtr'.
144+
--
145+
-- @since 2.0
142146
lengthWord8 :: Text -> Int
143147
lengthWord8 (Text _arr _off len) = len
144148
{-# INLINE lengthWord8 #-}
145149

146150
-- | /O(1)/ Unchecked take of 'k' 'Word8's from the front of a 'Text'.
151+
--
152+
-- @since 2.0
147153
takeWord8 :: Int -> Text -> Text
148154
takeWord8 k (Text arr off _len) = Text arr off k
149155
{-# INLINE takeWord8 #-}
150156

151157
-- | /O(1)/ Unchecked drop of 'k' 'Word8's from the front of a 'Text'.
158+
--
159+
-- @since 2.0
152160
dropWord8 :: Int -> Text -> Text
153161
dropWord8 k (Text arr off len) = Text arr (off+k) (len-k)
154162
{-# INLINE dropWord8 #-}

text.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cabal-version: 2.2
22
name: text
3-
version: 1.2.5.0
3+
version: 2.0
44

55
homepage: https://github.com/haskell/text
66
bug-reports: https://github.com/haskell/text/issues

0 commit comments

Comments
 (0)