Skip to content

Commit 04ff3b7

Browse files
committed
Speed up Data.Text.intersperse
1 parent 0d2c43e commit 04ff3b7

File tree

1 file changed

+59
-3
lines changed

1 file changed

+59
-3
lines changed

src/Data/Text.hs

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ import Data.Binary (Binary(get, put))
225225
import Data.Monoid (Monoid(..))
226226
import Data.Semigroup (Semigroup(..))
227227
import Data.String (IsString(..))
228-
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4)
228+
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4)
229229
import qualified Data.Text.Internal.Fusion as S
230230
import qualified Data.Text.Internal.Fusion.Common as S
231231
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
@@ -669,8 +669,61 @@ intercalate t = concat . L.intersperse t
669669
-- "S.H.I.E.L.D"
670670
--
671671
-- Performs replacement on invalid scalar values.
672-
intersperse :: Char -> Text -> Text
673-
intersperse c t = unstream (S.intersperse (safe c) (stream t))
672+
intersperse :: Char -> Text -> Text
673+
intersperse c t@(Text src o l) = if l == 0 then mempty else runST $ do
674+
let !cLen = utf8Length c
675+
dstLen = l + length t P.* cLen
676+
677+
dst <- A.new dstLen
678+
679+
let writeSep = case cLen of
680+
1 -> \dstOff ->
681+
A.unsafeWrite dst dstOff (ord8 c)
682+
2 -> let (c0, c1) = ord2 c in \dstOff -> do
683+
A.unsafeWrite dst dstOff c0
684+
A.unsafeWrite dst (dstOff + 1) c1
685+
3 -> let (c0, c1, c2) = ord3 c in \dstOff -> do
686+
A.unsafeWrite dst dstOff c0
687+
A.unsafeWrite dst (dstOff + 1) c1
688+
A.unsafeWrite dst (dstOff + 2) c2
689+
_ -> let (c0, c1, c2, c3) = ord4 c in \dstOff -> do
690+
A.unsafeWrite dst dstOff c0
691+
A.unsafeWrite dst (dstOff + 1) c1
692+
A.unsafeWrite dst (dstOff + 2) c2
693+
A.unsafeWrite dst (dstOff + 3) c3
694+
let go !srcOff !dstOff = if srcOff >= o + l then return () else do
695+
let m0 = A.unsafeIndex src srcOff
696+
m1 = A.unsafeIndex src (srcOff + 1)
697+
m2 = A.unsafeIndex src (srcOff + 2)
698+
m3 = A.unsafeIndex src (srcOff + 3)
699+
!d = utf8LengthByLeader m0
700+
case d of
701+
1 -> do
702+
A.unsafeWrite dst dstOff m0
703+
writeSep (dstOff + 1)
704+
go (srcOff + 1) (dstOff + 1 + cLen)
705+
2 -> do
706+
A.unsafeWrite dst dstOff m0
707+
A.unsafeWrite dst (dstOff + 1) m1
708+
writeSep (dstOff + 2)
709+
go (srcOff + 2) (dstOff + 2 + cLen)
710+
3 -> do
711+
A.unsafeWrite dst dstOff m0
712+
A.unsafeWrite dst (dstOff + 1) m1
713+
A.unsafeWrite dst (dstOff + 2) m2
714+
writeSep (dstOff + 3)
715+
go (srcOff + 3) (dstOff + 3 + cLen)
716+
_ -> do
717+
A.unsafeWrite dst dstOff m0
718+
A.unsafeWrite dst (dstOff + 1) m1
719+
A.unsafeWrite dst (dstOff + 2) m2
720+
A.unsafeWrite dst (dstOff + 3) m3
721+
writeSep (dstOff + 4)
722+
go (srcOff + 4) (dstOff + 4 + cLen)
723+
724+
go o 0
725+
arr <- A.unsafeFreeze dst
726+
return (Text arr 0 (dstLen - cLen))
674727
{-# INLINE [1] intersperse #-}
675728

676729
-- | /O(n)/ Reverse the characters of a string.
@@ -1958,6 +2011,9 @@ copy (Text arr off len) = Text (A.run go) 0 len
19582011
A.copyI len marr 0 arr off
19592012
return marr
19602013

2014+
ord8 :: Char -> Word8
2015+
ord8 = P.fromIntegral . ord
2016+
19612017
intToCSize :: Int -> CSize
19622018
intToCSize = P.fromIntegral
19632019

0 commit comments

Comments
 (0)