Skip to content

Commit 8baf7a1

Browse files
alexbiehlLysxia
authored andcommitted
Use copyToPointer in encodeUtf8Builder
Instead of copying each character individually we use copyToPointer instead.
1 parent 3488190 commit 8baf7a1

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

src/Data/Text/Encoding.hs

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,37 @@ decodeUtf8Lenient = decodeUtf8With lenientDecode
391391
--
392392
-- @since 1.1.0.0
393393
encodeUtf8Builder :: Text -> B.Builder
394-
encodeUtf8Builder = encodeUtf8BuilderEscaped (BP.liftFixedToBounded BP.word8)
394+
encodeUtf8Builder =
395+
-- manual eta-expansion to ensure inlining works as expected
396+
\txt -> B.builder (step txt)
397+
where
398+
step txt@(Text arr off len) !k br@(B.BufferRange op ope)
399+
-- Ensure that the common case is not recursive and therefore yields
400+
-- better code.
401+
| op' <= ope = do
402+
unsafeSTToIO $ A.copyToPointer arr off op len
403+
k (B.BufferRange op' ope)
404+
| otherwise = textCopyStep txt k br
405+
where
406+
op' = op `plusPtr` len
407+
{-# INLINE encodeUtf8Builder #-}
408+
409+
textCopyStep :: Text -> B.BuildStep a -> B.BuildStep a
410+
textCopyStep (Text arr off len) k =
411+
go off (off + len)
412+
where
413+
go !ip !ipe (B.BufferRange op ope)
414+
| inpRemaining <= outRemaining = do
415+
unsafeSTToIO $ A.copyToPointer arr ip op inpRemaining
416+
let !br = B.BufferRange (op `plusPtr` inpRemaining) ope
417+
k br
418+
| otherwise = do
419+
unsafeSTToIO $ A.copyToPointer arr ip op outRemaining
420+
let !ip' = ip + outRemaining
421+
return $ B.bufferFull 1 ope (go ip' ipe)
422+
where
423+
outRemaining = ope `minusPtr` op
424+
inpRemaining = ipe - ip
395425

396426
-- | Encode text using UTF-8 encoding and escape the ASCII characters using
397427
-- a 'BP.BoundedPrim'.

0 commit comments

Comments
 (0)