Skip to content

Commit 55358a4

Browse files
committed
Speed up lines and unlines
1 parent 634c565 commit 55358a4

File tree

2 files changed

+41
-35
lines changed

2 files changed

+41
-35
lines changed

src/Data/Text.hs

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1754,42 +1754,29 @@ isAsciiSpace :: Word8 -> Bool
17541754
isAsciiSpace w = w .&. 0x50 == 0 && w < 0x80 && (w == 0x20 || w - 0x09 < 5)
17551755
{-# INLINE isAsciiSpace #-}
17561756

1757-
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
1758-
-- newline 'Char's. The resulting strings do not contain newlines.
1757+
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
1758+
-- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
1759+
--
1760+
-- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
17591761
lines :: Text -> [Text]
1760-
lines ps | null ps = []
1761-
| otherwise = h : if null t
1762-
then []
1763-
else lines (unsafeTail t)
1764-
where (# h,t #) = span_ (/= '\n') ps
1762+
lines (Text arr@(A.ByteArray arr#) off len) = go off
1763+
where
1764+
go !n
1765+
| n >= len + off = []
1766+
| delta < 0 = [Text arr n (len + off - n)]
1767+
| otherwise = Text arr n delta : go (n + delta + 1)
1768+
where
1769+
delta = cSsizeToInt $ unsafeDupablePerformIO $
1770+
memchr arr# (intToCSize n) (intToCSize (len + off - n)) 0x0A
17651771
{-# INLINE lines #-}
17661772

1767-
{-
1768-
-- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
1769-
-- boundaries.
1770-
--
1771-
-- A line boundary is considered to be either a line feed, a carriage
1772-
-- return immediately followed by a line feed, or a carriage return.
1773-
-- This accounts for both Unix and Windows line ending conventions,
1774-
-- and for the old convention used on Mac OS 9 and earlier.
1775-
lines' :: Text -> [Text]
1776-
lines' ps | null ps = []
1777-
| otherwise = h : case uncons t of
1778-
Nothing -> []
1779-
Just (c,t')
1780-
| c == '\n' -> lines t'
1781-
| c == '\r' -> case uncons t' of
1782-
Just ('\n',t'') -> lines t''
1783-
_ -> lines t'
1784-
where (h,t) = span notEOL ps
1785-
notEOL c = c /= '\n' && c /= '\r'
1786-
{-# INLINE lines' #-}
1787-
-}
1773+
foreign import ccall unsafe "_hs_text_memchr" memchr
1774+
:: ByteArray# -> CSize -> CSize -> Word8 -> IO CSsize
17881775

17891776
-- | /O(n)/ Joins lines, after appending a terminating newline to
17901777
-- each.
17911778
unlines :: [Text] -> Text
1792-
unlines = concat . L.map (`snoc` '\n')
1779+
unlines = concat . L.foldr (\t acc -> t : singleton '\n' : acc) []
17931780
{-# INLINE unlines #-}
17941781

17951782
-- | /O(n)/ Joins words using single space characters.

src/Data/Text/Lazy.hs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,13 +1411,32 @@ chunksOf k = go
14111411
| otherwise -> a : go b
14121412
{-# INLINE chunksOf #-}
14131413

1414-
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
1415-
-- newline 'Char's. The resulting strings do not contain newlines.
1414+
-- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
1415+
-- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
1416+
--
1417+
-- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
14161418
lines :: Text -> [Text]
14171419
lines Empty = []
1418-
lines t = let (l,t') = break ((==) '\n') t
1419-
in l : if null t' then []
1420-
else lines (tail t')
1420+
lines (Chunk c cs)
1421+
| hasNlEnd c = P.map fromStrict (T.lines c) ++ lines cs
1422+
| otherwise = case T.lines c of
1423+
[] -> error "lines: unexpected empty chunk"
1424+
l : ls -> go l ls cs
1425+
where
1426+
go l [] Empty = [fromStrict l]
1427+
go l [] (Chunk x xs) = case T.lines x of
1428+
[] -> error "lines: unexpected empty chunk"
1429+
[xl]
1430+
| hasNlEnd x -> chunk l (fromStrict xl) : lines xs
1431+
| otherwise -> go (l `T.append` xl) [] xs
1432+
xl : yl : yls -> chunk l (fromStrict xl) :
1433+
if hasNlEnd x
1434+
then P.map fromStrict (yl : yls) ++ lines xs
1435+
else go yl yls xs
1436+
go l (m : ms) xs = fromStrict l : go m ms xs
1437+
1438+
hasNlEnd :: T.Text -> Bool
1439+
hasNlEnd (T.Text arr off len) = A.unsafeIndex arr (off + len - 1) == 0x0A
14211440

14221441
-- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
14231442
-- representing white space.
@@ -1428,7 +1447,7 @@ words = L.filter (not . null) . split isSpace
14281447
-- | /O(n)/ Joins lines, after appending a terminating newline to
14291448
-- each.
14301449
unlines :: [Text] -> Text
1431-
unlines = concat . L.map (`snoc` '\n')
1450+
unlines = concat . L.foldr (\t acc -> t : singleton '\n' : acc) []
14321451
{-# INLINE unlines #-}
14331452

14341453
-- | /O(n)/ Joins words using single space characters.

0 commit comments

Comments
 (0)