@@ -1754,42 +1754,29 @@ isAsciiSpace :: Word8 -> Bool
1754
1754
isAsciiSpace w = w .&. 0x50 == 0 && w < 0x80 && (w == 0x20 || w - 0x09 < 5 )
1755
1755
{-# INLINE isAsciiSpace #-}
1756
1756
1757
- -- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
1758
- -- newline 'Char's. The resulting strings do not contain newlines.
1757
+ -- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at newline characters
1758
+ -- @'\\n'@ (LF, line feed). The resulting strings do not contain newlines.
1759
+ --
1760
+ -- 'lines' __does not__ treat @'\\r'@ (CR, carriage return) as a newline character.
1759
1761
lines :: Text -> [Text ]
1760
- lines ps | null ps = []
1761
- | otherwise = h : if null t
1762
- then []
1763
- else lines (unsafeTail t)
1764
- where (# h,t # ) = span_ (/= ' \n ' ) ps
1762
+ lines (Text arr@ (A. ByteArray arr# ) off len) = go off
1763
+ where
1764
+ go ! n
1765
+ | n >= len + off = []
1766
+ | delta < 0 = [Text arr n (len + off - n)]
1767
+ | otherwise = Text arr n delta : go (n + delta + 1 )
1768
+ where
1769
+ delta = cSsizeToInt $ unsafeDupablePerformIO $
1770
+ memchr arr# (intToCSize n) (intToCSize (len + off - n)) 0x0A
1765
1771
{-# INLINE lines #-}
1766
1772
1767
- {-
1768
- -- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
1769
- -- boundaries.
1770
- --
1771
- -- A line boundary is considered to be either a line feed, a carriage
1772
- -- return immediately followed by a line feed, or a carriage return.
1773
- -- This accounts for both Unix and Windows line ending conventions,
1774
- -- and for the old convention used on Mac OS 9 and earlier.
1775
- lines' :: Text -> [Text]
1776
- lines' ps | null ps = []
1777
- | otherwise = h : case uncons t of
1778
- Nothing -> []
1779
- Just (c,t')
1780
- | c == '\n' -> lines t'
1781
- | c == '\r' -> case uncons t' of
1782
- Just ('\n',t'') -> lines t''
1783
- _ -> lines t'
1784
- where (h,t) = span notEOL ps
1785
- notEOL c = c /= '\n' && c /= '\r'
1786
- {- # INLINE lines' #-}
1787
- -}
1773
+ foreign import ccall unsafe " _hs_text_memchr" memchr
1774
+ :: ByteArray # -> CSize -> CSize -> Word8 -> IO CSsize
1788
1775
1789
1776
-- | /O(n)/ Joins lines, after appending a terminating newline to
1790
1777
-- each.
1791
1778
unlines :: [Text ] -> Text
1792
- unlines = concat . L. map ( `snoc` ' \n ' )
1779
+ unlines = concat . L. foldr ( \ t acc -> t : singleton ' \n ' : acc) []
1793
1780
{-# INLINE unlines #-}
1794
1781
1795
1782
-- | /O(n)/ Joins words using single space characters.
0 commit comments