@@ -247,6 +247,7 @@ import qualified Data.ByteString.Lazy as L
247247import qualified Data.ByteString as S (ByteString ) -- typename only
248248import qualified Data.ByteString as B
249249import qualified Data.ByteString.Unsafe as B
250+ import Data.List.NonEmpty (NonEmpty (.. ))
250251import Data.ByteString.Lazy.Internal
251252import Data.ByteString.Lazy.ReadInt
252253import Data.ByteString.Lazy.ReadNat
@@ -856,59 +857,50 @@ unzip :: [(Char, Char)] -> (ByteString, ByteString)
856857unzip ls = (pack (fmap fst ls), pack (fmap snd ls))
857858{-# INLINE unzip #-}
858859
859- -- | 'lines' breaks a ByteString up into a list of ByteStrings at
860+ -- | 'lines' lazily splits a ByteString into a list of ByteStrings at
860861-- newline Chars (@'\\n'@). The resulting strings do not contain newlines.
861- --
862- -- As of bytestring 0.9.0.3, this function is stricter than its
863- -- list cousin.
862+ -- The first chunk of the result is only strict in the first chunk of the
863+ -- input.
864864--
865865-- Note that it __does not__ regard CR (@'\\r'@) as a newline character.
866866--
867867lines :: ByteString -> [ByteString ]
868868lines Empty = []
869- lines (Chunk c0 cs0) = loop0 c0 cs0
870- where
871- -- this is a really performance sensitive function but the
872- -- chunked representation makes the general case a bit expensive
873- -- however assuming a large chunk size and normalish line lengths
874- -- we will find line endings much more frequently than chunk
875- -- endings so it makes sense to optimise for that common case.
876- -- So we partition into two special cases depending on whether we
877- -- are keeping back a list of chunks that will eventually be output
878- -- once we get to the end of the current line.
879-
880- -- the common special case where we have no existing chunks of
881- -- the current line
882- loop0 :: S. ByteString -> ByteString -> [ByteString ]
883- loop0 c cs =
884- case B. elemIndex (c2w ' \n ' ) c of
885- Nothing -> case cs of
886- Empty | B. null c -> []
887- | otherwise -> [Chunk c Empty ]
888- (Chunk c' cs')
889- | B. null c -> loop0 c' cs'
890- | otherwise -> loop c' [c] cs'
891-
892- Just n | n /= 0 -> Chunk (B. unsafeTake n c) Empty
893- : loop0 (B. unsafeDrop (n+ 1 ) c) cs
894- | otherwise -> Empty
895- : loop0 (B. unsafeTail c) cs
896-
897- -- the general case when we are building a list of chunks that are
898- -- part of the same line
899- loop :: S. ByteString -> [S. ByteString ] -> ByteString -> [ByteString ]
900- loop c line cs =
901- case B. elemIndex (c2w ' \n ' ) c of
902- Nothing ->
903- case cs of
904- Empty -> let ! c' = revChunks (c : line)
905- in [c']
906-
907- (Chunk c' cs') -> loop c' (c : line) cs'
908-
909- Just n ->
910- let ! c' = revChunks (B. unsafeTake n c : line)
911- in c' : loop0 (B. unsafeDrop (n+ 1 ) c) cs
869+ lines (Chunk c0 cs0) = unNE $! go c0 cs0
870+ where
871+ -- Natural NonEmpty -> List
872+ unNE :: NonEmpty a -> [a ]
873+ unNE (a :| b) = a : b
874+
875+ -- Strict in the first argument, lazy in the second.
876+ consNE :: ByteString -> NonEmpty ByteString -> NonEmpty ByteString
877+ consNE ! a b = a :| (unNE $! b)
878+
879+ -- Note invariant: The initial chunk is non-empty on input, and we
880+ -- need to be sure to maintain this in internal recursive calls.
881+ go :: S. ByteString -> ByteString -> NonEmpty ByteString
882+ go c cs = case B. elemIndex (c2w ' \n ' ) c of
883+ Just n
884+ | n1 <- n + 1
885+ , n1 < B. length c -> consNE c' $ go (B. unsafeDrop n1 c) cs
886+ -- 'c' was a multi-line chunk
887+ | otherwise -> c' :| lines cs
888+ -- 'c' was a single-line chunk
889+ where
890+ ! c' = chunk (B. unsafeTake n c) Empty
891+
892+ -- Initial chunk with no new line becomes first chunk of
893+ -- first line of result, with the rest of the result lazy!
894+ -- In particular, we don't strictly pattern match on 'cs'.
895+ --
896+ -- We can form `Chunk c ...` because the invariant is maintained
897+ -- here and also by using `chunk` in the defintion of `c'` above.
898+ Nothing -> let ~ (l:| ls) = lazyRest cs
899+ in Chunk c l :| ls
900+ where
901+ lazyRest :: ByteString -> NonEmpty ByteString
902+ lazyRest (Chunk c' cs') = go c' cs'
903+ lazyRest Empty = Empty :| []
912904
913905-- | 'unlines' joins lines, appending a terminating newline after each.
914906--
@@ -950,10 +942,3 @@ hPutStrLn h ps = hPut h ps >> hPut h (L.singleton 0x0a)
950942--
951943putStrLn :: ByteString -> IO ()
952944putStrLn = hPutStrLn stdout
953-
954- -- ---------------------------------------------------------------------
955- -- Internal utilities
956-
957- -- reverse a list of possibly-empty chunks into a lazy ByteString
958- revChunks :: [S. ByteString ] -> ByteString
959- revChunks = List. foldl' (flip chunk) Empty
0 commit comments