@@ -2000,39 +2000,16 @@ final class Stream[+F[_], +O] private[fs2] (private[fs2] val underlying: Pull[F,
20002000 Stream .force(fstream)
20012001 }
20022002
2003- /** Interleaves the two inputs nondeterministically. The output stream
2004- * halts after BOTH `s1` and `s2` terminate normally, or in the event
2005- * of an uncaught failure on either `s1` or `s2`. Has the property that
2006- * `merge(Stream.empty, s) == s` and `merge(raiseError(e), s)` will
2007- * eventually terminate with `raiseError(e)`, possibly after emitting some
2008- * elements of `s` first.
2009- *
2010- * The implementation always tries to pull one chunk from each side
2011- * before waiting for it to be consumed by resulting stream.
2012- * As such, there may be up to two chunks (one from each stream)
2013- * waiting to be processed while the resulting stream
2014- * is processing elements.
2015- *
2016- * Also note that if either side produces empty chunk,
2017- * the processing on that side continues,
2018- * w/o downstream requiring to consume result.
2003+ /** Implementation of [[merge ]], however allows specifying how to combine the output stream.
2004+ * This can be used to control how chunks are emitted downstream. See [[mergeAndAwaitDownstream ]] for example.
20192005 *
2020- * If either side does not emit anything (i.e. as result of drain) that side
2021- * will continue to run even when the resulting stream did not ask for more data.
2022- *
2023- * Note that even when this is equivalent to `Stream(this, that).parJoinUnbounded`,
2024- * this implementation is little more efficient
2025- *
2026- * @example {{{
2027- * scala> import scala.concurrent.duration._, cats.effect.IO, cats.effect.unsafe.implicits.global
2028- * scala> val s1 = Stream.awakeEvery[IO](500.millis).scan(0)((acc, _) => acc + 1)
2029- * scala> val s = s1.merge(Stream.sleep_[IO](250.millis) ++ s1)
2030- * scala> s.take(6).compile.toVector.unsafeRunSync()
2031- * res0: Vector[Int] = Vector(0, 0, 1, 1, 2, 2)
2032- * }}}
2006+ * @param f The function that combines the output stream and a finalizer for the chunk.
2007+ * This way we can controll when to pull pull next chunk from upstream.
20332008 */
2034- def merge [F2 [x] >: F [x], O2 >: O ](
2009+ private def merge_ [F2 [x] >: F [x], O2 >: O ](
20352010 that : Stream [F2 , O2 ]
2011+ )(
2012+ f : (Stream [F2 , O2 ], F2 [Unit ]) => Stream [F2 , O2 ]
20362013 )(implicit F : Concurrent [F2 ]): Stream [F2 , O2 ] =
20372014 Stream .force {
20382015 // `State` describes the state of an upstream stream (`this` and `that` are both upstream streams)
@@ -2063,12 +2040,10 @@ final class Stream[+F[_], +O] private[fs2] (private[fs2] val underlying: Pull[F,
20632040 case (Some (r1), Some (r2)) => CompositeFailure .fromResults(r1, r2)
20642041 }
20652042 def run (s : Stream [F2 , O2 ]): F2 [Unit ] =
2066- // `guard` ensures we do not pull another chunk until the previous one has been consumed downstream.
2043+ // `guard` ensures we do not pull another chunk until the previous one has been produced for downstream.
20672044 Semaphore [F2 ](1 ).flatMap { guard =>
2068- def sendChunk (chk : Chunk [O2 ]): F2 [Unit ] = {
2069- val outStr = Stream .chunk(chk).onFinalize(guard.release)
2070- output.send(outStr) >> guard.acquire
2071- }
2045+ def sendChunk (chk : Chunk [O2 ]): F2 [Unit ] =
2046+ output.send(f(Stream .chunk(chk), guard.release)) >> guard.acquire
20722047
20732048 (Stream .exec(guard.acquire) ++ s.chunks.foreach(sendChunk))
20742049 // Stop when the other upstream has errored or the downstream has completed.
@@ -2103,6 +2078,65 @@ final class Stream[+F[_], +O] private[fs2] (private[fs2] val underlying: Pull[F,
21032078 }
21042079 }
21052080
2081+ /** Like [[merge ]], but ensures that each chunk is fully consumed downstream before pulling the next chunk from the same side.
2082+ * This looses the equivalence with `Stream(this, that).parJoinUnbounded` but can be useful when we need to never read ahead from
2083+ * the merged streams.
2084+ *
2085+ * @note Pay attention to possible deadlocks of "this" or "that" when using this function, notably in parallel processing
2086+ * as unless the chunk is fully processed / scope of the chunk is released, the next chunk will not be pulled.
2087+ *
2088+ * @example {{{
2089+ * scala> import scala.concurrent.duration._, cats.effect.IO, cats.effect.unsafe.implicits.global
2090+ * scala> import cats.effect._
2091+ * scala> Ref.of[IO, Int](0).flatMap{ ref =>
2092+ * | fs2.Stream.never[IO].mergeAndAwaitDownstream(fs2.Stream.repeatEval(ref.get)).evalMap(value => {
2093+ * | IO.sleep(1.second) >> ref.set(value + 1) as value
2094+ * | }).take(6).compile.toVector
2095+ * | }.unsafeRunSync()
2096+ * res0: Vector[Int] = Vector(0, 1, 2, 3, 4, 5)
2097+ * }}}
2098+ */
2099+ def mergeAndAwaitDownstream [F2 [x] >: F [x], O2 >: O ](
2100+ that : Stream [F2 , O2 ]
2101+ )(implicit F : Concurrent [F2 ]): Stream [F2 , O2 ] =
2102+ merge_(that) { case (s, fin) => s.onFinalize(fin) }
2103+
2104+ /** Interleaves the two inputs nondeterministically. The output stream
2105+ * halts after BOTH `s1` and `s2` terminate normally, or in the event
2106+ * of an uncaught failure on either `s1` or `s2`. Has the property that
2107+ * `merge(Stream.empty, s) == s` and `merge(raiseError(e), s)` will
2108+ * eventually terminate with `raiseError(e)`, possibly after emitting some
2109+ * elements of `s` first.
2110+ *
2111+ * The implementation always tries to pull one chunk from each side
2112+ * before waiting for it to be consumed by resulting stream.
2113+ * As such, there may be up to two chunks (one from each stream)
2114+ * waiting to be processed while the resulting stream
2115+ * is processing elements.
2116+ *
2117+ * Also note that if either side produces empty chunk,
2118+ * the processing on that side continues,
2119+ * w/o downstream requiring to consume result.
2120+ *
2121+ * If either side does not emit anything (i.e. as result of drain) that side
2122+ * will continue to run even when the resulting stream did not ask for more data.
2123+ *
2124+ * Note that even when this is equivalent to `Stream(this, that).parJoinUnbounded`,
2125+ * this implementation is little more efficient
2126+ *
2127+ * @example {{{
2128+ * scala> import scala.concurrent.duration._, cats.effect.IO, cats.effect.unsafe.implicits.global
2129+ * scala> val s1 = Stream.awakeEvery[IO](500.millis).scan(0)((acc, _) => acc + 1)
2130+ * scala> val s = s1.merge(Stream.sleep_[IO](250.millis) ++ s1)
2131+ * scala> s.take(6).compile.toVector.unsafeRunSync()
2132+ * res0: Vector[Int] = Vector(0, 0, 1, 1, 2, 2)
2133+ * }}}
2134+ */
2135+ def merge [F2 [x] >: F [x], O2 >: O ](
2136+ that : Stream [F2 , O2 ]
2137+ )(implicit F : Concurrent [F2 ]): Stream [F2 , O2 ] =
2138+ merge_(that) { case (s, fin) => Stream .exec(fin) ++ s }
2139+
21062140 /** Like `merge`, but halts as soon as _either_ branch halts. */
21072141 def mergeHaltBoth [F2 [x] >: F [x]: Concurrent , O2 >: O ](
21082142 that : Stream [F2 , O2 ]
0 commit comments