Skip to content

Commit ac848e8

Browse files
elizarovqwwdfsad
authored andcommitted
Fixed exception handling for publish builder
* Test that ensures proper parent cancellation * Stress test to ensure that exception in publisher always handled by parent * Also fixed race between request & signal completion that did not reproduce before
1 parent 8e2fd07 commit ac848e8

File tree

5 files changed

+183
-47
lines changed

5 files changed

+183
-47
lines changed

binary-compatibility-validator/reference-public-api/kotlinx-coroutines-core.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ public final class kotlinx/coroutines/CoroutineExceptionHandlerKt {
144144
public static final fun CoroutineExceptionHandler (Lkotlin/jvm/functions/Function2;)Lkotlinx/coroutines/CoroutineExceptionHandler;
145145
public static final fun handleCoroutineException (Lkotlin/coroutines/CoroutineContext;Ljava/lang/Throwable;Lkotlinx/coroutines/Job;)V
146146
public static synthetic fun handleCoroutineException$default (Lkotlin/coroutines/CoroutineContext;Ljava/lang/Throwable;Lkotlinx/coroutines/Job;ILjava/lang/Object;)V
147+
public static final fun handleExceptionViaHandler (Lkotlin/coroutines/CoroutineContext;Ljava/lang/Throwable;)V
147148
}
148149

149150
public final class kotlinx/coroutines/CoroutineName : kotlin/coroutines/AbstractCoroutineContextElement {

common/kotlinx-coroutines-core-common/src/CoroutineExceptionHandler.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ public fun handleCoroutineException(context: CoroutineContext, exception: Throwa
3131
handleExceptionViaHandler(context, exception)
3232
}
3333

34-
internal fun handleExceptionViaHandler(context: CoroutineContext, exception: Throwable) {
34+
/**
35+
* @suppress This is an internal API and it is subject to change.
36+
*/
37+
@InternalCoroutinesApi
38+
public fun handleExceptionViaHandler(context: CoroutineContext, exception: Throwable) {
3539
// Invoke exception handler from the context if present
3640
try {
3741
context[CoroutineExceptionHandler]?.let {

reactive/kotlinx-coroutines-reactive/src/Publish.kt

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ private class PublisherCoroutine<in T>(
5959
private val subscriber: Subscriber<T>
6060
) : AbstractCoroutine<Unit>(parentContext, true), ProducerScope<T>, Subscription, SelectClause2<T, SendChannel<T>> {
6161
override val channel: SendChannel<T> get() = this
62+
63+
// cancelsParent == true ensure that error is always reported to the parent, so that parent cannot complete
64+
// without receiving reported error.
6265
override val cancelsParent: Boolean get() = true
6366

6467
// Mutex is locked when either nRequested == 0 or while subscriber.onXXX is being invoked
@@ -69,6 +72,8 @@ private class PublisherCoroutine<in T>(
6972
@Volatile
7073
private var cancelled = false // true when Subscription.cancel() is invoked
7174

75+
private var handleException = false // when handleJobException is invoked
76+
7277
override val isClosedForSend: Boolean get() = isCompleted
7378
override val isFull: Boolean = mutex.isLocked
7479
override fun close(cause: Throwable?): Boolean = cancel(cause)
@@ -105,68 +110,79 @@ private class PublisherCoroutine<in T>(
105110
}
106111
}
107112

113+
/*
114+
This code is not trivial because of the two properties:
115+
1. It ensures conformance to the reactive specification that mandates that onXXX invocations should not
116+
be concurrent. It uses Mutex to protect all onXXX invocation and ensure conformance even when multiple
117+
coroutines are invoking `send` function.
118+
2. Normally, `onComplete/onError` notification is sent only when coroutine and all its children are complete.
119+
However, nothing prevents `publish` coroutine from leaking reference to it send channel to some
120+
globally-scoped coroutine that is invoking `send` outside of this context. Without extra precaution this may
121+
lead to `onNext` that is concurrent with `onComplete/onError`, so that is why signalling for
122+
`onComplete/onError` is also done under the same mutex.
123+
*/
124+
108125
// assert: mutex.isLocked()
109126
private fun doLockedNext(elem: T) {
110-
// check if already closed for send
127+
// check if already closed for send, note, that isActive become false as soon as cancel() is invoked,
128+
// because the job is cancelled, so this check also ensure conformance to the reactive specification's
129+
// requirement that after cancellation requested we don't call onXXX
111130
if (!isActive) {
112-
doLockedSignalCompleted()
131+
unlockAndCheckCompleted()
113132
throw getCancellationException()
114133
}
115134
// notify subscriber
116-
try {
117-
subscriber.onNext(elem)
118-
} catch (e: Throwable) {
119-
try {
120-
if (!cancel(e))
121-
handleCoroutineException(context, e, this)
122-
} finally {
123-
doLockedSignalCompleted()
124-
}
125-
throw getCancellationException()
126-
}
135+
subscriber.onNext(elem)
127136
// now update nRequested
128137
while (true) { // lock-free loop on nRequested
129138
val cur = _nRequested.value
130139
if (cur < 0) break // closed from inside onNext => unlock
131140
if (cur == Long.MAX_VALUE) break // no back-pressure => unlock
132141
val upd = cur - 1
133142
if (_nRequested.compareAndSet(cur, upd)) {
134-
if (upd == 0L) return // return to keep locked due to back-pressure
143+
if (upd == 0L) {
144+
// return to keep locked due to back-pressure
145+
return
146+
}
135147
break // unlock if upd > 0
136148
}
137149
}
138-
/*
139-
There is no sense to check for `isActive` before doing `unlock`, because cancellation/completion might
140-
happen after this check and before `unlock` (see `onCancellation` that does not do anything
141-
if it fails to acquire the lock that we are still holding).
142-
We have to recheck `isActive` after `unlock` anyway.
143-
*/
150+
unlockAndCheckCompleted()
151+
}
152+
153+
private fun unlockAndCheckCompleted() {
154+
/*
155+
There is no sense to check completion before doing `unlock`, because completion might
156+
happen after this check and before `unlock` (see `signalCompleted` that does not do anything
157+
if it fails to acquire the lock that we are still holding).
158+
We have to recheck `isCompleted` after `unlock` anyway.
159+
*/
144160
mutex.unlock()
145-
// recheck isActive
146-
if (!isActive && mutex.tryLock())
147-
doLockedSignalCompleted()
161+
// check isCompleted and and try to regain lock to signal completion
162+
if (isCompleted && mutex.tryLock()) doLockedSignalCompleted()
148163
}
149164

150-
// assert: mutex.isLocked()
165+
// assert: mutex.isLocked() & isCompleted
151166
private fun doLockedSignalCompleted() {
152167
try {
153168
if (_nRequested.value >= CLOSED) {
154169
_nRequested.value = SIGNALLED // we'll signal onError/onCompleted (that the final state -- no CAS needed)
155170
val cause = getCompletionCause()
156171
// Specification requires that after cancellation requested we don't call onXXX
157172
if (cancelled) {
158-
// but we cannot just ignore exception so we handle it
159-
if (cause != null) handleCoroutineException(context, cause, this)
160-
return
161-
}
162-
try {
163-
if (cause != null && cause !is CancellationException)
164-
subscriber.onError(cause)
165-
else
166-
subscriber.onComplete()
167-
} catch (e: Throwable) {
168-
handleCoroutineException(context, e, this)
169-
}
173+
// If the parent had failed to handle our exception (handleJobException was invoked), then
174+
// we must not loose this exception
175+
if (handleException && cause != null) handleExceptionViaHandler(parentContext, cause)
176+
} else {
177+
try {
178+
if (cause != null && cause !is CancellationException)
179+
subscriber.onError(cause)
180+
else
181+
subscriber.onComplete()
182+
} catch (e: Throwable) {
183+
handleExceptionViaHandler(parentContext, e)
184+
}
185+
}
170186
}
171187
} finally {
172188
mutex.unlock()
@@ -189,37 +205,48 @@ private class PublisherCoroutine<in T>(
189205
if (_nRequested.compareAndSet(cur, upd)) {
190206
// unlock the mutex when we don't have back-pressure anymore
191207
if (cur == 0L) {
192-
mutex.unlock()
193-
// recheck isActive
194-
if (!isActive && mutex.tryLock())
195-
doLockedSignalCompleted()
208+
unlockAndCheckCompleted()
196209
}
197210
return
198211
}
199212
}
200213
}
201214

202-
override fun onCompletedExceptionally(exception: Throwable) = onCompleted(Unit)
203-
204-
override fun onCompleted(value: Unit) {
215+
// assert: isCompleted
216+
private fun signalCompleted() {
205217
while (true) { // lock-free loop for nRequested
206218
val cur = _nRequested.value
207219
if (cur == SIGNALLED) return // some other thread holding lock already signalled cancellation/completion
208-
check(cur >= 0) // no other thread could have marked it as CLOSED, because onCancellation is invoked once
220+
check(cur >= 0) // no other thread could have marked it as CLOSED, because onCompleted[Exceptionally] is invoked once
209221
if (!_nRequested.compareAndSet(cur, CLOSED)) continue // retry on failed CAS
210222
// Ok -- marked as CLOSED, now can unlock the mutex if it was locked due to backpressure
211223
if (cur == 0L) {
212224
doLockedSignalCompleted()
213225
} else {
214226
// otherwise mutex was either not locked or locked in concurrent onNext... try lock it to signal completion
215-
if (mutex.tryLock())
216-
doLockedSignalCompleted()
227+
if (mutex.tryLock()) doLockedSignalCompleted()
217228
// Note: if failed `tryLock`, then `doLockedNext` will signal after performing `unlock`
218229
}
219230
return // done anyway
220231
}
221232
}
222233

234+
// Note: It is invoked when parent fails to handle an exception and strictly before onCompleted[Exception]
235+
// so here we just raise a flag (and it need NOT be volatile!) to handle this exception.
236+
// This way we defer decision to handle this exception based on our ability to send this exception
237+
// to the subscriber (see doLockedSignalCompleted)
238+
override fun handleJobException(exception: Throwable) {
239+
handleException = true
240+
}
241+
242+
override fun onCompletedExceptionally(exception: Throwable) {
243+
signalCompleted()
244+
}
245+
246+
override fun onCompleted(value: Unit) {
247+
signalCompleted()
248+
}
249+
223250
override fun cancel() {
224251
// Specification requires that after cancellation publisher stops signalling
225252
// This flag distinguishes subscription cancellation request from the job crash
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Copyright 2016-2018 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
3+
*/
4+
5+
package kotlinx.coroutines.reactive
6+
7+
import kotlinx.coroutines.*
8+
import org.junit.*
9+
import org.junit.Test
10+
import org.reactivestreams.*
11+
import java.util.concurrent.*
12+
import kotlin.test.*
13+
14+
public class PublishParentCancelStressTest : TestBase() {
15+
private val dispatcher = newFixedThreadPoolContext(3, "PublishParentCancelStressTest")
16+
private val N_TIMES = 5000 * stressTestMultiplier
17+
18+
@After
19+
fun tearDown() {
20+
dispatcher.close()
21+
}
22+
23+
@Test
24+
fun testStress() = runTest {
25+
var unhandled: Throwable? = null
26+
val handler = CoroutineExceptionHandler { _, ex -> unhandled = ex }
27+
repeat(N_TIMES) {
28+
val barrier = CyclicBarrier(4)
29+
// launch parent job for publisher
30+
val parent = GlobalScope.async<Unit>(dispatcher + handler) {
31+
val publisher = publish<Unit> {
32+
// BARRIER #1 - child publisher crashes
33+
barrier.await()
34+
throw TestException()
35+
}
36+
var sub: Subscription? = null
37+
publisher.subscribe(object : Subscriber<Unit> {
38+
override fun onComplete() { error("Cannot be reached") }
39+
override fun onSubscribe(s: Subscription?) { sub = s }
40+
override fun onNext(t: Unit?) { error("Cannot be reached" ) }
41+
override fun onError(t: Throwable?) {
42+
assertTrue(t is TestException)
43+
}
44+
})
45+
launch {
46+
// BARRIER #3 -- cancel subscription
47+
barrier.await()
48+
sub!!.cancel()
49+
}
50+
// BARRIER #2 -- parent completes
51+
barrier.await()
52+
Unit
53+
}
54+
// BARRIE #4 - go 1-3 together
55+
barrier.await()
56+
// Make sure exception is not lost, but incorporated into parent
57+
val result = kotlin.runCatching { parent.await() }
58+
assertTrue(result.exceptionOrNull() is TestException)
59+
// Make sure unhandled exception handler was not invoked
60+
assertNull(unhandled)
61+
}
62+
}
63+
64+
private class TestException : Exception()
65+
}

reactive/kotlinx-coroutines-reactive/test/PublishTest.kt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,43 @@ class PublishTest : TestBase() {
130130
sub!!.cancel()
131131
finish(6)
132132
}
133+
134+
@Test
135+
fun testPublishFailureCancelsParent() = runTest(
136+
expected = { it is TestException }
137+
) {
138+
expect(1)
139+
val publisher = publish<Unit> {
140+
expect(5)
141+
throw TestException()
142+
}
143+
expect(2)
144+
publisher.subscribe(object : Subscriber<Unit> {
145+
override fun onComplete() {
146+
expectUnreached()
147+
}
148+
149+
override fun onSubscribe(s: Subscription) {
150+
expect(3)
151+
}
152+
153+
override fun onNext(t: Unit?) {
154+
expectUnreached()
155+
}
156+
157+
override fun onError(t: Throwable?) {
158+
assertTrue(t is TestException)
159+
expect(6)
160+
}
161+
})
162+
expect(4)
163+
try {
164+
yield() // to coroutine, will crash because it is a cancelled parent coroutine
165+
} finally {
166+
finish(7)
167+
}
168+
expectUnreached()
169+
}
170+
171+
private class TestException : Exception()
133172
}

0 commit comments

Comments
 (0)