Kotlin
diff --git a/‎benchmarks/src/jmh/kotlin/benchmarks/actors/PingPongActorBenchmark.kt
Lines changed: 6 additions & 8 deletions b/‎benchmarks/src/jmh/kotlin/benchmarks/actors/PingPongActorBenchmark.kt
Lines changed: 6 additions & 8 deletions
diff --git a/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/CoroutineScheduler.kt
Lines changed: 590 additions & 114 deletions b/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/CoroutineScheduler.kt
Lines changed: 590 additions & 114 deletions
diff --git a/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/ExperimentalCoroutineDispatcher.kt
Lines changed: 128 additions & 10 deletions b/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/ExperimentalCoroutineDispatcher.kt
Lines changed: 128 additions & 10 deletions
diff --git a/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/Tasks.kt
Lines changed: 23 additions & 7 deletions b/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/Tasks.kt
Lines changed: 23 additions & 7 deletions
diff --git a/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/WorkQueue.kt
Lines changed: 39 additions & 30 deletions b/‎core/kotlinx-coroutines-core/src/main/kotlin/kotlinx/coroutines/experimental/scheduling/WorkQueue.kt
Lines changed: 39 additions & 30 deletions
@@ -1,13 +1,11 @@
 package benchmarks.actors
 
-import benchmarks.ParametrizedDispatcherBase
-import kotlinx.coroutines.experimental.channels.Channel
-import kotlinx.coroutines.experimental.channels.SendChannel
-import kotlinx.coroutines.experimental.channels.actor
-import kotlinx.coroutines.experimental.runBlocking
+import benchmarks.*
+import kotlinx.coroutines.experimental.*
+import kotlinx.coroutines.experimental.channels.*
 import org.openjdk.jmh.annotations.*
-import java.util.concurrent.TimeUnit
-import kotlin.coroutines.experimental.CoroutineContext
+import java.util.concurrent.*
+import kotlin.coroutines.experimental.*
 
 /*
  * Benchmark                                   (dispatcher)  Mode  Cnt    Score    Error  Units
@@ -32,7 +30,7 @@ import kotlin.coroutines.experimental.CoroutineContext
 open class PingPongActorBenchmark : ParametrizedDispatcherBase() {
     data class Letter(val message: Any?, val sender: SendChannel<Letter>)
 
-    @Param("experimental")
+    @Param("experimental", "fjp", "ftp_1", "ftp_8")
     override var dispatcher: String = "fjp"
 
     @Benchmark
 
@@ -1,25 +1,143 @@
 package kotlinx.coroutines.experimental.scheduling
 
+import kotlinx.atomicfu.*
 import kotlinx.coroutines.experimental.*
-import java.io.Closeable
-import java.util.concurrent.TimeUnit
-import kotlin.coroutines.experimental.CoroutineContext
+import java.io.*
+import java.util.concurrent.*
+import kotlin.coroutines.experimental.*
 
+class ExperimentalCoroutineDispatcher(corePoolSize: Int = Runtime.getRuntime().availableProcessors(), maxPoolSize: Int = MAX_POOL_SIZE) : CoroutineDispatcher(), Delay, Closeable {
 
-class ExperimentalCoroutineDispatcher(threads: Int = Runtime.getRuntime().availableProcessors()) : CoroutineDispatcher(), Delay, Closeable {
+    private val coroutineScheduler = CoroutineScheduler(corePoolSize, maxPoolSize)
 
-    private val coroutineScheduler = CoroutineScheduler(threads)
+    /**
+     * TODO: yield doesn't work as expected
+     */
+    override fun dispatch(context: CoroutineContext, block: Runnable): Unit = coroutineScheduler.dispatch(block)
 
-    override fun dispatch(context: CoroutineContext, block: Runnable) {
-        coroutineScheduler.dispatch(block)
-    }
-
-    override fun scheduleResumeAfterDelay(time: Long, unit: TimeUnit, continuation: CancellableContinuation<Unit>) =
+    override fun scheduleResumeAfterDelay(time: Long, unit: TimeUnit, continuation: CancellableContinuation<Unit>): Unit =
             DefaultExecutor.scheduleResumeAfterDelay(time, unit, continuation)
 
     override fun close() = coroutineScheduler.close()
+
     override fun toString(): String {
         return "${super.toString()}[scheduler = $coroutineScheduler]"
     }
 
+    /**
+     * Creates new coroutine execution context with limited parallelism to execute tasks which may potentially block.
+     * Resulting [CoroutineDispatcher] doesn't own any resources (its threads) and piggybacks on the original [ExperimentalCoroutineDispatcher],
+     * executing tasks in this context, giving original dispatcher hint to adjust its behaviour.
+     *
+     * @param parallelism parallelism level, indicating how many threads can execute tasks in given context in parallel.
+     */
+    fun blocking(parallelism: Int = BLOCKING_DEFAULT_PARALLELISM): CoroutineDispatcher {
+        require(parallelism > 0) { "Expected positive parallelism level, but have $parallelism" }
+        return LimitingBlockingDispatcher(parallelism, TaskMode.PROBABLY_BLOCKING, this)
+    }
+
+    internal fun dispatchBlocking(block: Runnable, context: TaskMode, fair: Boolean): Unit = coroutineScheduler.dispatch(block, context, fair)
+}
+
+private class LimitingBlockingDispatcher(val parallelism: Int, val taskContext: TaskMode, val dispatcher: ExperimentalCoroutineDispatcher) : CoroutineDispatcher(), Delay {
+
+    private val queue = ConcurrentLinkedQueue<Runnable>()
+    private val inFlightTasks = atomic(0)
+
+    override fun dispatch(context: CoroutineContext, block: Runnable) = dispatch(block, false)
+
+    private fun dispatch(block: Runnable, fair: Boolean) {
+        var taskToSchedule = wrap(block)
+        while (true) {
+            // Commit in-flight tasks slot
+            val inFlight = inFlightTasks.incrementAndGet()
+
+            // Fast path, if parallelism limit is not reached, dispatch task and return
+            if (inFlight <= parallelism) {
+                dispatcher.dispatchBlocking(taskToSchedule, taskContext, fair)
+                return
+            }
+
+            // Parallelism limit is reached, add task to the queue
+            queue.add(taskToSchedule)
+
+            /*
+             * We're not actually scheduled anything, so rollback committed in-flight task slot:
+             * If the amount of in-flight tasks is still above the limit, do nothing
+             * If the amount of in-flight tasks is lesser than parallelism, then
+             * it's a race with a thread which finished the task from the current context, we should resubmit the first task from the queue
+             * to avoid starvation.
+             *
+             * Race example #1 (TN is N-th thread, R is current in-flight tasks number), execution is sequential:
+             *
+             * T1: submit task, start execution, R == 1
+             * T2: commit slot for next task, R == 2
+             * T1: finish T1, R == 1
+             * T2: submit next task to local queue, decrement R, R == 0
+             * Without retries, task from T2 will be stuck in the local queue
+             */
+            if (inFlightTasks.decrementAndGet() >= parallelism) {
+                return
+            }
+
+            taskToSchedule = queue.poll() ?: return
+        }
+    }
+
+    override fun toString(): String {
+        return "${super.toString()}[dispatcher = $dispatcher]"
+    }
+
+    private fun wrap(block: Runnable): Runnable {
+        return block as? WrappedTask ?: WrappedTask(block)
+    }
+
+    /**
+     * Tries to dispatch tasks which were blocked due to reaching parallelism limit if there is any.
+     *
+     * Implementation note: blocking tasks are scheduled in a fair manner (to local queue tail) to avoid
+     * non-blocking continuations starvation.
+     * E.g. for
+     * ```
+     * foo()
+     * blocking()
+     * bar()
+     * ```
+     * it's more profitable to execute bar at the end of `blocking` rather than pending blocking task
+     */
+    private fun afterTask() {
+        var next = queue.poll()
+        // If we have pending tasks in current blocking context, dispatch first
+        if (next != null) {
+            dispatcher.dispatchBlocking(next, taskContext, true)
+            return
+        }
+        inFlightTasks.decrementAndGet()
+
+        /*
+         * Re-poll again and try to submit task if it's required otherwise tasks may be stuck in the local queue.
+         * Race example #2 (TN is N-th thread, R is current in-flight tasks number), execution is sequential:
+         * T1: submit task, start execution, R == 1
+         * T2: commit slot for next task, R == 2
+         * T1: finish T1, poll queue (it's still empty), R == 2
+         * T2: submit next task to the local queue, decrement R, R == 1
+         * T1: decrement R, finish. R == 0
+         *
+         * The task from T2 is stuck is the local queue
+         */
+        next = queue.poll() ?: return
+        dispatch(next, true)
+    }
+
+    private inner class WrappedTask(val runnable: Runnable) : Runnable {
+        override fun run() {
+            try {
+                runnable.run()
+            } finally {
+                afterTask()
+            }
+        }
+    }
+
+    override fun scheduleResumeAfterDelay(time: Long, unit: TimeUnit, continuation: CancellableContinuation<Unit>) = dispatcher.scheduleResumeAfterDelay(time, unit, continuation)
 }
@@ -5,16 +5,29 @@ import java.util.*
 internal typealias Task = TimedTask
 internal typealias GlobalQueue = Queue<Task>
 
-// 100us is default resolution
+// 100us as default
 internal val WORK_STEALING_TIME_RESOLUTION_NS = readFromSystemProperties(
-        "kotlinx.coroutines.scheduler.resolution.ns", 100000L, String::toLongOrNull)
+        "kotlinx.coroutines.scheduler.resolution.ns", 100000L)
 
 internal val QUEUE_SIZE_OFFLOAD_THRESHOLD = readFromSystemProperties(
-        "kotlinx.coroutines.scheduler.offload.threshold", 96L, String::toLongOrNull)
+        "kotlinx.coroutines.scheduler.offload.threshold", 96L)
+
+internal val BLOCKING_DEFAULT_PARALLELISM = readFromSystemProperties(
+        "kotlinx.coroutines.scheduler.blocking.parallelism", 16L).toInt()
+
+internal val MAX_POOL_SIZE = readFromSystemProperties(
+    "kotlinx.coroutines.scheduler.max.pool.size", Runtime.getRuntime().availableProcessors() * 128L).toInt()
 
 internal var schedulerTimeSource: TimeSource = NanoTimeSource
 
-internal data class TimedTask(val submissionTime: Long, val task: Runnable)
+internal enum class TaskMode {
+    // Marker indicating that task is CPU-bound and will not block
+    NON_BLOCKING,
+    // Marker indicating that task may potentially block, thus giving scheduler a hint that additional thread may be required
+    PROBABLY_BLOCKING,
+}
+
+internal data class TimedTask(val task: Runnable, val submissionTime: Long, val mode: TaskMode)
 
 internal abstract class TimeSource {
     abstract fun nanoTime(): Long
@@ -24,13 +37,16 @@ internal object NanoTimeSource : TimeSource() {
     override fun nanoTime() = System.nanoTime()
 }
 
-private fun <T> readFromSystemProperties(propertyName: String, defaultValue: T, parser: (String) -> T?): T {
+private fun readFromSystemProperties(propertyName: String, defaultValue: Long): Long {
     val value = try {
         System.getProperty(propertyName)
     } catch (e: SecurityException) {
         null
     } ?: return defaultValue
 
-    val parsed = parser(value)
-    return parsed ?: error("System property '$propertyName' has unrecognized value '$value'")
+    val parsed = value.toLongOrNull() ?: error("System property '$propertyName' has unrecognized value '$value'")
+    if (parsed <= 0) {
+        error("System property '$propertyName' should be positive, but is '$parsed'")
+    }
+    return parsed
 }
@@ -1,7 +1,7 @@
 package kotlinx.coroutines.experimental.scheduling
 
-import kotlinx.atomicfu.atomic
-import java.util.concurrent.atomic.AtomicReferenceArray
+import kotlinx.atomicfu.*
+import java.util.concurrent.atomic.*
 
 internal const val BUFFER_CAPACITY_BASE = 7
 internal const val BUFFER_CAPACITY = 1 shl BUFFER_CAPACITY_BASE
@@ -15,7 +15,7 @@ internal const val MASK = BUFFER_CAPACITY - 1 // 128 by default
  *
  * Fairness
  * [WorkQueue] provides semi-FIFO order, but with priority for most recently submitted task assuming
- * that these two (current and submitted) are communicating and sharing state thus making such communication extremely fast.
+ * that these two (current one and submitted) are communicating and sharing state thus making such communication extremely fast.
  * E.g. submitted jobs [1, 2, 3, 4] will be executed in [4, 1, 2, 3] order.
  *
  * Work offloading
@@ -27,8 +27,16 @@ internal const val MASK = BUFFER_CAPACITY - 1 // 128 by default
  */
 internal class WorkQueue {
 
+    // todo: There is non-atomicity in computing bufferSize (indices update separately).
+    // todo: It can lead to arbitrary values of resulting bufferSize.
+    // todo: Consider merging both indices into a single Long.
+    // todo: Alternatively, prove that sporadic arbitrary result here is Ok (does not seems the case now)
     internal val bufferSize: Int get() = producerIndex.value - consumerIndex.value
+
+    // todo: AtomicReferenceArray has an extra memory indirection.
+    // todo: In the future (long-term) atomicfu shall support efficient atomic arrays in a platform-specific way (unsafe or varhandels)
     private val buffer: AtomicReferenceArray<Task?> = AtomicReferenceArray(BUFFER_CAPACITY)
+
     private val lastScheduledTask = atomic<Task?>(null)
 
     private val producerIndex = atomic(0)
@@ -49,16 +57,25 @@ internal class WorkQueue {
      * @param globalQueue fallback queue which is used when the local queue is overflown
      * @return true if no offloading happened, false otherwise
      */
-    fun offer(task: Task, globalQueue: GlobalQueue): Boolean {
-        while (true) {
-            val previous = lastScheduledTask.value
-            if (lastScheduledTask.compareAndSet(previous, task)) {
-                if (previous != null) {
-                    return addLast(previous, globalQueue)
-                }
-                return true
-            }
+    fun add(task: Task, globalQueue: GlobalQueue): Boolean {
+        val previous = lastScheduledTask.getAndSet(task) ?: return true
+        return addLast(previous, globalQueue)
+    }
+
+    // Called only by the owner
+    fun addLast(task: Task, globalQueue: GlobalQueue): Boolean {
+        var addedToGlobalQueue = false
+
+        /*
+         * We need the loop here because race possible not only on full queue,
+         * but also on queue with one element during stealing
+         */
+        while (!tryAddLast(task)) {
+            offloadWork(globalQueue)
+            addedToGlobalQueue = true
         }
+
+        return !addedToGlobalQueue
     }
 
     /**
@@ -74,7 +91,7 @@ internal class WorkQueue {
             }
 
             if (victim.lastScheduledTask.compareAndSet(lastScheduled, null)) {
-                offer(lastScheduled, globalQueue)
+                add(lastScheduled, globalQueue)
                 return true
             }
 
@@ -90,12 +107,20 @@ internal class WorkQueue {
             val task = victim.pollExternal { time - it.submissionTime >= WORK_STEALING_TIME_RESOLUTION_NS || victim.bufferSize > QUEUE_SIZE_OFFLOAD_THRESHOLD }
                     ?: return@repeat
             stolen = true
-            offer(task, globalQueue)
+            add(task, globalQueue)
         }
 
         return stolen
     }
 
+    internal fun size(): Int {
+        if (lastScheduledTask.value != null) {
+            return bufferSize + 1
+        }
+
+        return bufferSize
+    }
+
     /**
      * Offloads half of the current buffer to [target]
      */
@@ -126,22 +151,6 @@ internal class WorkQueue {
         }
     }
 
-    // Called only by the owner
-    private fun addLast(task: Task, globalQueue: GlobalQueue): Boolean {
-        var addedToGlobalQueue = false
-
-        /*
-         * We need the loop here because race possible not only on full queue,
-         * but also on queue with one element during stealing
-         */
-        while (!tryAddLast(task)) {
-            offloadWork(globalQueue)
-            addedToGlobalQueue = true
-        }
-
-        return !addedToGlobalQueue
-    }
-
     // Called only by the owner
     private fun tryAddLast(task: Task): Boolean {
         if (bufferSize == BUFFER_CAPACITY - 1) return false