Revert "Set up the monitor only on await for tasks"

José Valim · José Valim · commit 0fe1e686b37f · 2014-05-30T15:59:17.000+02:00
This reverts commit 85a04fe.
diff --git a/lib/elixir/lib/task.ex b/lib/elixir/lib/task.ex
@@ -2,10 +2,10 @@ defmodule Task do
   @moduledoc """
   Conveniences for spawning and awaiting for tasks.
 
-  Tasks are processes meant to execute one particular
-  action throughout their life-cycle, often with little or no
-  communication with other processes. The most common use case
-  for tasks is to compute a value asynchronously:
+  Tasks are processes that meant to execute one particular
+  action throughout their life-cycle, often with little
+  explicit communication with other processes. The most common
+  use case for tasks is to compute a value asynchronously:
 
       task = Task.async(fn -> do_some_work() end)
       res  = do_some_other_work()
@@ -16,21 +16,28 @@ defmodule Task do
   They are implemented by spawning a process that sends a message
   to the caller once the given computation is performed.
 
-  Besides `async/1` and `await/1`, tasks can also be used be
-  started as part of supervision trees and dynamically spawned
-  in remote nodes. We will explore all three scenarios next.
+  Besides `async/1` and `await/1`, tasks can also be used as part
+  of supervision trees and dynamically spawned in remote nodes.
+  We will explore all three scenarios next.
 
   ## async and await
 
   The most common way to spawn a task is with `Task.async/1`. A new
-  process will be created and linked to the caller. Once the task
-  action finishes, a message will be sent to the caller with its
-  result.
+  process will be created and this process is linked and monitored
+  by the caller. However, the processes are unlinked right before
+  the task finishes, allowing the proper error to be triggered only
+  on `await/1`.
 
-  `Task.await/1` is used to read the message sent by the task. On
-  await, Elixir will also setup a monitor to verify if the process
-  exited with any abnormal reason (or in case exits are being
-  trapped by the caller).
+  This implies three things:
+
+  1) In case the caller crashes, the task will be killed and its
+     computation will abort;
+
+  2) In case the task crashes due to an error, the parent will
+     crash only on `await/1`;
+
+  3) In case the task crashes because a linked process caused
+     it to crash, the parent will crash immediately;
 
   ## Supervised tasks
 
@@ -48,9 +55,14 @@ defmodule Task do
       ]
 
   Since such tasks are supervised and not directly linked to
-  the caller, they cannot be awaited on. Note `start_link/1`,
-  differently from `async/1`, returns `{:ok, pid}` (which is
-  the result expected by supervision trees).
+  the caller, they cannot be awaited on. For such reason,
+  differently from `async/1`, `start_link/1` returns `{:ok, pid}`
+  (which is the result expected by supervision trees).
+
+  Such tasks are useful as workers that run during your application
+  life-cycle and rarely communicate with other workers. For example,
+  a worker that pushes data to another server or a worker that consumes
+  events from an event manager and writes it to a log file.
 
   ## Supervision trees
 
@@ -66,7 +78,7 @@ defmodule Task do
       # In the remote node
       Task.Supervisor.start_link(name: :tasks_sup)
 
-      # In the client
+      # On the client
       Task.Supervisor.async({:tasks_sup, :remote@local}, fn -> do_work() end)
 
   `Task.Supervisor` is more often started in your supervision tree as:
@@ -106,7 +118,7 @@ defmodule Task do
   """
   @spec start_link(module, atom, [term]) :: {:ok, pid}
   def start_link(mod, fun, args) do
-    Task.Supervised.start_link({mod, fun, args})
+    Task.Supervised.start_link(:undefined, {mod, fun, args})
   end
 
   @doc """
@@ -135,8 +147,9 @@ defmodule Task do
   @spec async(module, atom, [term]) :: t
   def async(mod, fun, args) do
     mfa = {mod, fun, args}
-    ref = make_ref
-    pid = :proc_lib.spawn_link(Task.Supervised, :async, [mfa, self(), ref])
+    pid = :proc_lib.spawn_link(Task.Supervised, :async, [self(), mfa])
+    ref = Process.monitor(pid)
+    send(pid, {self(), ref})
     %Task{pid: pid, ref: ref}
   end
 
@@ -148,24 +161,67 @@ defmodule Task do
   exit with the same reason as the task.
   """
   @spec await(t, timeout) :: term | no_return
-  def await(%Task{pid: pid, ref: ref}=task, timeout \\ 5000) do
-    mon_ref = Process.monitor(pid)
-
+  def await(%Task{ref: ref}=task, timeout \\ 5000) do
     receive do
       {^ref, reply} ->
-        Process.demonitor(mon_ref, [:flush])
+        Process.demonitor(ref, [:flush])
         reply
-      {:DOWN, ^mon_ref, _, _, :noconnection} ->
-        exit({{:nodedown, get_node(task.pid)}, {__MODULE__, :await, [task, timeout]}})
-      {:DOWN, ^mon_ref, _, _, reason} ->
+      {:DOWN, ^ref, _, _, :noconnection} ->
+        mfa = {__MODULE__, :await, [task, timeout]}
+        exit({{:nodedown, get_node(task.pid)}, mfa})
+      {:DOWN, ^ref, _, _, reason} ->
         exit({reason, {__MODULE__, :await, [task, timeout]}})
     after
       timeout ->
-        Process.demonitor(mon_ref, [:flush])
+        Process.demonitor(ref, [:flush])
         exit({:timeout, {__MODULE__, :await, [task, timeout]}})
     end
   end
 
+  @doc """
+  Receives a group of tasks and a message and finds
+  a task that matches the given message.
+
+  This function returns a tuple with the task and the
+  returned value in case the message matches a task that
+  exited with success, it raises in case the found task
+  failed or nil if no task was found.
+
+  This function is useful in situations where multiple
+  tasks are spawned and their results are collected just
+  later on. For example, a GenServer can spawn tasks,
+  store the tasks in a list and later use `Task.find/2`
+  to see if upcoming messages are from any of the tasks.
+  """
+  @spec find([t], any) :: {term, t} | nil | no_return
+  def find(tasks, msg)
+
+  def find(tasks, {ref, reply}) when is_reference(ref) do
+    Enum.find_value tasks, fn
+      %Task{ref: task_ref} = t when ref == task_ref ->
+        Process.demonitor(ref, [:flush])
+        {reply, t}
+      %Task{} ->
+        nil
+    end
+  end
+
+  def find(tasks, {:DOWN, ref, _, _, reason} = msg) when is_reference(ref) do
+    find = fn(%Task{ref: task_ref}) -> task_ref == ref end
+    case Enum.find(tasks, find) do
+      %Task{pid: pid} when reason == :noconnection ->
+        exit({{:nodedown, get_node(pid)}, {__MODULE__, :find, [tasks, msg]}})
+      %Task{} ->
+        exit({reason, {__MODULE__, :find, [tasks, msg]}})
+      nil ->
+        nil
+    end
+  end
+
+  def find(_tasks, _msg) do
+    nil
+  end
+
   defp get_node({_, n}) when is_atom(n), do: n
-  defp get_node(pid) when is_pid(pid),   do: pid
+  defp get_node(pid) when is_pid(pid),     do: pid
 end
diff --git a/lib/elixir/lib/task/supervised.ex b/lib/elixir/lib/task/supervised.ex
@@ -1,30 +1,61 @@
 defmodule Task.Supervised do
   @moduledoc false
 
-  def start_link(mfa) do
-    :proc_lib.start_link(__MODULE__, :noreply, [mfa])
+  def start_link(:undefined, fun) do
+    :proc_lib.start_link(__MODULE__, :noreply, [fun])
   end
 
-  def start_link(mfa, caller, ref) do
-    :proc_lib.start_link(__MODULE__, :reply, [mfa, caller, ref])
+  def start_link(caller, fun) do
+    :proc_lib.start_link(__MODULE__, :reply, [caller, fun])
   end
 
-  def async(mfa, caller, ref) do
-    send caller, {ref, apply(mfa)}
+  def async(caller, {module, fun, args}) do
+    ref =
+      # There is a race condition on this operation when working accross
+      # node that manifests if a `Task.Supervisor.async/1` call is made
+      # while the supervisor is busy spawning previous tasks.
+      #
+      # Imagine the following workflow:
+      #
+      # 1. The nodes disconnect
+      # 2. The async call fails and is caught, the calling process does not exit
+      # 3. The task is spawned and links to the calling process, causing the nodes to reconnect
+      # 4. The calling process has not exited and so does not send its monitor reference
+      # 5. The spawned task waits forever for the monitor reference so it can begin
+      #
+      # We have solved this by specifying a timeout of 5000 seconds.
+      # Given no work is done in the client in between the task start and
+      # sending the reference, 5000 should be enough to not raise false
+      # negatives unless the nodes are indeed not available.
+      receive do
+        {^caller, ref} -> ref
+      after
+        5000 -> exit(:timeout)
+      end
+
+    try do
+      apply(module, fun, args)
+    else
+      result ->
+        send caller, {ref, result}
+    catch
+      :error, reason ->
+        exit({reason, System.stacktrace()})
+      :throw, value ->
+        exit({{:nocatch, value}, System.stacktrace()})
+    after
+      :erlang.unlink(caller)
+    end
   end
 
-  def reply(mfa, caller, ref) do
+  def reply(caller, mfa) do
     :erlang.link(caller)
     :proc_lib.init_ack({:ok, self()})
-    send caller, {ref, apply(mfa)}
+    async(caller, mfa)
   end
 
-  def noreply(mfa) do
+  def noreply({module, fun, args}) do
     :proc_lib.init_ack({:ok, self()})
-    apply(mfa)
-  end
-
-  defp apply({module, fun, args}) do
     try do
       apply(module, fun, args)
     catch
diff --git a/lib/elixir/lib/task/supervisor.ex b/lib/elixir/lib/task/supervisor.ex
@@ -55,8 +55,9 @@ defmodule Task.Supervisor do
   """
   @spec async(Supervisor.supervisor, module, atom, [term]) :: Task.t
   def async(supervisor, module, fun, args) do
-    ref = make_ref()
-    {:ok, pid} = Supervisor.start_child(supervisor, [{module, fun, args}, self(), ref])
+    {:ok, pid} = Supervisor.start_child(supervisor, [self(), {module, fun, args}])
+    ref = Process.monitor(pid)
+    send pid, {self(), ref}
     %Task{pid: pid, ref: ref}
   end
 
@@ -97,6 +98,6 @@ defmodule Task.Supervisor do
   """
   @spec start_child(Supervisor.supervisor, module, atom, [term]) :: {:ok, pid}
   def start_child(supervisor, module, fun, args) do
-    Supervisor.start_child(supervisor, [{module, fun, args}])
+    Supervisor.start_child(supervisor, [:undefined, {module, fun, args}])
   end
 end
diff --git a/lib/elixir/test/elixir/task/supervisor_test.exs b/lib/elixir/test/elixir/task/supervisor_test.exs
@@ -41,6 +41,7 @@ defmodule Task.SupervisorTest do
     # Assert response and monitoring messages
     ref = task.ref
     assert_receive {^ref, :done}
+    assert_receive {:DOWN, ^ref, _, _, :normal}
   end
 
   test "async/3", config do
@@ -83,32 +84,21 @@ defmodule Task.SupervisorTest do
     assert Task.Supervisor.terminate_child(config[:supervisor], pid) == :ok
   end
 
-  @wait 100
-
   test "await/1 exits on task throw", config do
-    Process.flag(:trap_exit, true)
-    task = Task.Supervisor.async(config[:supervisor], fn -> :timer.sleep(@wait); throw :unknown end)
+    task = Task.Supervisor.async(config[:supervisor], fn -> throw :unknown end)
     assert {{{:nocatch, :unknown}, _}, {Task, :await, [^task, 5000]}} =
            catch_exit(Task.await(task))
-  after
-    Process.flag(:trap_exit, false)
   end
 
   test "await/1 exits on task error", config do
-    Process.flag(:trap_exit, true)
-    task = Task.Supervisor.async(config[:supervisor], fn -> :timer.sleep(@wait); raise "oops" end)
+    task = Task.Supervisor.async(config[:supervisor], fn -> raise "oops" end)
     assert {{%RuntimeError{}, _}, {Task, :await, [^task, 5000]}} =
            catch_exit(Task.await(task))
-  after
-    Process.flag(:trap_exit, false)
   end
 
   test "await/1 exits on task exit", config do
-    Process.flag(:trap_exit, true)
-    task = Task.Supervisor.async(config[:supervisor], fn -> :timer.sleep(@wait); exit :unknown end)
+    task = Task.Supervisor.async(config[:supervisor], fn -> exit :unknown end)
     assert {:unknown, {Task, :await, [^task, 5000]}} =
            catch_exit(Task.await(task))
-  after
-    Process.flag(:trap_exit, false)
   end
 end
diff --git a/lib/elixir/test/elixir/task_test.exs b/lib/elixir/test/elixir/task_test.exs