Merge pull request #190 from python-adaptive/mpi4py_support

jbweston · web-flow · commit abc0f0e12b6c · 2019-05-07T11:21:06.000+02:00
add support for mpi4py as an executor.
diff --git a/adaptive/runner.py b/adaptive/runner.py
@@ -26,6 +26,12 @@
 except ModuleNotFoundError:
     with_distributed = False
 
+try:
+    import mpi4py.futures
+    with_mpi4py = True
+except ModuleNotFoundError:
+    with_mpi4py = False
+
 with suppress(ModuleNotFoundError):
     import uvloop
     asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
@@ -66,7 +72,7 @@ class BaseRunner(metaclass=abc.ABCMeta):
         the learner as its sole argument, and return True when we should
         stop requesting more points.
     executor : `concurrent.futures.Executor`, `distributed.Client`,\
-               or `ipyparallel.Client`, optional
+               `mpi4py.futures.MPIPoolExecutor`, or `ipyparallel.Client`, optional
         The executor in which to evaluate the function to be learned.
         If not provided, a new `~concurrent.futures.ProcessPoolExecutor`
         is used on Unix systems while on Windows a `distributed.Client`
@@ -281,7 +287,7 @@ class BlockingRunner(BaseRunner):
         the learner as its sole argument, and return True when we should
         stop requesting more points.
     executor : `concurrent.futures.Executor`, `distributed.Client`,\
-               or `ipyparallel.Client`, optional
+               `mpi4py.futures.MPIPoolExecutor`, or `ipyparallel.Client`, optional
         The executor in which to evaluate the function to be learned.
         If not provided, a new `~concurrent.futures.ProcessPoolExecutor`
         is used on Unix systems while on Windows a `distributed.Client`
@@ -386,7 +392,7 @@ class AsyncRunner(BaseRunner):
         stop requesting more points. If not provided, the runner will run
         forever, or until ``self.task.cancel()`` is called.
     executor : `concurrent.futures.Executor`, `distributed.Client`,\
-               or `ipyparallel.Client`, optional
+               `mpi4py.futures.MPIPoolExecutor`, or `ipyparallel.Client`, optional
         The executor in which to evaluate the function to be learned.
         If not provided, a new `~concurrent.futures.ProcessPoolExecutor`
         is used on Unix systems while on Windows a `distributed.Client`
@@ -693,6 +699,9 @@ def _get_ncores(ex):
         return 1
     elif with_distributed and isinstance(ex, distributed.cfexecutor.ClientExecutor):
         return sum(n for n in ex._client.ncores().values())
+    elif with_mpi4py and isinstance(ex, mpi4py.futures.MPIPoolExecutor):
+        ex.bootup() # wait until all workers are up and running
+        return ex._pool.size  # not public API!
     else:
         raise TypeError('Cannot get number of cores for {}'
                         .format(ex.__class__))
diff --git a/docs/source/tutorial/tutorial.parallelism.rst b/docs/source/tutorial/tutorial.parallelism.rst
@@ -53,3 +53,62 @@ On Windows by default `adaptive.Runner` uses a `distributed.Client`.
     runner = adaptive.Runner(learner, executor=client, goal=lambda l: l.loss() < 0.01)
     runner.live_info()
     runner.live_plot(update_interval=0.1)
+
+`mpi4py.futures.MPIPoolExecutor`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This makes sense if you want to run a ``Learner`` on a cluster non-interactively using a job script.
+
+For example, you create the following file called ``run_learner.py``:
+
+.. code:: python
+
+    import mpi4py.futures
+
+    learner = adaptive.Learner1D(f, bounds=(-1, 1))
+
+    # load the data
+    learner.load(fname)
+
+    # run until `goal` is reached with an `MPIPoolExecutor`
+    runner = adaptive.Runner(
+        learner,
+        executor=MPIPoolExecutor(),
+        shutdown_executor=True,
+        goal=lambda l: l.loss() < 0.01,
+    )
+
+    # periodically save the data (in case the job dies)
+    runner.start_periodic_saving(dict(fname=fname), interval=600)
+
+    # block until runner goal reached
+    runner.ioloop.run_until_complete(runner.task)
+
+
+On your laptop/desktop you can run this script like:
+
+.. code:: python
+
+    export MPI4PY_MAX_WORKERS=15
+    mpiexec -n 1 python run_learner.py
+
+Or you can pass ``max_workers=15`` programmatically when creating the executor instance.
+
+Inside the job script using a job queuing system use:
+
+.. code:: python
+
+    export MPI4PY_MAX_WORKERS=15
+    mpiexec -n 16 python -m mpi4py.futures run_learner.py
+
+How you call MPI might depend on your specific queuing system, with SLURM for example it's:
+
+.. code:: python
+
+    #!/bin/bash
+    #SBATCH --job-name adaptive-example
+    #SBATCH --ntasks 100
+
+    export MPI4PY_MAX_WORKERS=$SLURM_NTASKS
+    srun -n $SLURM_NTASKS --mpi=pmi2 ~/miniconda3/envs/py37_min/bin/python -m mpi4py.futures run_learner.py
+