BlueBrain
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CHANGELOG.rst‎
Lines changed: 20 additions & 0 deletions b/‎CHANGELOG.rst‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎bluepyparallel/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎bluepyparallel/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bluepyparallel/evaluator.py‎
Lines changed: 45 additions & 56 deletions b/‎bluepyparallel/evaluator.py‎
Lines changed: 45 additions & 56 deletions
diff --git a/‎bluepyparallel/parallel.py‎
Lines changed: 42 additions & 13 deletions b/‎bluepyparallel/parallel.py‎
Lines changed: 42 additions & 13 deletions
diff --git a/‎bluepyparallel/version.py‎
Lines changed: 1 addition & 1 deletion b/‎bluepyparallel/version.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/Makefile‎
Lines changed: 21 additions & 0 deletions b/‎doc/Makefile‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎doc/source/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎doc/source/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/api.rst‎
Lines changed: 9 additions & 0 deletions b/‎doc/source/api.rst‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎doc/source/changelog.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/changelog.rst‎
Lines changed: 1 addition & 0 deletions
@@ -97,7 +97,8 @@ instance/
 .scrapy
 
 # Sphinx documentation
-docs/_build/
+doc/_build/
+doc/build/
 
 # PyBuilder
 target/
 
@@ -0,0 +1,20 @@
+Changelog
+=========
+
+Version 0.0.3
+-------------
+
+- Merge no_sql and db_filename arguments of evaluator.evaluate
+- Rename continu argument of evaluator.evaluate to resume
+- Add a factory for None value which returns map
+- Clean and improve code
+
+Version 0.0.2
+-------------
+
+- Remove mention to combos
+
+Version 0.0.1
+-------------
+
+- First release
@@ -1,3 +1,3 @@
 """BluePyParallel functions."""
-from .evaluator import evaluate
-from .parallel import init_parallel_factory
+from .evaluator import evaluate  # noqa
+from .parallel import init_parallel_factory  # noqa
@@ -3,12 +3,15 @@
 import sqlite3
 import sys
 import traceback
+from collections import defaultdict
 from functools import partial
 from pathlib import Path
 
 import pandas as pd
 from tqdm import tqdm
 
+from bluepyparallel.parallel import init_parallel_factory
+
 logger = logging.getLogger(__name__)
 
 
@@ -18,35 +21,34 @@ def _try_evaluation(task, evaluation_function=None):
     try:
         result = evaluation_function(task_args)
         exception = ""
-
     except Exception:  # pylint: disable=broad-except
         result = None
         exception = "".join(traceback.format_exception(*sys.exc_info()))
-        logger.exception("Exception for combo %s", exception)
+        logger.exception("Exception for ID=%s: %s", task_id, exception)
     return task_id, result, exception
 
 
 def _create_database(df, new_columns, db_filename="db.sql"):
     """Create a sqlite database from dataframe."""
-    df.loc[:, "exception"] = None
+    df["exception"] = None
     for new_column in new_columns:
-        df.loc[:, new_column[0]] = new_column[1]
-        df.loc[:, "to_run_" + new_column[0]] = 1
-    with sqlite3.connect(db_filename) as db:
+        df[new_column[0]] = new_column[1]
+        df["to_run_" + new_column[0]] = 1
+    with sqlite3.connect(str(db_filename)) as db:
         df.to_sql("df", db, if_exists="replace", index_label="index")
     return df
 
 
 def _load_database_to_dataframe(db_filename="db.sql"):
-    """Load an sql database and construct the dataframe."""
-    with sqlite3.connect(db_filename) as db:
+    """Load an SQL database and construct the dataframe."""
+    with sqlite3.connect(str(db_filename)) as db:
         out = pd.read_sql("SELECT * FROM df", db, index_col="index")
         return out
 
 
 def _write_to_sql(db_filename, task_id, results, new_columns, exception):
-    """Write row data to sql."""
-    with sqlite3.connect(db_filename) as db:
+    """Write row data to SQL."""
+    with sqlite3.connect(str(db_filename)) as db:
         for new_column in new_columns:
             res = results[new_column[0]] if results is not None else None
             db.execute(
@@ -60,11 +62,9 @@ def evaluate(
     df,
     evaluation_function,
     new_columns=None,
-    task_ids=None,
-    continu=False,
+    resume=False,
     parallel_factory=None,
-    db_filename="db.sql",
-    no_sql=False,
+    db_filename=None,
 ):
     """Evaluate and save results in a sqlite database on the fly and return dataframe.
 
@@ -75,68 +75,61 @@ def evaluate(
             and return a dict with keys corresponding to the names in new_columns
         new_columns (list): list of names of new column and empty value to save evaluation results,
             i.e.: [['result', 0.0], ['valid', False]]
-        task_ids (int): index of dataframe to compute, if None, all will be computed
-        continu (bool): if True, it will use only compute the empty rows of the database,
+        resume (bool): if True, it will use only compute the empty rows of the database,
             if False, it will ecrase or generate the database
         parallel_factory (ParallelFactory): parallel factory instance
-        db_filename (str): filename for the sqlite database
-        no_sql (bool): is True, sql backend will be disabled. To use when evaluations are numerous
-            and fast, to avoid the overhead of communication with sql database.
+        db_filename (str): if a file path is given, SQL backend will be enabled and will use this
+            path for the SQLite database. Should not be used when evaluations are numerous and
+            fast, in order to avoid the overhead of communication with SQL database.
+
     Return:
         pandas.DataFrame: dataframe with new columns containing computed results
     """
-    if task_ids is None:
-        task_ids = df.index
-    else:
-        df = df.loc[task_ids]
+    if isinstance(parallel_factory, str) or parallel_factory is None:
+        parallel_factory = init_parallel_factory(parallel_factory)
+
+    task_ids = df.index
+
     if new_columns is None:
         new_columns = [["data", ""]]
 
-    if no_sql:
-        logger.info("Not using sql backend to save iterations")
+    if db_filename is None:
+        logger.info("Not using SQL backend to save iterations")
         to_evaluate = df
-    elif continu:
-        logger.info("Load data from sql database")
+    elif resume:
+        logger.info("Load data from SQL database")
         if Path(db_filename).exists():
             to_evaluate = _load_database_to_dataframe(db_filename=db_filename)
+            task_ids = task_ids.intersection(to_evaluate.index)
         else:
             to_evaluate = _create_database(df, new_columns, db_filename=db_filename)
-        for new_column in new_columns:
-            task_ids = task_ids[
-                to_evaluate.loc[task_ids, "to_run_" + new_column[0]].to_numpy() == 1
-            ]
+
+        # Find tasks to run
+        should_run = (
+            to_evaluate.loc[task_ids, ["to_run_" + col[0] for col in new_columns]] == 1
+        ).any(axis=1)
+        task_ids = should_run.loc[should_run].index
     else:
-        logger.info("Create sql database")
+        logger.info("Create SQL database")
         to_evaluate = _create_database(df, new_columns, db_filename=db_filename)
 
-        # this is a hack to make it work, otherwise it does not update the entries correctly
-        to_evaluate = _load_database_to_dataframe(db_filename)
-        to_evaluate = _create_database(to_evaluate, new_columns, db_filename=db_filename)
-
     if len(task_ids) > 0:
         logger.info("%s rows to compute.", str(len(task_ids)))
     else:
-        logger.warning("WARNING: No rows to compute, something may be wrong")
+        logger.warning("WARNING: No row to compute, something may be wrong")
         return _load_database_to_dataframe(db_filename)
 
-    if parallel_factory is None:
-        mapper = map
-    else:
-        mapper = parallel_factory.get_mapper()
+    mapper = parallel_factory.get_mapper()
 
     eval_func = partial(_try_evaluation, evaluation_function=evaluation_function)
-    arg_list = enumerate(
-        dict(zip(to_evaluate.columns, row)) for row in to_evaluate.loc[task_ids].values
-    )
+    arg_list = to_evaluate.to_dict("index").items()
 
-    if no_sql:
-        _results = {}
-        for new_column, new_column_empty in new_columns:
-            _results[new_column] = len(task_ids) * [new_column_empty]
+    if db_filename is None:
+        _results = defaultdict(dict)
 
     try:
         for task_id, results, exception in tqdm(mapper(eval_func, arg_list), total=len(task_ids)):
-            if no_sql:
+            if db_filename is None:
                 for new_column, _ in new_columns:
                     _results[new_column][task_id] = (
                         results[new_column] if results is not None else None
@@ -149,15 +142,11 @@ def evaluate(
                     new_columns,
                     exception,
                 )
-
-        if no_sql:
-            for new_column, data in _results.items():
-                to_evaluate.loc[:, new_column] = data
-
-    # to save dataframe even if program is killed
     except (KeyboardInterrupt, SystemExit) as ex:
+        # To save dataframe even if program is killed
         logger.warning("Stopping mapper loop. Reason: %r", ex)
 
-    if no_sql:
+    if db_filename is None:
+        to_evaluate = pd.concat([to_evaluate, pd.DataFrame(_results)], axis=1)
         return to_evaluate
     return _load_database_to_dataframe(db_filename)
@@ -11,18 +11,18 @@
 
 try:
     import dask.distributed
-except ModuleNotFoundError:
-    pass
+    import dask_mpi
+
+    dask_available = True
+except ImportError:
+    dask_available = False
 
 try:
     import ipyparallel
-except ModuleNotFoundError:
-    pass
 
-try:
-    import dask_mpi
-except ModuleNotFoundError:
-    pass
+    ipyparallel_available = True
+except ImportError:
+    ipyparallel_available = False
 
 
 L = logging.getLogger(__name__)
@@ -86,6 +86,14 @@ def _with_batches(mapper, func, iterable, batch_size=None):
         yield from mapper(func, _iterable)
 
 
+class SerialFactory(ParallelFactory):
+    """Factory that do not work in parallel."""
+
+    def get_mapper(self):
+        """Get a map."""
+        return map
+
+
 class MultiprocessingFactory(ParallelFactory):
     """Parallel helper class using multiprocessing."""
 
@@ -182,11 +190,32 @@ def _dask_mapper(func, iterable):
 
 
 def init_parallel_factory(parallel_lib):
-    """Return the desired instance of the parallel factory."""
-    parallel_factory = {
-        "dask": DaskFactory,
-        "ipyparallel": IPyParallelFactory,
+    """Return the desired instance of the parallel factory.
+
+    The main factories are:
+
+    * None: return a serial mapper (the standard :func:`map` function).
+    * multiprocessing: return a mapper using the standard :mod:`multiprocessing`.
+    * dask: return a mapper using the :class:`distributed.Client`.
+    * ipyparallel: return a mapper using the :mod:`ipyparallel` library.
+    """
+    parallel_factories = {
+        None: SerialFactory,
         "multiprocessing": MultiprocessingFactory,
-    }[parallel_lib]()
+    }
+    if dask_available:
+        parallel_factories["dask"] = DaskFactory
+    if ipyparallel_available:
+        parallel_factories["ipyparallel"] = IPyParallelFactory
+
+    try:
+        parallel_factory = parallel_factories[parallel_lib]()
+    except KeyError:
+        L.critical(
+            "The %s factory is not available, maybe the required libraries are not properly "
+            "installed.",
+            parallel_lib,
+        )
+        raise
     L.info("Initialized %s factory", parallel_lib)
     return parallel_factory
@@ -1,4 +1,4 @@
 """Package version """
 
-VERSION = "0.0.2"
+VERSION = "0.0.3.dev0"
 __version__ = VERSION
@@ -0,0 +1,21 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	# Generate the documentation
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,2 @@
+_autosummary
+generated
@@ -0,0 +1,9 @@
+API Documentation
+=================
+
+.. autosummary::
+    :toctree: generated
+
+    bluepyparallel
+    bluepyparallel.parallel
+    bluepyparallel.evaluator
@@ -0,0 +1 @@
+.. include:: ../../CHANGELOG.rst