remove mention to combos

arnaudon · arnaudon · commit 104cf61b10ce · 2021-02-10T10:10:28.000+01:00
Change-Id: I42d188d0b17905279d5a41fa85e7a7a703989ea8
diff --git a/.pylintrc b/.pylintrc
@@ -34,7 +34,7 @@ max-locals=25
 # Maximum number of return / yield for function / method body
 max-returns=6
 # Maximum number of branch for function / method body
-max-branches=20
+max-branches=25
 # Maximum number of statements in function / method body
 max-statements=60
 # Maximum number of parents for a class (see R0901).
diff --git a/bluepyparallel/__init__.py b/bluepyparallel/__init__.py
@@ -1,3 +1,3 @@
 """BluePyParallel functions."""
-from .evaluator import evaluate_combos
+from .evaluator import evaluate
 from .parallel import init_parallel_factory
diff --git a/bluepyparallel/evaluator.py b/bluepyparallel/evaluator.py
@@ -1,4 +1,4 @@
-"""Module to evaluate generic functions on rows of combos dataframe (similar to BluePyMMM)."""
+"""Module to evaluate generic functions on rows of dataframe."""
 import logging
 import sqlite3
 import sys
@@ -26,104 +26,98 @@ def _try_evaluation(task, evaluation_function=None):
     return task_id, result, exception
 
 
-def _create_database(combos, new_columns, combos_db_filename="combos_db.sql"):
-    """Create a sqlite database from combos dataframe."""
-    combos.loc[:, "exception"] = None
+def _create_database(df, new_columns, db_filename="db.sql"):
+    """Create a sqlite database from dataframe."""
+    df.loc[:, "exception"] = None
     for new_column in new_columns:
-        combos.loc[:, new_column[0]] = new_column[1]
-        combos.loc[:, "to_run_" + new_column[0]] = 1
-    with sqlite3.connect(combos_db_filename) as combos_db:
-        combos.to_sql("combos", combos_db, if_exists="replace", index_label="index")
-    return combos
+        df.loc[:, new_column[0]] = new_column[1]
+        df.loc[:, "to_run_" + new_column[0]] = 1
+    with sqlite3.connect(db_filename) as db:
+        df.to_sql("df", db, if_exists="replace", index_label="index")
+    return df
 
 
-def _load_database_to_dataframe(combos_db_filename="combos_db.sql"):
+def _load_database_to_dataframe(db_filename="db.sql"):
     """Load an sql database and construct the dataframe."""
-    with sqlite3.connect(combos_db_filename) as combos_db:
-        out = pd.read_sql("SELECT * FROM combos", combos_db, index_col="index")
+    with sqlite3.connect(db_filename) as db:
+        out = pd.read_sql("SELECT * FROM df", db, index_col="index")
         return out
 
 
-def _write_to_sql(combos_db_filename, task_id, results, new_columns, exception):
+def _write_to_sql(db_filename, task_id, results, new_columns, exception):
     """Write row data to sql."""
-    with sqlite3.connect(combos_db_filename) as combos_db:
+    with sqlite3.connect(db_filename) as db:
         for new_column in new_columns:
             res = results[new_column[0]] if results is not None else None
-            combos_db.execute(
-                "UPDATE combos SET " + new_column[0] + "=?, "
+            db.execute(
+                "UPDATE df SET " + new_column[0] + "=?, "
                 "exception=?, to_run_" + new_column[0] + "=? WHERE `index`=?",
                 (res, exception, 0, task_id),
             )
 
 
-def evaluate_combos(  # pylint:disable=too-many-branches
-    combos_df,
+def evaluate(
+    df,
     evaluation_function,
     new_columns=None,
     task_ids=None,
     continu=False,
     parallel_factory=None,
-    combos_db_filename="combos_db.sql",
+    db_filename="db.sql",
     no_sql=False,
 ):
-    """Evaluate combos and save results in a sqlite database on the fly and return combos dataframe.
+    """Evaluate and save results in a sqlite database on the fly and return dataframe.
 
     Args:
-        combos_df (DataFrame): each row contains information for the computation
+        df (DataFrame): each row contains information for the computation
         evaluation_function (function): function used to evaluate each row,
             should have a single argument as list-like containing values of the rows of df,
             and return a dict with keys corresponding to the names in new_columns
         new_columns (list): list of names of new column and empty value to save evaluation results,
             i.e.: [['result', 0.0], ['valid', False]]
-        task_ids (int): index of combos_original to compute, if None, all will be computed
+        task_ids (int): index of dataframe to compute, if None, all will be computed
         continu (bool): if True, it will use only compute the empty rows of the database,
             if False, it will ecrase or generate the database
         parallel_factory (ParallelFactory): parallel factory instance
-        combos_db_filename (str): filename for the combos sqlite database
+        db_filename (str): filename for the sqlite database
         no_sql (bool): is True, sql backend will be disabled. To use when evaluations are numerous
             and fast, to avoid the overhead of communication with sql database.
     Return:
-        pandas.DataFrame: combos_df dataframe with new columns containing computed results
+        pandas.DataFrame: dataframe with new columns containing computed results
     """
     if task_ids is None:
-        task_ids = combos_df.index
+        task_ids = df.index
     else:
-        combos_df = combos_df.loc[task_ids]
+        df = df.loc[task_ids]
     if new_columns is None:
         new_columns = [["data", ""]]
 
     if no_sql:
         logger.info("Not using sql backend to save iterations")
-        combos_to_evaluate = combos_df
+        to_evaluate = df
     elif continu:
         logger.info("Load data from sql database")
-        if Path(combos_db_filename).exists():
-            combos_to_evaluate = _load_database_to_dataframe(combos_db_filename=combos_db_filename)
+        if Path(db_filename).exists():
+            to_evaluate = _load_database_to_dataframe(db_filename=db_filename)
         else:
-            combos_to_evaluate = _create_database(
-                combos_df, new_columns, combos_db_filename=combos_db_filename
-            )
+            to_evaluate = _create_database(df, new_columns, db_filename=db_filename)
         for new_column in new_columns:
             task_ids = task_ids[
-                combos_to_evaluate.loc[task_ids, "to_run_" + new_column[0]].to_numpy() == 1
+                to_evaluate.loc[task_ids, "to_run_" + new_column[0]].to_numpy() == 1
             ]
     else:
         logger.info("Create sql database")
-        combos_to_evaluate = _create_database(
-            combos_df, new_columns, combos_db_filename=combos_db_filename
-        )
+        to_evaluate = _create_database(df, new_columns, db_filename=db_filename)
 
         # this is a hack to make it work, otherwise it does not update the entries correctly
-        combos_to_evaluate = _load_database_to_dataframe(combos_db_filename)
-        combos_to_evaluate = _create_database(
-            combos_to_evaluate, new_columns, combos_db_filename=combos_db_filename
-        )
+        to_evaluate = _load_database_to_dataframe(db_filename)
+        to_evaluate = _create_database(to_evaluate, new_columns, db_filename=db_filename)
 
     if len(task_ids) > 0:
-        logger.info("%s combos to compute.", str(len(task_ids)))
+        logger.info("%s rows to compute.", str(len(task_ids)))
     else:
-        logger.warning("WARNING: No combos to compute, something may be wrong")
-        return _load_database_to_dataframe(combos_db_filename)
+        logger.warning("WARNING: No rows to compute, something may be wrong")
+        return _load_database_to_dataframe(db_filename)
 
     if parallel_factory is None:
         mapper = map
@@ -132,8 +126,7 @@ def evaluate_combos(  # pylint:disable=too-many-branches
 
     eval_func = partial(_try_evaluation, evaluation_function=evaluation_function)
     arg_list = enumerate(
-        dict(zip(combos_to_evaluate.columns, row))
-        for row in combos_to_evaluate.loc[task_ids].values
+        dict(zip(to_evaluate.columns, row)) for row in to_evaluate.loc[task_ids].values
     )
 
     if no_sql:
@@ -150,7 +143,7 @@ def evaluate_combos(  # pylint:disable=too-many-branches
                     )
             else:
                 _write_to_sql(
-                    combos_db_filename,
+                    db_filename,
                     task_id,
                     results,
                     new_columns,
@@ -159,12 +152,12 @@ def evaluate_combos(  # pylint:disable=too-many-branches
 
         if no_sql:
             for new_column, data in _results.items():
-                combos_to_evaluate.loc[:, new_column] = data
+                to_evaluate.loc[:, new_column] = data
 
     # to save dataframe even if program is killed
     except (KeyboardInterrupt, SystemExit) as ex:
         logger.warning("Stopping mapper loop. Reason: %r", ex)
 
     if no_sql:
-        return combos_to_evaluate
-    return _load_database_to_dataframe(combos_db_filename)
+        return to_evaluate
+    return _load_database_to_dataframe(db_filename)
diff --git a/bluepyparallel/version.py b/bluepyparallel/version.py
@@ -1,4 +1,4 @@
 """Package version """
 
-VERSION = "0.0.1"
+VERSION = "0.0.2.dev1"
 __version__ = VERSION
diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
@@ -1,8 +1,7 @@
-from functools import partial
 import pandas as pd
 from pandas._testing import assert_frame_equal
 
-from bluepyparallel import evaluate_combos
+from bluepyparallel import evaluate
 from bluepyparallel import init_parallel_factory
 
 
@@ -11,38 +10,36 @@ def _evaluation_function(combo):
     return {"result_orig": combo["value"], "result_10": 10.0 * combo["value_1"]}
 
 
-def test_evaluate_combos():
-    """Test combos evaluator on a trivial example."""
+def test_evaluate():
+    """Test evaluator on a trivial example."""
     parallel_factory = init_parallel_factory("multiprocessing")
-    combos = pd.DataFrame()
-    combos.loc[0, "name"] = "test1"
-    combos.loc[0, "value"] = 1.0
-    combos.loc[0, "value_1"] = 2.0
-    combos.loc[1, "name"] = "test2"
-    combos.loc[1, "value"] = 2.0
-    combos.loc[1, "value_1"] = 3.0
-
-    expected_result_combos = combos.copy()
-    expected_result_combos["exception"] = ""
-    expected_result_combos.loc[0, "name"] = "test1"
-    expected_result_combos.loc[1, "name"] = "test2"
-    expected_result_combos["to_run_result_orig"] = 0
-    expected_result_combos["to_run_result_10"] = 0
-    expected_result_combos.loc[0, "value"] = 1.0
-    expected_result_combos.loc[1, "value"] = 2.0
-    expected_result_combos.loc[0, "value_1"] = 2.0
-    expected_result_combos.loc[1, "value_1"] = 3.0
-    expected_result_combos.loc[0, "result_orig"] = 1.0
-    expected_result_combos.loc[1, "result_orig"] = 2.0
-    expected_result_combos.loc[0, "result_10"] = 20.0
-    expected_result_combos.loc[1, "result_10"] = 30.0
+    df = pd.DataFrame()
+    df.loc[0, "name"] = "test1"
+    df.loc[0, "value"] = 1.0
+    df.loc[0, "value_1"] = 2.0
+    df.loc[1, "name"] = "test2"
+    df.loc[1, "value"] = 2.0
+    df.loc[1, "value_1"] = 3.0
+
+    expected_result_df = df.copy()
+    expected_result_df["exception"] = ""
+    expected_result_df.loc[0, "name"] = "test1"
+    expected_result_df.loc[1, "name"] = "test2"
+    expected_result_df["to_run_result_orig"] = 0
+    expected_result_df["to_run_result_10"] = 0
+    expected_result_df.loc[0, "value"] = 1.0
+    expected_result_df.loc[1, "value"] = 2.0
+    expected_result_df.loc[0, "value_1"] = 2.0
+    expected_result_df.loc[1, "value_1"] = 3.0
+    expected_result_df.loc[0, "result_orig"] = 1.0
+    expected_result_df.loc[1, "result_orig"] = 2.0
+    expected_result_df.loc[0, "result_10"] = 20.0
+    expected_result_df.loc[1, "result_10"] = 30.0
 
     new_columns = [["result_orig", 0.0], ["result_10", 0.0]]
-    result_combos = evaluate_combos(
-        combos, _evaluation_function, new_columns, parallel_factory=parallel_factory
-    )
-    assert_frame_equal(result_combos, expected_result_combos, check_like=True)
+    result_df = evaluate(df, _evaluation_function, new_columns, parallel_factory=parallel_factory)
+    assert_frame_equal(result_df, expected_result_df, check_like=True)
 
 
 if __name__ == "__main__":
-    test_evaluate_combos()
+    test_evaluate()