1- """Module to evaluate generic functions on rows of combos dataframe (similar to BluePyMMM) ."""
1+ """Module to evaluate generic functions on rows of dataframe."""
22import logging
33import sqlite3
44import sys
@@ -26,104 +26,98 @@ def _try_evaluation(task, evaluation_function=None):
2626 return task_id , result , exception
2727
2828
29- def _create_database (combos , new_columns , combos_db_filename = "combos_db .sql" ):
30- """Create a sqlite database from combos dataframe."""
31- combos .loc [:, "exception" ] = None
29+ def _create_database (df , new_columns , db_filename = "db .sql" ):
30+ """Create a sqlite database from dataframe."""
31+ df .loc [:, "exception" ] = None
3232 for new_column in new_columns :
33- combos .loc [:, new_column [0 ]] = new_column [1 ]
34- combos .loc [:, "to_run_" + new_column [0 ]] = 1
35- with sqlite3 .connect (combos_db_filename ) as combos_db :
36- combos .to_sql ("combos " , combos_db , if_exists = "replace" , index_label = "index" )
37- return combos
33+ df .loc [:, new_column [0 ]] = new_column [1 ]
34+ df .loc [:, "to_run_" + new_column [0 ]] = 1
35+ with sqlite3 .connect (db_filename ) as db :
36+ df .to_sql ("df " , db , if_exists = "replace" , index_label = "index" )
37+ return df
3838
3939
40- def _load_database_to_dataframe (combos_db_filename = "combos_db .sql" ):
40+ def _load_database_to_dataframe (db_filename = "db .sql" ):
4141 """Load an sql database and construct the dataframe."""
42- with sqlite3 .connect (combos_db_filename ) as combos_db :
43- out = pd .read_sql ("SELECT * FROM combos " , combos_db , index_col = "index" )
42+ with sqlite3 .connect (db_filename ) as db :
43+ out = pd .read_sql ("SELECT * FROM df " , db , index_col = "index" )
4444 return out
4545
4646
47- def _write_to_sql (combos_db_filename , task_id , results , new_columns , exception ):
47+ def _write_to_sql (db_filename , task_id , results , new_columns , exception ):
4848 """Write row data to sql."""
49- with sqlite3 .connect (combos_db_filename ) as combos_db :
49+ with sqlite3 .connect (db_filename ) as db :
5050 for new_column in new_columns :
5151 res = results [new_column [0 ]] if results is not None else None
52- combos_db .execute (
53- "UPDATE combos SET " + new_column [0 ] + "=?, "
52+ db .execute (
53+ "UPDATE df SET " + new_column [0 ] + "=?, "
5454 "exception=?, to_run_" + new_column [0 ] + "=? WHERE `index`=?" ,
5555 (res , exception , 0 , task_id ),
5656 )
5757
5858
59- def evaluate_combos ( # pylint:disable=too-many-branches
60- combos_df ,
59+ def evaluate (
60+ df ,
6161 evaluation_function ,
6262 new_columns = None ,
6363 task_ids = None ,
6464 continu = False ,
6565 parallel_factory = None ,
66- combos_db_filename = "combos_db .sql" ,
66+ db_filename = "db .sql" ,
6767 no_sql = False ,
6868):
69- """Evaluate combos and save results in a sqlite database on the fly and return combos dataframe.
69+ """Evaluate and save results in a sqlite database on the fly and return dataframe.
7070
7171 Args:
72- combos_df (DataFrame): each row contains information for the computation
72+ df (DataFrame): each row contains information for the computation
7373 evaluation_function (function): function used to evaluate each row,
7474 should have a single argument as list-like containing values of the rows of df,
7575 and return a dict with keys corresponding to the names in new_columns
7676 new_columns (list): list of names of new column and empty value to save evaluation results,
7777 i.e.: [['result', 0.0], ['valid', False]]
78- task_ids (int): index of combos_original to compute, if None, all will be computed
78+ task_ids (int): index of dataframe to compute, if None, all will be computed
7979 continu (bool): if True, it will use only compute the empty rows of the database,
8080 if False, it will ecrase or generate the database
8181 parallel_factory (ParallelFactory): parallel factory instance
82- combos_db_filename (str): filename for the combos sqlite database
82+ db_filename (str): filename for the sqlite database
8383 no_sql (bool): is True, sql backend will be disabled. To use when evaluations are numerous
8484 and fast, to avoid the overhead of communication with sql database.
8585 Return:
86- pandas.DataFrame: combos_df dataframe with new columns containing computed results
86+ pandas.DataFrame: dataframe with new columns containing computed results
8787 """
8888 if task_ids is None :
89- task_ids = combos_df .index
89+ task_ids = df .index
9090 else :
91- combos_df = combos_df .loc [task_ids ]
91+ df = df .loc [task_ids ]
9292 if new_columns is None :
9393 new_columns = [["data" , "" ]]
9494
9595 if no_sql :
9696 logger .info ("Not using sql backend to save iterations" )
97- combos_to_evaluate = combos_df
97+ to_evaluate = df
9898 elif continu :
9999 logger .info ("Load data from sql database" )
100- if Path (combos_db_filename ).exists ():
101- combos_to_evaluate = _load_database_to_dataframe (combos_db_filename = combos_db_filename )
100+ if Path (db_filename ).exists ():
101+ to_evaluate = _load_database_to_dataframe (db_filename = db_filename )
102102 else :
103- combos_to_evaluate = _create_database (
104- combos_df , new_columns , combos_db_filename = combos_db_filename
105- )
103+ to_evaluate = _create_database (df , new_columns , db_filename = db_filename )
106104 for new_column in new_columns :
107105 task_ids = task_ids [
108- combos_to_evaluate .loc [task_ids , "to_run_" + new_column [0 ]].to_numpy () == 1
106+ to_evaluate .loc [task_ids , "to_run_" + new_column [0 ]].to_numpy () == 1
109107 ]
110108 else :
111109 logger .info ("Create sql database" )
112- combos_to_evaluate = _create_database (
113- combos_df , new_columns , combos_db_filename = combos_db_filename
114- )
110+ to_evaluate = _create_database (df , new_columns , db_filename = db_filename )
115111
116112 # this is a hack to make it work, otherwise it does not update the entries correctly
117- combos_to_evaluate = _load_database_to_dataframe (combos_db_filename )
118- combos_to_evaluate = _create_database (
119- combos_to_evaluate , new_columns , combos_db_filename = combos_db_filename
120- )
113+ to_evaluate = _load_database_to_dataframe (db_filename )
114+ to_evaluate = _create_database (to_evaluate , new_columns , db_filename = db_filename )
121115
122116 if len (task_ids ) > 0 :
123- logger .info ("%s combos to compute." , str (len (task_ids )))
117+ logger .info ("%s rows to compute." , str (len (task_ids )))
124118 else :
125- logger .warning ("WARNING: No combos to compute, something may be wrong" )
126- return _load_database_to_dataframe (combos_db_filename )
119+ logger .warning ("WARNING: No rows to compute, something may be wrong" )
120+ return _load_database_to_dataframe (db_filename )
127121
128122 if parallel_factory is None :
129123 mapper = map
@@ -132,8 +126,7 @@ def evaluate_combos( # pylint:disable=too-many-branches
132126
133127 eval_func = partial (_try_evaluation , evaluation_function = evaluation_function )
134128 arg_list = enumerate (
135- dict (zip (combos_to_evaluate .columns , row ))
136- for row in combos_to_evaluate .loc [task_ids ].values
129+ dict (zip (to_evaluate .columns , row )) for row in to_evaluate .loc [task_ids ].values
137130 )
138131
139132 if no_sql :
@@ -150,7 +143,7 @@ def evaluate_combos( # pylint:disable=too-many-branches
150143 )
151144 else :
152145 _write_to_sql (
153- combos_db_filename ,
146+ db_filename ,
154147 task_id ,
155148 results ,
156149 new_columns ,
@@ -159,12 +152,12 @@ def evaluate_combos( # pylint:disable=too-many-branches
159152
160153 if no_sql :
161154 for new_column , data in _results .items ():
162- combos_to_evaluate .loc [:, new_column ] = data
155+ to_evaluate .loc [:, new_column ] = data
163156
164157 # to save dataframe even if program is killed
165158 except (KeyboardInterrupt , SystemExit ) as ex :
166159 logger .warning ("Stopping mapper loop. Reason: %r" , ex )
167160
168161 if no_sql :
169- return combos_to_evaluate
170- return _load_database_to_dataframe (combos_db_filename )
162+ return to_evaluate
163+ return _load_database_to_dataframe (db_filename )
0 commit comments