@@ -42,7 +42,15 @@ def _try_evaluation_df(task, evaluation_function, func_args, func_kwargs):
4242
4343
4444def _evaluate_dataframe (
45- to_evaluate , df , evaluation_function , func_args , func_kwargs , new_columns , mapper , task_ids , db
45+ to_evaluate ,
46+ input_cols ,
47+ evaluation_function ,
48+ func_args ,
49+ func_kwargs ,
50+ new_columns ,
51+ mapper ,
52+ task_ids ,
53+ db ,
4654):
4755 """Internal evalution function for dask.dataframe."""
4856 # Setup the function to apply to the data
@@ -57,25 +65,21 @@ def _evaluate_dataframe(
5765 res = []
5866 try :
5967 # Compute and collect the results
60- for batch in mapper (eval_func , to_evaluate .loc [task_ids , df . columns ], meta = meta ):
68+ for batch in mapper (eval_func , to_evaluate .loc [task_ids , input_cols ], meta = meta ):
6169 res .append (batch )
6270
6371 if db is not None :
64- # pylint: disable=cell-var-from-loop
65- batch_complete = to_evaluate [df .columns ].join (batch , how = "right" )
66- batch_cols = [col for col in batch_complete .columns if col != "exception" ]
67- batch_complete .apply (
68- lambda row : db .write (row .name , row [batch_cols ].to_dict (), row ["exception" ]),
69- axis = 1 ,
70- )
72+ batch_complete = to_evaluate [input_cols ].join (batch , how = "right" )
73+ data = batch_complete .to_records ().tolist ()
74+ db .write_batch (batch_complete .columns .tolist (), data )
7175 except (KeyboardInterrupt , SystemExit ) as ex : # pragma: no cover
7276 # To save dataframe even if program is killed
7377 logger .warning ("Stopping mapper loop. Reason: %r" , ex )
7478 return pd .concat (res )
7579
7680
7781def _evaluate_basic (
78- to_evaluate , df , evaluation_function , func_args , func_kwargs , mapper , task_ids , db
82+ to_evaluate , input_cols , evaluation_function , func_args , func_kwargs , mapper , task_ids , db
7983):
8084
8185 res = []
@@ -88,7 +92,7 @@ def _evaluate_basic(
8892 )
8993
9094 # Split the data into rows
91- arg_list = list (to_evaluate .loc [task_ids , df . columns ].to_dict ("index" ).items ())
95+ arg_list = list (to_evaluate .loc [task_ids , input_cols ].to_dict ("index" ).items ())
9296
9397 try :
9498 # Compute and collect the results
@@ -98,7 +102,7 @@ def _evaluate_basic(
98102 # Save the results into the DB
99103 if db is not None :
100104 db .write (
101- task_id , result , exception , ** to_evaluate .loc [task_id , df . columns ].to_dict ()
105+ task_id , result , exception , ** to_evaluate .loc [task_id , input_cols ].to_dict ()
102106 )
103107 except (KeyboardInterrupt , SystemExit ) as ex :
104108 # To save dataframe even if program is killed
@@ -132,7 +136,7 @@ def _prepare_db(db_url, to_evaluate, df, resume, task_ids):
132136 logger .info ("Create SQL database" )
133137 db .create (to_evaluate )
134138
135- return db , db .get_url ()
139+ return db , db .get_url (), task_ids
136140
137141
138142def evaluate (
@@ -209,7 +213,7 @@ def evaluate(
209213 logger .info ("Not using SQL backend to save iterations" )
210214 db = None
211215 else :
212- db , db_url = _prepare_db (db_url , to_evaluate , df , resume , task_ids )
216+ db , db_url , task_ids = _prepare_db (db_url , to_evaluate , df , resume , task_ids )
213217
214218 # Log the number of tasks to run
215219 if len (task_ids ) > 0 :
@@ -224,7 +228,7 @@ def evaluate(
224228 if isinstance (parallel_factory , DaskDataFrameFactory ):
225229 res_df = _evaluate_dataframe (
226230 to_evaluate ,
227- df ,
231+ df . columns ,
228232 evaluation_function ,
229233 func_args ,
230234 func_kwargs ,
@@ -236,7 +240,7 @@ def evaluate(
236240 else :
237241 res_df = _evaluate_basic (
238242 to_evaluate ,
239- df ,
243+ df . columns ,
240244 evaluation_function ,
241245 func_args ,
242246 func_kwargs ,
0 commit comments