1
1
import contextlib
2
2
3
+ from datetime import datetime
3
4
import pandas as pd
4
5
import numpy as np
5
6
from scipy import sparse
6
7
from sklearn .base import BaseEstimator , TransformerMixin
7
8
8
9
from .cross_validation import DataWrapper
9
10
from .pipeline import make_transformer_pipeline , _call_fit , TransformerPipeline
11
+ from . import logger
10
12
11
13
string_types = text_type = str
12
14
@@ -31,6 +33,10 @@ def _build_feature(columns, transformers, options={}):
31
33
return (columns , _build_transformer (transformers ), options )
32
34
33
35
36
+ def _elapsed_secs (t1 ):
37
+ return (datetime .now ()- t1 ).total_seconds ()
38
+
39
+
34
40
def _get_feature_names (estimator ):
35
41
"""
36
42
Attempt to extract feature names based on a given estimator
@@ -107,7 +113,6 @@ def __init__(self, features, default=False, sparse=False, df_out=False,
107
113
self .input_df = input_df
108
114
self .drop_cols = [] if drop_cols is None else drop_cols
109
115
self .transformed_names_ = []
110
-
111
116
if (df_out and (sparse or default )):
112
117
raise ValueError ("Can not use df_out with sparse or default" )
113
118
@@ -208,13 +213,16 @@ def fit(self, X, y=None):
208
213
209
214
"""
210
215
self ._build ()
216
+
211
217
for columns , transformers , options in self .built_features :
218
+ t1 = datetime .now ()
212
219
input_df = options .get ('input_df' , self .input_df )
213
220
214
221
if transformers is not None :
215
222
with add_column_names_to_exception (columns ):
216
223
Xt = self ._get_col_subset (X , columns , input_df )
217
224
_call_fit (transformers .fit , Xt , y )
225
+ logger .info (f"[FIT] { columns } : { _elapsed_secs (t1 )} secs" )
218
226
219
227
# handle features not explicitly selected
220
228
if self .built_default : # not False and not None
@@ -304,14 +312,24 @@ def _transform(self, X, y=None, do_fit=False):
304
312
# strings; we don't care because pandas
305
313
# will handle either.
306
314
Xt = self ._get_col_subset (X , columns , input_df )
315
+
307
316
if transformers is not None :
308
317
with add_column_names_to_exception (columns ):
309
318
if do_fit and hasattr (transformers , 'fit_transform' ):
319
+ t1 = datetime .now ()
310
320
Xt = _call_fit (transformers .fit_transform , Xt , y )
321
+ logger .info (f"[FIT_TRANSFORM] { columns } : { _elapsed_secs (t1 )} secs" ) # NOQA
311
322
else :
312
323
if do_fit :
324
+ t1 = datetime .now ()
313
325
_call_fit (transformers .fit , Xt , y )
326
+ logger .info (
327
+ f"[FIT] { columns } : { _elapsed_secs (t1 )} secs" )
328
+
329
+ t1 = datetime .now ()
314
330
Xt = transformers .transform (Xt )
331
+ logger .info (f"[TRANSFORM] { columns } : { _elapsed_secs (t1 )} secs" ) # NOQA
332
+
315
333
extracted .append (_handle_feature (Xt ))
316
334
317
335
alias = options .get ('alias' )
@@ -339,6 +357,7 @@ def _transform(self, X, y=None, do_fit=False):
339
357
# if not applying a default transformer,
340
358
# keep column names unmodified
341
359
self .transformed_names_ += unsel_cols
360
+
342
361
extracted .append (_handle_feature (Xt ))
343
362
344
363
# combine the feature outputs into one array.
0 commit comments