Skip to content

Commit 8e13e6a

Browse files
ragrawalragrawal
andauthored
closing 209 : added info message for computation time (#222)
* added info message for computation time * fixed lint issues Co-authored-by: ragrawal <[email protected]>
1 parent e85877a commit 8e13e6a

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed

README.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,26 @@ can be easily serialized.
441441
[1.60943791],
442442
[1.38629436]])
443443

444+
Changing Logging level
445+
***********************************
446+
447+
You can change log level to info to print time take to fit/transform features. Setting it to higher level will stop printing elapsed time.
448+
Below example shows how to change logging level.
449+
450+
451+
>>> import logging
452+
>>> logging.getLogger('sklearn_pandas').setLevel(logging.INFO)
444453

445454

446455
Changelog
447456
---------
457+
458+
2.0.3 (2020-11-06)
459+
******************
460+
461+
* Added elapsed time information for each feature
462+
463+
448464
2.0.2 (2020-10-01)
449465
******************
450466

sklearn_pandas/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
__version__ = '2.0.2'
1+
__version__ = '2.0.3'
2+
3+
import logging
4+
logger = logging.getLogger(__name__)
25

36
from .dataframe_mapper import DataFrameMapper # NOQA
47
from .features_generator import gen_features # NOQA

sklearn_pandas/dataframe_mapper.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import contextlib
22

3+
from datetime import datetime
34
import pandas as pd
45
import numpy as np
56
from scipy import sparse
67
from sklearn.base import BaseEstimator, TransformerMixin
78

89
from .cross_validation import DataWrapper
910
from .pipeline import make_transformer_pipeline, _call_fit, TransformerPipeline
11+
from . import logger
1012

1113
string_types = text_type = str
1214

@@ -31,6 +33,10 @@ def _build_feature(columns, transformers, options={}):
3133
return (columns, _build_transformer(transformers), options)
3234

3335

36+
def _elapsed_secs(t1):
37+
return (datetime.now()-t1).total_seconds()
38+
39+
3440
def _get_feature_names(estimator):
3541
"""
3642
Attempt to extract feature names based on a given estimator
@@ -107,7 +113,6 @@ def __init__(self, features, default=False, sparse=False, df_out=False,
107113
self.input_df = input_df
108114
self.drop_cols = [] if drop_cols is None else drop_cols
109115
self.transformed_names_ = []
110-
111116
if (df_out and (sparse or default)):
112117
raise ValueError("Can not use df_out with sparse or default")
113118

@@ -208,13 +213,16 @@ def fit(self, X, y=None):
208213
209214
"""
210215
self._build()
216+
211217
for columns, transformers, options in self.built_features:
218+
t1 = datetime.now()
212219
input_df = options.get('input_df', self.input_df)
213220

214221
if transformers is not None:
215222
with add_column_names_to_exception(columns):
216223
Xt = self._get_col_subset(X, columns, input_df)
217224
_call_fit(transformers.fit, Xt, y)
225+
logger.info(f"[FIT] {columns}: {_elapsed_secs(t1)} secs")
218226

219227
# handle features not explicitly selected
220228
if self.built_default: # not False and not None
@@ -304,14 +312,24 @@ def _transform(self, X, y=None, do_fit=False):
304312
# strings; we don't care because pandas
305313
# will handle either.
306314
Xt = self._get_col_subset(X, columns, input_df)
315+
307316
if transformers is not None:
308317
with add_column_names_to_exception(columns):
309318
if do_fit and hasattr(transformers, 'fit_transform'):
319+
t1 = datetime.now()
310320
Xt = _call_fit(transformers.fit_transform, Xt, y)
321+
logger.info(f"[FIT_TRANSFORM] {columns}: {_elapsed_secs(t1)} secs") # NOQA
311322
else:
312323
if do_fit:
324+
t1 = datetime.now()
313325
_call_fit(transformers.fit, Xt, y)
326+
logger.info(
327+
f"[FIT] {columns}: {_elapsed_secs(t1)} secs")
328+
329+
t1 = datetime.now()
314330
Xt = transformers.transform(Xt)
331+
logger.info(f"[TRANSFORM] {columns}: {_elapsed_secs(t1)} secs") # NOQA
332+
315333
extracted.append(_handle_feature(Xt))
316334

317335
alias = options.get('alias')
@@ -339,6 +357,7 @@ def _transform(self, X, y=None, do_fit=False):
339357
# if not applying a default transformer,
340358
# keep column names unmodified
341359
self.transformed_names_ += unsel_cols
360+
342361
extracted.append(_handle_feature(Xt))
343362

344363
# combine the feature outputs into one array.

tests/test_dataframe_mapper.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,20 @@ def test_transformed_names_binarizer(complex_dataframe):
152152
assert mapper.transformed_names_ == ['target_a', 'target_b', 'target_c']
153153

154154

155+
def test_logging(caplog, complex_dataframe):
156+
"""
157+
Get transformed names of features in `transformed_names` attribute
158+
for a transformation that multiplies the number of columns
159+
"""
160+
import logging
161+
logger = logging.getLogger('sklearn_pandas')
162+
logger.setLevel(logging.INFO)
163+
df = complex_dataframe
164+
mapper = DataFrameMapper([('target', LabelBinarizer())])
165+
mapper.fit_transform(df)
166+
assert '[FIT_TRANSFORM] target:' in caplog.text
167+
168+
155169
def test_transformed_names_binarizer_unicode():
156170
df = pd.DataFrame({'target': [u'ñ', u'á', u'é']})
157171
mapper = DataFrameMapper([('target', LabelBinarizer())])

0 commit comments

Comments
 (0)