Skip to content

Commit 117ea30

Browse files
solegalliNicoGalli
andauthored
add inverse_transform to transformers from transformation module (#283)
* New inverse methods in transformers * Log inverse test case * More unit tests + reciprocal error fixed in return * #248 changes requested after initial PR * polish PR on inverse_transform * reorders import in outlier init * adds inverse_transform to jupyter demos * make minor changes to log docstrings Co-authored-by: NicoGalli <[email protected]>
1 parent adc52db commit 117ea30

File tree

9 files changed

+704
-59
lines changed

9 files changed

+704
-59
lines changed

examples/transformation/LogTransformer.ipynb

Lines changed: 179 additions & 19 deletions
Large diffs are not rendered by default.

examples/transformation/PowerTransformer.ipynb

Lines changed: 165 additions & 12 deletions
Large diffs are not rendered by default.

examples/transformation/ReciprocalTransformer.ipynb

Lines changed: 173 additions & 20 deletions
Large diffs are not rendered by default.

feature_engine/transformation/log.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
class LogTransformer(BaseNumericalTransformer):
1515
"""
1616
The LogTransformer() applies the natural logarithm or the base 10 logarithm to
17-
numerical variables. The natural logarithm is logarithm in base e.
17+
numerical variables. The natural logarithm is the logarithm in base e.
1818
19-
The LogTransformer() only works with numerical non-negative values. If the variable
20-
contains a zero or a negative value, the transformer will return an error.
19+
The LogTransformer() only works with positive values. If the variable
20+
contains a zero or a negative value the transformer will return an error.
2121
2222
A list of variables can be passed as an argument. Alternatively, the transformer
2323
will automatically select and transform all variables of type numeric.
@@ -45,9 +45,11 @@ class LogTransformer(BaseNumericalTransformer):
4545
fit:
4646
This transformer does not learn parameters.
4747
transform:
48-
Transforms the variables using log transformation.
48+
Transform the variables using the logarithm.
4949
fit_transform:
5050
Fit to data, then transform it.
51+
inverse_transform:
52+
Convert the data back to the original representation.
5153
"""
5254

5355
def __init__(
@@ -66,9 +68,9 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
6668
"""
6769
This transformer does not learn parameters.
6870
69-
Select the numerical variables and determines whether the logarithm
70-
can be applied on the selected variables (it checks if the variables
71-
are all positive).
71+
Selects the numerical variables and determines whether the logarithm
72+
can be applied on the selected variables, i.e., it checks that the variables
73+
are positive.
7274
7375
Parameters
7476
----------
@@ -109,7 +111,7 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
109111

110112
def transform(self, X: pd.DataFrame) -> pd.DataFrame:
111113
"""
112-
Transforms the variables using log transformation.
114+
Transform the variables using log transformation.
113115
114116
Parameters
115117
----------
@@ -148,6 +150,41 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
148150

149151
return X
150152

153+
def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
154+
"""
155+
Convert the data back to the original representation.
156+
157+
Parameters
158+
----------
159+
X: Pandas DataFrame of shape = [n_samples, n_features]
160+
The data to be transformed.
161+
162+
Raises
163+
------
164+
TypeError
165+
If the input is not a Pandas DataFrame
166+
ValueError
167+
- If the variable(s) contain null values
168+
- If the df has different number of features than the df used in fit()
169+
- If some variables contains zero or negative values
170+
171+
Returns
172+
-------
173+
X: pandas dataframe
174+
The dataframe with the transformed variables.
175+
"""
176+
177+
# check input dataframe and if class was fitted
178+
X = super().transform(X)
179+
180+
# inverse_transform
181+
if self.base == "e":
182+
X.loc[:, self.variables_] = np.exp(X.loc[:, self.variables_])
183+
elif self.base == "10":
184+
X.loc[:, self.variables_] = np.array(10 ** X.loc[:, self.variables_])
185+
186+
return X
187+
151188
def _more_tags(self):
152189
tags_dict = _return_tags()
153190
# ======= this tests fail because the transformers throw an error

feature_engine/transformation/power.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ class PowerTransformer(BaseNumericalTransformer):
4646
Apply the power transformation to the variables.
4747
fit_transform:
4848
Fit to data, then transform it.
49+
inverse_transform:
50+
Convert the data back to the original representation.
4951
"""
5052

5153
def __init__(
@@ -124,3 +126,34 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
124126
X.loc[:, self.variables_] = np.power(X.loc[:, self.variables_], self.exp)
125127

126128
return X
129+
130+
def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
131+
"""
132+
Convert the data back to the original representation.
133+
134+
Parameters
135+
----------
136+
X: Pandas DataFrame of shape = [n_samples, n_features]
137+
The data to be transformed.
138+
139+
Raises
140+
------
141+
TypeError
142+
If the input is not a Pandas DataFrame
143+
ValueError
144+
- If the variable(s) contain null values
145+
- If the df has different number of features than the df used in fit()
146+
147+
Returns
148+
-------
149+
X: pandas Dataframe
150+
The dataframe with the power transformed variables.
151+
"""
152+
153+
# check input dataframe and if class was fitted
154+
X = super().transform(X)
155+
156+
# inverse_transform
157+
X.loc[:, self.variables_] = np.power(X.loc[:, self.variables_], 1 / self.exp)
158+
159+
return X

feature_engine/transformation/reciprocal.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ class ReciprocalTransformer(BaseNumericalTransformer):
4545
Apply the reciprocal 1 / x transformation.
4646
fit_transform:
4747
Fit to data, then transform it.
48+
inverse_transform:
49+
Convert the data back to the original representation.
4850
"""
4951

5052
def __init__(
@@ -136,6 +138,32 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
136138

137139
return X
138140

141+
def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
142+
"""
143+
Convert the data back to the original representation.
144+
145+
Parameters
146+
----------
147+
X: Pandas DataFrame of shape = [n_samples, n_features]
148+
The data to be transformed.
149+
150+
Raises
151+
------
152+
TypeError
153+
If the input is not a Pandas DataFrame
154+
ValueError
155+
- If the variable(s) contain null values
156+
- If the df has different number of features than the df used in fit()
157+
- If some variables contain zero values
158+
159+
Returns
160+
-------
161+
X: pandas dataframe
162+
The dataframe with the transformed variables.
163+
"""
164+
# inverse_transform
165+
return self.transform(X)
166+
139167
def _more_tags(self):
140168
tags_dict = _return_tags()
141169
# ======= this tests fail because the transformers throw an error

tests/test_transformation/test_log_transformer.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ def test_log_base_e_plus_automatically_find_variables(df_vartypes):
2424
# test transform output
2525
pd.testing.assert_frame_equal(X, transf_df)
2626

27+
# test inverse_transform
28+
Xit = transformer.inverse_transform(X)
29+
30+
# convert numbers to original format.
31+
Xit["Age"] = Xit["Age"].round().astype("int64")
32+
Xit["Marks"] = Xit["Marks"].round(1)
33+
34+
# test
35+
pd.testing.assert_frame_equal(Xit, df_vartypes)
36+
2737

2838
def test_log_base_10_plus_user_passes_var_list(df_vartypes):
2939
# test case 2: log base 10, user passes variables
@@ -43,6 +53,15 @@ def test_log_base_10_plus_user_passes_var_list(df_vartypes):
4353
# test transform output
4454
pd.testing.assert_frame_equal(X, transf_df)
4555

56+
# test inverse_transform
57+
Xit = transformer.inverse_transform(X)
58+
59+
# convert numbers to original format.
60+
Xit["Age"] = Xit["Age"].round().astype("int64")
61+
62+
# test
63+
pd.testing.assert_frame_equal(Xit, df_vartypes)
64+
4665

4766
def test_error_if_base_value_not_allowed():
4867
with pytest.raises(ValueError):
@@ -85,3 +104,22 @@ def test_non_fitted_error(df_vartypes):
85104
with pytest.raises(NotFittedError):
86105
transformer = LogTransformer()
87106
transformer.transform(df_vartypes)
107+
108+
109+
def test_inverse_e_plus_user_passes_var_list(df_vartypes):
110+
# test case 7: inverse log, user passes variables
111+
transformer = LogTransformer(variables="Age")
112+
Xt = transformer.fit_transform(df_vartypes)
113+
X = transformer.inverse_transform(Xt)
114+
115+
# convert floats to int
116+
X["Age"] = X["Age"].round().astype("int64")
117+
118+
# test init params
119+
assert transformer.base == "e"
120+
assert transformer.variables == "Age"
121+
# test fit attr
122+
assert transformer.variables_ == ["Age"]
123+
assert transformer.n_features_in_ == 5
124+
# test transform output
125+
pd.testing.assert_frame_equal(X, df_vartypes)

tests/test_transformation/test_power_transformer.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ def test_defo_params_plus_automatically_find_variables(df_vartypes):
2424
# test transform output
2525
pd.testing.assert_frame_equal(X, transf_df)
2626

27+
# inverse transform
28+
Xit = transformer.inverse_transform(X)
29+
30+
# convert numbers to original format.
31+
Xit["Age"] = Xit["Age"].round().astype("int64")
32+
Xit["Marks"] = Xit["Marks"].round(1)
33+
34+
# test
35+
pd.testing.assert_frame_equal(Xit, df_vartypes)
36+
2737

2838
def test_error_if_exp_value_not_allowed():
2939
with pytest.raises(ValueError):
@@ -49,3 +59,26 @@ def test_non_fitted_error(df_vartypes):
4959
with pytest.raises(NotFittedError):
5060
transformer = PowerTransformer()
5161
transformer.transform(df_vartypes)
62+
63+
64+
_exp_ls = [0.001, 0.1, 2, 3, 4, 10]
65+
66+
67+
@pytest.mark.parametrize("exp_base", _exp_ls)
68+
def test_inverse_transform_exp_no_default(exp_base, df_vartypes):
69+
transformer = PowerTransformer(exp=exp_base)
70+
Xt = transformer.fit_transform(df_vartypes)
71+
X = transformer.inverse_transform(Xt)
72+
73+
# convert numbers to original format.
74+
X["Age"] = X["Age"].round().astype("int64")
75+
X["Marks"] = X["Marks"].round(1)
76+
77+
# test init params
78+
# assert transformer.exp == 100
79+
assert transformer.variables is None
80+
# test fit attr
81+
assert transformer.variables_ == ["Age", "Marks"]
82+
assert transformer.n_features_in_ == 5
83+
# test transform output
84+
pd.testing.assert_frame_equal(X, df_vartypes)

tests/test_transformation/test_reciprocal_transformer.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ def test_automatically_find_variables(df_vartypes):
2323
# test transform output
2424
pd.testing.assert_frame_equal(X, transf_df)
2525

26+
# test inverse_transform
27+
Xit = transformer.inverse_transform(X)
28+
29+
# convert numbers to original format.
30+
Xit["Age"] = Xit["Age"].round().astype("int64")
31+
Xit["Marks"] = Xit["Marks"].round(1)
32+
33+
# test
34+
pd.testing.assert_frame_equal(Xit, df_vartypes)
35+
2636

2737
def test_fit_raises_error_if_na_in_df(df_na):
2838
# test case 2: when dataset contains na, fit method

0 commit comments

Comments
 (0)