From 02a695d82073816aa7a62320c45608555a093d11 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 28 Apr 2023 10:52:44 -0400 Subject: [PATCH 01/10] initial commit --- feature_engine/encoding/one_hot.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index 68a219790..8d730a402 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -1,7 +1,7 @@ # Authors: Soledad Galli # License: BSD 3 clause -from typing import List, Optional, Union +from typing import List, Optional, Union, Dict import numpy as np import pandas as pd @@ -160,6 +160,7 @@ class OneHotEncoder(CategoricalInitMixin, CategoricalMethodsMixin): def __init__( self, top_categories: Optional[int] = None, + custom_categories: Optional[Dict] = None, drop_last: bool = False, drop_last_binary: bool = False, variables: Union[None, int, str, List[Union[str, int]]] = None, @@ -173,7 +174,13 @@ def __init__( "top_categories takes only positive integers. " f"Got {top_categories} instead" ) - + if top_categories is not None and custom_categories is not None: + raise ValueError( + "Both top_cagetories and custom_categories have values. " + "Only one of the two parameters may be used at a time. " + f"Got {top_categories} for top_categories. " + f"Got {custom_categories} for custom_categoriers." + ) if not isinstance(drop_last, bool): raise ValueError( f"drop_last takes only True or False. Got {drop_last} instead." From ed3adf8f1069e326ee22ab75878fbecb98a9dc36 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 28 Apr 2023 11:39:53 -0400 Subject: [PATCH 02/10] create custom_categories param and init checks --- feature_engine/encoding/one_hot.py | 22 +++++++++++++++- tests/test_encoding/test_onehot_encoder.py | 30 ++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index 8d730a402..1678c3da6 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -174,13 +174,33 @@ def __init__( "top_categories takes only positive integers. " f"Got {top_categories} instead" ) + if top_categories is not None and custom_categories is not None: raise ValueError( "Both top_cagetories and custom_categories have values. " "Only one of the two parameters may be used at a time. " f"Got {top_categories} for top_categories. " - f"Got {custom_categories} for custom_categoriers." + f"Got {custom_categories} for custom_categories." + ) + + if custom_categories and not isinstance(custom_categories, dict): + raise ValueError( + "custom_categories must be a dictionary. " + f"Got {custom_categories} instead." ) + + # check that all values of custom_categories key-value pairs are lists + if custom_categories: + non_lists_custom_categories = [ + val for val in custom_categories.values() + if not isinstance(val, list) + ] + if len(non_lists_custom_categories) > 0: + raise ValueError( + "custom_categories must be a dictionary that has lists as " + f"its values. Got {custom_categories} instead." + ) + if not isinstance(drop_last, bool): raise ValueError( f"drop_last takes only True or False. Got {drop_last} instead." diff --git a/tests/test_encoding/test_onehot_encoder.py b/tests/test_encoding/test_onehot_encoder.py index 42448be12..afc2d9014 100644 --- a/tests/test_encoding/test_onehot_encoder.py +++ b/tests/test_encoding/test_onehot_encoder.py @@ -202,6 +202,36 @@ def test_raises_error_if_df_contains_na(df_enc_big, df_enc_big_na): assert str(record.value) == msg +def test_raises_error_using_top_and_custom_categories(df_enc): + with pytest.raises(ValueError): + OneHotEncoder( + top_categories=1, + custom_categories={"var_A": ["C"]}, + ) + + +@pytest.mark.parametrize("_custom_cat", + [3, "hamberguesa", True, [3, 5, 7]] + ) +def test_raises_error_not_permitted_custom_categories(_custom_cat): + with pytest.raises(ValueError): + OneHotEncoder( + custom_categories=_custom_cat, + ) + + +@pytest.mark.parametrize("_custom_cat", [ + {"var_A": ["ZZ", "YY"], "var_B": 3}, + {"var_M": "test", "var_S": ["T", "U"]}, + ] + ) +def test_raises_error_non_permitted_custom_category_pair_values(_custom_cat): + with pytest.raises(ValueError): + OneHotEncoder( + custom_categories=_custom_cat + ) + + def test_encode_numerical_variables(df_enc_numeric): encoder = OneHotEncoder( top_categories=None, From 781b312fac318d08df88cd4258675d74ad3d7087 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 28 Apr 2023 15:40:42 -0400 Subject: [PATCH 03/10] add init checks --- feature_engine/encoding/one_hot.py | 73 +++++++++++++++------- tests/test_encoding/test_onehot_encoder.py | 20 +++++- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index 1678c3da6..64b30ed3f 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -189,8 +189,9 @@ def __init__( f"Got {custom_categories} instead." ) - # check that all values of custom_categories key-value pairs are lists + if custom_categories: + # check that all values of custom_categories key-value pairs are lists non_lists_custom_categories = [ val for val in custom_categories.values() if not isinstance(val, list) @@ -201,6 +202,15 @@ def __init__( f"its values. Got {custom_categories} instead." ) + # check that custom_categories variable match variables + cust_cat_vars = sorted(list(custom_categories.keys())) + if cust_cat_vars != sorted(variables): + raise ValueError( + "Variables listed in custom_categories must match features " + f"listed in the variables param. Got {cust_cat_vars} for " + f"custom_categories and {sorted(variables)} for variables." + ) + if not isinstance(drop_last, bool): raise ValueError( f"drop_last takes only True or False. Got {drop_last} instead." @@ -214,6 +224,7 @@ def __init__( super().__init__(variables, ignore_format) self.top_categories = top_categories + self.custom_categories = custom_categories self.drop_last = drop_last self.drop_last_binary = drop_last_binary @@ -241,29 +252,35 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None): self.encoder_dict_ = {} - for var in variables_: - - # make dummies only for the most popular categories - if self.top_categories: - self.encoder_dict_[var] = [ - x - for x in X[var] - .value_counts() - .sort_values(ascending=False) - .head(self.top_categories) - .index - ] + # make dummies only for the selected variables and categories + if self.custom_categories: + for var in self.custom_categories.keys(): + unique_values = set(X[var].unique()) + + else: + for var in variables_: + + # make dummies only for the most popular categories + if self.top_categories: + self.encoder_dict_[var] = [ + x + for x in X[var] + .value_counts() + .sort_values(ascending=False) + .head(self.top_categories) + .index + ] - else: - category_ls = list(X[var].unique()) + else: + category_ls = list(X[var].unique()) - # return k-1 dummies - if self.drop_last: - self.encoder_dict_[var] = category_ls[:-1] + # return k-1 dummies + if self.drop_last: + self.encoder_dict_[var] = category_ls[:-1] - # return k dummies - else: - self.encoder_dict_[var] = category_ls + # return k dummies + else: + self.encoder_dict_[var] = category_ls self.variables_binary_ = [var for var in variables_ if X[var].nunique() == 2] @@ -329,3 +346,17 @@ def _add_new_feature_names(self, feature_names) -> List: feature_names = [f for f in feature_names if f not in self.variables_] return feature_names + + def _check_custom_categories_in_dataset(self, X: pd.DataFrame): + """ + Raise an error if user entered categories in custom_categories that do + not exist within dataset. + + """ + for var, categories in self.custom_categories.items(): + unique_values = set(X[var].unique()) + if not set(categories).issubset(unique_values): + raise ValueError( + f"All categorical values provided in {var} of custom_categories " + "do not exist within the dataset." + ) \ No newline at end of file diff --git a/tests/test_encoding/test_onehot_encoder.py b/tests/test_encoding/test_onehot_encoder.py index afc2d9014..b07a63871 100644 --- a/tests/test_encoding/test_onehot_encoder.py +++ b/tests/test_encoding/test_onehot_encoder.py @@ -228,7 +228,8 @@ def test_raises_error_not_permitted_custom_categories(_custom_cat): def test_raises_error_non_permitted_custom_category_pair_values(_custom_cat): with pytest.raises(ValueError): OneHotEncoder( - custom_categories=_custom_cat + custom_categories=_custom_cat, + variables=list(_custom_cat.keys()), ) @@ -546,3 +547,20 @@ def test_inverse_transform_raises_not_implemented_error(df_enc_binary): enc = OneHotEncoder().fit(df_enc_binary) with pytest.raises(NotImplementedError): enc.inverse_transform(df_enc_binary) + + +def test_error_when_custom_categories_values_do_not_exist(df_enc): + encoder = OneHotEncoder( + top_categories=None, + custom_categories={"var_A": ["A", "C"], "var_B": ["B", "X"]}, + variables=["var_A", "var_B"], + ) + + +def test_error_when_custom_categories_does_not_match_variables(): + with pytest.raises(ValueError): + OneHotEncoder( + top_categories=None, + custom_categories={"var_Q": ["A"], "var_Y": ["G", "H"]}, + variables=["var_Y", "var_B"], + ) \ No newline at end of file From 21b3b57d5e504e1f038b2eee6c43c23373f01715 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 28 Apr 2023 15:53:59 -0400 Subject: [PATCH 04/10] expand fit --- feature_engine/encoding/one_hot.py | 56 +++++++++++----------- tests/test_encoding/test_onehot_encoder.py | 6 ++- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index 64b30ed3f..b554209dd 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -250,37 +250,37 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None): variables_ = self._check_or_select_variables(X) _check_contains_na(X, variables_) - self.encoder_dict_ = {} - - # make dummies only for the selected variables and categories if self.custom_categories: - for var in self.custom_categories.keys(): - unique_values = set(X[var].unique()) - - else: - for var in variables_: - - # make dummies only for the most popular categories - if self.top_categories: - self.encoder_dict_[var] = [ - x - for x in X[var] - .value_counts() - .sort_values(ascending=False) - .head(self.top_categories) - .index - ] - - else: - category_ls = list(X[var].unique()) + self._check_custom_categories_in_dataset(X) - # return k-1 dummies - if self.drop_last: - self.encoder_dict_[var] = category_ls[:-1] + self.encoder_dict_ = {} - # return k dummies - else: - self.encoder_dict_[var] = category_ls + for var in variables_: + + # make dummies only for the most popular categories + if self.top_categories: + self.encoder_dict_[var] = [ + x + for x in X[var] + .value_counts() + .sort_values(ascending=False) + .head(self.top_categories) + .index + ] + # assign custom_categories to encoder_dict_ + elif self.custom_categories: + self.encoder_dict_ = self.custom_categories + + else: + category_ls = list(X[var].unique()) + + # return k-1 dummies + if self.drop_last: + self.encoder_dict_[var] = category_ls[:-1] + + # return k dummies + else: + self.encoder_dict_[var] = category_ls self.variables_binary_ = [var for var in variables_ if X[var].nunique() == 2] diff --git a/tests/test_encoding/test_onehot_encoder.py b/tests/test_encoding/test_onehot_encoder.py index b07a63871..423a40753 100644 --- a/tests/test_encoding/test_onehot_encoder.py +++ b/tests/test_encoding/test_onehot_encoder.py @@ -555,12 +555,14 @@ def test_error_when_custom_categories_values_do_not_exist(df_enc): custom_categories={"var_A": ["A", "C"], "var_B": ["B", "X"]}, variables=["var_A", "var_B"], ) + with pytest.raises(ValueError): + encoder._check_custom_categories_in_dataset(df_enc) def test_error_when_custom_categories_does_not_match_variables(): with pytest.raises(ValueError): OneHotEncoder( - top_categories=None, custom_categories={"var_Q": ["A"], "var_Y": ["G", "H"]}, variables=["var_Y", "var_B"], - ) \ No newline at end of file + ) + From c6b478b6752e3b44dd46a7f4fa9e58efea9652b0 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 28 Apr 2023 18:07:08 -0400 Subject: [PATCH 05/10] create test_encode_custom_categories(). pass all tests. --- feature_engine/encoding/one_hot.py | 2 +- tests/test_encoding/test_onehot_encoder.py | 41 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index b554209dd..9d6a8c116 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -270,7 +270,7 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None): # assign custom_categories to encoder_dict_ elif self.custom_categories: self.encoder_dict_ = self.custom_categories - + else: category_ls = list(X[var].unique()) diff --git a/tests/test_encoding/test_onehot_encoder.py b/tests/test_encoding/test_onehot_encoder.py index 423a40753..d1b115a50 100644 --- a/tests/test_encoding/test_onehot_encoder.py +++ b/tests/test_encoding/test_onehot_encoder.py @@ -566,3 +566,44 @@ def test_error_when_custom_categories_does_not_match_variables(): variables=["var_Y", "var_B"], ) + +def test_encode_custom_categories(df_enc_big): + encoder = OneHotEncoder( + custom_categories={ + "var_A": ["A", "F", "G"], + "var_C": ["B", "F", "E"], + }, + variables=["var_A", "var_C"], + ) + X = encoder.fit_transform(df_enc_big).reset_index() + X = X.drop("index", axis=1) + + + expected_results_head = { + "var_B": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], + "var_A_A": [1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + "var_A_F": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "var_A_G": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "var_C_B": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + "var_C_F": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "var_C_E": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + } + expected_results_head_df = pd.DataFrame(expected_results_head) + + expected_results_tail = { + "var_B": ["E", "E", "F", "F", "G", "G", "G", "G", "G", "G"], + "var_A_A": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "var_A_F": [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], + "var_A_G": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + "var_C_B": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "var_C_F": [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], + "var_C_E": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], + } + expected_results_tail_df = pd.DataFrame( + data=expected_results_tail, + index=range(30, 40), + ) + + # test transform outputs + pd.testing.assert_frame_equal(X.head(10), expected_results_head_df) + pd.testing.assert_frame_equal(X.tail(10), expected_results_tail_df) \ No newline at end of file From 353c6f372b84a51c1d3edd7976e4170de0663327 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Sat, 29 Apr 2023 09:47:26 -0400 Subject: [PATCH 06/10] fix formatting errors --- feature_engine/encoding/one_hot.py | 8 +++----- tests/test_encoding/test_onehot_encoder.py | 21 ++++++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index 9d6a8c116..ec356ca63 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -1,7 +1,7 @@ # Authors: Soledad Galli # License: BSD 3 clause -from typing import List, Optional, Union, Dict +from typing import Dict, List, Optional, Union import numpy as np import pandas as pd @@ -189,12 +189,10 @@ def __init__( f"Got {custom_categories} instead." ) - if custom_categories: # check that all values of custom_categories key-value pairs are lists non_lists_custom_categories = [ - val for val in custom_categories.values() - if not isinstance(val, list) + val for val in custom_categories.values() if not isinstance(val, list) ] if len(non_lists_custom_categories) > 0: raise ValueError( @@ -359,4 +357,4 @@ def _check_custom_categories_in_dataset(self, X: pd.DataFrame): raise ValueError( f"All categorical values provided in {var} of custom_categories " "do not exist within the dataset." - ) \ No newline at end of file + ) diff --git a/tests/test_encoding/test_onehot_encoder.py b/tests/test_encoding/test_onehot_encoder.py index d1b115a50..1e5d2dfae 100644 --- a/tests/test_encoding/test_onehot_encoder.py +++ b/tests/test_encoding/test_onehot_encoder.py @@ -210,9 +210,7 @@ def test_raises_error_using_top_and_custom_categories(df_enc): ) -@pytest.mark.parametrize("_custom_cat", - [3, "hamberguesa", True, [3, 5, 7]] - ) +@pytest.mark.parametrize("_custom_cat", [3, "hamberguesa", True, [3, 5, 7]]) def test_raises_error_not_permitted_custom_categories(_custom_cat): with pytest.raises(ValueError): OneHotEncoder( @@ -220,11 +218,13 @@ def test_raises_error_not_permitted_custom_categories(_custom_cat): ) -@pytest.mark.parametrize("_custom_cat", [ - {"var_A": ["ZZ", "YY"], "var_B": 3}, - {"var_M": "test", "var_S": ["T", "U"]}, - ] - ) +@pytest.mark.parametrize( + "_custom_cat", + [ + {"var_A": ["ZZ", "YY"], "var_B": 3}, + {"var_M": "test", "var_S": ["T", "U"]}, + ], +) def test_raises_error_non_permitted_custom_category_pair_values(_custom_cat): with pytest.raises(ValueError): OneHotEncoder( @@ -572,13 +572,12 @@ def test_encode_custom_categories(df_enc_big): custom_categories={ "var_A": ["A", "F", "G"], "var_C": ["B", "F", "E"], - }, + }, variables=["var_A", "var_C"], ) X = encoder.fit_transform(df_enc_big).reset_index() X = X.drop("index", axis=1) - expected_results_head = { "var_B": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A"], "var_A_A": [1, 1, 1, 1, 1, 1, 0, 0, 0, 0], @@ -606,4 +605,4 @@ def test_encode_custom_categories(df_enc_big): # test transform outputs pd.testing.assert_frame_equal(X.head(10), expected_results_head_df) - pd.testing.assert_frame_equal(X.tail(10), expected_results_tail_df) \ No newline at end of file + pd.testing.assert_frame_equal(X.tail(10), expected_results_tail_df) From 00b421626d4fc380124f32362971538ade311f42 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Sat, 29 Apr 2023 09:58:46 -0400 Subject: [PATCH 07/10] update docstring --- feature_engine/encoding/one_hot.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index ec356ca63..df634f339 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -45,6 +45,10 @@ class OneHotEncoder(CategoricalInitMixin, CategoricalMethodsMixin): majority of the observations in the dataset. This behaviour can be specified with the parameter `top_categories`. + The encoder also has the functionality to one-hot encode user-defined categories + for a subset or all variables. This behavior can be specified with the parameter + `custom_categories`. + The encoder will encode only categorical variables by default (type 'object' or 'categorical'). You can pass a list of variables to encode. Alternatively, the encoder will find and encode all categorical variables (type 'object' or @@ -82,6 +86,17 @@ class OneHotEncoder(CategoricalInitMixin, CategoricalMethodsMixin): value 0 in all the binary variables. Note that if `top_categories` is not None, the parameter `drop_last` is ignored. + If `top_categories` is being used, `custom_categories` must equal None. + + custom_categories: dict, default=None + Accepts a dictionary in which the keys are the variables that the use would like + to transform. The keys must match the values of `variables` param. + + The dicitonary values are lists of the categories for each selected variable + that are to be one-hot encoded. + + If `custom_categories` is being used, `top_categories` must equal None. + drop_last: boolean, default=False Only used if `top_categories = None`. It indicates whether to create dummy variables for all the categories (k dummies), or if set to `True`, it will From 43bc63d2ca2fff7957ba83012e8d7b033a076b7b Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Sat, 29 Apr 2023 10:15:46 -0400 Subject: [PATCH 08/10] fix typecheck errors --- feature_engine/encoding/one_hot.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index df634f339..e097ed6aa 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -1,7 +1,7 @@ # Authors: Soledad Galli # License: BSD 3 clause -from typing import Dict, List, Optional, Union +from typing import Dict, Iterable, List, Optional, Union import numpy as np import pandas as pd @@ -178,7 +178,9 @@ def __init__( custom_categories: Optional[Dict] = None, drop_last: bool = False, drop_last_binary: bool = False, - variables: Union[None, int, str, List[Union[str, int]]] = None, + variables: Union[ + None, int, str, List[Union[str, int]], Iterable[Union[str, int]] + ] = None, ignore_format: bool = False, ) -> None: @@ -360,7 +362,7 @@ def _add_new_feature_names(self, feature_names) -> List: return feature_names - def _check_custom_categories_in_dataset(self, X: pd.DataFrame): + def _check_custom_categories_in_dataset(self, X: pd.DataFrame) -> None: """ Raise an error if user entered categories in custom_categories that do not exist within dataset. From cebc04aabe887ec7b1b23de970469975b6e7eed8 Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 26 May 2023 19:46:23 -0400 Subject: [PATCH 09/10] revise docstring --- feature_engine/encoding/one_hot.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index e097ed6aa..51e9e930d 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -45,9 +45,8 @@ class OneHotEncoder(CategoricalInitMixin, CategoricalMethodsMixin): majority of the observations in the dataset. This behaviour can be specified with the parameter `top_categories`. - The encoder also has the functionality to one-hot encode user-defined categories - for a subset or all variables. This behavior can be specified with the parameter - `custom_categories`. + OneHotEncoder can also encode a user defined subset of categories for each variable. + See parameter `custom_categories`. The encoder will encode only categorical variables by default (type 'object' or 'categorical'). You can pass a list of variables to encode. Alternatively, the From 9d98b36d4a05d346e2516565e0406b1d50d605bd Mon Sep 17 00:00:00 2001 From: Morgan-Sell Date: Fri, 26 May 2023 19:52:56 -0400 Subject: [PATCH 10/10] refactor code --- feature_engine/encoding/one_hot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py index 51e9e930d..49149698f 100644 --- a/feature_engine/encoding/one_hot.py +++ b/feature_engine/encoding/one_hot.py @@ -199,7 +199,7 @@ def __init__( f"Got {custom_categories} for custom_categories." ) - if custom_categories and not isinstance(custom_categories, dict): + if custom_categories is not None and not isinstance(custom_categories, dict): raise ValueError( "custom_categories must be a dictionary. " f"Got {custom_categories} instead."