44
55import pandas as pd
66
7+ from pandas .api .types import is_numeric_dtype as is_numeric
8+ from pandas .api .types import is_categorical_dtype as is_categorical
9+ from pandas .api .types import is_object_dtype as is_object
10+
711Variables = Union [None , int , str , List [Union [str , int ]]]
812
913
@@ -44,40 +48,47 @@ def _find_or_check_numerical_variables(
4448
4549 Parameters
4650 ----------
47- X : Pandas DataFrame
51+ X : Pandas DataFrame.
4852 variables : variable or list of variables. Defaults to None.
4953
5054 Raises
5155 ------
5256 ValueError
53- If there are no numerical variables in the df or the df is empty
57+ If there are no numerical variables in the df or the df is empty.
5458 TypeError
55- If any of the user provided variables are not numerical
59+ If any of the user provided variables are not numerical.
5660
5761 Returns
5862 -------
59- variables: List of numerical variables
63+ variables: List of numerical variables.
6064 """
6165
62- if isinstance (variables , (str , int )):
63- variables = [variables ]
64-
65- elif not variables :
66+ if variables is None :
6667 # find numerical variables in dataset
6768 variables = list (X .select_dtypes (include = "number" ).columns )
6869 if len (variables ) == 0 :
6970 raise ValueError (
70- "No numerical variables in this dataframe. Please check variable "
71- "format with pandas dtypes"
71+ "No numerical variables found in this dataframe. Please check "
72+ "variable format with pandas dtypes. "
7273 )
7374
75+ elif isinstance (variables , (str , int )):
76+ if is_numeric (X [variables ]):
77+ variables = [variables ]
78+ else :
79+ raise TypeError ("The variable entered is not numeric." )
80+
7481 else :
82+ if len (variables ) == 0 :
83+ raise ValueError ("The list of variables is empty." )
84+
7585 # check that user entered variables are of type numerical
76- if any (X [variables ].select_dtypes (exclude = "number" ).columns ):
77- raise TypeError (
78- "Some of the variables are not numerical. Please cast them as "
79- "numerical before using this transformer"
80- )
86+ else :
87+ if len (X [variables ].select_dtypes (exclude = "number" ).columns ) > 0 :
88+ raise TypeError (
89+ "Some of the variables are not numerical. Please cast them as "
90+ "numerical before using this transformer."
91+ )
8192
8293 return variables
8394
@@ -91,38 +102,47 @@ def _find_or_check_categorical_variables(
91102
92103 Parameters
93104 ----------
94- X : pandas DataFrame
105+ X : pandas DataFrame.
95106 variables : variable or list of variables. Defaults to None.
96107
97108 Raises
98109 ------
99110 ValueError
100- If there are no categorical variables in df or df is empty
111+ If there are no categorical variables in df or df is empty.
101112 TypeError
102- If any of the user provided variables are not categorical
113+ If any of the user provided variables are not categorical.
103114
104115 Returns
105116 -------
106- variables : List of categorical variables
117+ variables : List of categorical variables.
107118 """
108119
109- if isinstance (variables , (str , int )):
110- variables = [variables ]
111-
112- elif not variables :
120+ if variables is None :
121+ # find categorical variables in dataset
113122 variables = list (X .select_dtypes (include = ["O" , "category" ]).columns )
114123 if len (variables ) == 0 :
115124 raise ValueError (
116- "No categorical variables in this dataframe. Please check the "
117- "variables format with pandas dtypes"
125+ "No categorical variables found in this dataframe. Please check "
126+ "variable format with pandas dtypes. "
118127 )
119128
129+ elif isinstance (variables , (str , int )):
130+ if is_categorical (X [variables ]) or is_object (X [variables ]):
131+ variables = [variables ]
132+ else :
133+ raise TypeError ("The variable entered is not categorical." )
134+
120135 else :
121- if any (X [variables ].select_dtypes (exclude = ["O" , "category" ]).columns ):
122- raise TypeError (
123- "Some of the variables are not categorical. Please cast them as object "
124- "or category before calling this transformer"
125- )
136+ if len (variables ) == 0 :
137+ raise ValueError ("The list of variables is empty." )
138+
139+ # check that user entered variables are of type numerical
140+ else :
141+ if len (X [variables ].select_dtypes (exclude = ["O" , "category" ]).columns ) > 0 :
142+ raise TypeError (
143+ "Some of the variables are not categorical. Please cast them as "
144+ "categorical or object before using this transformer."
145+ )
126146
127147 return variables
128148
0 commit comments