Skip to content

Commit eaf35b6

Browse files
Merge pull request #426 from s-banach/pandas-lints
Pandas lints
2 parents 5cfe457 + 8b82514 commit eaf35b6

33 files changed

+189
-189
lines changed

category_encoders/base_contrast_encoder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,14 @@ def _transform(self, X) -> pd.DataFrame:
8686
return X
8787

8888
@abstractmethod
89-
def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix:
89+
def get_contrast_matrix(self, values_to_encode: np.ndarray) -> ContrastMatrix:
9090
raise NotImplementedError
9191

9292
def fit_contrast_coding(self, col, values, handle_missing, handle_unknown):
9393
if handle_missing == 'value':
9494
values = values[values > 0]
9595

96-
values_to_encode = values.values
96+
values_to_encode = values.to_numpy()
9797

9898
if len(values) < 2:
9999
return pd.DataFrame(index=values_to_encode)
@@ -119,7 +119,7 @@ def fit_contrast_coding(self, col, values, handle_missing, handle_unknown):
119119

120120
@staticmethod
121121
def transform_contrast_coding(X, mapping):
122-
cols = X.columns.values.tolist()
122+
cols = X.columns.tolist()
123123

124124
# See issue 370 if it is necessary to add an intercept or not.
125125
X['intercept'] = pd.Series([1] * X.shape[0], index=X.index)
@@ -132,7 +132,7 @@ def transform_contrast_coding(X, mapping):
132132

133133
# reindex actually applies the mapping
134134
base_df = mod.reindex(X[col])
135-
base_df.set_index(X.index, inplace=True)
135+
base_df = base_df.set_index(X.index)
136136
X = pd.concat([base_df, X], axis=1)
137137

138138
old_column_index = cols.index(col)

category_encoders/basen.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -188,20 +188,20 @@ def inverse_transform(self, X_in):
188188
raise ValueError(f'Unexpected input dimension {X.shape[1]}, expected {self._dim}')
189189

190190
if not list(self.cols):
191-
return X if self.return_df else X.values
191+
return X if self.return_df else X.to_numpy()
192192

193193
for switch in self.ordinal_encoder.mapping:
194194
column_mapping = switch.get('mapping')
195-
inverse = pd.Series(data=column_mapping.index, index=column_mapping.values)
195+
inverse = pd.Series(data=column_mapping.index, index=column_mapping.array)
196196
X[switch.get('col')] = X[switch.get('col')].map(inverse).astype(switch.get('data_type'))
197197

198198
if self.handle_unknown == 'return_nan' and self.handle_missing == 'return_nan':
199199
for col in self.cols:
200-
if X[switch.get('col')].isnull().any():
200+
if X[switch.get('col')].isna().any():
201201
warnings.warn("inverse_transform is not supported because transform impute "
202202
f"the unknown category nan when encode {col}")
203203

204-
return X if self.return_df else X.values
204+
return X if self.return_df else X.to_numpy()
205205

206206
def calc_required_digits(self, values):
207207
# figure out how many digits we need to represent the classes present
@@ -212,7 +212,7 @@ def calc_required_digits(self, values):
212212

213213
return digits
214214

215-
def basen_encode(self, X_in, cols=None):
215+
def basen_encode(self, X_in: pd.DataFrame, cols=None):
216216
"""
217217
Basen encoding encodes the integers as basen code with one column per digit.
218218
@@ -230,22 +230,22 @@ def basen_encode(self, X_in, cols=None):
230230

231231
X = X_in.copy(deep=True)
232232

233-
cols = X.columns.values.tolist()
233+
cols = X.columns.tolist()
234234

235235
for switch in self.mapping:
236236
col = switch.get('col')
237237
mod = switch.get('mapping')
238238

239239
base_df = mod.reindex(X[col])
240-
base_df.set_index(X.index, inplace=True)
240+
base_df = base_df.set_index(X.index)
241241
X = pd.concat([base_df, X], axis=1)
242242

243243
old_column_index = cols.index(col)
244244
cols[old_column_index: old_column_index + 1] = mod.columns
245245

246246
return X.reindex(columns=cols)
247247

248-
def basen_to_integer(self, X, cols, base):
248+
def basen_to_integer(self, X: pd.DataFrame, cols, base):
249249
"""
250250
Convert basen code as integers.
251251
@@ -263,7 +263,7 @@ def basen_to_integer(self, X, cols, base):
263263
numerical: DataFrame
264264
265265
"""
266-
out_cols = X.columns.values.tolist()
266+
out_cols = X.columns.tolist()
267267

268268
for col in cols:
269269
col_list = [col0 for col0 in out_cols if re.match(re.escape(str(col))+'_\\d+', str(col0))]
@@ -275,8 +275,8 @@ def basen_to_integer(self, X, cols, base):
275275
len0 = len(col_list)
276276
value_array = np.array([base ** (len0 - 1 - i) for i in range(len0)])
277277
X.insert(insert_at, col, np.dot(X[col_list].values, value_array.T))
278-
X.drop(col_list, axis=1, inplace=True)
279-
out_cols = X.columns.values.tolist()
278+
X = X.drop(col_list, axis=1)
279+
out_cols = X.columns.tolist()
280280

281281
return X
282282

category_encoders/cat_boost.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def _transform(self, X, y=None):
121121
unique_train = colmap.index
122122
unseen_values = pd.Series([x for x in X[col].unique() if x not in unique_train], dtype=unique_train.dtype)
123123

124-
is_nan = X[col].isnull()
124+
is_nan = X[col].isna()
125125
is_unknown_value = X[col].isin(unseen_values.dropna().astype(object))
126126

127127
if self.handle_unknown == 'error' and is_unknown_value.any():
@@ -153,7 +153,7 @@ def _transform(self, X, y=None):
153153
if self.handle_missing == 'value':
154154
# only set value if there are actually missing values.
155155
# In case of pd.Categorical columns setting values that are not seen in pd.Categorical gives an error.
156-
nan_cond = is_nan & unseen_values.isnull().any()
156+
nan_cond = is_nan & unseen_values.isna().any()
157157
if nan_cond.any():
158158
X.loc[nan_cond, col] = self._mean
159159
elif self.handle_missing == 'return_nan':

category_encoders/count.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ def _transform(self, X):
154154
and X[col].isna().any()
155155
and self._handle_missing[col] != 'return_nan'
156156
):
157-
X[col].replace(np.nan, 0, inplace=True)
157+
X[col] = X[col].replace(np.nan, 0)
158158

159159
elif (
160160
self._handle_unknown[col] == 'error'
161-
and X[col].isnull().any()
161+
and X[col].isna().any()
162162
):
163163
raise ValueError(f'Missing data found in column {col} at transform time.')
164164
return X
@@ -168,7 +168,7 @@ def _fit_count_encode(self, X_in, y):
168168
X = X_in.copy(deep=True)
169169

170170
if self.cols is None:
171-
self.cols = X.columns.values
171+
self.cols = X.columns
172172

173173
self.mapping = {}
174174

@@ -202,20 +202,20 @@ def combine_min_categories(self, X):
202202
elif self._combine_min_nan_groups[col] == 'force':
203203
min_groups_idx = (
204204
(mapper < self._min_group_size[col])
205-
| (mapper.index.isnull())
205+
| (mapper.index.isna())
206206
)
207207
else:
208208
min_groups_idx = (
209209
(mapper < self._min_group_size[col])
210-
& (~mapper.index.isnull())
210+
& (~mapper.index.isna())
211211
)
212212

213213
min_groups_sum = mapper.loc[min_groups_idx].sum()
214214

215215
if (
216216
min_groups_sum > 0
217217
and min_groups_idx.sum() > 1
218-
and not min_groups_idx.loc[~min_groups_idx.index.isnull()].all()
218+
and not min_groups_idx.loc[~min_groups_idx.index.isna()].all()
219219
):
220220
if isinstance(self._min_group_name[col], str):
221221
min_group_mapper_name = self._min_group_name[col]

category_encoders/gray.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def _fit(self, X, y=None, **kwargs):
9191
col = col_to_encode["col"]
9292
bin_mapping = col_to_encode["mapping"]
9393
n_cols_out = bin_mapping.shape[1]
94-
null_cond = (bin_mapping.index < 0) | (bin_mapping.isnull().all(1))
94+
null_cond = (bin_mapping.index < 0) | (bin_mapping.isna().all(1))
9595
map_null = bin_mapping[null_cond]
9696
map_non_null = bin_mapping[~null_cond].copy()
9797
ordinal_mapping = [m for m in self.ordinal_encoder.mapping if m.get("col") == col]

category_encoders/hashing.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def _transform_single_cpu(self, X, override_return_df=False):
246246
if self.return_df or override_return_df:
247247
return X
248248
else:
249-
return X.values
249+
return X.to_numpy()
250250

251251
@staticmethod
252252
def hashing_trick(X_in, hashing_method='md5', N=2, cols=None, make_copy=False):
@@ -294,11 +294,11 @@ def hashing_trick(X_in, hashing_method='md5', N=2, cols=None, make_copy=False):
294294
X = X_in
295295

296296
if cols is None:
297-
cols = X.columns.values
297+
cols = X.columns
298298

299299
def hash_fn(x):
300300
tmp = [0 for _ in range(N)]
301-
for val in x.values:
301+
for val in x.array:
302302
if val is not None:
303303
hasher = hashlib.new(hashing_method)
304304
if sys.version_info[0] == 2:
@@ -311,7 +311,7 @@ def hash_fn(x):
311311
new_cols = [f'col_{d}' for d in range(N)]
312312

313313
X_cat = X.loc[:, cols]
314-
X_num = X.loc[:, [x for x in X.columns.values if x not in cols]]
314+
X_num = X.loc[:, [x for x in X.columns if x not in cols]]
315315

316316
X_cat = X_cat.apply(hash_fn, axis=1, result_type='expand')
317317
X_cat.columns = new_cols

category_encoders/james_stein.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def _fit(self, X, y, **kwargs):
169169
unique = y.unique()
170170
if len(unique) != 2:
171171
raise ValueError("The target column y must be binary. But the target contains " + str(len(unique)) + " unique value(s).")
172-
if y.isnull().any():
172+
if y.isna().any():
173173
raise ValueError("The target column y must not contain missing values.")
174174
if np.max(unique) < 1:
175175
raise ValueError("The target column y must be binary with values {0, 1}. Value 1 was not found in the target.")
@@ -357,7 +357,7 @@ def get_best_sigma(sigma, mu_k, sigma_k, K):
357357
crosstable['E-A+'] = stats['count'] - stats['sum']
358358
crosstable['E+A-'] = global_sum - stats['sum']
359359
crosstable['E+A+'] = stats['sum']
360-
index = crosstable.index.values
360+
index = crosstable.index
361361
crosstable = np.array(crosstable, dtype=np.float32) # The argument unites the types into float
362362

363363
# Count of contingency tables.

category_encoders/leave_one_out.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def fit_leave_one_out(self, X_in, y, cols=None):
110110
X = X_in.copy(deep=True)
111111

112112
if cols is None:
113-
cols = X.columns.values
113+
cols = X.columns
114114

115115
self._mean = y.mean()
116116

@@ -143,7 +143,7 @@ def transform_leave_one_out(self, X, y, mapping=None):
143143
unique_train = colmap.index
144144
unseen_values = pd.Series([x for x in X[col].unique() if x not in unique_train], dtype=unique_train.dtype)
145145

146-
is_nan = X[col].isnull()
146+
is_nan = X[col].isna()
147147
is_unknown_value = X[col].isin(unseen_values.dropna().astype(object))
148148

149149
if X[col].dtype.name == 'category': # Pandas 0.24 tries hard to preserve categorical data type
@@ -161,15 +161,15 @@ def transform_leave_one_out(self, X, y, mapping=None):
161161
# excluding this row's y, it's (sum - y) / (count - 1)
162162
level_means = (X[col].map(colmap['sum']) - y) / (X[col].map(colmap['count']) - 1)
163163
# The 'where' fills in singleton levels (count = 1 -> div by 0) with the global mean
164-
X[col] = level_means.where(X[col].map(colmap['count'][level_notunique]).notnull(), self._mean)
164+
X[col] = level_means.where(X[col].map(colmap['count'][level_notunique]).notna(), self._mean)
165165

166166
if self.handle_unknown == 'value':
167167
X.loc[is_unknown_value, col] = self._mean
168168
elif self.handle_unknown == 'return_nan':
169169
X.loc[is_unknown_value, col] = np.nan
170170

171171
if self.handle_missing == 'value':
172-
X.loc[is_nan & unseen_values.isnull().any(), col] = self._mean
172+
X.loc[is_nan & unseen_values.isna().any(), col] = self._mean
173173
elif self.handle_missing == 'return_nan':
174174
X.loc[is_nan, col] = np.nan
175175

category_encoders/one_hot.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def inverse_transform(self, X_in):
227227
raise ValueError(f'Unexpected input dimension {X.shape[1]}, expected {self._dim}')
228228

229229
if not list(self.cols):
230-
return X if self.return_df else X.values
230+
return X if self.return_df else X.to_numpy()
231231

232232
for switch in self.ordinal_encoder.mapping:
233233
column_mapping = switch.get('mapping')
@@ -236,11 +236,11 @@ def inverse_transform(self, X_in):
236236

237237
if self.handle_unknown == 'return_nan' and self.handle_missing == 'return_nan':
238238
for col in self.cols:
239-
if X[switch.get('col')].isnull().any():
239+
if X[switch.get('col')].isna().any():
240240
warnings.warn("inverse_transform is not supported because transform impute "
241241
f"the unknown category nan when encode {col}")
242242

243-
return X if self.return_df else X.values
243+
return X if self.return_df else X.to_numpy()
244244

245245
def get_dummies(self, X_in):
246246
"""
@@ -258,7 +258,7 @@ def get_dummies(self, X_in):
258258

259259
X = X_in.copy(deep=True)
260260

261-
cols = X.columns.values.tolist()
261+
cols = X.columns.tolist()
262262

263263
for switch in self.mapping:
264264
col = switch.get('col')
@@ -290,7 +290,7 @@ def reverse_dummies(self, X, mapping):
290290
numerical: DataFrame
291291
292292
"""
293-
out_cols = X.columns.values.tolist()
293+
out_cols = X.columns.tolist()
294294
mapped_columns = []
295295
for switch in mapping:
296296
col = switch.get('col')
@@ -304,7 +304,7 @@ def reverse_dummies(self, X, mapping):
304304
val = positive_indexes[i]
305305
X.loc[X[existing_col] == 1, col] = val
306306
mapped_columns.append(existing_col)
307-
X.drop(mod.columns, axis=1, inplace=True)
308-
out_cols = X.columns.values.tolist()
307+
X = X.drop(mod.columns, axis=1)
308+
out_cols = X.columns.tolist()
309309

310310
return X

category_encoders/ordinal.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def inverse_transform(self, X_in):
151151
raise ValueError(f'Unexpected input dimension {X.shape[1]}, expected {self._dim}')
152152

153153
if not list(self.cols):
154-
return X if self.return_df else X.values
154+
return X if self.return_df else X.to_numpy()
155155

156156
if self.handle_unknown == 'value':
157157
for col in self.cols:
@@ -161,7 +161,7 @@ def inverse_transform(self, X_in):
161161

162162
if self.handle_unknown == 'return_nan' and self.handle_missing == 'return_nan':
163163
for col in self.cols:
164-
if X[col].isnull().any():
164+
if X[col].isna().any():
165165
warnings.warn("inverse_transform is not supported because transform impute "
166166
f"the unknown category nan when encode {col}")
167167

@@ -170,7 +170,7 @@ def inverse_transform(self, X_in):
170170
inverse = pd.Series(data=column_mapping.index, index=column_mapping.values)
171171
X[switch.get('col')] = X[switch.get('col')].map(inverse).astype(switch.get('data_type'))
172172

173-
return X if self.return_df else X.values
173+
return X if self.return_df else X.to_numpy()
174174

175175
@staticmethod
176176
def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', handle_missing='value'):
@@ -185,7 +185,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
185185
X = X_in.copy(deep=True)
186186

187187
if cols is None:
188-
cols = X.columns.values
188+
cols = X.columns
189189

190190
if mapping is not None:
191191
mapping_out = mapping
@@ -197,7 +197,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
197197
X[column] = pd.Series([el if el is not None else np.NaN for el in X[column]], index=X[column].index)
198198
X[column] = X[column].map(col_mapping)
199199
if util.is_category(X[column].dtype):
200-
nan_identity = col_mapping.loc[col_mapping.index.isna()].values[0]
200+
nan_identity = col_mapping.loc[col_mapping.index.isna()].array[0]
201201
X[column] = X[column].cat.add_categories(nan_identity)
202202
X[column] = X[column].fillna(nan_identity)
203203
try:
@@ -206,9 +206,9 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
206206
X[column] = X[column].astype(float)
207207

208208
if handle_unknown == 'value':
209-
X[column].fillna(-1, inplace=True)
209+
X[column] = X[column].fillna(-1)
210210
elif handle_unknown == 'error':
211-
missing = X[column].isnull()
211+
missing = X[column].isna()
212212
if any(missing):
213213
raise ValueError(f'Unexpected categories found in column {column}')
214214

@@ -237,7 +237,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
237237

238238
data = pd.Series(index=index, data=range(1, len(index) + 1))
239239

240-
if handle_missing == 'value' and ~data.index.isnull().any():
240+
if handle_missing == 'value' and ~data.index.isna().any():
241241
data.loc[nan_identity] = -2
242242
elif handle_missing == 'return_nan':
243243
data.loc[nan_identity] = -2

0 commit comments

Comments
 (0)