Skip to content

Commit 639793a

Browse files
committed
update dev tooling and switch from mypy to ty
1 parent 3b99bbb commit 639793a

File tree

5 files changed

+6167
-39
lines changed

5 files changed

+6167
-39
lines changed

pyproject.toml

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,35 +24,34 @@ dependencies = [
2424

2525
[dependency-groups]
2626
dev = [
27-
"bandit>=1.7.7",
2827
"ipython>=8.0.0",
2928
"matplotlib>=3.7.2",
3029
"mkdocs-material>=9.5.28",
31-
"mypy>=1.7.1",
30+
"ty>=0.0.9",
3231
"notebook>=6.5.0,<7.0",
3332
"poethepoet>=0.24.4",
3433
"pytest>=7.4.0",
3534
"pyupgrade>=3.9.0",
36-
"ruff>=0.2.1",
35+
"ruff>=0.14.0",
3736
]
3837

3938
[project.urls]
4039
Repository = "https://github.com/cod3licious/autofeat"
4140
Documentation = "https://cod3licious.github.io/autofeat"
4241

4342
[tool.uv]
44-
required-version = ">=0.5.31"
43+
required-version = ">=0.7.13"
4544

4645

4746
[tool.poe.tasks]
4847
# run with `uv run poe format`
4948
format = "bash -c 'pyupgrade --py38-plus $(find **/*.py) && ruff check --fix . && ruff format .'"
50-
check = "bash -c 'ruff check . && mypy src/autofeat && bandit -c pyproject.toml -r .'"
49+
check = "bash -c 'ruff check . && ty check src'"
5150
test = "bash -c 'pytest tests'"
5251

5352

5453
[tool.ruff]
55-
target-version = "py38"
54+
target-version = "py310"
5655
line-length = 128
5756

5857
# Exclude a variety of commonly ignored directories.
@@ -83,7 +82,6 @@ unfixable = ["B", "F841"]
8382
# Ignore a few rules that we consider too strict.
8483
ignore = ["E501", # Line too long
8584
"E741", # Ambiguous variable name: `l`
86-
"PD901", # 'df' is a bad variable name
8785
"N999", # Invalid module name: '🏠_Home'
8886
"N802", "N803", "N806", # names should be lowercase
8987
"D1", # D100 - D107: Missing docstrings
@@ -115,32 +113,9 @@ section-order = ["future", "standard-library", "third-party", "first-party", "te
115113

116114
[tool.ruff.lint.flake8-import-conventions]
117115

118-
[tool.mypy]
119-
plugins = ["numpy.typing.mypy_plugin"]
120-
121-
[[tool.mypy.overrides]]
122-
module = [
123-
"pandas.*",
124-
"sklearn.*",
125-
"joblib.*",
126-
"scipy.*",
127-
"numpy",
128-
"numba",
129-
"pandas.*",
130-
"streamlit.*",
131-
"matplotlib.*",
132-
"IPython.*",
133-
"plotly.*",
134-
"seaborn.*",
135-
"requests.*",
136-
"sqlalchemy.*"
137-
]
138-
ignore_missing_imports = true
139-
140-
[tool.bandit]
141-
targets = ["src/autofeat/"]
142-
recursive = true
143-
skips = ["B101"]
116+
[tool.ty.rules]
117+
no-matching-overload = "ignore"
118+
invalid-argument-type = "ignore" # doesn't properly recognize pandas df columns argument
144119

145120
[tool.pytest.ini_options]
146121
minversion = "6.0"

src/autofeat/autofeat.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def _generate_features(self, df: pd.DataFrame, new_feat_cols: list) -> pd.DataFr
241241
logging.info(f"[AutoFeat] {len(new_feat_cols):5}/{len(new_feat_cols):5} new features ...done.")
242242
return df.join(pd.DataFrame(feat_array, columns=new_feat_cols, index=df.index))
243243

244-
def _X2df(self, X: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFrame:
244+
def _X2df(self, X: np.ndarray | pd.DataFrame) -> pd.DataFrame:
245245
"""
246246
Helper function that ensures correctness of the input data for classification tasks.
247247
Inputs:
@@ -264,6 +264,7 @@ def _X2df(self, X: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFrame:
264264
self.always_return_numpy = False
265265
df = self.transform(df)
266266
self.always_return_numpy = temp
267+
assert isinstance(df, pd.DataFrame)
267268
return df
268269

269270
def fit_transform(self, X: np.ndarray | pd.DataFrame, y: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFrame:
@@ -408,11 +409,12 @@ def fit_transform(self, X: np.ndarray | pd.DataFrame, y: np.ndarray | pd.DataFra
408409
self.prediction_model_ = model
409410
# sklearn requires a "classes_" attribute
410411
if self.problem_type == "classification":
412+
assert hasattr(model, "classes_")
411413
self.classes_ = model.classes_
412414
if self.verbose:
413415
# for classification, model.coefs_ is n_classes x n_features, but we need n_features
414416
coefs = model.coef_ if self.problem_type == "regression" else np.max(np.abs(model.coef_), axis=0)
415-
weights = dict(zip(self.good_cols_, coefs))
417+
weights = dict(zip(self.good_cols_, coefs, strict=True))
416418
logging.info("[AutoFeat] Trained model: largest coefficients:")
417419
logging.info(model.intercept_)
418420
for c in sorted(weights, key=lambda x: abs(weights[x]), reverse=True):
@@ -498,6 +500,7 @@ def score(self, X: np.ndarray | pd.DataFrame, y: np.ndarray | pd.DataFrame) -> n
498500
temp = self.always_return_numpy
499501
self.always_return_numpy = False
500502
df = self.transform(df)
503+
assert isinstance(df, pd.DataFrame)
501504
self.always_return_numpy = temp
502505
return self.prediction_model_.score(df[self.good_cols_].to_numpy(), target)
503506

@@ -580,5 +583,6 @@ def predict_proba(self, X: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFra
580583
- y_pred: predicted targets probabilities returned by prediction_model.predict_proba()
581584
"""
582585
check_is_fitted(self, ["prediction_model_"])
586+
assert hasattr(self.prediction_model_, "predict_proba") and callable(self.prediction_model_.predict_proba)
583587
df = self._X2df(X)
584-
return self.prediction_model_.predict_proba(df[self.good_cols_].to_numpy())
588+
return self.prediction_model_.predict_proba(df[self.good_cols_].to_numpy()) # type: ignore

src/autofeat/feateng.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import logging
77
import operator as op
88
import re
9+
from collections.abc import Callable
910
from functools import reduce
1011
from itertools import combinations, product
11-
from typing import Callable
1212

1313
import numba as nb
1414
import numpy as np
@@ -406,6 +406,7 @@ def get_feature_combinations(feature_tuples: list) -> tuple[list, set]:
406406
),
407407
axis=1,
408408
),
409+
strict=True,
409410
),
410411
)
411412
cols = [c for c in cols if corrs[c] < 0.9]

src/autofeat/featsel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def _noise_filtering(
8282
# model.fit(X, target)
8383
# for classification, model.coefs_ is n_classes x n_features, but we need n_features
8484
coefs = np.abs(model.coef_) if problem_type == "regression" else np.max(np.abs(model.coef_), axis=0)
85-
weights = dict(zip(good_cols, coefs[: len(good_cols)]))
85+
weights = dict(zip(good_cols, coefs[: len(good_cols)], strict=True))
8686
# only include features that are more important than our known noise features
8787
noise_w_thr = np.max(coefs[n_feat:])
8888
good_cols = [c for c in good_cols if weights[c] > noise_w_thr]
@@ -159,7 +159,7 @@ def _select_features_1run(df: pd.DataFrame, target: np.ndarray, problem_type: st
159159
current_cols.extend(initial_cols)
160160
# for classification, model.coefs_ is n_classes x n_features, but we need n_features
161161
coefs = np.abs(model.coef_) if problem_type == "regression" else np.max(np.abs(model.coef_), axis=0)
162-
weights = dict(zip(current_cols, coefs[: len(current_cols)]))
162+
weights = dict(zip(current_cols, coefs[: len(current_cols)], strict=True))
163163
# only include features that are more important than our known noise features
164164
noise_w_thr = np.max(coefs[len(current_cols) :])
165165
good_cols_set.update([c for c in weights if abs(weights[c]) > noise_w_thr])

0 commit comments

Comments
 (0)