update dev tooling and switch from mypy to ty

cod3licious · cod3licious · commit 639793a6e620 · 2026-01-06T23:55:52.000+01:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,35 +24,34 @@ dependencies = [
 
 [dependency-groups]
 dev = [
-    "bandit>=1.7.7",
     "ipython>=8.0.0",
     "matplotlib>=3.7.2",
     "mkdocs-material>=9.5.28",
-    "mypy>=1.7.1",
+    "ty>=0.0.9",
     "notebook>=6.5.0,<7.0",
     "poethepoet>=0.24.4",
     "pytest>=7.4.0",
     "pyupgrade>=3.9.0",
-    "ruff>=0.2.1",
+    "ruff>=0.14.0",
 ]
 
 [project.urls]
 Repository = "https://github.com/cod3licious/autofeat"
 Documentation = "https://cod3licious.github.io/autofeat"
 
 [tool.uv]
-required-version = ">=0.5.31"
+required-version = ">=0.7.13"
 
 
 [tool.poe.tasks]
 # run with `uv run poe format`
 format = "bash -c 'pyupgrade --py38-plus $(find **/*.py) && ruff check --fix . && ruff format .'"
-check = "bash -c 'ruff check . && mypy src/autofeat && bandit -c pyproject.toml -r .'"
+check = "bash -c 'ruff check . && ty check src'"
 test = "bash -c 'pytest tests'"
 
 
 [tool.ruff]
-target-version = "py38"
+target-version = "py310"
 line-length = 128
 
 # Exclude a variety of commonly ignored directories.
@@ -83,7 +82,6 @@ unfixable = ["B", "F841"]
 # Ignore a few rules that we consider too strict.
 ignore = ["E501", # Line too long
     "E741", # Ambiguous variable name: `l`
-    "PD901", # 'df' is a bad variable name
     "N999", # Invalid module name: '🏠_Home'
     "N802", "N803", "N806", # names should be lowercase
     "D1",  # D100 - D107: Missing docstrings
@@ -115,32 +113,9 @@ section-order = ["future", "standard-library", "third-party", "first-party", "te
 
 [tool.ruff.lint.flake8-import-conventions]
 
-[tool.mypy]
-plugins = ["numpy.typing.mypy_plugin"]
-
-[[tool.mypy.overrides]]
-module = [
-    "pandas.*",
-    "sklearn.*",
-    "joblib.*",
-    "scipy.*",
-    "numpy",
-    "numba",
-    "pandas.*",
-    "streamlit.*",
-    "matplotlib.*",
-    "IPython.*",
-    "plotly.*",
-    "seaborn.*",
-    "requests.*",
-    "sqlalchemy.*"
-]
-ignore_missing_imports = true
-
-[tool.bandit]
-targets = ["src/autofeat/"]
-recursive = true
-skips = ["B101"]
+[tool.ty.rules]
+no-matching-overload = "ignore"
+invalid-argument-type = "ignore"  # doesn't properly recognize pandas df columns argument
 
 [tool.pytest.ini_options]
 minversion = "6.0"
diff --git a/src/autofeat/autofeat.py b/src/autofeat/autofeat.py
@@ -241,7 +241,7 @@ def _generate_features(self, df: pd.DataFrame, new_feat_cols: list) -> pd.DataFr
             logging.info(f"[AutoFeat] {len(new_feat_cols):5}/{len(new_feat_cols):5} new features ...done.")
         return df.join(pd.DataFrame(feat_array, columns=new_feat_cols, index=df.index))
 
-    def _X2df(self, X: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFrame:
+    def _X2df(self, X: np.ndarray | pd.DataFrame) -> pd.DataFrame:
         """
         Helper function that ensures correctness of the input data for classification tasks.
         Inputs:
@@ -264,6 +264,7 @@ def _X2df(self, X: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFrame:
             self.always_return_numpy = False
             df = self.transform(df)
             self.always_return_numpy = temp
+        assert isinstance(df, pd.DataFrame)
         return df
 
     def fit_transform(self, X: np.ndarray | pd.DataFrame, y: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFrame:
@@ -408,11 +409,12 @@ def fit_transform(self, X: np.ndarray | pd.DataFrame, y: np.ndarray | pd.DataFra
             self.prediction_model_ = model
             # sklearn requires a "classes_" attribute
             if self.problem_type == "classification":
+                assert hasattr(model, "classes_")
                 self.classes_ = model.classes_
             if self.verbose:
                 # for classification, model.coefs_ is n_classes x n_features, but we need n_features
                 coefs = model.coef_ if self.problem_type == "regression" else np.max(np.abs(model.coef_), axis=0)
-                weights = dict(zip(self.good_cols_, coefs))
+                weights = dict(zip(self.good_cols_, coefs, strict=True))
                 logging.info("[AutoFeat] Trained model: largest coefficients:")
                 logging.info(model.intercept_)
                 for c in sorted(weights, key=lambda x: abs(weights[x]), reverse=True):
@@ -498,6 +500,7 @@ def score(self, X: np.ndarray | pd.DataFrame, y: np.ndarray | pd.DataFrame) -> n
             temp = self.always_return_numpy
             self.always_return_numpy = False
             df = self.transform(df)
+            assert isinstance(df, pd.DataFrame)
             self.always_return_numpy = temp
         return self.prediction_model_.score(df[self.good_cols_].to_numpy(), target)
 
@@ -580,5 +583,6 @@ def predict_proba(self, X: np.ndarray | pd.DataFrame) -> np.ndarray | pd.DataFra
             - y_pred: predicted targets probabilities returned by prediction_model.predict_proba()
         """
         check_is_fitted(self, ["prediction_model_"])
+        assert hasattr(self.prediction_model_, "predict_proba") and callable(self.prediction_model_.predict_proba)
         df = self._X2df(X)
-        return self.prediction_model_.predict_proba(df[self.good_cols_].to_numpy())
+        return self.prediction_model_.predict_proba(df[self.good_cols_].to_numpy())  # type: ignore
diff --git a/src/autofeat/feateng.py b/src/autofeat/feateng.py
@@ -6,9 +6,9 @@
 import logging
 import operator as op
 import re
+from collections.abc import Callable
 from functools import reduce
 from itertools import combinations, product
-from typing import Callable
 
 import numba as nb
 import numpy as np
@@ -406,6 +406,7 @@ def get_feature_combinations(feature_tuples: list) -> tuple[list, set]:
                     ),
                     axis=1,
                 ),
+                strict=True,
             ),
         )
         cols = [c for c in cols if corrs[c] < 0.9]
diff --git a/src/autofeat/featsel.py b/src/autofeat/featsel.py
@@ -82,7 +82,7 @@ def _noise_filtering(
             # model.fit(X, target)
         # for classification, model.coefs_ is n_classes x n_features, but we need n_features
         coefs = np.abs(model.coef_) if problem_type == "regression" else np.max(np.abs(model.coef_), axis=0)
-        weights = dict(zip(good_cols, coefs[: len(good_cols)]))
+        weights = dict(zip(good_cols, coefs[: len(good_cols)], strict=True))
         # only include features that are more important than our known noise features
         noise_w_thr = np.max(coefs[n_feat:])
         good_cols = [c for c in good_cols if weights[c] > noise_w_thr]
@@ -159,7 +159,7 @@ def _select_features_1run(df: pd.DataFrame, target: np.ndarray, problem_type: st
             current_cols.extend(initial_cols)
             # for classification, model.coefs_ is n_classes x n_features, but we need n_features
             coefs = np.abs(model.coef_) if problem_type == "regression" else np.max(np.abs(model.coef_), axis=0)
-            weights = dict(zip(current_cols, coefs[: len(current_cols)]))
+            weights = dict(zip(current_cols, coefs[: len(current_cols)], strict=True))
             # only include features that are more important than our known noise features
             noise_w_thr = np.max(coefs[len(current_cols) :])
             good_cols_set.update([c for c in weights if abs(weights[c]) > noise_w_thr])
diff --git a/uv.lock b/uv.lock