Fix typing issues in tests/valuation

mdbenito · mdbenito · commit 010a9827fbe2 · 2025-09-06T15:10:19.000+02:00
diff --git a/tests/valuation/conftest.py b/tests/valuation/conftest.py
@@ -12,9 +12,9 @@
 from sklearn.utils import Bunch
 
 from pydvl.parallel import JoblibParallelBackend
-from pydvl.utils import SupervisedModel
 from pydvl.utils.caching import InMemoryCacheBackend
 from pydvl.utils.status import Status
+from pydvl.utils.types import SupervisedModel
 from pydvl.valuation.dataset import Dataset
 from pydvl.valuation.games import (
     AsymmetricVotingGame,
@@ -59,8 +59,8 @@ def polynomial_dataset(coefficients: np.ndarray):
     y = np.random.normal(loc=locs, scale=0.3)
     db = Bunch()
     db.data, db.target = x.reshape(-1, 1), y
-    poly = [f"{c} x^{i}" for i, c in enumerate(coefficients)]
-    poly = " + ".join(poly)
+    monomials = [f"{c} x^{i}" for i, c in enumerate(coefficients)]
+    poly = " + ".join(monomials)
     db.DESCR = f"$y \\sim N({poly}, 1)$"
     db.feature_names = ["x"]
     db.target_names = ["y"]
@@ -135,7 +135,7 @@ def score(self, x: NDArray, y: NDArray | None = None) -> float:
     model = DummyModel(data)
 
     x, _ = data.data()
-    scorer = SupervisedScorer(
+    scorer = SupervisedScorer[SupervisedModel, NDArray](
         model, test_data=test_data, default=0, range=(0, x.sum() / x.max())
     )
 
@@ -211,7 +211,9 @@ def linear_shapley(
     train, test = linear_dataset
     if utility is None:
         scorer = compose_score(
-            SupervisedScorer("r2", test, default=-np.inf), sigmoid, name=scorer_name
+            SupervisedScorer[SupervisedModel, NDArray]("r2", test, default=-np.inf),
+            sigmoid,
+            name=scorer_name,
         )
         utility = ModelUtility(LinearRegression(), scorer=scorer).with_dataset(train)
     if exact_result is None:
diff --git a/tests/valuation/methods/conftest.py b/tests/valuation/methods/conftest.py
@@ -1,18 +1,22 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import pytest
+from numpy.typing import NDArray
 from typing_extensions import Self
 
 from pydvl.utils import Seed, try_torch_import
 from pydvl.valuation.dataset import Dataset
-from pydvl.valuation.games import DummyGameDataset, MinerGame, ShoesGame
+from pydvl.valuation.games import MinerGame, ShoesGame
 from pydvl.valuation.scorers import ClasswiseSupervisedScorer
 from pydvl.valuation.utility.classwise import ClasswiseModelUtility
 
-torch = try_torch_import()
-
-if torch is None:
-    pytest.skip("PyTorch not available", allow_module_level=True)
+if TYPE_CHECKING:
+    import torch
+else:
+    if (torch := try_torch_import()) is None:
+        pytest.skip("PyTorch not available", allow_module_level=True)
 
 
 class TorchLinearClassifier:
@@ -48,8 +52,8 @@ def __init__(self, n_estimators: int, max_samples: float, random_state: Seed):
         self.n_estimators = n_estimators
         self.max_samples = max_samples
         self.random_state = random_state
-        self.estimators_ = []
-        self.estimators_samples_ = []
+        self.estimators_: list[TorchLinearClassifier] = []
+        self.estimators_samples_: list[NDArray] = []
 
     def fit(self, X, y):
         n_samples = X.shape[0]
@@ -129,21 +133,18 @@ def tensor_classwise_utility(tensor_test_dataset):
     )
 
 
-class TensorDummyGameDataset(DummyGameDataset):
+class TensorDummyGameDataset(Dataset[torch.Tensor]):
     """Extends DummyGameDataset to use PyTorch tensors instead of NumPy arrays."""
 
     def __init__(self, n_players: int, description: str = ""):
         x = torch.arange(0, n_players, 1).reshape(-1, 1).float()
         nil = torch.zeros_like(x)
-        (
-            Dataset.__init__(
-                self,
-                x,
-                nil.clone(),
-                feature_names=["x"],
-                target_names=["y"],
-                description=description,
-            ),
+        super().__init__(
+            x,
+            nil.clone(),
+            feature_names=["x"],
+            target_names=["y"],
+            description=description,
         )
 
 
@@ -152,12 +153,12 @@ class TensorMinerGame(MinerGame):
 
     def __init__(self, n_players: int):
         super().__init__(n_players)
-        self.data = TensorDummyGameDataset(self.n_players, "Tensor Miner Game dataset")
+        self.data = TensorDummyGameDataset(self.n_players, "Tensor Miner Game dataset")  # type: ignore[assignment]
 
 
 class TensorShoesGame(ShoesGame):
     """Extends ShoesGame to use PyTorch tensors."""
 
     def __init__(self, left: int, right: int):
         super().__init__(left, right)
-        self.data = TensorDummyGameDataset(self.n_players, "Tensor Shoes Game dataset")
+        self.data = TensorDummyGameDataset(self.n_players, "Tensor Shoes Game dataset")  # type: ignore[assignment]
diff --git a/tests/valuation/methods/test_classwise_shapley.py b/tests/valuation/methods/test_classwise_shapley.py
@@ -19,6 +19,7 @@
 from __future__ import annotations
 
 import logging
+from typing import cast
 
 import numpy as np
 import pytest
@@ -111,7 +112,7 @@ def predict(self, x: NDArray) -> NDArray:
             raise AttributeError("Model not fitted")
 
         probs = self._beta * x
-        return np.clip(np.round(probs + 1e-10), 0, 1).astype(int)
+        return cast(NDArray, np.clip(np.round(probs + 1e-10), 0, 1).astype(int))
 
     def score(self, x: NDArray, y: NDArray | None) -> float:
         assert y is not None
@@ -154,10 +155,7 @@ def test_dataset_manual_derivation(train_dataset_manual_derivation) -> Dataset:
 @pytest.mark.parametrize("n_samples", [100], ids=lambda x: f"n_samples={x}")
 @pytest.mark.parametrize(
     "exact_solution",
-    [
-        pytest.param("classwise_shapley_exact_solution", marks=[pytest.mark.xfail]),
-        "classwise_shapley_exact_solution_normalized",
-    ],
+    ["classwise_shapley_exact_solution", "classwise_shapley_exact_solution_normalized"],
 )
 def test_classwise_shapley(
     classwise_shapley_utility: ClasswiseModelUtility,
@@ -167,9 +165,7 @@ def test_classwise_shapley(
     batch_size: int,
     request,
 ):
-    method_kwargs, exact_solution, check_kwargs = request.getfixturevalue(
-        exact_solution
-    )
+    method_kwargs, exact_result, check_kwargs = request.getfixturevalue(exact_solution)
     in_class_sampler = DeterministicPermutationSampler()
     out_of_class_sampler = DeterministicUniformSampler(
         index_iteration=FiniteNoIndexIteration
@@ -187,4 +183,4 @@ def test_classwise_shapley(
         **method_kwargs,
     )
     valuation.fit(train_dataset_manual_derivation)
-    check_values(valuation.result, exact_solution, **check_kwargs)
+    check_values(valuation.result, exact_result, **check_kwargs)
diff --git a/tests/valuation/methods/test_montecarlo_shapley.py b/tests/valuation/methods/test_montecarlo_shapley.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 from joblib import parallel_config
+from numpy.typing import NDArray
 from sklearn.linear_model import LinearRegression
 
 from pydvl.utils import SupervisedModel
@@ -379,7 +380,7 @@ def test_grouped_linear_montecarlo_shapley(
     data_train, data_test = linear_dataset
 
     scorer = compose_score(
-        SupervisedScorer("r2", data_test, default=-np.inf),
+        SupervisedScorer[SupervisedModel, NDArray]("r2", data_test, default=-np.inf),
         sigmoid,
         name="squashed r2",
     )
diff --git a/tests/valuation/samplers/test_sampler.py b/tests/valuation/samplers/test_sampler.py
@@ -213,7 +213,7 @@ def stratified_samplers(n_samples_per_index: int = 32):
         FiniteSequentialIndexIteration,
     ]
 
-    ret = []
+    ret: list[tuple[type[IndexSampler], dict]] = []
     for ss in sample_size_strategies:
         ret.append(
             (
@@ -741,14 +741,16 @@ class TestBatchSampler(IndexSampler):
     def __init__(self, batch_size):
         super().__init__(batch_size)
 
-    def sample_limit(self, indices: IndexSetT) -> int | None: ...
+    def sample_limit(self, indices: IndexSetT) -> int | None:
+        return len(indices)
 
     def generate(self, indices: IndexSetT) -> SampleGenerator:
         yield from (Sample(idx, np.empty_like(indices)) for idx in indices)
 
-    def log_weight(self, n: int, subset_len: int) -> float: ...
+    def log_weight(self, n: int, subset_len: int) -> float:
+        return 0.0
 
-    def make_strategy(
+    def make_strategy(  # type: ignore[empty-body]
         self,
         utility: UtilityBase,
         log_coefficient: Callable[[int, int], float] | None = None,
diff --git a/tests/valuation/samplers/test_stratified.py b/tests/valuation/samplers/test_stratified.py
@@ -3,6 +3,8 @@
 
 from __future__ import annotations
 
+from typing import cast
+
 import numpy as np
 import pytest
 from numpy.typing import NDArray
@@ -26,9 +28,9 @@ def __init__(self, sample_sizes: list[int]):
         super().__init__(n_samples=sum(sample_sizes))
         self._sample_sizes = np.array(sample_sizes, dtype=int)
 
-    def sample_sizes(self, n_indices: int, probs: bool = True) -> NDArray[np.int64]:
+    def sample_sizes(self, n_indices: int, probs: bool = True) -> NDArray[np.int64]:  # type: ignore[override]
         if probs:
-            return self._sample_sizes / np.sum(self._sample_sizes)
+            return cast(NDArray, self._sample_sizes / np.sum(self._sample_sizes))
         return self._sample_sizes
 
     def fun(self, n_indices: int, subset_len: int) -> float:
diff --git a/tests/valuation/scorers/test_classwise.py b/tests/valuation/scorers/test_classwise.py
@@ -60,7 +60,7 @@ def model() -> SupervisedModel:
 def test_classwise_scorer(
     model: SupervisedModel, test_data: Dataset, expected_scores: dict[int, float]
 ):
-    scorer = ClasswiseSupervisedScorer("accuracy", test_data)
+    scorer = ClasswiseSupervisedScorer[SupervisedModel, NDArray]("accuracy", test_data)
 
     for label, expected_score in expected_scores.items():
         scorer.label = label
diff --git a/tests/valuation/test_tensor_support.py b/tests/valuation/test_tensor_support.py
@@ -1,11 +1,18 @@
+from types import ModuleType
+from typing import TYPE_CHECKING
+
 import numpy as np
 import pytest
 from sklearn.datasets import make_classification
 
 from pydvl.utils.array import is_numpy, try_torch_import
 from pydvl.valuation.dataset import Dataset, GroupedDataset, RawData
 
-torch = try_torch_import()
+if TYPE_CHECKING:
+    import torch
+else:
+    torch = try_torch_import()
+
 pytestmark = pytest.mark.skipif(torch is None, reason="PyTorch not installed")
 
 
diff --git a/tests/valuation/utility/test_learning.py b/tests/valuation/utility/test_learning.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import Sequence
 
 import numpy as np
@@ -12,7 +14,7 @@
 from pydvl.valuation.utility.learning import IndicatorUtilityModel
 
 
-class LinearUtility(UtilityBase):
+class LinearUtility(UtilityBase[Sample]):
     """A utility function that returns the sum of the weights corresponding to the
     indices in the subset.
 
@@ -25,9 +27,9 @@ def __init__(self, weights: Sequence, training_data: Sequence):
         self.weights = np.array(weights)
 
         # FIXME this doesn't make sense
-        self._training_data = training_data
+        self._training_data = training_data  # type: ignore
 
-    def __call__(self, sample: Sample):
+    def __call__(self, sample: Sample | None) -> float:
         # Compute the sum of the weights corresponding to the indices in the subset.
         if sample is None or len(sample.subset) == 0:
             return 0.0
@@ -134,6 +136,9 @@ def predict(self, X):
         self.last_predict_X = X
         return np.sum(X, axis=1, keepdims=True)
 
+    def score(self, x: NDArray, y: NDArray | None) -> float:
+        return 1.0  # dummy, not used in tests
+
 
 @pytest.mark.parametrize(
     "utility_samples, encoding",