Skip to content

Commit 010a982

Browse files
committed
Fix typing issues in tests/valuation
1 parent 452790c commit 010a982

File tree

9 files changed

+61
-45
lines changed

9 files changed

+61
-45
lines changed

tests/valuation/conftest.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
from sklearn.utils import Bunch
1313

1414
from pydvl.parallel import JoblibParallelBackend
15-
from pydvl.utils import SupervisedModel
1615
from pydvl.utils.caching import InMemoryCacheBackend
1716
from pydvl.utils.status import Status
17+
from pydvl.utils.types import SupervisedModel
1818
from pydvl.valuation.dataset import Dataset
1919
from pydvl.valuation.games import (
2020
AsymmetricVotingGame,
@@ -59,8 +59,8 @@ def polynomial_dataset(coefficients: np.ndarray):
5959
y = np.random.normal(loc=locs, scale=0.3)
6060
db = Bunch()
6161
db.data, db.target = x.reshape(-1, 1), y
62-
poly = [f"{c} x^{i}" for i, c in enumerate(coefficients)]
63-
poly = " + ".join(poly)
62+
monomials = [f"{c} x^{i}" for i, c in enumerate(coefficients)]
63+
poly = " + ".join(monomials)
6464
db.DESCR = f"$y \\sim N({poly}, 1)$"
6565
db.feature_names = ["x"]
6666
db.target_names = ["y"]
@@ -135,7 +135,7 @@ def score(self, x: NDArray, y: NDArray | None = None) -> float:
135135
model = DummyModel(data)
136136

137137
x, _ = data.data()
138-
scorer = SupervisedScorer(
138+
scorer = SupervisedScorer[SupervisedModel, NDArray](
139139
model, test_data=test_data, default=0, range=(0, x.sum() / x.max())
140140
)
141141

@@ -211,7 +211,9 @@ def linear_shapley(
211211
train, test = linear_dataset
212212
if utility is None:
213213
scorer = compose_score(
214-
SupervisedScorer("r2", test, default=-np.inf), sigmoid, name=scorer_name
214+
SupervisedScorer[SupervisedModel, NDArray]("r2", test, default=-np.inf),
215+
sigmoid,
216+
name=scorer_name,
215217
)
216218
utility = ModelUtility(LinearRegression(), scorer=scorer).with_dataset(train)
217219
if exact_result is None:

tests/valuation/methods/conftest.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
from __future__ import annotations
22

3+
from typing import TYPE_CHECKING
4+
35
import pytest
6+
from numpy.typing import NDArray
47
from typing_extensions import Self
58

69
from pydvl.utils import Seed, try_torch_import
710
from pydvl.valuation.dataset import Dataset
8-
from pydvl.valuation.games import DummyGameDataset, MinerGame, ShoesGame
11+
from pydvl.valuation.games import MinerGame, ShoesGame
912
from pydvl.valuation.scorers import ClasswiseSupervisedScorer
1013
from pydvl.valuation.utility.classwise import ClasswiseModelUtility
1114

12-
torch = try_torch_import()
13-
14-
if torch is None:
15-
pytest.skip("PyTorch not available", allow_module_level=True)
15+
if TYPE_CHECKING:
16+
import torch
17+
else:
18+
if (torch := try_torch_import()) is None:
19+
pytest.skip("PyTorch not available", allow_module_level=True)
1620

1721

1822
class TorchLinearClassifier:
@@ -48,8 +52,8 @@ def __init__(self, n_estimators: int, max_samples: float, random_state: Seed):
4852
self.n_estimators = n_estimators
4953
self.max_samples = max_samples
5054
self.random_state = random_state
51-
self.estimators_ = []
52-
self.estimators_samples_ = []
55+
self.estimators_: list[TorchLinearClassifier] = []
56+
self.estimators_samples_: list[NDArray] = []
5357

5458
def fit(self, X, y):
5559
n_samples = X.shape[0]
@@ -129,21 +133,18 @@ def tensor_classwise_utility(tensor_test_dataset):
129133
)
130134

131135

132-
class TensorDummyGameDataset(DummyGameDataset):
136+
class TensorDummyGameDataset(Dataset[torch.Tensor]):
133137
"""Extends DummyGameDataset to use PyTorch tensors instead of NumPy arrays."""
134138

135139
def __init__(self, n_players: int, description: str = ""):
136140
x = torch.arange(0, n_players, 1).reshape(-1, 1).float()
137141
nil = torch.zeros_like(x)
138-
(
139-
Dataset.__init__(
140-
self,
141-
x,
142-
nil.clone(),
143-
feature_names=["x"],
144-
target_names=["y"],
145-
description=description,
146-
),
142+
super().__init__(
143+
x,
144+
nil.clone(),
145+
feature_names=["x"],
146+
target_names=["y"],
147+
description=description,
147148
)
148149

149150

@@ -152,12 +153,12 @@ class TensorMinerGame(MinerGame):
152153

153154
def __init__(self, n_players: int):
154155
super().__init__(n_players)
155-
self.data = TensorDummyGameDataset(self.n_players, "Tensor Miner Game dataset")
156+
self.data = TensorDummyGameDataset(self.n_players, "Tensor Miner Game dataset") # type: ignore[assignment]
156157

157158

158159
class TensorShoesGame(ShoesGame):
159160
"""Extends ShoesGame to use PyTorch tensors."""
160161

161162
def __init__(self, left: int, right: int):
162163
super().__init__(left, right)
163-
self.data = TensorDummyGameDataset(self.n_players, "Tensor Shoes Game dataset")
164+
self.data = TensorDummyGameDataset(self.n_players, "Tensor Shoes Game dataset") # type: ignore[assignment]

tests/valuation/methods/test_classwise_shapley.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from __future__ import annotations
2020

2121
import logging
22+
from typing import cast
2223

2324
import numpy as np
2425
import pytest
@@ -111,7 +112,7 @@ def predict(self, x: NDArray) -> NDArray:
111112
raise AttributeError("Model not fitted")
112113

113114
probs = self._beta * x
114-
return np.clip(np.round(probs + 1e-10), 0, 1).astype(int)
115+
return cast(NDArray, np.clip(np.round(probs + 1e-10), 0, 1).astype(int))
115116

116117
def score(self, x: NDArray, y: NDArray | None) -> float:
117118
assert y is not None
@@ -154,10 +155,7 @@ def test_dataset_manual_derivation(train_dataset_manual_derivation) -> Dataset:
154155
@pytest.mark.parametrize("n_samples", [100], ids=lambda x: f"n_samples={x}")
155156
@pytest.mark.parametrize(
156157
"exact_solution",
157-
[
158-
pytest.param("classwise_shapley_exact_solution", marks=[pytest.mark.xfail]),
159-
"classwise_shapley_exact_solution_normalized",
160-
],
158+
["classwise_shapley_exact_solution", "classwise_shapley_exact_solution_normalized"],
161159
)
162160
def test_classwise_shapley(
163161
classwise_shapley_utility: ClasswiseModelUtility,
@@ -167,9 +165,7 @@ def test_classwise_shapley(
167165
batch_size: int,
168166
request,
169167
):
170-
method_kwargs, exact_solution, check_kwargs = request.getfixturevalue(
171-
exact_solution
172-
)
168+
method_kwargs, exact_result, check_kwargs = request.getfixturevalue(exact_solution)
173169
in_class_sampler = DeterministicPermutationSampler()
174170
out_of_class_sampler = DeterministicUniformSampler(
175171
index_iteration=FiniteNoIndexIteration
@@ -187,4 +183,4 @@ def test_classwise_shapley(
187183
**method_kwargs,
188184
)
189185
valuation.fit(train_dataset_manual_derivation)
190-
check_values(valuation.result, exact_solution, **check_kwargs)
186+
check_values(valuation.result, exact_result, **check_kwargs)

tests/valuation/methods/test_montecarlo_shapley.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy as np
55
import pytest
66
from joblib import parallel_config
7+
from numpy.typing import NDArray
78
from sklearn.linear_model import LinearRegression
89

910
from pydvl.utils import SupervisedModel
@@ -379,7 +380,7 @@ def test_grouped_linear_montecarlo_shapley(
379380
data_train, data_test = linear_dataset
380381

381382
scorer = compose_score(
382-
SupervisedScorer("r2", data_test, default=-np.inf),
383+
SupervisedScorer[SupervisedModel, NDArray]("r2", data_test, default=-np.inf),
383384
sigmoid,
384385
name="squashed r2",
385386
)

tests/valuation/samplers/test_sampler.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def stratified_samplers(n_samples_per_index: int = 32):
213213
FiniteSequentialIndexIteration,
214214
]
215215

216-
ret = []
216+
ret: list[tuple[type[IndexSampler], dict]] = []
217217
for ss in sample_size_strategies:
218218
ret.append(
219219
(
@@ -741,14 +741,16 @@ class TestBatchSampler(IndexSampler):
741741
def __init__(self, batch_size):
742742
super().__init__(batch_size)
743743

744-
def sample_limit(self, indices: IndexSetT) -> int | None: ...
744+
def sample_limit(self, indices: IndexSetT) -> int | None:
745+
return len(indices)
745746

746747
def generate(self, indices: IndexSetT) -> SampleGenerator:
747748
yield from (Sample(idx, np.empty_like(indices)) for idx in indices)
748749

749-
def log_weight(self, n: int, subset_len: int) -> float: ...
750+
def log_weight(self, n: int, subset_len: int) -> float:
751+
return 0.0
750752

751-
def make_strategy(
753+
def make_strategy( # type: ignore[empty-body]
752754
self,
753755
utility: UtilityBase,
754756
log_coefficient: Callable[[int, int], float] | None = None,

tests/valuation/samplers/test_stratified.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from __future__ import annotations
55

6+
from typing import cast
7+
68
import numpy as np
79
import pytest
810
from numpy.typing import NDArray
@@ -26,9 +28,9 @@ def __init__(self, sample_sizes: list[int]):
2628
super().__init__(n_samples=sum(sample_sizes))
2729
self._sample_sizes = np.array(sample_sizes, dtype=int)
2830

29-
def sample_sizes(self, n_indices: int, probs: bool = True) -> NDArray[np.int64]:
31+
def sample_sizes(self, n_indices: int, probs: bool = True) -> NDArray[np.int64]: # type: ignore[override]
3032
if probs:
31-
return self._sample_sizes / np.sum(self._sample_sizes)
33+
return cast(NDArray, self._sample_sizes / np.sum(self._sample_sizes))
3234
return self._sample_sizes
3335

3436
def fun(self, n_indices: int, subset_len: int) -> float:

tests/valuation/scorers/test_classwise.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def model() -> SupervisedModel:
6060
def test_classwise_scorer(
6161
model: SupervisedModel, test_data: Dataset, expected_scores: dict[int, float]
6262
):
63-
scorer = ClasswiseSupervisedScorer("accuracy", test_data)
63+
scorer = ClasswiseSupervisedScorer[SupervisedModel, NDArray]("accuracy", test_data)
6464

6565
for label, expected_score in expected_scores.items():
6666
scorer.label = label

tests/valuation/test_tensor_support.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
from types import ModuleType
2+
from typing import TYPE_CHECKING
3+
14
import numpy as np
25
import pytest
36
from sklearn.datasets import make_classification
47

58
from pydvl.utils.array import is_numpy, try_torch_import
69
from pydvl.valuation.dataset import Dataset, GroupedDataset, RawData
710

8-
torch = try_torch_import()
11+
if TYPE_CHECKING:
12+
import torch
13+
else:
14+
torch = try_torch_import()
15+
916
pytestmark = pytest.mark.skipif(torch is None, reason="PyTorch not installed")
1017

1118

tests/valuation/utility/test_learning.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from typing import Sequence
24

35
import numpy as np
@@ -12,7 +14,7 @@
1214
from pydvl.valuation.utility.learning import IndicatorUtilityModel
1315

1416

15-
class LinearUtility(UtilityBase):
17+
class LinearUtility(UtilityBase[Sample]):
1618
"""A utility function that returns the sum of the weights corresponding to the
1719
indices in the subset.
1820
@@ -25,9 +27,9 @@ def __init__(self, weights: Sequence, training_data: Sequence):
2527
self.weights = np.array(weights)
2628

2729
# FIXME this doesn't make sense
28-
self._training_data = training_data
30+
self._training_data = training_data # type: ignore
2931

30-
def __call__(self, sample: Sample):
32+
def __call__(self, sample: Sample | None) -> float:
3133
# Compute the sum of the weights corresponding to the indices in the subset.
3234
if sample is None or len(sample.subset) == 0:
3335
return 0.0
@@ -134,6 +136,9 @@ def predict(self, X):
134136
self.last_predict_X = X
135137
return np.sum(X, axis=1, keepdims=True)
136138

139+
def score(self, x: NDArray, y: NDArray | None) -> float:
140+
return 1.0 # dummy, not used in tests
141+
137142

138143
@pytest.mark.parametrize(
139144
"utility_samples, encoding",

0 commit comments

Comments
 (0)