|
| 1 | +""" |
| 2 | +This module provides a :class:`Scorer` class that wraps scoring functions with |
| 3 | +additional information. |
| 4 | +
|
| 5 | +Scorers can be constructed in the same way as in scikit-learn: either from |
| 6 | +known strings or from a callable. Greater values must be better. If they are not, |
| 7 | +a negated version can be used, see scikit-learn's `make_scorer() |
| 8 | +<https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html>`_. |
| 9 | +
|
| 10 | +:class:`Scorer` provides additional information about the scoring function, like |
| 11 | +its range and default values. |
| 12 | +""" |
| 13 | +from typing import Callable, Optional, Protocol, Tuple, Union |
| 14 | + |
| 15 | +import numpy as np |
| 16 | +from numpy.typing import NDArray |
| 17 | +from scipy.special import expit |
| 18 | +from sklearn.metrics import get_scorer |
| 19 | + |
| 20 | +from pydvl.utils.types import SupervisedModel |
| 21 | + |
| 22 | +__all__ = ["Scorer", "compose_score", "squashed_r2", "squashed_variance"] |
| 23 | + |
| 24 | + |
| 25 | +class ScorerCallable(Protocol): |
| 26 | + """Signature for a scorer""" |
| 27 | + |
| 28 | + def __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: |
| 29 | + ... |
| 30 | + |
| 31 | + |
| 32 | +class Scorer: |
| 33 | + """A scoring callable that takes a model, data, and labels and returns a |
| 34 | + scalar. |
| 35 | +
|
| 36 | + :param scoring: Either a string or callable that can be passed to |
| 37 | + `get_scorer |
| 38 | + <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.get_scorer.html>`_. |
| 39 | + :param default: score to be used when a model cannot be fit, e.g. when too |
| 40 | + little data is passed, or errors arise. |
| 41 | + :param range: numerical range of the score function. Some Monte Carlo |
| 42 | + methods can use this to estimate the number of samples required for a |
| 43 | + certain quality of approximation. If not provided, it can be read from |
| 44 | + the ``scoring`` object if it provides it, for instance if it was |
| 45 | + constructed with :func:`~pydvl.utils.types.compose_score`. |
| 46 | + :param name: The name of the scorer. If not provided, the name of the |
| 47 | + function passed will be used. |
| 48 | +
|
| 49 | + .. versionadded:: 0.5.0 |
| 50 | +
|
| 51 | + """ |
| 52 | + |
| 53 | + _name: str |
| 54 | + range: NDArray[np.float_] |
| 55 | + |
| 56 | + def __init__( |
| 57 | + self, |
| 58 | + scoring: Union[str, ScorerCallable], |
| 59 | + default: float = np.nan, |
| 60 | + range: Tuple = (-np.inf, np.inf), |
| 61 | + name: Optional[str] = None, |
| 62 | + ): |
| 63 | + self._scorer = get_scorer(scoring) |
| 64 | + self.default = default |
| 65 | + # TODO: auto-fill from known scorers ? |
| 66 | + self.range = np.array(range) |
| 67 | + self._name = getattr(self._scorer, "__name__", name or "scorer") |
| 68 | + |
| 69 | + def __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: |
| 70 | + return self._scorer(model, X, y) # type: ignore |
| 71 | + |
| 72 | + def __str__(self): |
| 73 | + return self._name |
| 74 | + |
| 75 | + def __repr__(self): |
| 76 | + capitalized_name = "".join(s.capitalize() for s in self._name.split(" ")) |
| 77 | + return f"{capitalized_name} (scorer={self._scorer})" |
| 78 | + |
| 79 | + |
| 80 | +def compose_score( |
| 81 | + scorer: Scorer, |
| 82 | + transformation: Callable[[float], float], |
| 83 | + range: Tuple[float, float], |
| 84 | + name: str, |
| 85 | +) -> Scorer: |
| 86 | + """Composes a scoring function with an arbitrary scalar transformation. |
| 87 | +
|
| 88 | + Useful to squash unbounded scores into ranges manageable by data valuation |
| 89 | + methods. |
| 90 | +
|
| 91 | + .. code-block:: python |
| 92 | + :caption: Example usage |
| 93 | +
|
| 94 | + sigmoid = lambda x: 1/(1+np.exp(-x)) |
| 95 | + compose_score(Scorer("r2"), sigmoid, range=(0,1), name="squashed r2") |
| 96 | +
|
| 97 | + :param scorer: The object to be composed. |
| 98 | + :param transformation: A scalar transformation |
| 99 | + :param range: The range of the transformation. This will be used e.g. by |
| 100 | + :class:`~pydvl.utils.utility.Utility` for the range of the composed. |
| 101 | + :param name: A string representation for the composition, for `str()`. |
| 102 | + :return: The composite :class:`Scorer`. |
| 103 | + """ |
| 104 | + |
| 105 | + class NewScorer(Scorer): |
| 106 | + def __call__(self, model: SupervisedModel, X: NDArray, y: NDArray) -> float: |
| 107 | + score = self._scorer(model=model, X=X, y=y) |
| 108 | + return transformation(score) |
| 109 | + |
| 110 | + return NewScorer(scorer, range=range, name=name) |
| 111 | + |
| 112 | + |
| 113 | +def _sigmoid(x: float) -> float: |
| 114 | + result: float = expit(x).item() |
| 115 | + return result |
| 116 | + |
| 117 | + |
| 118 | +squashed_r2 = compose_score(Scorer("r2"), _sigmoid, (0, 1), "squashed r2") |
| 119 | +""" A scorer that squashes the R² score into the range [0, 1] using a sigmoid.""" |
| 120 | + |
| 121 | + |
| 122 | +squashed_variance = compose_score( |
| 123 | + Scorer("explained_variance"), _sigmoid, (0, 1), "squashed explained variance" |
| 124 | +) |
| 125 | +""" A scorer that squashes the explained variance score into the range [0, 1] using |
| 126 | + a sigmoid.""" |
0 commit comments