Skip to content
55 changes: 53 additions & 2 deletions keras/src/wrappers/sklearn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def patched_more_tags(self):
return parametrize_with_checks(estimators)


def dynamic_model(X, y, loss, layers=[10]):
def dynamic_model(X, y, loss, out_activation_function="softmax", layers=[10]):
"""Creates a basic MLP classifier dynamically choosing binary/multiclass
classification loss and ouput activations.
"""
Expand All @@ -69,7 +69,7 @@ def dynamic_model(X, y, loss, layers=[10]):
hidden = Dense(layer_size, activation="relu")(hidden)

n_outputs = y.shape[1] if len(y.shape) > 1 else 1
out = [Dense(n_outputs, activation="softmax")(hidden)]
out = [Dense(n_outputs, activation=out_activation_function)(hidden)]
model = Model(inp, out)
model.compile(loss=loss, optimizer="rmsprop")

Expand Down Expand Up @@ -107,6 +107,9 @@ def use_floatx(x):
),
"check_supervised_y_2d": "This test assumes reproducibility in fit.",
"check_fit_idempotent": "This test assumes reproducibility in fit.",
"check_classifiers_train": (
"decision_function can return both probabilities and logits"
),
Comment on lines +110 to +112
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this is a good solution for this. 3 tests are failing without it

FAILED keras/src/wrappers/sklearn_test.py::test_sklearn_estimator_checks[SKLearnClassifier(fit_kwargs={'epochs':5},model=<functiondynamic_modelat0x7fae46b3a680>,model_kwargs={'layers':[20,20,20],'loss':'categorical_crossentropy'})-check_classifiers_train] - AssertionError
FAILED keras/src/wrappers/sklearn_test.py::test_sklearn_estimator_checks[SKLearnClassifier(fit_kwargs={'epochs':5},model=<functiondynamic_modelat0x7fae46b3a680>,model_kwargs={'layers':[20,20,20],'loss':'categorical_crossentropy'})-check_classifiers_train(readonly_memmap=True)] - AssertionError
FAILED keras/src/wrappers/sklearn_test.py::test_sklearn_estimator_checks[SKLearnClassifier(fit_kwargs={'epochs':5},model=<functiondynamic_modelat0x7fae46b3a680>,model_kwargs={'layers':[20,20,20],'loss':'categorical_crossentropy'})-check_classifiers_train(readonly_memmap=True,X_dtype=float32)] - AssertionError

From here

            if hasattr(classifier, "decision_function"):
                try:
                    # decision_function agrees with predict
                    decision = classifier.decision_function(X)
                    if n_classes == 2:
                        if tags.target_tags.single_output:
>                           assert decision.shape == (n_samples,)
E                           AssertionError

looking at ways to avoid adding this expected failed check ~

Copy link
Author

@divakaivan divakaivan Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if I make the decision_function to match the shape in case of binary classification

    def decision_function(self, X):
        """Get raw model outputs."""
        from sklearn.utils.validation import check_is_fitted

        check_is_fitted(self)

        X = _validate_data(self, X, reset=False)
        scores = self.model_.predict(X)
        if len(self.classes_) == 2:
            return scores[:, 1] # caveat that since this returns a single number, now argmax will be 0 so not good 
        return scores

Then the 3 errors are like this:

FAILED keras/src/wrappers/sklearn_test.py::test_sklearn_estimator_checks[SKLearnClassifier(fit_kwargs={'epochs':5},model=<functiondynamic_modelat0x16a8b2a20>,model_kwargs={'layers':[20,20,20],'loss':'categorical_crossentropy'})-check_classifiers_train(readonly_memmap=True)] - AssertionError:
Arrays are not equal

Mismatched elements: 91 / 200 (45.5%)
Max absolute difference among violations: 1
Max relative difference among violations: inf
 ACTUAL: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
 DESIRED: array([0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,...

This comes from scores[:, 1] being probabilities. And sklearn/utils/estimator_checks.py does this

                    dec_pred = (decision.ravel() > 0).astype(int)

So all our outputs are 1s.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Is it a good compromise is to return the shape from self.model_.predict(X) and ignore those test cases?
  • Also is it acceptable practice in the scikit-learn community if decision_function is used for both both logits and probas?

},
"SKLearnRegressor": {
"check_parameters_default_constructible": (
Expand Down Expand Up @@ -158,3 +161,51 @@ def test_sklearn_estimator_checks(estimator, check):
pytest.xfail("Backend not implemented")
else:
raise


@pytest.mark.parametrize(
"estimator",
[
SKLearnClassifier(
model=dynamic_model,
model_kwargs={
"out_activation_function": "softmax",
"loss": "binary_crossentropy",
},
fit_kwargs={"epochs": 1},
),
SKLearnClassifier(
model=dynamic_model,
model_kwargs={
"out_activation_function": "linear",
"loss": "binary_crossentropy",
},
fit_kwargs={"epochs": 1},
),
],
)
def test_sklearn_estimator_decision_function(estimator):
"""Checks that the argmax of ``decision_function`` is the same as
``predict`` for classifiers.
"""
try:
X, y = sklearn.datasets.make_classification(
n_samples=10,
n_features=10,
n_informative=4,
n_classes=2,
random_state=42,
)
estimator.fit(X, y)
assert (
estimator.decision_function(X[:1]).argmax(axis=-1)
== estimator.predict(X[:1]).flatten()
)
except Exception as exc:
if keras.config.backend() in ["numpy", "openvino"] and (
isinstance(exc, NotImplementedError)
or "NotImplementedError" in str(exc)
):
pytest.xfail("Backend not implemented")
else:
raise
10 changes: 10 additions & 0 deletions keras/src/wrappers/sklearn_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sklearn.base import ClassifierMixin
from sklearn.base import RegressorMixin
from sklearn.base import TransformerMixin
from sklearn.utils._array_api import get_namespace
except ImportError:
sklearn = None

Expand Down Expand Up @@ -278,6 +279,15 @@ def dynamic_model(X, y, loss, layers=[10]):
```
"""

def decision_function(self, X):
"""Get raw model outputs."""
from sklearn.utils.validation import check_is_fitted

check_is_fitted(self)

X = _validate_data(self, X, reset=False)
return self.model_.predict(X)

def _process_target(self, y, reset=False):
"""Classifiers do OHE."""
target_type = type_of_target(y, raise_unknown=True)
Expand Down
2 changes: 1 addition & 1 deletion keras/src/wrappers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def inverse_transform(self, y):
If the transformer was fit to a 1D numpy array,
and a 2D numpy array with a singleton second dimension
is passed, it will be squeezed back to 1D. Otherwise, it
will eb left untouched.
will be left untouched.
"""
from sklearn.utils.validation import check_is_fitted

Expand Down
Loading