Skip to content
55 changes: 53 additions & 2 deletions keras/src/wrappers/sklearn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def patched_more_tags(self):
return parametrize_with_checks(estimators)


def dynamic_model(X, y, loss, layers=[10]):
def dynamic_model(X, y, loss, out_activation_function="softmax", layers=[10]):
"""Creates a basic MLP classifier dynamically choosing binary/multiclass
classification loss and ouput activations.
"""
Expand All @@ -69,7 +69,7 @@ def dynamic_model(X, y, loss, layers=[10]):
hidden = Dense(layer_size, activation="relu")(hidden)

n_outputs = y.shape[1] if len(y.shape) > 1 else 1
out = [Dense(n_outputs, activation="softmax")(hidden)]
out = [Dense(n_outputs, activation=out_activation_function)(hidden)]
model = Model(inp, out)
model.compile(loss=loss, optimizer="rmsprop")

Expand Down Expand Up @@ -158,3 +158,54 @@ def test_sklearn_estimator_checks(estimator, check):
pytest.xfail("Backend not implemented")
else:
raise


@pytest.mark.parametrize(
"estimator, has_predict_proba",
[
(
SKLearnClassifier(
model=dynamic_model,
model_kwargs={
"out_activation_function": "softmax",
"loss": "categorical_crossentropy",
},
fit_kwargs={"epochs": 1},
),
True,
),
(
SKLearnClassifier(
model=dynamic_model,
model_kwargs={
"out_activation_function": "linear",
"loss": "categorical_crossentropy",
},
fit_kwargs={"epochs": 1},
),
False,
),
],
)
def test_sklearn_estimator_predict_proba(estimator, has_predict_proba):
"""Checks that ``SKLearnClassifier`` exposes the ``predict_proba`` method
only when the model outputs probabilities.
"""
try:
X, y = sklearn.datasets.make_classification(
n_samples=100,
n_features=10,
n_informative=4,
n_classes=4,
random_state=42,
)
estimator.fit(X, y)
assert hasattr(estimator, "predict_proba") == has_predict_proba
except Exception as exc:
if keras.config.backend() in ["numpy", "openvino"] and (
isinstance(exc, NotImplementedError)
or "NotImplementedError" in str(exc)
):
pytest.xfail("Backend not implemented")
else:
raise
10 changes: 10 additions & 0 deletions keras/src/wrappers/sklearn_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from keras.src.wrappers.fixes import type_of_target
from keras.src.wrappers.utils import TargetReshaper
from keras.src.wrappers.utils import _check_model
from keras.src.wrappers.utils import _estimator_has
from keras.src.wrappers.utils import assert_sklearn_installed

try:
Expand Down Expand Up @@ -278,6 +279,15 @@ def dynamic_model(X, y, loss, layers=[10]):
```
"""

@sklearn.utils.metaestimators.available_if(_estimator_has("predict_proba"))
def predict_proba(self, X):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem here is that since the model is configurable, we have no way to know whether the model outputs probabilities or not. This method serves no additional purpose over just predict().

Copy link
Author

@divakaivan divakaivan Aug 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fchollet Could you elaborate, please? I'm not sure I understand your comment. In the case when the user expects probas they will get probas. The only difference between this predict_proba and predict is that the target is not transformed back.

If the user expects probabilities, then they will get them. Although predict_proba might not always return proper probabilities, its inclusion allows users to interoperate with sklearn workflows that expect it. Some examples are in the original issue request.

Copy link
Author

@divakaivan divakaivan Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a chat with Adrin Jalali and @ glemaitre and he suggested we put predict_proba under an available_if decorator. Maybe something like

from sklearn.utils._available_if import available_if

def _returns_probas(estimator):
    return estimator.model_.layers[-1].activation.__name__ in ("sigmoid", "softmax")

class SKLearnClassifier:
    @available_if(_returns_probas)
    def predict_proba(self, X):
        ...
        return self.model_.predict(X)

Also pinging @adrinjalali for his thoughts on the issue/PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""Predict class probabilities of the input samples X."""
from sklearn.utils.validation import check_is_fitted

check_is_fitted(self)
X = _validate_data(self, X, reset=False)
return self.model_.predict(X)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these probabilities? Or is this more of a decision_function implementation? 🤔

Copy link
Author

@divakaivan divakaivan Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model_.predict returns probas when the last layer's activation function is softmax/sigmoid. If the last layer has no activation - I believe it's logits.

Reproducible example in google colab
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.losses import SparseCategoricalCrossentropy

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

import random
import numpy as np
import tensorflow as tf

random.seed(42)
np.random.seed(42)

X, y = make_classification(n_samples=1000, n_features=10, n_informative=4, n_classes=4, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

inp = Input(shape=(10,))
x = Dense(20, activation="relu")(inp)
x = Dense(20, activation="relu")(x)
x = Dense(20, activation="relu")(x)
logits_output = Dense(4, activation=None)(x)

model_logits = Model(inp, logits_output)
model_logits.compile(loss=SparseCategoricalCrossentropy(from_logits=True), optimizer="adam")

model_logits.fit(X_train, y_train, epochs=10, verbose=0)

softmax_output = tf.keras.layers.Activation('softmax')(logits_output)
model_softmax = Model(inp, softmax_output)

test_sample = X_test[:1]

print("LOGITS OUTPUT:")
pred_logits = model_logits.predict(test_sample, verbose=0)
print(pred_logits)

print("SOFTMAX MODEL OUTPUT:")
pred_softmax = model_softmax.predict(test_sample, verbose=0)
print(pred_softmax)

print("MANUAL SOFTMAX APPLIED TO LOGITS:")
pred_manual_softmax = tf.nn.softmax(pred_logits).numpy()
print(pred_manual_softmax)

print("DIFFERENCE")
print(np.abs(pred_softmax - pred_manual_softmax))
LOGITS OUTPUT:
[[ 0.60939574  4.029889   -1.224225    1.267421  ]]
SOFTMAX MODEL OUTPUT:
[[0.02969535 0.9082174  0.00474632 0.05734099]]
MANUAL SOFTMAX APPLIED TO LOGITS:
[[0.02969535 0.9082174  0.00474632 0.05734099]]
DIFFERENCE
[[0. 0. 0. 0.]]


def _process_target(self, y, reset=False):
"""Classifiers do OHE."""
target_type = type_of_target(y, raise_unknown=True)
Expand Down
12 changes: 12 additions & 0 deletions keras/src/wrappers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ def _check_model(model):
)


def _estimator_has(attr):
Copy link
Author

@divakaivan divakaivan Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be _check_proba or _estimator_has_proba? No reason to have attr as it's quite specific to what we want to check (something like here)

def check(self):
from sklearn.utils.validation import check_is_fitted

check_is_fitted(self)
return (
self.model_.layers[-1].activation.__name__ in ("sigmoid", "softmax")
)

return check


class TargetReshaper(TransformerMixin, BaseEstimator):
"""Convert 1D targets to 2D and back.

Expand Down
Loading