diff --git a/Orange/classification/__init__.py b/Orange/classification/__init__.py
index f0489b4cb74..842518fca31 100644
--- a/Orange/classification/__init__.py
+++ b/Orange/classification/__init__.py
@@ -19,3 +19,4 @@
from .rules import *
from .sgd import *
from .neural_network import *
+from .calibration import *
diff --git a/Orange/classification/calibration.py b/Orange/classification/calibration.py
new file mode 100644
index 00000000000..46bf2e8f242
--- /dev/null
+++ b/Orange/classification/calibration.py
@@ -0,0 +1,176 @@
+import numpy as np
+from sklearn.isotonic import IsotonicRegression
+from sklearn.calibration import _SigmoidCalibration
+
+from Orange.classification import Model, Learner
+from Orange.evaluation import TestOnTrainingData
+from Orange.evaluation.performance_curves import Curves
+
+__all__ = ["ThresholdClassifier", "ThresholdLearner",
+ "CalibratedLearner", "CalibratedClassifier"]
+
+
+class ThresholdClassifier(Model):
+ """
+ A model that wraps a binary model and sets a different threshold.
+
+ The target class is the class with index 1. A data instances is classified
+ to class 1 it the probability of this class equals or exceeds the threshold
+
+ Attributes:
+ base_model (Orange.classification.Model): base mode
+ threshold (float): decision threshold
+ """
+ def __init__(self, base_model, threshold):
+ if not base_model.domain.class_var.is_discrete \
+ or len(base_model.domain.class_var.values) != 2:
+ raise ValueError("ThresholdClassifier requires a binary class")
+
+ super().__init__(base_model.domain, base_model.original_domain)
+ self.name = f"{base_model.name}, thresh={threshold:.2f}"
+ self.base_model = base_model
+ self.threshold = threshold
+
+ def __call__(self, data, ret=Model.Value):
+ probs = self.base_model(data, ret=Model.Probs)
+ if ret == Model.Probs:
+ return probs
+ class_probs = probs[:, 1].ravel()
+ with np.errstate(invalid="ignore"): # we fix nanx below
+ vals = (class_probs >= self.threshold).astype(float)
+ vals[np.isnan(class_probs)] = np.nan
+ if ret == Model.Value:
+ return vals
+ else:
+ return vals, probs
+
+
+class ThresholdLearner(Learner):
+ """
+ A learner that runs another learner and then finds the optimal threshold
+ for CA or F1 on the training data.
+
+ Attributes:
+ base_leaner (Learner): base learner
+ threshold_criterion (int):
+ `ThresholdLearner.OptimizeCA` or `ThresholdLearner.OptimizeF1`
+ """
+ __returns__ = ThresholdClassifier
+
+ OptimizeCA, OptimizeF1 = range(2)
+
+ def __init__(self, base_learner, threshold_criterion=OptimizeCA):
+ super().__init__()
+ self.base_learner = base_learner
+ self.threshold_criterion = threshold_criterion
+
+ def fit_storage(self, data):
+ """
+ Induce a model using the provided `base_learner`, compute probabilities
+ on training data and the find the optimal decision thresholds. In case
+ of ties, select the threshold that is closest to 0.5.
+ """
+ if not data.domain.class_var.is_discrete \
+ or len(data.domain.class_var.values) != 2:
+ raise ValueError("ThresholdLearner requires a binary class")
+
+ res = TestOnTrainingData(data, [self.base_learner], store_models=True)
+ model = res.models[0, 0]
+ curves = Curves.from_results(res)
+ curve = [curves.ca, curves.f1][self.threshold_criterion]()
+ # In case of ties, we want the optimal threshold that is closest to 0.5
+ best_threshs = curves.probs[curve == np.max(curve)]
+ threshold = best_threshs[min(np.searchsorted(best_threshs, 0.5),
+ len(best_threshs) - 1)]
+ return ThresholdClassifier(model, threshold)
+
+
+class CalibratedClassifier(Model):
+ """
+ A model that wraps another model and recalibrates probabilities
+
+ Attributes:
+ base_model (Mode): base mode
+ calibrators (list of callable):
+ list of functions that get a vector of probabilities and return
+ calibrated probabilities
+ """
+ def __init__(self, base_model, calibrators):
+ if not base_model.domain.class_var.is_discrete:
+ raise ValueError("CalibratedClassifier requires a discrete target")
+
+ super().__init__(base_model.domain, base_model.original_domain)
+ self.base_model = base_model
+ self.calibrators = calibrators
+ self.name = f"{base_model.name}, calibrated"
+
+ def __call__(self, data, ret=Model.Value):
+ probs = self.base_model(data, Model.Probs)
+ cal_probs = self.calibrated_probs(probs)
+ if ret == Model.Probs:
+ return cal_probs
+ vals = np.argmax(cal_probs, axis=1)
+ if ret == Model.Value:
+ return vals
+ else:
+ return vals, cal_probs
+
+ def calibrated_probs(self, probs):
+ if self.calibrators:
+ ps = np.hstack(
+ tuple(
+ calibr.predict(cls_probs).reshape(-1, 1)
+ for calibr, cls_probs in zip(self.calibrators, probs.T)))
+ else:
+ ps = probs.copy()
+ sums = np.sum(ps, axis=1)
+ zero_sums = sums == 0
+ with np.errstate(invalid="ignore"): # handled below
+ ps /= sums[:, None]
+ if zero_sums.any():
+ ps[zero_sums] = 1 / ps.shape[1]
+ return ps
+
+
+class CalibratedLearner(Learner):
+ """
+ Probability calibration for learning algorithms
+
+ This learner that wraps another learner, so that after training, it predicts
+ the probabilities on training data and calibrates them using sigmoid or
+ isotonic calibration. It then returns a :obj:`CalibratedClassifier`.
+
+ Attributes:
+ base_learner (Learner): base learner
+ calibration_method (int):
+ `CalibratedLearner.Sigmoid` or `CalibratedLearner.Isotonic`
+ """
+ __returns__ = CalibratedClassifier
+
+ Sigmoid, Isotonic = range(2)
+
+ def __init__(self, base_learner, calibration_method=Sigmoid):
+ super().__init__()
+ self.base_learner = base_learner
+ self.calibration_method = calibration_method
+
+ def fit_storage(self, data):
+ """
+ Induce a model using the provided `base_learner`, compute probabilities
+ on training data and use scipy's `_SigmoidCalibration` or
+ `IsotonicRegression` to prepare calibrators.
+ """
+ res = TestOnTrainingData(data, [self.base_learner], store_models=True)
+ model = res.models[0, 0]
+ probabilities = res.probabilities[0]
+ return self.get_model(model, res.actual, probabilities)
+
+ def get_model(self, model, ytrue, probabilities):
+ if self.calibration_method == CalibratedLearner.Sigmoid:
+ fitter = _SigmoidCalibration()
+ else:
+ fitter = IsotonicRegression(out_of_bounds='clip')
+ probabilities[np.isinf(probabilities)] = 1
+ calibrators = [fitter.fit(cls_probs, ytrue)
+ for cls_idx, cls_probs in enumerate(probabilities.T)]
+ return CalibratedClassifier(model, calibrators)
diff --git a/Orange/classification/tests/test_calibration.py b/Orange/classification/tests/test_calibration.py
new file mode 100644
index 00000000000..a538a3b1870
--- /dev/null
+++ b/Orange/classification/tests/test_calibration.py
@@ -0,0 +1,203 @@
+import unittest
+from unittest.mock import Mock, patch
+
+import numpy as np
+
+from Orange.base import Model
+from Orange.classification.calibration import \
+ ThresholdLearner, ThresholdClassifier, \
+ CalibratedLearner, CalibratedClassifier
+from Orange.data import Table
+
+
+class TestThresholdClassifier(unittest.TestCase):
+ def setUp(self):
+ probs1 = np.array([0.3, 0.5, 0.2, 0.8, 0.9, 0]).reshape(-1, 1)
+ self.probs = np.hstack((1 - probs1, probs1))
+ base_model = Mock(return_value=self.probs)
+ base_model.domain.class_var.is_discrete = True
+ base_model.domain.class_var.values = ["a", "b"]
+ self.model = ThresholdClassifier(base_model, 0.5)
+ self.data = Mock()
+
+ def test_threshold(self):
+ vals = self.model(self.data)
+ np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])
+
+ self.model.threshold = 0.8
+ vals = self.model(self.data)
+ np.testing.assert_equal(vals, [0, 0, 0, 1, 1, 0])
+
+ self.model.threshold = 0
+ vals = self.model(self.data)
+ np.testing.assert_equal(vals, [1] * 6)
+
+ def test_return_types(self):
+ vals = self.model(self.data, ret=Model.Value)
+ np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])
+
+ vals = self.model(self.data)
+ np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])
+
+ probs = self.model(self.data, ret=Model.Probs)
+ np.testing.assert_equal(probs, self.probs)
+
+ vals, probs = self.model(self.data, ret=Model.ValueProbs)
+ np.testing.assert_equal(vals, [0, 1, 0, 1, 1, 0])
+ np.testing.assert_equal(probs, self.probs)
+
+ def test_nans(self):
+ self.probs[1, :] = np.nan
+ vals, probs = self.model(self.data, ret=Model.ValueProbs)
+ np.testing.assert_equal(vals, [0, np.nan, 0, 1, 1, 0])
+ np.testing.assert_equal(probs, self.probs)
+
+ def test_non_binary_base(self):
+ base_model = Mock()
+ base_model.domain.class_var.is_discrete = True
+ base_model.domain.class_var.values = ["a"]
+ self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5)
+
+ base_model.domain.class_var.values = ["a", "b", "c"]
+ self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5)
+
+ base_model.domain.class_var = Mock()
+ base_model.domain.class_var.is_discrete = False
+ self.assertRaises(ValueError, ThresholdClassifier, base_model, 0.5)
+
+
+class TestThresholdLearner(unittest.TestCase):
+ @patch("Orange.evaluation.performance_curves.Curves.from_results")
+ @patch("Orange.classification.calibration.TestOnTrainingData")
+ def test_fit_storage(self, test_on_training, curves_from_results):
+ curves_from_results.return_value = curves = Mock()
+ curves.probs = np.array([0.1, 0.15, 0.3, 0.45, 0.6, 0.8])
+ curves.ca = lambda: np.array([0.1, 0.7, 0.4, 0.4, 0.3, 0.1])
+ curves.f1 = lambda: np.array([0.1, 0.2, 0.4, 0.4, 0.3, 0.1])
+ model = Mock()
+ model.domain.class_var.is_discrete = True
+ model.domain.class_var.values = ("a", "b")
+ data = Table("heart_disease")
+ learner = Mock()
+ test_on_training.return_value = res = Mock()
+ res.models = np.array([[model]])
+ test_on_training.return_value = res
+
+ thresh_learner = ThresholdLearner(
+ base_learner=learner,
+ threshold_criterion=ThresholdLearner.OptimizeCA)
+ thresh_model = thresh_learner(data)
+ self.assertEqual(thresh_model.threshold, 0.15)
+ args, kwargs = test_on_training.call_args
+ self.assertEqual(len(args), 2)
+ self.assertIs(args[0], data)
+ self.assertIs(args[1][0], learner)
+ self.assertEqual(len(args[1]), 1)
+ self.assertEqual(kwargs, {"store_models": 1})
+
+ thresh_learner = ThresholdLearner(
+ base_learner=learner,
+ threshold_criterion=ThresholdLearner.OptimizeF1)
+ thresh_model = thresh_learner(data)
+ self.assertEqual(thresh_model.threshold, 0.45)
+
+ def test_non_binary_class(self):
+ thresh_learner = ThresholdLearner(
+ base_learner=Mock(),
+ threshold_criterion=ThresholdLearner.OptimizeF1)
+
+ data = Mock()
+ data.domain.class_var.is_discrete = True
+ data.domain.class_var.values = ["a"]
+ self.assertRaises(ValueError, thresh_learner.fit_storage, data)
+
+ data.domain.class_var.values = ["a", "b", "c"]
+ self.assertRaises(ValueError, thresh_learner.fit_storage, data)
+
+ data.domain.class_var = Mock()
+ data.domain.class_var.is_discrete = False
+ self.assertRaises(ValueError, thresh_learner.fit_storage, data)
+
+
+class TestCalibratedClassifier(unittest.TestCase):
+ def setUp(self):
+ probs1 = np.array([0.3, 0.5, 0.2, 0.8, 0.9, 0]).reshape(-1, 1)
+ self.probs = np.hstack((1 - probs1, probs1))
+ base_model = Mock(return_value=self.probs)
+ base_model.domain.class_var.is_discrete = True
+ base_model.domain.class_var.values = ["a", "b"]
+ self.model = CalibratedClassifier(base_model, None)
+ self.data = Mock()
+
+ def test_call(self):
+ calprobs = np.arange(self.probs.size).reshape(self.probs.shape)
+ calprobs = calprobs / np.sum(calprobs, axis=1)[:, None]
+ calprobs[-1] = [0.7, 0.3]
+ self.model.calibrated_probs = Mock(return_value=calprobs)
+
+ probs = self.model(self.data, ret=Model.Probs)
+ self.model.calibrated_probs.assert_called_with(self.probs)
+ np.testing.assert_almost_equal(probs, calprobs)
+
+ vals = self.model(self.data, ret=Model.Value)
+ np.testing.assert_almost_equal(vals, [1, 1, 1, 1, 1, 0])
+
+ vals, probs = self.model(self.data, ret=Model.ValueProbs)
+ np.testing.assert_almost_equal(probs, calprobs)
+ np.testing.assert_almost_equal(vals, [1, 1, 1, 1, 1, 0])
+
+ def test_calibrated_probs(self):
+ self.model.calibrators = None
+ calprobs = self.model.calibrated_probs(self.probs)
+ np.testing.assert_equal(calprobs, self.probs)
+ self.assertIsNot(calprobs, self.probs)
+
+ calibrator = Mock()
+ calibrator.predict = lambda x: x**2
+ self.model.calibrators = [calibrator] * 2
+ calprobs = self.model.calibrated_probs(self.probs)
+ expprobs = self.probs ** 2 / np.sum(self.probs ** 2, axis=1)[:, None]
+ np.testing.assert_almost_equal(calprobs, expprobs)
+
+ self.probs[1] = 0
+ self.probs[2] = np.nan
+ expprobs[1] = 0.5
+ expprobs[2] = np.nan
+ calprobs = self.model.calibrated_probs(self.probs)
+ np.testing.assert_almost_equal(calprobs, expprobs)
+
+
+class TestCalibratedLearner(unittest.TestCase):
+ @patch("Orange.classification.calibration._SigmoidCalibration.fit")
+ @patch("Orange.classification.calibration.TestOnTrainingData")
+ def test_fit_storage(self, test_on_training, sigmoid_fit):
+ data = Table("heart_disease")
+ learner = Mock()
+
+ model = Mock()
+ model.domain.class_var.is_discrete = True
+ model.domain.class_var.values = ("a", "b")
+
+ test_on_training.return_value = res = Mock()
+ res.models = np.array([[model]])
+ res.probabilities = np.arange(20, dtype=float).reshape(1, 5, 4)
+ test_on_training.return_value = res
+
+ sigmoid_fit.return_value = Mock()
+
+ cal_learner = CalibratedLearner(
+ base_learner=learner, calibration_method=CalibratedLearner.Sigmoid)
+ cal_model = cal_learner(data)
+
+ self.assertIs(cal_model.base_model, model)
+ self.assertEqual(cal_model.calibrators, [sigmoid_fit.return_value] * 4)
+ args, kwargs = test_on_training.call_args
+ self.assertEqual(len(args), 2)
+ self.assertIs(args[0], data)
+ self.assertIs(args[1][0], learner)
+ self.assertEqual(len(args[1]), 1)
+ self.assertEqual(kwargs, {"store_models": 1})
+
+ for call, cls_probs in zip(sigmoid_fit.call_args_list,
+ res.probabilities[0].T):
+ np.testing.assert_equal(call[0][0], cls_probs)
diff --git a/Orange/evaluation/performance_curves.py b/Orange/evaluation/performance_curves.py
new file mode 100644
index 00000000000..c7dee568e53
--- /dev/null
+++ b/Orange/evaluation/performance_curves.py
@@ -0,0 +1,150 @@
+import numpy as np
+
+
+class Curves:
+ # names of scores are standard acronyms, pylint: disable=invalid-name
+ """
+ Computation of performance curves (ca, f1, precision, recall and the rest
+ of the zoo) from test results.
+
+ The class works with binary classes. Attribute `probs` contains ordered
+ probabilities and all curves represent performance statistics if an
+ instance is classified as positive if it equals or exceeds the threshold
+ in `probs`, that is, `sensitivity[i]` is the sensitivity of the classifier
+ that classifies an instances as positive if the probability of being
+ positive is at least `probs[i]`.
+
+ Class can be constructed by giving `probs` and `ytrue`, or from test
+ results (see :obj:`Curves.from_results`). The latter removes instances
+ with missing class values or predicted probabilities.
+
+ The class treats all results as obtained from a single run instead of
+ computing separate curves and fancy averaging.
+
+ Arguments:
+ probs (np.ndarray): vector of predicted probabilities
+ ytrue (np.ndarray): corresponding true classes
+
+ Attributes:
+ probs (np.ndarray): ordered vector of predicted probabilities
+ ytrue (np.ndarray): corresponding true classes
+ tot (int): total number of data instances
+ p (int): number of real positive instances
+ n (int): number of real negative instances
+ tp (np.ndarray): number of true positives (property computed from `tn`)
+ fp (np.ndarray): number of false positives (property computed from `tn`)
+ tn (np.ndarray): number of true negatives (property computed from `tn`)
+ fn (np.ndarray): number of false negatives (precomputed, not a property)
+ """
+ def __init__(self, ytrue, probs):
+ sortind = np.argsort(probs)
+ self.probs = np.hstack((probs[sortind], [1]))
+ self.ytrue = ytrue[sortind]
+ self.fn = np.hstack(([0], np.cumsum(self.ytrue)))
+ self.tot = len(probs)
+ self.p = self.fn[-1]
+ self.n = self.tot - self.p
+
+ @classmethod
+ def from_results(cls, results, target_class=None, model_index=None):
+ """
+ Construct an instance of `Curves` from test results.
+
+ Args:
+ results (:obj:`Orange.evaluation.testing.Results`): test results
+ target_class (int): target class index; if the class is binary,
+ this defaults to `1`, otherwise it must be given
+ model_index (int): model index; if there is only one model, this
+ argument can be omitted
+
+ Returns:
+ curves (:obj:`Curves`)
+ """
+ if model_index is None:
+ if results.probabilities.shape[0] != 1:
+ raise ValueError("Argument 'model_index' is required when "
+ "there are multiple models")
+ model_index = 0
+ if target_class is None:
+ if results.probabilities.shape[2] != 2:
+ raise ValueError("Argument 'target_class' is required when the "
+ "class is not binary")
+ target_class = 1
+ actual = results.actual
+ probs = results.probabilities[model_index, :, target_class]
+ nans = np.isnan(actual) + np.isnan(probs)
+ if nans.any():
+ actual = actual[~nans]
+ probs = probs[~nans]
+ return cls(actual == target_class, probs)
+
+ @property
+ def tn(self):
+ return np.arange(self.tot + 1) - self.fn
+
+ @property
+ def tp(self):
+ return self.p - self.fn
+
+ @property
+ def fp(self):
+ return self.n - self.tn
+
+ def ca(self):
+ """Classification accuracy curve"""
+ return (self.tp + self.tn) / self.tot
+
+ def f1(self):
+ """F1 curve"""
+ return 2 * self.tp / (2 * self.tp + self.fp + self.fn)
+
+ def sensitivity(self):
+ """Sensitivity curve"""
+ return self.tp / self.p
+
+ def specificity(self):
+ """Specificity curve"""
+ return self.tn / self.n
+
+ def precision(self):
+ """
+ Precision curve
+
+ The last element represents precision at threshold 1. Unless such
+ a probability appears in the data, the precision at this point is
+ undefined. To avoid this, we copy the previous value to the last.
+ """
+ tp_fp = np.arange(self.tot, -1, -1)
+ tp_fp[-1] = 1 # avoid division by zero
+ prec = self.tp / tp_fp
+ prec[-1] = prec[-2]
+ return prec
+
+ def recall(self):
+ """Recall curve"""
+ return self.sensitivity()
+
+ def ppv(self):
+ """PPV curve; see the comment at :obj:`precision`"""
+ return self.precision()
+
+ def npv(self):
+ """
+ NPV curve
+
+ The first value is undefined (no negative instances). To avoid this,
+ we copy the second value into the first.
+ """
+ tn_fn = np.arange(self.tot + 1)
+ tn_fn[0] = 1 # avoid division by zero
+ npv = self.tn / tn_fn
+ npv[0] = npv[1]
+ return npv
+
+ def fpr(self):
+ """FPR curve"""
+ return self.fp / self.n
+
+ def tpr(self):
+ """TPR curve"""
+ return self.sensitivity()
diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py
index 92c68d1c13f..93c0d563238 100644
--- a/Orange/evaluation/testing.py
+++ b/Orange/evaluation/testing.py
@@ -171,7 +171,7 @@ def set_or_raise(value, exp_values, msg):
"mismatching number of class values")
nmethods = set_or_raise(
nmethods, [learners is not None and len(learners),
- models is not None and len(models),
+ models is not None and models.shape[1],
failed is not None and len(failed),
predicted is not None and predicted.shape[0],
probabilities is not None and probabilities.shape[0]],
@@ -317,7 +317,7 @@ def split_by_model(self):
res.probabilities = self.probabilities[(i,), :, :]
if self.models is not None:
- res.models = self.models[:, i]
+ res.models = self.models[:, i:i + 1]
res.failed = [self.failed[i]]
yield res
@@ -365,7 +365,7 @@ def __new__(cls,
"and train_data are omitted")
return self
- warn("calling Validation's constructor with data and learners"
+ warn("calling Validation's constructor with data and learners "
"is deprecated;\nconstruct an instance and call it",
DeprecationWarning, stacklevel=2)
diff --git a/Orange/evaluation/tests/__init__.py b/Orange/evaluation/tests/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/Orange/evaluation/tests/test_performance_curves.py b/Orange/evaluation/tests/test_performance_curves.py
new file mode 100644
index 00000000000..a73d7165557
--- /dev/null
+++ b/Orange/evaluation/tests/test_performance_curves.py
@@ -0,0 +1,125 @@
+import unittest
+from unittest.mock import patch
+
+import numpy as np
+
+from Orange.evaluation.testing import Results
+from Orange.evaluation.performance_curves import Curves
+
+
+# Test data and sensitivity/specificity are taken from
+# Tom Fawcett: An introduction to ROC analysis, with one true positive instance
+# removed, so that the number of positive and negative does not match
+
+class TestCurves(unittest.TestCase):
+ def setUp(self):
+ n, p = (0, 1)
+ self.data = np.array([
+ (p, .8), (n, .7), (p, .6), (p, .55), (p, .54), (n, .53),
+ (n, .52), (p, .51), (n, .505), (p, .4), (n, .39), (p, .38),
+ (n, .37), (n, .36), (n, .35), (p, .34), (n, .33), (p, .30), (n, .1)
+ ])
+
+ def test_curves(self):
+ np.random.shuffle(self.data)
+ ytrue, probs = self.data.T
+ curves = Curves(ytrue, probs)
+
+ tn = np.array(
+ [0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 9, 9, 10, 10])
+ np.testing.assert_equal(curves.tn, tn)
+ np.testing.assert_equal(curves.fp, 10 - tn)
+ np.testing.assert_almost_equal(curves.specificity(), tn / 10)
+
+ tp = np.array(
+ [9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 5, 5, 4, 4, 4, 3, 2, 1, 1, 0])
+ np.testing.assert_equal(curves.tp, tp)
+ np.testing.assert_equal(curves.fn, 9 - tp)
+ np.testing.assert_almost_equal(curves.sensitivity(), tp / 9)
+
+ np.testing.assert_almost_equal(
+ curves.ca(),
+ np.array([9, 10, 9, 10, 9, 10, 11, 12, 11, 12, 11, 12, 11, 12,
+ 13, 12, 11, 10, 11, 10]) / 19)
+
+ precision = np.array(
+ [9 / 19, 9 / 18, 8 / 17, 8 / 16, 7 / 15, 7 / 14, 7 / 13,
+ 7 / 12, 6 / 11, 6 / 10, 5 / 9, 5 / 8, 4 / 7, 4 / 6,
+ 4 / 5, 3 / 4, 2 / 3, 1 / 2, 1 / 1, 1])
+ np.testing.assert_almost_equal(curves.precision(), precision)
+ np.testing.assert_almost_equal(curves.recall(), tp / 9)
+
+ np.testing.assert_almost_equal(curves.ppv(), precision)
+ np.testing.assert_almost_equal(
+ curves.npv(),
+ np.array([1, 1 / 1, 1 / 2, 2 / 3, 2 / 4, 3 / 5, 4 / 6, 5 / 7,
+ 5 / 8, 6 / 9, 6 / 10, 7 / 11, 7 / 12, 8 / 13, 9 / 14,
+ 9 / 15, 9 / 16, 9 / 17, 10 / 18, 10 / 19]))
+
+ np.testing.assert_almost_equal(curves.tpr(), tp / 9)
+ np.testing.assert_almost_equal(curves.fpr(), (10 - tn) / 10)
+
+ @patch("Orange.evaluation.performance_curves.Curves.__init__",
+ return_value=None)
+ def test_curves_from_results(self, init):
+ res = Results()
+ ytrue, probs = self.data.T
+ res.actual = ytrue.astype(float)
+ res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2)
+ Curves.from_results(res)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, ytrue)
+ np.testing.assert_equal(cprobs, probs)
+
+ Curves.from_results(res, target_class=0)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, 1 - ytrue)
+ np.testing.assert_equal(cprobs, 1 - probs)
+
+ res.actual = ytrue.astype(float)
+ res.probabilities = np.random.random((2, 19, 2))
+ res.probabilities[1] = np.vstack((1 - probs, probs)).T
+
+ Curves.from_results(res, model_index=1)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, ytrue)
+ np.testing.assert_equal(cprobs, probs)
+
+ self.assertRaises(ValueError, Curves.from_results, res)
+
+ ytrue[ytrue == 0] = 2 * (np.arange(10) % 2)
+ res.actual = ytrue.astype(float)
+ res.probabilities = np.random.random((2, 19, 3))
+ res.probabilities[1] = np.vstack(
+ ((1 - probs) / 3, probs, (1 - probs) * 2 / 3)).T
+
+ Curves.from_results(res, model_index=1, target_class=1)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, ytrue == 1)
+ np.testing.assert_equal(cprobs, probs)
+
+ Curves.from_results(res, model_index=1, target_class=0)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, ytrue == 0)
+ np.testing.assert_equal(cprobs, (1 - probs) / 3)
+
+ Curves.from_results(res, model_index=1, target_class=2)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, ytrue == 2)
+ np.testing.assert_equal(cprobs, (1 - probs) * 2 / 3)
+
+ self.assertRaises(ValueError, Curves.from_results, res, model_index=1)
+
+ @patch("Orange.evaluation.performance_curves.Curves.__init__",
+ return_value=None)
+ def test_curves_from_results_nans(self, init):
+ res = Results()
+ ytrue, probs = self.data.T
+ ytrue[0] = np.nan
+ probs[-1] = np.nan
+ res.actual = ytrue.astype(float)
+ res.probabilities = np.vstack((1 - probs, probs)).T.reshape(1, -1, 2)
+ Curves.from_results(res)
+ cytrue, cprobs = init.call_args[0]
+ np.testing.assert_equal(cytrue, ytrue[1:-1])
+ np.testing.assert_equal(cprobs, probs[1:-1])
diff --git a/Orange/tests/test_evaluation_testing.py b/Orange/tests/test_evaluation_testing.py
index a57910eb971..a5f78cb2972 100644
--- a/Orange/tests/test_evaluation_testing.py
+++ b/Orange/tests/test_evaluation_testing.py
@@ -233,7 +233,7 @@ def test_split_by_model(self):
self.assertTrue((result.predicted == res.predicted[i]).all())
self.assertTrue((result.probabilities == res.probabilities[i]).all())
self.assertEqual(len(result.models), 5)
- for model in result.models:
+ for model in result.models[0]:
self.assertIsInstance(model, learners[i].__returns__)
self.assertSequenceEqual(result.learners, [res.learners[i]])
@@ -756,7 +756,7 @@ def setUp(self):
self.row_indices = np.arange(100)
self.folds = (range(50), range(10, 60)), (range(50, 100), range(50))
self.learners = [MajorityLearner(), MajorityLearner()]
- self.models = [Mock(), Mock()]
+ self.models = np.array([[Mock(), Mock()]])
self.predicted = np.zeros((2, 100))
self.probabilities = np.zeros((2, 100, 3))
self.failed = [False, True]
diff --git a/Orange/widgets/evaluate/contexthandlers.py b/Orange/widgets/evaluate/contexthandlers.py
index d79def2ca60..3ad2796698d 100644
--- a/Orange/widgets/evaluate/contexthandlers.py
+++ b/Orange/widgets/evaluate/contexthandlers.py
@@ -1,47 +1,30 @@
+from Orange.data import Variable
from Orange.widgets import settings
-from Orange.widgets.utils import getdeepattr
class EvaluationResultsContextHandler(settings.ContextHandler):
- def __init__(self, targetAttr, selectedAttr):
- super().__init__()
- self.targetAttr, self.selectedAttr = targetAttr, selectedAttr
+ """Context handler for evaluation results"""
- #noinspection PyMethodOverriding
- def match(self, context, cnames, cvalues):
- return (cnames, cvalues) == (
- context.classifierNames, context.classValues) and 2
+ def open_context(self, widget, classes, classifier_names):
+ if isinstance(classes, Variable):
+ if classes.is_discrete:
+ classes = classes.values
+ else:
+ classes = None
+ super().open_context(widget, classes, classifier_names)
- def fast_save(self, widget, name, value):
- context = widget.current_context
- if name == self.targetAttr:
- context.targetClass = value
- elif name == self.selectedAttr:
- context.selectedClassifiers = list(value)
+ def new_context(self, classes, classifier_names):
+ context = super().new_context()
+ context.classes = classes
+ context.classifier_names = classifier_names
+ return context
- def settings_from_widget(self, widget, *args):
- super().settings_from_widget(widget, *args)
- context = widget.current_context
- context.targetClass = getdeepattr(widget, self.targetAttr)
- context.selectedClassifiers = list(getdeepattr(self.selectedAttr))
-
- def settings_to_widget(self, widget, *args):
- super().settings_to_widget(widget, *args)
- context = widget.current_context
- if context.targetClass is not None:
- setattr(widget, self.targetAttr, context.targetClass)
- if context.selectedClassifiers is not None:
- setattr(widget, self.selectedAttr, context.selectedClassifiers)
-
- #noinspection PyMethodOverriding
- def find_or_create_context(self, widget, results):
- cnames = [c.name for c in results.classifiers]
- cvalues = results.classValues
- context, isNew = super().find_or_create_context(
- widget, results.classifierNames, results.classValues)
- if isNew:
- context.classifierNames = results.classifierNames
- context.classValues = results.classValues
- context.selectedClassifiers = None
- context.targetClass = None
- return context, isNew
+ def match(self, context, classes, classifier_names):
+ if classifier_names != context.classifier_names:
+ return self.NO_MATCH
+ elif isinstance(classes, Variable) and classes.is_continuous:
+ return (self.PERFECT_MATCH if context.classes is None
+ else self.NO_MATCH)
+ else:
+ return (self.PERFECT_MATCH if context.classes == classes
+ else self.NO_MATCH)
diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py
index c757932adea..562c3d5aa01 100644
--- a/Orange/widgets/evaluate/owcalibrationplot.py
+++ b/Orange/widgets/evaluate/owcalibrationplot.py
@@ -1,37 +1,61 @@
-"""
-Calibration Plot Widget
------------------------
-
-"""
from collections import namedtuple
import numpy as np
-from AnyQt.QtWidgets import QListWidget
+from AnyQt.QtCore import Qt, QSize
+from AnyQt.QtWidgets import QListWidget, QSizePolicy
import pyqtgraph as pg
-import Orange
+from Orange.base import Model
+from Orange.classification import ThresholdClassifier, CalibratedLearner
+from Orange.evaluation import Results
+from Orange.evaluation.performance_curves import Curves
from Orange.widgets import widget, gui, settings
-from Orange.widgets.evaluate.utils import \
- check_results_adequacy, results_for_preview
+from Orange.widgets.evaluate.contexthandlers import \
+ EvaluationResultsContextHandler
+from Orange.widgets.evaluate.utils import results_for_preview
from Orange.widgets.utils import colorpalette, colorbrewer
from Orange.widgets.utils.widgetpreview import WidgetPreview
-from Orange.widgets.widget import Input
+from Orange.widgets.widget import Input, Output, Msg
from Orange.widgets import report
-Curve = namedtuple(
- "Curve",
- ["x", "y"]
-)
-
-PlotCurve = namedtuple(
- "PlotCurve",
- ["curve",
- "curve_item",
- "rug_item"]
-)
+MetricDefinition = namedtuple(
+ "metric_definition",
+ ("name", "functions", "short_names", "explanation"))
+
+Metrics = [MetricDefinition(*args) for args in (
+ ("Calibration curve", None, (), ""),
+ ("Classification accuracy", (Curves.ca, ), (), ""),
+ ("F1", (Curves.f1, ), (), ""),
+ ("Sensitivity and specificity",
+ (Curves.sensitivity, Curves.specificity),
+ ("sens", "spec"),
+ "
Sensitivity (falling) is the proportion of correctly "
+ "detected positive instances (TP / P).
"
+ "Specificity (rising) is the proportion of detected "
+ "negative instances (TP / N).
"),
+ ("Precision and recall",
+ (Curves.precision, Curves.recall),
+ ("prec", "recall"),
+ "Precision (rising) is the fraction of retrieved instances "
+ "that are relevant, TP / (TP + FP).
"
+ "Recall (falling) is the proportion of discovered relevant "
+ "instances, TP / P.
"),
+ ("Pos and neg predictive value",
+ (Curves.ppv, Curves.npv),
+ ("PPV", "TPV"),
+ "Positive predictive value (rising) is the proportion of "
+ "correct positives, TP / (TP + FP).
"
+ "Negative predictive value is the proportion of correct "
+ "negatives, TN / (TN + FN).
"),
+ ("True and false positive rate",
+ (Curves.tpr, Curves.fpr),
+ ("TPR", "FPR"),
+ "True and false positive rate are proportions of detected "
+ "and omitted positive instances
"),
+)]
class OWCalibrationPlot(widget.OWWidget):
@@ -42,15 +66,41 @@ class OWCalibrationPlot(widget.OWWidget):
keywords = []
class Inputs:
- evaluation_results = Input("Evaluation Results", Orange.evaluation.Results)
+ evaluation_results = Input("Evaluation Results", Results)
- class Warning(widget.OWWidget.Warning):
- empty_input = widget.Msg(
- "Empty result on input. Nothing to display.")
+ class Outputs:
+ calibrated_model = Output("Calibrated Model", Model)
+
+ class Error(widget.OWWidget.Error):
+ non_discrete_target = Msg("Calibration plot requires a discrete target")
+ empty_input = widget.Msg("Empty result on input. Nothing to display.")
+ nan_classes = \
+ widget.Msg("Remove test data instances with unknown classes")
+ all_target_class = widget.Msg(
+ "All data instances belong to target class")
+ no_target_class = widget.Msg(
+ "No data instances belong to target class")
- target_index = settings.Setting(0)
- selected_classifiers = settings.Setting([])
+ class Warning(widget.OWWidget.Warning):
+ omitted_folds = widget.Msg(
+ "Test folds where all data belongs to (non)-target are not shown")
+ omitted_nan_prob_points = widget.Msg(
+ "Instance for which the model couldn't compute probabilities are"
+ "skipped")
+ no_valid_data = widget.Msg("No valid data for model(s) {}")
+
+ class Information(widget.OWWidget.Information):
+ no_output = Msg("Can't output a model: {}")
+
+ settingsHandler = EvaluationResultsContextHandler()
+ target_index = settings.ContextSetting(0)
+ selected_classifiers = settings.ContextSetting([])
+ score = settings.Setting(0)
+ output_calibration = settings.Setting(0)
+ fold_curves = settings.Setting(False)
display_rug = settings.Setting(True)
+ threshold = settings.Setting(0.5)
+ auto_commit = settings.Setting(True)
graph_name = "plot"
@@ -58,56 +108,100 @@ def __init__(self):
super().__init__()
self.results = None
+ self.scores = None
self.classifier_names = []
self.colors = []
- self._curve_data = {}
+ self.line = None
- box = gui.vBox(self.controlArea, "Plot")
- tbox = gui.vBox(box, "Target Class")
- tbox.setFlat(True)
+ self._last_score_value = -1
+ box = gui.vBox(self.controlArea, box="Settings")
self.target_cb = gui.comboBox(
- tbox, self, "target_index", callback=self._replot,
+ box, self, "target_index", label="Target:",
+ orientation=Qt.Horizontal, callback=self.target_index_changed,
contentsLength=8)
-
- cbox = gui.vBox(box, "Classifier")
- cbox.setFlat(True)
-
- self.classifiers_list_box = gui.listBox(
- box, self, "selected_classifiers", "classifier_names",
- selectionMode=QListWidget.MultiSelection,
+ gui.checkBox(
+ box, self, "display_rug", "Show rug",
+ callback=self._on_display_rug_changed)
+ gui.checkBox(
+ box, self, "fold_curves", "Curves for individual folds",
callback=self._replot)
- gui.checkBox(box, self, "display_rug", "Show rug",
- callback=self._on_display_rug_changed)
+ self.classifiers_list_box = gui.listBox(
+ self.controlArea, self, "selected_classifiers", "classifier_names",
+ box="Classifier", selectionMode=QListWidget.ExtendedSelection,
+ sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred),
+ sizeHint=QSize(150, 40),
+ callback=self._on_selection_changed)
+
+ box = gui.vBox(self.controlArea, "Metrics")
+ combo = gui.comboBox(
+ box, self, "score", items=(metric.name for metric in Metrics),
+ callback=self.score_changed)
+
+ self.explanation = gui.widgetLabel(
+ box, wordWrap=True, fixedWidth=combo.sizeHint().width())
+ self.explanation.setContentsMargins(8, 8, 0, 0)
+ font = self.explanation.font()
+ font.setPointSizeF(0.85 * font.pointSizeF())
+ self.explanation.setFont(font)
+
+ gui.radioButtons(
+ box, self, value="output_calibration",
+ btnLabels=("Sigmoid calibration", "Isotonic calibration"),
+ label="Output model calibration", callback=self.apply)
+
+ self.info_box = gui.widgetBox(self.controlArea, "Info")
+ self.info_label = gui.widgetLabel(self.info_box)
+
+ gui.auto_commit(
+ self.controlArea, self, "auto_commit", "Apply", commit=self.apply)
self.plotview = pg.GraphicsView(background="w")
self.plot = pg.PlotItem(enableMenu=False)
self.plot.setMouseEnabled(False, False)
self.plot.hideButtons()
- axis = self.plot.getAxis("bottom")
- axis.setLabel("Predicted Probability")
-
- axis = self.plot.getAxis("left")
- axis.setLabel("Observed Average")
+ for axis_name in ("bottom", "left"):
+ axis = self.plot.getAxis(axis_name)
+ axis.setPen(pg.mkPen(color=0.0))
+ # Remove the condition (that is, allow setting this for bottom
+ # axis) when pyqtgraph is fixed
+ # Issue: https://github.com/pyqtgraph/pyqtgraph/issues/930
+ # Pull request: https://github.com/pyqtgraph/pyqtgraph/pull/932
+ if axis_name != "bottom": # remove if when pyqtgraph is fixed
+ axis.setStyle(stopAxisAtTick=(True, True))
self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0), padding=0.05)
self.plotview.setCentralItem(self.plot)
+
self.mainArea.layout().addWidget(self.plotview)
+ self._set_explanation()
@Inputs.evaluation_results
def set_results(self, results):
+ self.closeContext()
self.clear()
- results = check_results_adequacy(results, self.Error)
- if results is not None and not results.actual.size:
- self.Warning.empty_input()
- else:
- self.Warning.empty_input.clear()
- self.results = results
- if self.results is not None:
- self._initialize(results)
- self._replot()
+ self.Error.clear()
+ self.Information.clear()
+
+ self.results = None
+ if results is not None:
+ if not results.domain.has_discrete_class:
+ self.Error.non_discrete_target()
+ elif not results.actual.size:
+ self.Error.empty_input()
+ elif np.any(np.isnan(results.actual)):
+ self.Error.nan_classes()
+ else:
+ self.results = results
+ self._initialize(results)
+ class_var = self.results.domain.class_var
+ self.target_index = int(len(class_var.values) == 2)
+ self.openContext(class_var, self.classifier_names)
+ self._replot()
+
+ self.apply()
def clear(self):
self.plot.clear()
@@ -117,106 +211,296 @@ def clear(self):
self.target_cb.clear()
self.target_index = 0
self.colors = []
- self._curve_data = {}
+
+ def target_index_changed(self):
+ if len(self.results.domain.class_var.values) == 2:
+ self.threshold = 1 - self.threshold
+ self._set_explanation()
+ self._replot()
+ self.apply()
+
+ def score_changed(self):
+ self._set_explanation()
+ self._replot()
+ if self._last_score_value != self.score:
+ self.apply()
+ self._last_score_value = self.score
+
+ def _set_explanation(self):
+ explanation = Metrics[self.score].explanation
+ if explanation:
+ self.explanation.setText(explanation)
+ self.explanation.show()
+ else:
+ self.explanation.hide()
+
+ if self.score == 0:
+ self.controls.output_calibration.show()
+ self.info_box.hide()
+ else:
+ self.controls.output_calibration.hide()
+ self.info_box.show()
+
+ axis = self.plot.getAxis("bottom")
+ axis.setLabel("Predicted probability" if self.score == 0
+ else "Threshold probability to classify as positive")
+
+ axis = self.plot.getAxis("left")
+ axis.setLabel(Metrics[self.score].name)
def _initialize(self, results):
- N = len(results.predicted)
+ n = len(results.predicted)
names = getattr(results, "learner_names", None)
if names is None:
- names = ["#{}".format(i + 1) for i in range(N)]
+ names = ["#{}".format(i + 1) for i in range(n)]
self.classifier_names = names
scheme = colorbrewer.colorSchemes["qualitative"]["Dark2"]
- if N > len(scheme):
+ if n > len(scheme):
scheme = colorpalette.DefaultRGBColors
- self.colors = colorpalette.ColorPaletteGenerator(N, scheme)
+ self.colors = colorpalette.ColorPaletteGenerator(n, scheme)
- for i in range(N):
+ for i in range(n):
item = self.classifiers_list_box.item(i)
item.setIcon(colorpalette.ColorPixmap(self.colors[i]))
- self.selected_classifiers = list(range(N))
- self.target_cb.addItems(results.data.domain.class_var.values)
-
- def plot_curve(self, clf_idx, target):
- if (clf_idx, target) in self._curve_data:
- return self._curve_data[clf_idx, target]
-
- ytrue = self.results.actual == target
- probs = self.results.probabilities[clf_idx, :, target]
- sortind = np.argsort(probs)
- probs = probs[sortind]
- ytrue = ytrue[sortind]
- if probs.size:
- xmin, xmax = probs.min(), probs.max()
- x = np.linspace(xmin, xmax, 100)
- if xmax != xmin:
- f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin))
- observed = f(x)
- else:
- observed = np.full(100, xmax)
- else:
- x = np.array([])
- observed = np.array([])
-
- curve = Curve(x, observed)
- curve_item = pg.PlotDataItem(
- x, observed, pen=pg.mkPen(self.colors[clf_idx], width=1),
- shadowPen=pg.mkPen(self.colors[clf_idx].lighter(160), width=2),
- symbol="+", symbolSize=4,
- antialias=True
- )
+ self.selected_classifiers = list(range(n))
+ self.target_cb.addItems(results.domain.class_var.values)
+ def _rug(self, data, pen_args):
+ color = pen_args["pen"].color()
rh = 0.025
- rug_x = np.c_[probs, probs]
- rug_x_true = rug_x[ytrue].ravel()
- rug_x_false = rug_x[~ytrue].ravel()
+ rug_x = np.c_[data.probs[:-1], data.probs[:-1]]
+ rug_x_true = rug_x[data.ytrue].ravel()
+ rug_x_false = rug_x[~data.ytrue].ravel()
rug_y_true = np.ones_like(rug_x_true)
rug_y_true[1::2] = 1 - rh
rug_y_false = np.zeros_like(rug_x_false)
rug_y_false[1::2] = rh
- rug1 = pg.PlotDataItem(
- rug_x_false, rug_y_false, pen=self.colors[clf_idx],
- connect="pairs", antialias=True
- )
- rug2 = pg.PlotDataItem(
- rug_x_true, rug_y_true, pen=self.colors[clf_idx],
- connect="pairs", antialias=True
- )
- self._curve_data[clf_idx, target] = PlotCurve(curve, curve_item, (rug1, rug2))
- return self._curve_data[clf_idx, target]
+ self.plot.plot(
+ rug_x_false, rug_y_false,
+ pen=color, connect="pairs", antialias=True)
+ self.plot.plot(
+ rug_x_true, rug_y_true,
+ pen=color, connect="pairs", antialias=True)
+
+ def plot_metrics(self, data, metrics, pen_args):
+ if metrics is None:
+ return self._prob_curve(data.ytrue, data.probs[:-1], pen_args)
+ ys = [metric(data) for metric in metrics]
+ for y in ys:
+ self.plot.plot(data.probs, y, **pen_args)
+ return data.probs, ys
+
+ def _prob_curve(self, ytrue, probs, pen_args):
+ xmin, xmax = probs.min(), probs.max()
+ x = np.linspace(xmin, xmax, 100)
+ if xmax != xmin:
+ f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin))
+ y = f(x)
+ else:
+ y = np.full(100, xmax)
+
+ self.plot.plot(x, y, symbol="+", symbolSize=4, **pen_args)
+ return x, (y, )
def _setup_plot(self):
target = self.target_index
- selected = self.selected_classifiers
- curves = [self.plot_curve(i, target) for i in selected]
+ results = self.results
+ metrics = Metrics[self.score].functions
+ plot_folds = self.fold_curves and results.folds is not None
+ self.scores = []
- for curve in curves:
- self.plot.addItem(curve.curve_item)
- if self.display_rug:
- self.plot.addItem(curve.rug_item[0])
- self.plot.addItem(curve.rug_item[1])
+ if not self._check_class_presence(results.actual == target):
+ return
- self.plot.plot([0, 1], [0, 1], antialias=True)
+ self.Warning.omitted_folds.clear()
+ self.Warning.omitted_nan_prob_points.clear()
+ no_valid_models = []
+ shadow_width = 4 + 4 * plot_folds
+ for clsf in self.selected_classifiers:
+ data = Curves.from_results(results, target, clsf)
+ if data.tot == 0: # all probabilities are nan
+ no_valid_models.append(clsf)
+ continue
+ if data.tot != results.probabilities.shape[1]: # some are nan
+ self.Warning.omitted_nan_prob_points()
+
+ color = self.colors[clsf]
+ pen_args = dict(
+ pen=pg.mkPen(color, width=1), antiAlias=True,
+ shadowPen=pg.mkPen(color.lighter(160), width=shadow_width))
+ self.scores.append(
+ (self.classifier_names[clsf],
+ self.plot_metrics(data, metrics, pen_args)))
+
+ if self.display_rug:
+ self._rug(data, pen_args)
+
+ if plot_folds:
+ pen_args = dict(
+ pen=pg.mkPen(color, width=1, style=Qt.DashLine),
+ antiAlias=True)
+ for fold in range(len(results.folds)):
+ fold_results = results.get_fold(fold)
+ fold_curve = Curves.from_results(fold_results, target, clsf)
+ # Can't check this before: p and n can be 0 because of
+ # nan probabilities
+ if fold_curve.p * fold_curve.n == 0:
+ self.Warning.omitted_folds()
+ self.plot_metrics(fold_curve, metrics, pen_args)
+
+ if no_valid_models:
+ self.Warning.no_valid_data(
+ ", ".join(self.classifier_names[i] for i in no_valid_models))
+
+ if self.score == 0:
+ self.plot.plot([0, 1], [0, 1], antialias=True)
+ else:
+ self.line = pg.InfiniteLine(
+ pos=self.threshold, movable=True,
+ pen=pg.mkPen(color="k", style=Qt.DashLine, width=2),
+ hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3),
+ bounds=(0, 1),
+ )
+ self.line.sigPositionChanged.connect(self.threshold_change)
+ self.line.sigPositionChangeFinished.connect(
+ self.threshold_change_done)
+ self.plot.addItem(self.line)
+
+ def _check_class_presence(self, ytrue):
+ self.Error.all_target_class.clear()
+ self.Error.no_target_class.clear()
+ if np.max(ytrue) == 0:
+ self.Error.no_target_class()
+ return False
+ if np.min(ytrue) == 1:
+ self.Error.all_target_class()
+ return False
+ return True
def _replot(self):
self.plot.clear()
if self.results is not None:
self._setup_plot()
+ self._update_info()
def _on_display_rug_changed(self):
self._replot()
+ def _on_selection_changed(self):
+ self._replot()
+ self.apply()
+
+ def threshold_change(self):
+ self.threshold = round(self.line.pos().x(), 2)
+ self.line.setPos(self.threshold)
+ self._update_info()
+
+ def get_info_text(self, short):
+ if short:
+ def elided(s):
+ return s[:17] + "..." if len(s) > 20 else s
+
+ text = f"""
+
+ | Threshold: p= |
+ {self.threshold:.2f}
|
+
"""
+
+ else:
+ def elided(s):
+ return s
+
+ text = f"""
+
+ | Threshold: |
+ p = {self.threshold:.2f}
+ |
+
+ """
+
+ if self.scores is not None:
+ short_names = Metrics[self.score].short_names
+ if short_names:
+ text += f"""
+ |
+ {" | ".join(f"{n} | "
+ for n in short_names)}
+
"""
+ for name, (probs, curves) in self.scores:
+ ind = min(np.searchsorted(probs, self.threshold),
+ len(probs) - 1)
+ text += f"| {elided(name)}: | "
+ text += "/ | ".join(f'{curve[ind]:.3f} | '
+ for curve in curves)
+ text += "
"
+ text += ""
+ return text
+
+ def _update_info(self):
+ self.info_label.setText(self.get_info_text(short=True))
+
+ def threshold_change_done(self):
+ self.apply()
+
+ def apply(self):
+ self.Information.no_output.clear()
+ wrapped = None
+ results = self.results
+ if results is not None:
+ problems = [
+ msg for condition, msg in (
+ (len(results.folds) > 1,
+ "each training data sample produces a different model"),
+ (results.models is None,
+ "test results do not contain stored models - try testing "
+ "on separate data or on training data"),
+ (len(self.selected_classifiers) != 1,
+ "select a single model - the widget can output only one"),
+ (self.score != 0 and len(results.domain.class_var.values) != 2,
+ "cannot calibrate non-binary classes"))
+ if condition]
+ if len(problems) == 1:
+ self.Information.no_output(problems[0])
+ elif problems:
+ self.Information.no_output(
+ "".join(f"\n - {problem}" for problem in problems))
+ else:
+ clsf_idx = self.selected_classifiers[0]
+ model = results.models[0, clsf_idx]
+ if self.score == 0:
+ cal_learner = CalibratedLearner(
+ None, self.output_calibration)
+ wrapped = cal_learner.get_model(
+ model, results.actual, results.probabilities[clsf_idx])
+ else:
+ threshold = [1 - self.threshold,
+ self.threshold][self.target_index]
+ wrapped = ThresholdClassifier(model, threshold)
+
+ self.Outputs.calibrated_model.send(wrapped)
+
def send_report(self):
if self.results is None:
return
+ self.report_items((
+ ("Target class", self.target_cb.currentText()),
+ ("Output model calibration",
+ self.score == 0
+ and ("Sigmoid calibration",
+ "Isotonic calibration")[self.output_calibration])
+ ))
caption = report.list_legend(self.classifiers_list_box,
self.selected_classifiers)
- self.report_items((("Target class", self.target_cb.currentText()),))
self.report_plot()
self.report_caption(caption)
+ self.report_caption(self.controls.score.currentText())
+
+ if self.score != 0:
+ self.report_raw(self.get_info_text(short=False))
def gaussian_smoother(x, y, sigma=1.0):
diff --git a/Orange/widgets/evaluate/owtestlearners.py b/Orange/widgets/evaluate/owtestlearners.py
index 0577b448950..d534bbe6a32 100644
--- a/Orange/widgets/evaluate/owtestlearners.py
+++ b/Orange/widgets/evaluate/owtestlearners.py
@@ -315,7 +315,7 @@ def set_learner(self, learner, key):
# Removed
self._invalidate([key])
del self.learners[key]
- else:
+ elif learner is not None:
self.learners[key] = InputLearner(learner, None, None)
self._invalidate([key])
@@ -735,7 +735,8 @@ def __update(self):
if self.resampling == OWTestLearners.TestOnTest:
test_f = partial(
- Orange.evaluation.TestOnTestData(store_data=True),
+ Orange.evaluation.TestOnTestData(
+ store_data=True, store_models=True),
self.data, self.test_data, learners_c, self.preprocessor
)
else:
@@ -756,7 +757,8 @@ def __update(self):
stratified=self.shuffle_stratified,
random_state=rstate)
elif self.resampling == OWTestLearners.TestOnTrain:
- sampler = Orange.evaluation.TestOnTrainingData()
+ sampler = Orange.evaluation.TestOnTrainingData(
+ store_models=True)
else:
assert False, "self.resampling %s" % self.resampling
@@ -916,7 +918,7 @@ def is_empty(res):
res.probabilities = np.vstack((x.probabilities, y.probabilities))
if x.models is not None:
- res.models = [xm + ym for xm, ym in zip(x.models, y.models)]
+ res.models = np.hstack((x.models, y.models))
return res
diff --git a/Orange/widgets/evaluate/tests/base.py b/Orange/widgets/evaluate/tests/base.py
index 3100f1e1905..93fafea1e51 100644
--- a/Orange/widgets/evaluate/tests/base.py
+++ b/Orange/widgets/evaluate/tests/base.py
@@ -17,6 +17,6 @@ def test_many_evaluation_results(self):
classification.NaiveBayesLearner(),
classification.SGDClassificationLearner()
]
- res = evaluation.CrossValidation(data, learners, k=2, store_data=True)
+ res = evaluation.CrossValidation(k=2, store_data=True)(data, learners)
# this is a mixin; pylint: disable=no-member
self.send_signal("Evaluation Results", res)
diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
index 0575e03e8d1..e4f18231686 100644
--- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
+++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
@@ -1,12 +1,18 @@
import copy
import warnings
+from unittest.mock import Mock, patch
import numpy as np
+from AnyQt.QtCore import QItemSelection
+from pyqtgraph import InfiniteLine
+
from sklearn.exceptions import ConvergenceWarning
-from Orange.data import Table
+from Orange.data import Table, DiscreteVariable, Domain, ContinuousVariable
import Orange.evaluation
import Orange.classification
+from Orange.evaluation import Results
+from Orange.evaluation.performance_curves import Curves
from Orange.widgets.evaluate.tests.base import EvaluateTest
from Orange.widgets.evaluate.owcalibrationplot import OWCalibrationPlot
@@ -15,42 +21,620 @@
class TestOWCalibrationPlot(WidgetTest, EvaluateTest):
- @classmethod
- def setUpClass(cls):
- super().setUpClass()
- cls.lenses = data = Table(test_filename("datasets/lenses.tab"))
- cls.res = Orange.evaluation.TestOnTestData(
- train_data=data[::2], test_data=data[1::2],
- learners=[Orange.classification.MajorityLearner(),
- Orange.classification.KNNLearner()],
- store_data=True,
- )
-
def setUp(self):
super().setUp()
+
+ n, p = (0, 1)
+ actual, probs = np.array([
+ (p, .8), (n, .7), (p, .6), (p, .55), (p, .54), (n, .53), (n, .52),
+ (p, .51), (n, .505), (p, .4), (n, .39), (p, .38), (n, .37),
+ (n, .36), (n, .35), (p, .34), (n, .33), (p, .30), (n, .1)]).T
+ self.curves = Curves(actual, probs)
+ probs2 = (probs + 0.5) / 2 + 1
+ self.curves2 = Curves(actual, probs2)
+ pred = probs > 0.5
+ pred2 = probs2 > 0.5
+ probs = np.vstack((1 - probs, probs)).T
+ probs2 = np.vstack((1 - probs2, probs2)).T
+ domain = Domain([], DiscreteVariable("y", values=("a", "b")))
+ self.results = Results(
+ domain=domain,
+ actual=actual,
+ folds=np.array([Ellipsis]),
+ models=np.array([[Mock(), Mock()]]),
+ row_indices=np.arange(19),
+ predicted=np.array((pred, pred2)),
+ probabilities=np.array([probs, probs2]))
+
+ self.lenses = data = Table(test_filename("datasets/lenses.tab"))
+ majority = Orange.classification.MajorityLearner()
+ majority.name = "majority"
+ knn3 = Orange.classification.KNNLearner(n_neighbors=3)
+ knn3.name = "knn-3"
+ knn1 = Orange.classification.KNNLearner(n_neighbors=1)
+ knn1.name = "knn-1"
+ self.lenses_results = Orange.evaluation.TestOnTestData(
+ store_data=True, store_models=True)(
+ data=data[::2], test_data=data[1::2],
+ learners=[majority, knn3, knn1])
+ self.lenses_results.learner_names = ["majority", "knn-3", "knn-1"]
+
self.widget = self.create_widget(OWCalibrationPlot) # type: OWCalibrationPlot
warnings.filterwarnings("ignore", ".*", ConvergenceWarning)
- def test_basic(self):
- self.send_signal(self.widget.Inputs.evaluation_results, self.res)
- self.widget.controls.display_rug.click()
+ def test_initialization(self):
+ """Test initialization of lists and combos"""
+ def check_clsfr_names(names):
+ self.assertEqual(widget.classifier_names, names)
+ clsf_list = widget.controls.selected_classifiers
+ self.assertEqual(
+ [clsf_list.item(i).text() for i in range(clsf_list.count())],
+ names)
+
+ widget = self.widget
+ tcomb = widget.controls.target_index
+
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ check_clsfr_names(["majority", "knn-3", "knn-1"])
+ self.assertEqual(widget.selected_classifiers, [0, 1, 2])
+ self.assertEqual(
+ [tcomb.itemText(i) for i in range(tcomb.count())],
+ self.lenses.domain.class_var.values)
+ self.assertEqual(widget.target_index, 0)
+
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_clsfr_names(["#1", "#2"])
+ self.assertEqual(widget.selected_classifiers, [0, 1])
+ self.assertEqual(
+ [tcomb.itemText(i) for i in range(tcomb.count())], ["a", "b"])
+ self.assertEqual(widget.target_index, 1)
+
+ self.send_signal(widget.Inputs.evaluation_results, None)
+ check_clsfr_names([])
+ self.assertEqual(widget.selected_classifiers, [])
+ self.assertEqual(widget.controls.target_index.count(), 0)
+
+ def test_empty_input_error(self):
+ """Show an error when data is present but empty"""
+ widget = self.widget
+
+ res = copy.copy(self.results)
+ res.row_indices = res.row_indices[:0]
+ res.actual = res.actual[:0]
+ res.predicted = res.predicted[:, 0]
+ res.probabilities = res.probabilities[:, :0, :]
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertFalse(widget.Error.empty_input.is_shown())
+ self.assertTrue(bool(widget.plot.items))
+
+ self.send_signal(widget.Inputs.evaluation_results, res)
+ self.assertTrue(widget.Error.empty_input.is_shown())
+ self.assertIsNone(widget.results)
+ self.assertFalse(bool(widget.plot.items))
+
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertFalse(widget.Error.empty_input.is_shown())
+ self.assertTrue(bool(widget.plot.items))
+
+ def test_regression_input_error(self):
+ """Show an error for regression data"""
+ widget = self.widget
- def test_empty(self):
- res = copy.copy(self.res)
+ res = copy.copy(self.results)
+ res.domain = Domain([], ContinuousVariable("y"))
res.row_indices = res.row_indices[:0]
res.actual = res.actual[:0]
res.predicted = res.predicted[:, 0]
res.probabilities = res.probabilities[:, :0, :]
- self.send_signal(self.widget.Inputs.evaluation_results, res)
-
- def test_nan_input(self):
- res = copy.copy(self.res)
- res.actual = res.actual.copy()
- res.probabilities = res.probabilities.copy()
-
- res.actual[0] = np.nan
- res.probabilities[:, [0, 3], :] = np.nan
- self.send_signal(self.widget.Inputs.evaluation_results, res)
- self.assertTrue(self.widget.Error.invalid_results.is_shown())
- self.send_signal(self.widget.Inputs.evaluation_results, None)
- self.assertFalse(self.widget.Error.invalid_results.is_shown())
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertFalse(widget.Error.non_discrete_target.is_shown())
+ self.assertTrue(bool(widget.plot.items))
+
+ self.send_signal(widget.Inputs.evaluation_results, res)
+ self.assertTrue(widget.Error.non_discrete_target.is_shown())
+ self.assertIsNone(widget.results)
+ self.assertFalse(bool(widget.plot.items))
+
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertFalse(widget.Error.non_discrete_target.is_shown())
+ self.assertTrue(bool(widget.plot.items))
+
+ @staticmethod
+ def _set_combo(combo, val):
+ combo.setCurrentIndex(val)
+ combo.activated[int].emit(val)
+ combo.activated[str].emit(combo.currentText())
+
+ @staticmethod
+ def _set_radio_buttons(radios, val):
+ radios.buttons[val].click()
+
+ @staticmethod
+ def _set_list_selection(listview, selection):
+ model = listview.model()
+ selectionmodel = listview.selectionModel()
+ itemselection = QItemSelection()
+ for item in selection:
+ itemselection.select(model.index(item, 0), model.index(item, 0))
+ selectionmodel.select(itemselection, selectionmodel.ClearAndSelect)
+
+ def _set_threshold(self, pos, done):
+ _, line = self._get_curves()
+ line.setPos(pos)
+ if done:
+ line.sigPositionChangeFinished.emit(line)
+ else:
+ line.sigPositionChanged.emit(line)
+
+ def _get_curves(self):
+ plot_items = self.widget.plot.items[:]
+ for i, item in enumerate(plot_items):
+ if isinstance(item, InfiniteLine):
+ del plot_items[i]
+ return plot_items, item
+ return plot_items, None
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_plotting_curves(self, *_):
+ """Curve coordinates match those computed by `Curves`"""
+ widget = self.widget
+ widget.display_rug = False
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ widget.selected_classifiers = [0]
+ combo = widget.controls.score
+
+ c = self.curves
+ combinations = ([c.ca()],
+ [c.f1()],
+ [c.sensitivity(), c.specificity()],
+ [c.precision(), c.recall()],
+ [c.ppv(), c.npv()],
+ [c.tpr(), c.fpr()])
+ for idx, curves_data in enumerate(combinations, start=1):
+ self._set_combo(combo, idx)
+ curves, line = self._get_curves()
+ self.assertEqual(len(curves), len(curves_data))
+ self.assertIsNotNone(line)
+ for curve in curves:
+ x, y = curve.getData()
+ np.testing.assert_almost_equal(x, self.curves.probs)
+ for i, curve_data in enumerate(curves_data):
+ if np.max(curve_data - y) < 1e-6:
+ del curves_data[i]
+ break
+ else:
+ self.fail(f"invalid curve for {combo.currentText()}")
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_multiple_fold_curves(self, *_):
+ widget = self.widget
+ widget.display_rug = False
+ widget.fold_curves = False
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_list_selection(widget.controls.selected_classifiers, [0])
+ self._set_combo(widget.controls.score, 1) # CA
+
+ self.results.folds = [slice(1, 5), slice(5, 19)]
+ self.results.models = np.array([[Mock(), Mock()]] * 2)
+ curves, _ = self._get_curves()
+ self.assertEqual(len(curves), 1)
+
+ widget.controls.fold_curves.click()
+ curves, _ = self._get_curves()
+ self.assertEqual(len(curves), 3)
+
+ widget.controls.fold_curves.click()
+ curves, _ = self._get_curves()
+ self.assertEqual(len(curves), 1)
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_change_target_class(self, *_):
+ """Changing target combo changes the curves"""
+ widget = self.widget
+ widget.display_rug = False
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ widget.selected_classifiers = [0]
+ score_combo = widget.controls.score
+ target_combo = widget.controls.target_index
+
+ self._set_combo(score_combo, 1) # ca
+ self._set_combo(target_combo, 1)
+ (ca, ), _ = self._get_curves()
+ np.testing.assert_almost_equal(ca.getData()[1], self.curves.ca())
+
+ self._set_combo(target_combo, 0)
+ (ca, ), _ = self._get_curves()
+ curves = Curves(1 - self.curves.ytrue, 1 - self.curves.probs[:-1])
+ np.testing.assert_almost_equal(ca.getData()[1], curves.ca())
+
+ def test_changing_score_explanation(self):
+ """Changing score hides/shows explanation and options for calibration"""
+ widget = self.widget
+ score_combo = widget.controls.score
+ explanation = widget.explanation
+ calibrations = widget.controls.output_calibration
+
+ self._set_combo(score_combo, 1) # ca
+ self.assertTrue(explanation.isHidden())
+ self.assertTrue(calibrations.isHidden())
+
+ self._set_combo(score_combo, 0) # calibration
+ self.assertTrue(explanation.isHidden())
+ self.assertFalse(calibrations.isHidden())
+
+ self._set_combo(score_combo, 3) # sens/spec
+ self.assertFalse(explanation.isHidden())
+ self.assertTrue(calibrations.isHidden())
+
+ def test_rug(self):
+ """Test rug appearance and positions"""
+ def get_rugs():
+ rugs = [None, None]
+ for item in widget.plot.items:
+ if item.curve.opts.get("connect", "") == "pairs":
+ x, y = item.getData()
+ np.testing.assert_almost_equal(x[::2], x[1::2])
+ rugs[int(y[0] == 1)] = x[::2]
+ return rugs
+
+ widget = self.widget
+ widget.display_rug = True
+ model_list = widget.controls.selected_classifiers
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+
+ self._set_list_selection(model_list, [0])
+ probs = self.curves.probs[:-1]
+ truex = probs[self.curves.ytrue == 1]
+ falsex = probs[self.curves.ytrue == 0]
+ bottom, top = get_rugs()
+ np.testing.assert_almost_equal(bottom, falsex)
+ np.testing.assert_almost_equal(top, truex)
+
+ # Switching targets should switch rugs and takes other probabilities
+ self._set_combo(widget.controls.target_index, 0)
+ bottom, top = get_rugs()
+ np.testing.assert_almost_equal(bottom, (1 - truex)[::-1])
+ np.testing.assert_almost_equal(top, (1 - falsex)[::-1])
+ self._set_combo(widget.controls.target_index, 1)
+
+ # Changing models gives a different rug
+ self._set_list_selection(model_list, [1])
+ probs2 = self.curves2.probs[:-1]
+ truex2 = probs2[self.curves2.ytrue == 1]
+ falsex2 = probs2[self.curves2.ytrue == 0]
+ bottom, top = get_rugs()
+ np.testing.assert_almost_equal(bottom, falsex2)
+ np.testing.assert_almost_equal(top, truex2)
+
+ # Two models - two rugs - four rug items
+ self._set_list_selection(model_list, [0, 1])
+ self.assertEqual(sum(item.curve.opts.get("connect", "") == "pairs"
+ for item in widget.plot.items), 4)
+
+ # No models - no rugs
+ self._set_list_selection(model_list, [])
+ self.assertEqual(get_rugs(), [None, None])
+
+ # Bring the rug back
+ self._set_list_selection(model_list, [1])
+ self.assertIsNotNone(get_rugs()[0])
+
+ # Disable it with checkbox
+ widget.controls.display_rug.click()
+ self.assertEqual(get_rugs(), [None, None])
+
+ def test_calibration_curve(self):
+ """Test the correct number of calibration curves"""
+ widget = self.widget
+ model_list = widget.controls.selected_classifiers
+ widget.display_rug = False
+
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertEqual(len(widget.plot.items), 3) # 2 + diagonal
+
+ self._set_list_selection(model_list, [1])
+ self.assertEqual(len(widget.plot.items), 2)
+
+ self._set_list_selection(model_list, [])
+ self.assertEqual(len(widget.plot.items), 1)
+
+ def test_threshold_change_updates_info(self):
+ """Changing the threshold updates info label"""
+ widget = self.widget
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_combo(widget.controls.score, 1)
+
+ original_text = widget.info_label.text()
+ self._set_threshold(0.3, False)
+ self.assertNotEqual(widget.info_label.text(), original_text)
+
+ def test_threshold_rounding(self):
+ """Threshold is rounded to two decimals"""
+ widget = self.widget
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_combo(widget.controls.score, 1)
+ self._set_threshold(0.367, False)
+ self.assertAlmostEqual(widget.threshold, 0.37)
+
+ def test_threshold_flips_on_two_classes(self):
+ """Threshold changes to 1 - threshold if *binary* class is switched"""
+ widget = self.widget
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_combo(widget.controls.target_index, 0)
+ self._set_combo(widget.controls.score, 1) # CA
+ self._set_threshold(0.25, False)
+ self.assertEqual(widget.threshold, 0.25)
+ self._set_combo(widget.controls.target_index, 1)
+ self.assertEqual(widget.threshold, 0.75)
+
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ self._set_combo(widget.controls.target_index, 0)
+ self._set_combo(widget.controls.score, 1) # CA
+ self._set_threshold(0.25, False)
+ self.assertEqual(widget.threshold, 0.25)
+ self._set_combo(widget.controls.target_index, 1)
+ self.assertEqual(widget.threshold, 0.25)
+
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_apply_no_output(self, *_):
+ """Test no output warnings"""
+ widget = self.widget
+ model_list = widget.controls.selected_classifiers
+
+ multiple_folds, multiple_selected, no_models, non_binary_class = "abcd"
+ messages = {
+ multiple_folds:
+ "each training data sample produces a different model",
+ no_models:
+ "test results do not contain stored models - try testing on "
+ "separate data or on training data",
+ multiple_selected:
+ "select a single model - the widget can output only one",
+ non_binary_class:
+ "cannot calibrate non-binary classes"}
+
+ def test_shown(shown):
+ widget_msg = widget.Information.no_output
+ output = self.get_output(widget.Outputs.calibrated_model)
+ if not shown:
+ self.assertFalse(widget_msg.is_shown())
+ self.assertIsNotNone(output)
+ else:
+ self.assertTrue(widget_msg.is_shown())
+ self.assertIsNone(output)
+ for msg_id in shown:
+ msg = messages[msg_id]
+ self.assertIn(msg, widget_msg.formatted,
+ f"{msg} not included in the message")
+
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_combo(widget.controls.score, 1) # CA
+ test_shown({multiple_selected})
+
+ self._set_list_selection(model_list, [0])
+ test_shown(())
+ self._set_list_selection(model_list, [0, 1])
+
+ self.results.models = None
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ test_shown({multiple_selected, no_models})
+
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ test_shown({multiple_selected, non_binary_class})
+
+ self._set_list_selection(model_list, [0])
+ test_shown({non_binary_class})
+
+ self.results.folds = [slice(0, 5), slice(5, 10), slice(10, 19)]
+ self.results.models = np.array([[Mock(), Mock()]] * 3)
+
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ test_shown({multiple_selected, multiple_folds})
+
+ self._set_list_selection(model_list, [0])
+ test_shown({multiple_folds})
+
+ self._set_combo(widget.controls.score, 0) # calibration
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ self._set_list_selection(model_list, [0, 1])
+ test_shown({multiple_selected})
+ self._set_list_selection(model_list, [0])
+ test_shown(())
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ def test_output_threshold_classifier(self, threshold_classifier):
+ """Test threshold classifier on output"""
+ widget = self.widget
+ model_list = widget.controls.selected_classifiers
+ models = self.results.models.ravel()
+ target_combo = widget.controls.target_index
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_list_selection(model_list, [0])
+ widget.target_index = 1
+
+ widget.threshold = 0.3
+ self._set_combo(widget.controls.score, 1) # CA
+ model = self.get_output(widget.Outputs.calibrated_model)
+ threshold_classifier.assert_called_with(models[0], 0.3)
+ self.assertIs(model, threshold_classifier.return_value)
+ threshold_classifier.reset_mock()
+
+ widget.auto_commit = True
+ self._set_threshold(0.4, False)
+ threshold_classifier.assert_not_called()
+
+ widget.auto_commit = False
+ self._set_threshold(0.35, True)
+ threshold_classifier.assert_not_called()
+
+ widget.auto_commit = True
+ self._set_threshold(0.4, True)
+ threshold_classifier.assert_called_with(models[0], 0.4)
+ self.assertIs(model, threshold_classifier.return_value)
+ threshold_classifier.reset_mock()
+
+ self._set_combo(target_combo, 0)
+ threshold_classifier.assert_called_with(models[0], 0.4)
+ self.assertIs(model, threshold_classifier.return_value)
+ threshold_classifier.reset_mock()
+
+ self._set_combo(target_combo, 1)
+ threshold_classifier.assert_called_with(models[0], 0.4)
+ self.assertIs(model, threshold_classifier.return_value)
+ threshold_classifier.reset_mock()
+
+ self._set_list_selection(model_list, [1])
+ threshold_classifier.assert_called_with(models[1], 0.4)
+ self.assertIs(model, threshold_classifier.return_value)
+ threshold_classifier.reset_mock()
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_output_calibrated_classifier(self, calibrated_learner):
+ """Test calibrated classifier on output"""
+ calibrated_instance = calibrated_learner.return_value
+ get_model = calibrated_instance.get_model
+
+ widget = self.widget
+ model_list = widget.controls.selected_classifiers
+ models = self.lenses_results.models.ravel()
+ results = self.lenses_results
+ self.send_signal(widget.Inputs.evaluation_results, results)
+ self._set_combo(widget.controls.score, 0)
+
+ self._set_list_selection(model_list, [1])
+
+ self._set_radio_buttons(widget.controls.output_calibration, 0)
+ calibrated_learner.assert_called_with(None, 0)
+ model, actual, probabilities = get_model.call_args[0]
+ self.assertIs(model, models[1])
+ np.testing.assert_equal(actual, results.actual)
+ np.testing.assert_equal(probabilities, results.probabilities[1])
+ self.assertIs(self.get_output(widget.Outputs.calibrated_model),
+ get_model.return_value)
+ calibrated_learner.reset_mock()
+ get_model.reset_mock()
+
+ self._set_radio_buttons(widget.controls.output_calibration, 1)
+ calibrated_learner.assert_called_with(None, 1)
+ model, actual, probabilities = get_model.call_args[0]
+ self.assertIs(model, models[1])
+ np.testing.assert_equal(actual, results.actual)
+ np.testing.assert_equal(probabilities, results.probabilities[1])
+ self.assertIs(self.get_output(widget.Outputs.calibrated_model),
+ get_model.return_value)
+ calibrated_learner.reset_mock()
+ get_model.reset_mock()
+
+ self._set_list_selection(model_list, [0])
+ self._set_radio_buttons(widget.controls.output_calibration, 1)
+ calibrated_learner.assert_called_with(None, 1)
+ model, actual, probabilities = get_model.call_args[0]
+ self.assertIs(model, models[0])
+ np.testing.assert_equal(actual, results.actual)
+ np.testing.assert_equal(probabilities, results.probabilities[0])
+ self.assertIs(self.get_output(widget.Outputs.calibrated_model),
+ get_model.return_value)
+ calibrated_learner.reset_mock()
+ get_model.reset_mock()
+
+ def test_contexts(self):
+ """Test storing and retrieving context settings"""
+ widget = self.widget
+ model_list = widget.controls.selected_classifiers
+ target_combo = widget.controls.target_index
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ self._set_list_selection(model_list, [0, 2])
+ self._set_combo(target_combo, 2)
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self._set_list_selection(model_list, [0])
+ self._set_combo(target_combo, 0)
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ self.assertEqual(widget.selected_classifiers, [0, 2])
+ self.assertEqual(widget.target_index, 2)
+
+ def test_report(self):
+ """Test that report does not crash"""
+ widget = self.widget
+ self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
+ widget.send_report()
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_single_class(self, *_):
+ """Curves are not plotted if all data belongs to (non)-target"""
+ def check_error(shown):
+ for error in (errors.no_target_class, errors.all_target_class,
+ errors.nan_classes):
+ self.assertEqual(error.is_shown(), error is shown,
+ f"{error} is unexpectedly"
+ f"{'' if error.is_shown() else ' not'} shown")
+ if shown is not None:
+ self.assertEqual(len(widget.plot.items), 0)
+ else:
+ self.assertGreater(len(widget.plot.items), 0)
+
+ widget = self.widget
+ errors = widget.Error
+ widget.display_rug = True
+ combo = widget.controls.score
+
+ original_actual = self.results.actual.copy()
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ widget.selected_classifiers = [0]
+ for idx in range(combo.count()):
+ self._set_combo(combo, idx)
+ self.results.actual[:] = 0
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(errors.no_target_class)
+
+ self.results.actual[:] = 1
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(errors.all_target_class)
+
+ self.results.actual[:] = original_actual
+ self.results.actual[3] = np.nan
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(errors.nan_classes)
+
+ self.results.actual[:] = original_actual
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ check_error(None)
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_single_class_folds(self, *_):
+ """Curves for single-class folds are not plotted"""
+ widget = self.widget
+ widget.display_rug = False
+ widget.fold_curves = False
+
+ results = self.lenses_results
+ results.folds = [slice(0, 5), slice(5, 19)]
+ results.models = results.models.repeat(2, axis=0)
+ results.actual[:3] = 0
+ results.probabilities[1, 3:5] = np.nan
+ # after this, model 1 has just negative instances in fold 0
+ self.send_signal(widget.Inputs.evaluation_results, results)
+ self._set_combo(widget.controls.score, 1) # CA
+ self.assertFalse(widget.Warning.omitted_folds.is_shown())
+ widget.controls.fold_curves.click()
+ self.assertTrue(widget.Warning.omitted_folds.is_shown())
+
+ @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+ @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+ def test_warn_nan_probabilities(self, *_):
+ """Warn about omitted points with nan probabiities"""
+ widget = self.widget
+ widget.display_rug = False
+ widget.fold_curves = False
+
+ self.results.probabilities[1, 3] = np.nan
+ self.send_signal(widget.Inputs.evaluation_results, self.results)
+ self.assertTrue(widget.Warning.omitted_nan_prob_points.is_shown())
+ self._set_list_selection(widget.controls.selected_classifiers, [0, 2])
+ self.assertFalse(widget.Warning.omitted_folds.is_shown())
diff --git a/Orange/widgets/evaluate/utils.py b/Orange/widgets/evaluate/utils.py
index 9e2f579dfae..ebe06032777 100644
--- a/Orange/widgets/evaluate/utils.py
+++ b/Orange/widgets/evaluate/utils.py
@@ -47,7 +47,7 @@ def results_for_preview(data_name=""):
from Orange.classification import \
LogisticRegressionLearner, SVMLearner, NuSVMLearner
- data = Table(data_name or "ionosphere")
+ data = Table(data_name or "heart_disease")
results = CrossValidation(
data,
[LogisticRegressionLearner(penalty="l2"),
diff --git a/Orange/widgets/gui.py b/Orange/widgets/gui.py
index 683b8be2f73..b6a8d84552b 100644
--- a/Orange/widgets/gui.py
+++ b/Orange/widgets/gui.py
@@ -1783,6 +1783,9 @@ def __init__(self, master, enableDragDrop=False, dragDropCallback=None,
def sizeHint(self):
return self.size_hint
+ def minimumSizeHint(self):
+ return self.size_hint
+
def dragEnterEvent(self, event):
super().dragEnterEvent(event)
if self.valid_data_callback:
diff --git a/Orange/widgets/model/icons/CalibratedLearner.svg b/Orange/widgets/model/icons/CalibratedLearner.svg
new file mode 100644
index 00000000000..360a0d188ba
--- /dev/null
+++ b/Orange/widgets/model/icons/CalibratedLearner.svg
@@ -0,0 +1,26 @@
+
+
+
+
diff --git a/Orange/widgets/model/owcalibratedlearner.py b/Orange/widgets/model/owcalibratedlearner.py
new file mode 100644
index 00000000000..0edf3184797
--- /dev/null
+++ b/Orange/widgets/model/owcalibratedlearner.py
@@ -0,0 +1,111 @@
+from Orange.classification import CalibratedLearner, ThresholdLearner, \
+ NaiveBayesLearner
+from Orange.data import Table
+from Orange.modelling import Learner
+from Orange.widgets import gui
+from Orange.widgets.widget import Input
+from Orange.widgets.settings import Setting
+from Orange.widgets.utils.owlearnerwidget import OWBaseLearner
+from Orange.widgets.utils.widgetpreview import WidgetPreview
+
+
+class OWCalibratedLearner(OWBaseLearner):
+ name = "Calibrated Learner"
+ description = "Wraps another learner with probability calibration and " \
+ "decision threshold optimization"
+ icon = "icons/CalibratedLearner.svg"
+ priority = 20
+ keywords = ["calibration", "threshold"]
+
+ LEARNER = CalibratedLearner
+
+ SigmoidCalibration, IsotonicCalibration, NoCalibration = range(3)
+ CalibrationOptions = ("Sigmoid calibration",
+ "Isotonic calibration",
+ "No calibration")
+ CalibrationShort = ("Sigmoid", "Isotonic", "")
+ CalibrationMap = {
+ SigmoidCalibration: CalibratedLearner.Sigmoid,
+ IsotonicCalibration: CalibratedLearner.Isotonic}
+
+ OptimizeCA, OptimizeF1, NoThresholdOptimization = range(3)
+ ThresholdOptions = ("Optimize classification accuracy",
+ "Optimize F1 score",
+ "No threshold optimization")
+ ThresholdShort = ("CA", "F1", "")
+ ThresholdMap = {
+ OptimizeCA: ThresholdLearner.OptimizeCA,
+ OptimizeF1: ThresholdLearner.OptimizeF1}
+
+ learner_name = Setting("", schema_only=True)
+ calibration = Setting(SigmoidCalibration)
+ threshold = Setting(OptimizeCA)
+
+ class Inputs(OWBaseLearner.Inputs):
+ base_learner = Input("Base Learner", Learner)
+
+ def __init__(self):
+ super().__init__()
+ self.base_learner = None
+
+ def add_main_layout(self):
+ gui.radioButtons(
+ self.controlArea, self, "calibration", self.CalibrationOptions,
+ box="Probability calibration",
+ callback=self.calibration_options_changed)
+ gui.radioButtons(
+ self.controlArea, self, "threshold", self.ThresholdOptions,
+ box="Decision threshold optimization",
+ callback=self.calibration_options_changed)
+
+ @Inputs.base_learner
+ def set_learner(self, learner):
+ self.base_learner = learner
+ self._set_default_name()
+ self.unconditional_apply()
+
+ def _set_default_name(self):
+ if self.base_learner is None:
+ self.name = "Calibrated learner"
+ else:
+ self.name = " + ".join(part for part in (
+ self.base_learner.name.title(),
+ self.CalibrationShort[self.calibration],
+ self.ThresholdShort[self.threshold]) if part)
+ self.controls.learner_name.setPlaceholderText(self.name)
+
+ def calibration_options_changed(self):
+ self._set_default_name()
+ self.apply()
+
+ def create_learner(self):
+ class IdentityWrapper(Learner):
+ def fit_storage(self, data):
+ return self.base_learner.fit_storage(data)
+
+ if self.base_learner is None:
+ return None
+ learner = self.base_learner
+ if self.calibration != self.NoCalibration:
+ learner = CalibratedLearner(learner,
+ self.CalibrationMap[self.calibration])
+ if self.threshold != self.NoThresholdOptimization:
+ learner = ThresholdLearner(learner,
+ self.ThresholdMap[self.threshold])
+ if self.preprocessors:
+ if learner is self.base_learner:
+ learner = IdentityWrapper()
+ learner.preprocessors = (self.preprocessors, )
+ return learner
+
+ def get_learner_parameters(self):
+ return (("Calibrate probabilities",
+ self.CalibrationOptions[self.calibration]),
+ ("Threshold optimization",
+ self.ThresholdOptions[self.threshold]))
+
+
+if __name__ == "__main__": # pragma: no cover
+ WidgetPreview(OWCalibratedLearner).run(
+ Table("heart_disease"),
+ set_learner=NaiveBayesLearner())
diff --git a/Orange/widgets/model/tests/test_owcalibratedlearner.py b/Orange/widgets/model/tests/test_owcalibratedlearner.py
new file mode 100644
index 00000000000..400d483a592
--- /dev/null
+++ b/Orange/widgets/model/tests/test_owcalibratedlearner.py
@@ -0,0 +1,158 @@
+from unittest.mock import Mock
+
+from Orange.classification import ThresholdLearner, CalibratedLearner, \
+ NaiveBayesLearner, ThresholdClassifier, CalibratedClassifier
+from Orange.classification.base_classification import ModelClassification, \
+ LearnerClassification
+from Orange.classification.naive_bayes import NaiveBayesModel
+from Orange.data import Table
+from Orange.widgets.model.owcalibratedlearner import OWCalibratedLearner
+from Orange.widgets.tests.base import WidgetTest, WidgetLearnerTestMixin, \
+ datasets
+
+
+class TestOWCalibratedLearner(WidgetTest, WidgetLearnerTestMixin):
+ def setUp(self):
+ self.widget = self.create_widget(
+ OWCalibratedLearner, stored_settings={"auto_apply": False})
+ self.send_signal(self.widget.Inputs.base_learner, NaiveBayesLearner())
+
+ self.data = Table("heart_disease")
+ self.valid_datasets = (self.data,)
+ self.inadequate_dataset = (Table(datasets.path("testing_dataset_reg")),)
+ self.learner_class = LearnerClassification
+ self.model_class = ModelClassification
+ self.model_name = 'Calibrated classifier'
+ self.parameters = []
+
+ def test_output_learner(self):
+ """Check if learner is on output after apply"""
+ # Overridden to change the output type in the last test
+ initial = self.get_output("Learner")
+ self.assertIsNotNone(initial, "Does not initialize the learner output")
+ self.widget.apply_button.button.click()
+ newlearner = self.get_output("Learner")
+ self.assertIsNot(initial, newlearner,
+ "Does not send a new learner instance on `Apply`.")
+ self.assertIsNotNone(newlearner)
+ self.assertIsInstance(
+ newlearner,
+ (CalibratedLearner, ThresholdLearner, NaiveBayesLearner))
+
+ def test_output_model(self):
+ """Check if model is on output after sending data and apply"""
+ # Overridden to change the output type in the last two test
+ self.assertIsNone(self.get_output(self.widget.Outputs.model))
+ self.widget.apply_button.button.click()
+ self.assertIsNone(self.get_output(self.widget.Outputs.model))
+ self.send_signal('Data', self.data)
+ self.widget.apply_button.button.click()
+ self.wait_until_stop_blocking()
+ model = self.get_output(self.widget.Outputs.model)
+ self.assertIsNotNone(model)
+ self.assertIsInstance(
+ model, (CalibratedClassifier, ThresholdClassifier, NaiveBayesModel))
+
+ def test_create_learner(self):
+ widget = self.widget #: OWCalibratedLearner
+ self.widget.base_learner = Mock()
+
+ widget.calibration = widget.SigmoidCalibration
+ widget.threshold = widget.OptimizeF1
+ learner = self.widget.create_learner()
+ self.assertIsInstance(learner, ThresholdLearner)
+ self.assertEqual(learner.threshold_criterion, learner.OptimizeF1)
+ cal_learner = learner.base_learner
+ self.assertIsInstance(cal_learner, CalibratedLearner)
+ self.assertEqual(cal_learner.calibration_method, cal_learner.Sigmoid)
+ self.assertIs(cal_learner.base_learner, self.widget.base_learner)
+
+ widget.calibration = widget.IsotonicCalibration
+ widget.threshold = widget.OptimizeCA
+ learner = self.widget.create_learner()
+ self.assertIsInstance(learner, ThresholdLearner)
+ self.assertEqual(learner.threshold_criterion, learner.OptimizeCA)
+ cal_learner = learner.base_learner
+ self.assertIsInstance(cal_learner, CalibratedLearner)
+ self.assertEqual(cal_learner.calibration_method, cal_learner.Isotonic)
+ self.assertIs(cal_learner.base_learner, self.widget.base_learner)
+
+ widget.calibration = widget.NoCalibration
+ widget.threshold = widget.OptimizeCA
+ learner = self.widget.create_learner()
+ self.assertIsInstance(learner, ThresholdLearner)
+ self.assertEqual(learner.threshold_criterion, learner.OptimizeCA)
+ self.assertIs(learner.base_learner, self.widget.base_learner)
+
+ widget.calibration = widget.IsotonicCalibration
+ widget.threshold = widget.NoThresholdOptimization
+ learner = self.widget.create_learner()
+ self.assertIsInstance(learner, CalibratedLearner)
+ self.assertEqual(learner.calibration_method, cal_learner.Isotonic)
+ self.assertIs(learner.base_learner, self.widget.base_learner)
+
+ widget.calibration = widget.NoCalibration
+ widget.threshold = widget.NoThresholdOptimization
+ learner = self.widget.create_learner()
+ self.assertIs(learner, self.widget.base_learner)
+
+ widget.calibration = widget.SigmoidCalibration
+ widget.threshold = widget.OptimizeF1
+ widget.base_learner = None
+ learner = self.widget.create_learner()
+ self.assertIsNone(learner)
+
+ def test_preprocessors(self):
+ widget = self.widget #: OWCalibratedLearner
+ self.widget.base_learner = Mock()
+ self.widget.base_learner.preprocessors = ()
+
+ widget.calibration = widget.SigmoidCalibration
+ widget.threshold = widget.OptimizeF1
+ widget.preprocessors = Mock()
+ learner = self.widget.create_learner()
+ self.assertEqual(learner.preprocessors, (widget.preprocessors, ))
+ self.assertEqual(learner.base_learner.preprocessors, ())
+ self.assertEqual(learner.base_learner.base_learner.preprocessors, ())
+
+ widget.calibration = widget.NoCalibration
+ widget.threshold = widget.NoThresholdOptimization
+ learner = self.widget.create_learner()
+ self.assertIsNot(learner, self.widget.base_learner)
+ self.assertFalse(
+ isinstance(learner, (CalibratedLearner, ThresholdLearner)))
+ self.assertEqual(learner.preprocessors, (widget.preprocessors, ))
+
+ def test_set_learner_calls_unconditional_apply(self):
+ widget = self.widget
+ self.assertIsNotNone(self.get_output(widget.Outputs.learner))
+
+ widget.auto_apply = False
+ self.send_signal(widget.Inputs.base_learner, None)
+ self.assertIsNone(self.get_output(widget.Outputs.learner))
+
+ def test_name_changes(self):
+ widget = self.widget
+ widget.auto_apply = True
+ learner = NaiveBayesLearner()
+ learner.name = "foo"
+ self.send_signal(widget.Inputs.base_learner, learner)
+
+ widget.calibration = widget.IsotonicCalibration
+ widget.threshold = widget.OptimizeCA
+ widget.controls.calibration.group.buttonClicked[int].emit(
+ widget.IsotonicCalibration)
+
+ learner = self.get_output(widget.Outputs.learner)
+ self.assertEqual(learner.name, "Foo + Isotonic + CA")
+
+ widget.calibration = widget.NoCalibration
+ widget.threshold = widget.OptimizeCA
+ widget.controls.calibration.group.buttonClicked[int].emit(
+ widget.NoCalibration)
+ learner = self.get_output(widget.Outputs.learner)
+ self.assertEqual(learner.name, "Foo + CA")
+
+ self.send_signal(widget.Inputs.base_learner, None)
+ self.assertEqual(widget.controls.learner_name.placeholderText(),
+ "Calibrated learner")
diff --git a/Orange/widgets/tests/base.py b/Orange/widgets/tests/base.py
index 635dd2e5fd8..1204e1c6ed5 100644
--- a/Orange/widgets/tests/base.py
+++ b/Orange/widgets/tests/base.py
@@ -672,7 +672,8 @@ def test_output_learner_name(self):
new_name = "Learner Name"
self.widget.apply_button.button.click()
self.assertEqual(self.widget.learner.name,
- self.widget.name_line_edit.text())
+ self.widget.name_line_edit.text()
+ or self.widget.name_line_edit.placeholderText())
self.widget.name_line_edit.setText(new_name)
self.widget.apply_button.button.click()
self.wait_until_stop_blocking()
diff --git a/Orange/widgets/utils/owlearnerwidget.py b/Orange/widgets/utils/owlearnerwidget.py
index 3c6ee6ea65f..63b2795c78e 100644
--- a/Orange/widgets/utils/owlearnerwidget.py
+++ b/Orange/widgets/utils/owlearnerwidget.py
@@ -65,7 +65,7 @@ class OWBaseLearner(OWWidget, metaclass=OWBaseLearnerMeta, openclass=True):
LEARNER = None
supports_sparse = True
- learner_name = Setting(None, schema_only=True)
+ learner_name = Setting("", schema_only=True)
want_main_area = False
resizing_enabled = False
auto_apply = Setting(True)
@@ -95,8 +95,6 @@ def __init__(self):
self.data = None
self.valid_data = False
self.learner = None
- if self.learner_name is None:
- self.learner_name = self.name
self.model = None
self.preprocessors = None
self.outdated_settings = False
@@ -149,7 +147,7 @@ def update_learner(self):
if self.learner and issubclass(self.LEARNER, Fitter):
self.learner.use_default_preprocessors = True
if self.learner is not None:
- self.learner.name = self.learner_name
+ self.learner.name = self.learner_name or self.name
self.Outputs.learner.send(self.learner)
self.outdated_settings = False
self.Warning.outdated_learner.clear()
@@ -168,7 +166,7 @@ def update_model(self):
except BaseException as exc:
self.show_fitting_failed(exc)
else:
- self.model.name = self.learner_name
+ self.model.name = self.learner_name or self.name
self.model.instances = self.data
self.Outputs.model.send(self.model)
@@ -198,7 +196,7 @@ def settings_changed(self, *args, **kwargs):
def _change_name(self, instance, output):
if instance:
- instance.name = self.learner_name
+ instance.name = self.learner_name or self.name
if self.auto_apply:
output.send(instance)
@@ -207,7 +205,7 @@ def learner_name_changed(self):
self._change_name(self.model, self.Outputs.model)
def send_report(self):
- self.report_items((("Name", self.learner_name),))
+ self.report_items((("Name", self.learner_name or self.name),))
model_parameters = self.get_learner_parameters()
if model_parameters:
@@ -264,6 +262,7 @@ def add_regression_layout(self, box):
def add_learner_name_widget(self):
self.name_line_edit = gui.lineEdit(
self.controlArea, self, 'learner_name', box='Name',
+ placeholderText=self.name,
tooltip='The name will identify this model in other widgets',
orientation=Qt.Horizontal, callback=self.learner_name_changed)
diff --git a/Orange/widgets/utils/tests/test_owlearnerwidget.py b/Orange/widgets/utils/tests/test_owlearnerwidget.py
index 99f792196b6..9a43365a473 100644
--- a/Orange/widgets/utils/tests/test_owlearnerwidget.py
+++ b/Orange/widgets/utils/tests/test_owlearnerwidget.py
@@ -105,7 +105,6 @@ class WidgetA(OWBaseLearner):
LEARNER = KNNLearner
w1 = self.create_widget(WidgetA)
- self.assertEqual(w1.learner_name, "A")
w1.learner_name = "MyWidget"
settings = w1.settingsHandler.pack_data(w1)
diff --git a/doc/data-mining-library/source/reference/classification.rst b/doc/data-mining-library/source/reference/classification.rst
index 5095e147f2a..55792fa340f 100644
--- a/doc/data-mining-library/source/reference/classification.rst
+++ b/doc/data-mining-library/source/reference/classification.rst
@@ -196,3 +196,21 @@ CN2 Rule Induction
.. autoclass:: CN2SDUnorderedLearner
:members:
+
+
+Calibration and threshold optimization
+--------------------------------------
+
+.. automodule:: Orange.classification.calibration
+
+.. autoclass:: ThresholdClassifier
+ :members:
+
+.. autoclass:: ThresholdLearner
+ :members:
+
+.. autoclass:: CalibratedClassifier
+ :members:
+
+.. autoclass:: CalibratedLearner
+ :members:
diff --git a/doc/data-mining-library/source/reference/evaluation.performance_curves.rst b/doc/data-mining-library/source/reference/evaluation.performance_curves.rst
new file mode 100644
index 00000000000..d9eaa515c0f
--- /dev/null
+++ b/doc/data-mining-library/source/reference/evaluation.performance_curves.rst
@@ -0,0 +1,8 @@
+.. py:currentmodule:: Orange.evaluation.performance_curves
+
+##################
+Performance curves
+##################
+
+.. autoclass:: Orange.evaluation.performance_curves.Curves
+ :members:
diff --git a/doc/data-mining-library/source/reference/evaluation.rst b/doc/data-mining-library/source/reference/evaluation.rst
index 422371a41eb..a07c99ae44f 100644
--- a/doc/data-mining-library/source/reference/evaluation.rst
+++ b/doc/data-mining-library/source/reference/evaluation.rst
@@ -9,3 +9,4 @@ Evaluation (``evaluation``)
evaluation.testing
evaluation.cd
+ evaluation.performance_curves