Calibration plot: Test missing probabilities and single classes

janezd · janezd · commit 47308ca09a8f · 2019-06-19T13:19:03.000+02:00
diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py
@@ -74,18 +74,23 @@ class Outputs:
     class Error(widget.OWWidget.Error):
         non_discrete_target = Msg("Calibration plot requires a discrete target")
         empty_input = widget.Msg("Empty result on input. Nothing to display.")
+        nan_classes = \
+            widget.Msg("Remove test data instances with unknown classes")
+        all_target_class = widget.Msg(
+            "All data instances belong to target class")
+        no_target_class = widget.Msg(
+            "No data instances belong to target class")
+
+    class Warning(widget.OWWidget.Warning):
+        omitted_folds = widget.Msg(
+            "Test folds where all data belongs to (non)-target are not shown")
+        omitted_nan_prob_points = widget.Msg(
+            "Instance for which the model couldn't compute probabilities are"
+            "skipped")
+        no_valid_data = widget.Msg("No valid data for model(s) {}")
 
     class Information(widget.OWWidget.Information):
-        no_out = "Can't output a model: "
-        no_output_multiple_folds = Msg(
-            no_out + "each training data sample produces a different model")
-        no_output_no_models = Msg(
-            no_out + "test results do not contain stored models;\n"
-            "try testing on separate data or on training data")
-        no_output_multiple_selected = Msg(
-            no_out + "select a single model - the widget can output only one")
-        no_output_non_binary_class = Msg(
-            no_out + "cannot calibrate non-binary classes")
+        no_output = Msg("Can't output a model: {}")
 
     settingsHandler = EvaluationResultsContextHandler()
     target_index = settings.ContextSetting(0)
@@ -179,19 +184,23 @@ def set_results(self, results):
         self.clear()
         self.Error.clear()
         self.Information.clear()
-        if results is not None and not results.domain.has_discrete_class:
-            self.Error.non_discrete_target()
-            results = None
-        if results is not None and not results.actual.size:
-            self.Error.empty_input()
-            results = None
-        self.results = results
-        if self.results is not None:
-            self._initialize(results)
-            class_var = self.results.domain.class_var
-            self.target_index = int(len(class_var.values) == 2)
-            self.openContext(class_var, self.classifier_names)
-            self._replot()
+
+        self.results = None
+        if results is not None:
+            if not results.domain.has_discrete_class:
+                self.Error.non_discrete_target()
+            elif not results.actual.size:
+                self.Error.empty_input()
+            elif np.any(np.isnan(results.actual)):
+                self.Error.nan_classes()
+            else:
+                self.results = results
+                self._initialize(results)
+                class_var = self.results.domain.class_var
+                self.target_index = int(len(class_var.values) == 2)
+                self.openContext(class_var, self.classifier_names)
+                self._replot()
+
         self.apply()
 
     def clear(self):
@@ -286,9 +295,6 @@ def plot_metrics(self, data, metrics, pen_args):
         return data.probs, ys
 
     def _prob_curve(self, ytrue, probs, pen_args):
-        if not probs.size:
-            return None
-
         xmin, xmax = probs.min(), probs.max()
         x = np.linspace(xmin, xmax, 100)
         if xmax != xmin:
@@ -307,16 +313,25 @@ def _setup_plot(self):
         plot_folds = self.fold_curves and results.folds is not None
         self.scores = []
 
-        ytrue = results.actual == target
+        if not self._check_class_presence(results.actual == target):
+            return
+
+        self.Warning.omitted_folds.clear()
+        self.Warning.omitted_nan_prob_points.clear()
+        no_valid_models = []
+        shadow_width = 4 + 4 * plot_folds
         for clsf in self.selected_classifiers:
-            probs = results.probabilities[clsf, :, target]
+            data = Curves.from_results(results, target, clsf)
+            if data.tot == 0:  # all probabilities are nan
+                no_valid_models.append(clsf)
+                continue
+            if data.tot != results.probabilities.shape[1]:  # some are nan
+                self.Warning.omitted_nan_prob_points()
+
             color = self.colors[clsf]
             pen_args = dict(
-                pen=pg.mkPen(color, width=1),
-                shadowPen=pg.mkPen(color.lighter(160),
-                                   width=4 + 4 * plot_folds),
-                antiAlias=True)
-            data = Curves(ytrue, probs)
+                pen=pg.mkPen(color, width=1), antiAlias=True,
+                shadowPen=pg.mkPen(color.lighter(160), width=shadow_width))
             self.scores.append(
                 (self.classifier_names[clsf],
                  self.plot_metrics(data, metrics, pen_args)))
@@ -330,28 +345,46 @@ def _setup_plot(self):
                     antiAlias=True)
                 for fold in range(len(results.folds)):
                     fold_results = results.get_fold(fold)
-                    fold_ytrue = fold_results.actual == target
-                    fold_probs = fold_results.probabilities[clsf, :, target]
-                    self.plot_metrics(Curves(fold_ytrue, fold_probs),
-                                      metrics, pen_args)
+                    fold_curve = Curves.from_results(fold_results, target, clsf)
+                    # Can't check this before: p and n can be 0 because of
+                    # nan probabilities
+                    if fold_curve.p * fold_curve.n == 0:
+                        self.Warning.omitted_folds()
+                    self.plot_metrics(fold_curve, metrics, pen_args)
+
+        if no_valid_models:
+            self.Warning.no_valid_data(
+                ", ".join(self.classifier_names[i] for i in no_valid_models))
 
         if self.score == 0:
             self.plot.plot([0, 1], [0, 1], antialias=True)
-
-    def _replot(self):
-        self.plot.clear()
-        if self.results is not None:
-            self._setup_plot()
-        if self.score != 0:
+        else:
             self.line = pg.InfiniteLine(
                 pos=self.threshold, movable=True,
                 pen=pg.mkPen(color="k", style=Qt.DashLine, width=2),
                 hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3),
                 bounds=(0, 1),
             )
             self.line.sigPositionChanged.connect(self.threshold_change)
-            self.line.sigPositionChangeFinished.connect(self.threshold_change_done)
+            self.line.sigPositionChangeFinished.connect(
+                self.threshold_change_done)
             self.plot.addItem(self.line)
+
+    def _check_class_presence(self, ytrue):
+        self.Error.all_target_class.clear()
+        self.Error.no_target_class.clear()
+        if np.max(ytrue) == 0:
+            self.Error.no_target_class()
+            return False
+        if np.min(ytrue) == 1:
+            self.Error.all_target_class()
+            return False
+        return True
+
+    def _replot(self):
+        self.plot.clear()
+        if self.results is not None:
+            self._setup_plot()
         self._update_info()
 
     def _on_display_rug_changed(self):
@@ -397,20 +430,28 @@ def threshold_change_done(self):
         self.apply()
 
     def apply(self):
-        info = self.Information
+        self.Information.no_output.clear()
         wrapped = None
-        problems = {}
         results = self.results
         if results is not None:
-            problems = {
-                info.no_output_multiple_folds: len(results.folds) > 1,
-                info.no_output_no_models: results.models is None,
-                info.no_output_multiple_selected:
-                    len(self.selected_classifiers) != 1,
-                info.no_output_non_binary_class:
-                    self.score != 0
-                    and len(results.domain.class_var.values) != 2}
-            if not any(problems.values()):
+            problems = [
+                msg for condition, msg in (
+                    (len(results.folds) > 1,
+                     "each training data sample produces a different model"),
+                    (results.models is None,
+                     "test results do not contain stored models - try testing on"
+                     "separate data or on training data"),
+                    (len(self.selected_classifiers) != 1,
+                     "select a single model - the widget can output only one"),
+                    (self.score != 0 and len(results.domain.class_var.values) != 2,
+                     "cannot calibrate non-binary classes"))
+                if condition]
+            if len(problems) == 1:
+                self.Information.no_output(problems[0])
+            elif problems:
+                self.Information.no_output(
+                    "".join(f"\n - {problem}" for problem in problems))
+            else:
                 clsf_idx = self.selected_classifiers[0]
                 model = results.models[0, clsf_idx]
                 if self.score == 0:
@@ -424,9 +465,6 @@ def apply(self):
                     wrapped = ThresholdClassifier(model, threshold)
 
         self.Outputs.calibrated_model.send(wrapped)
-        for info, shown in problems.items():
-            if info.is_shown() != shown:
-                info(shown=shown)
 
     def send_report(self):
         if self.results is None:
diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
@@ -21,22 +21,6 @@
 
 
 class TestOWCalibrationPlot(WidgetTest, EvaluateTest):
-    @classmethod
-    def setUpClass(cls):
-        super().setUpClass()
-        cls.lenses = data = Table(test_filename("datasets/lenses.tab"))
-        majority = Orange.classification.MajorityLearner()
-        majority.name = "majority"
-        knn3 = Orange.classification.KNNLearner(n_neighbors=3)
-        knn3.name = "knn-3"
-        knn1 = Orange.classification.KNNLearner(n_neighbors=1)
-        knn1.name = "knn-1"
-        cls.lenses_results = Orange.evaluation.TestOnTestData(
-            store_data=True, store_models=True)(
-                data=data[::2], test_data=data[1::2],
-                learners=[majority, knn3, knn1])
-        cls.lenses_results.learner_names = ["majority", "knn-3", "knn-1"]
-
     def setUp(self):
         super().setUp()
 
@@ -56,12 +40,25 @@ def setUp(self):
         self.results = Results(
             domain=domain,
             actual=actual,
-            folds=(Ellipsis, ),
+            folds=np.array([Ellipsis]),
             models=np.array([[Mock(), Mock()]]),
             row_indices=np.arange(19),
             predicted=np.array((pred, pred2)),
             probabilities=np.array([probs, probs2]))
 
+        self.lenses = data = Table(test_filename("datasets/lenses.tab"))
+        majority = Orange.classification.MajorityLearner()
+        majority.name = "majority"
+        knn3 = Orange.classification.KNNLearner(n_neighbors=3)
+        knn3.name = "knn-3"
+        knn1 = Orange.classification.KNNLearner(n_neighbors=1)
+        knn1.name = "knn-1"
+        self.lenses_results = Orange.evaluation.TestOnTestData(
+            store_data=True, store_models=True)(
+                data=data[::2], test_data=data[1::2],
+                learners=[majority, knn3, knn1])
+        self.lenses_results.learner_names = ["majority", "knn-3", "knn-1"]
+
         self.widget = self.create_widget(OWCalibrationPlot)  # type: OWCalibrationPlot
         warnings.filterwarnings("ignore", ".*", ConvergenceWarning)
 
@@ -389,24 +386,31 @@ def test_apply_no_output(self, *_):
         widget = self.widget
         model_list = widget.controls.selected_classifiers
 
-        info = widget.Information
-        infos = (info.no_output_multiple_folds,
-                 info.no_output_no_models,
-                 info.no_output_multiple_selected,
-                 info.no_output_non_binary_class)
-        multiple_folds, no_models, multiple_selected, non_binary_class = infos
+        multiple_folds, multiple_selected, no_models, non_binary_class = "abcd"
+        messages = {
+            multiple_folds:
+                "each training data sample produces a different model",
+            no_models:
+                "test results do not contain stored models - try testing on"
+                "separate data or on training data",
+            multiple_selected:
+                "select a single model - the widget can output only one",
+            non_binary_class:
+                "cannot calibrate non-binary classes"}
 
         def test_shown(shown):
-            for info in infos:
-                self.assertEqual(
-                    info.is_shown(), info in shown,
-                    f"{info} is unexpectedly "
-                    f"{'' if info.is_shown() else 'not'} shown")
+            widget_msg = widget.Information.no_output
             output = self.get_output(widget.Outputs.calibrated_model)
-            if shown:
-                self.assertIsNone(output)
-            else:
+            if not shown:
+                self.assertFalse(widget_msg.is_shown())
                 self.assertIsNotNone(output)
+            else:
+                self.assertTrue(widget_msg.is_shown())
+                self.assertIsNone(output)
+                for msg_id in shown:
+                    msg = messages[msg_id]
+                    self.assertIn(msg, widget_msg.formatted,
+                                  f"{msg} not included in the message")
 
         self.send_signal(widget.Inputs.evaluation_results, self.results)
         self._set_combo(widget.controls.score, 1)  # CA
@@ -558,3 +562,79 @@ def test_report(self):
         widget = self.widget
         self.send_signal(widget.Inputs.evaluation_results, self.lenses_results)
         widget.send_report()
+
+    @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+    @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+    def test_single_class(self, *_):
+        """Curves are not plotted if all data belongs to (non)-target"""
+        def check_error(shown):
+            for error in (errors.no_target_class, errors.all_target_class,
+                          errors.nan_classes):
+                self.assertEqual(error.is_shown(), error is shown,
+                                 f"{error} is unexpectedly"
+                                 f"{'' if error.is_shown() else ' not'} shown")
+            if shown is not None:
+                self.assertEqual(len(widget.plot.items), 0)
+            else:
+                self.assertGreater(len(widget.plot.items), 0)
+
+        widget = self.widget
+        errors = widget.Error
+        widget.display_rug = True
+        combo = widget.controls.score
+
+        original_actual = self.results.actual.copy()
+        self.send_signal(widget.Inputs.evaluation_results, self.results)
+        widget.selected_classifiers = [0]
+        for idx in range(combo.count()):
+            self._set_combo(combo, idx)
+            self.results.actual[:] = 0
+            self.send_signal(widget.Inputs.evaluation_results, self.results)
+            check_error(errors.no_target_class)
+
+            self.results.actual[:] = 1
+            self.send_signal(widget.Inputs.evaluation_results, self.results)
+            check_error(errors.all_target_class)
+
+            self.results.actual[:] = original_actual
+            self.results.actual[3] = np.nan
+            self.send_signal(widget.Inputs.evaluation_results, self.results)
+            check_error(errors.nan_classes)
+
+            self.results.actual[:] = original_actual
+            self.send_signal(widget.Inputs.evaluation_results, self.results)
+            check_error(None)
+
+    @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+    @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+    def test_single_class_folds(self, *_):
+        """Curves for single-class folds are not plotted"""
+        widget = self.widget
+        widget.display_rug = False
+        widget.fold_curves = False
+
+        results = self.lenses_results
+        results.folds = [slice(0, 5), slice(5, 19)]
+        results.models = results.models.repeat(2, axis=0)
+        results.actual[:3] = 0
+        results.probabilities[1, 3:5] = np.nan
+        # after this, model 1 has just negative instances in fold 0
+        self.send_signal(widget.Inputs.evaluation_results, results)
+        self._set_combo(widget.controls.score, 1)  # CA
+        self.assertFalse(widget.Warning.omitted_folds.is_shown())
+        widget.controls.fold_curves.click()
+        self.assertTrue(widget.Warning.omitted_folds.is_shown())
+
+    @patch("Orange.widgets.evaluate.owcalibrationplot.ThresholdClassifier")
+    @patch("Orange.widgets.evaluate.owcalibrationplot.CalibratedLearner")
+    def test_warn_nan_probabilities(self, *_):
+        """Warn about omitted points with nan probabiities"""
+        widget = self.widget
+        widget.display_rug = False
+        widget.fold_curves = False
+
+        self.results.probabilities[1, 3] = np.nan
+        self.send_signal(widget.Inputs.evaluation_results, self.results)
+        self.assertTrue(widget.Warning.omitted_nan_prob_points.is_shown())
+        self._set_list_selection(widget.controls.selected_classifiers, [0, 2])
+        self.assertFalse(widget.Warning.omitted_folds.is_shown())