Merge pull request #1954 from ales-erjavec/fixes/evaluate-input-validation

janezd · astaric · commit 1657e29b4d8e · 2017-02-03T15:09:47.000+01:00
[FIX] Evaluation Results input validation (cherry picked from commit 5cbac0e)
diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py
@@ -144,7 +144,7 @@ def __init__(self, data=None, nmethods=0, *, learners=None, train_data=None,
         if nmethods is not None:
             self.failed = [False] * nmethods
 
-        if data:
+        if data is not None:
             self.data = data if self.store_data else None
             self.domain = data.domain
             self.dtype = getattr(data.Y, 'dtype', self.dtype)
diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py
@@ -37,6 +37,10 @@ class OWCalibrationPlot(widget.OWWidget):
     priority = 1030
     inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")]
 
+    class Warning(widget.OWWidget.Warning):
+        empty_input = widget.Msg(
+            "Empty result on input. Nothing to display.")
+
     target_index = settings.Setting(0)
     selected_classifiers = settings.Setting([])
     display_rug = settings.Setting(True)
@@ -84,7 +88,12 @@ def __init__(self):
 
     def set_results(self, results):
         self.clear()
-        self.results = check_results_adequacy(results, self.Error)
+        results = check_results_adequacy(results, self.Error)
+        if results is not None and not results.actual.size:
+            self.Warning.empty_input()
+        else:
+            self.Warning.empty_input.clear()
+        self.results = results
         if self.results is not None:
             self._initialize(results)
             self._replot()
@@ -125,11 +134,15 @@ def plot_curve(self, clf_idx, target):
         sortind = numpy.argsort(probs)
         probs = probs[sortind]
         ytrue = ytrue[sortind]
-#         x = numpy.unique(probs)
-        xmin, xmax = probs.min(), probs.max()
-        x = numpy.linspace(xmin, xmax, 100)
-        f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin))
-        observed = f(x)
+        if probs.size:
+            xmin, xmax = probs.min(), probs.max()
+            x = numpy.linspace(xmin, xmax, 100)
+            f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin))
+            observed = f(x)
+        else:
+            x = numpy.array([])
+            observed = numpy.array([])
+
         curve = Curve(x, observed)
         curve_item = pg.PlotDataItem(
             x, observed, pen=pg.mkPen(self.colors[clf_idx], width=1),
diff --git a/Orange/widgets/evaluate/owconfusionmatrix.py b/Orange/widgets/evaluate/owconfusionmatrix.py
@@ -16,6 +16,7 @@
 import sklearn.metrics as skl_metrics
 
 import Orange
+import Orange.evaluation
 from Orange.widgets import widget, settings, gui
 from Orange.widgets.utils.annotated_data import (create_annotated_table,
                                                  ANNOTATED_DATA_SIGNAL_NAME)
@@ -32,8 +33,13 @@ def confusion_matrix(res, index):
 
     Returns: Confusion matrix
     """
-    return skl_metrics.confusion_matrix(
-        res.actual, res.predicted[index])
+    labels = numpy.arange(len(res.domain.class_var.values))
+    if not res.actual.size:
+        # scikit-learn will not return an zero matrix
+        return numpy.zeros((len(labels), len(labels)))
+    else:
+        return skl_metrics.confusion_matrix(
+            res.actual, res.predicted[index], labels=labels)
 
 
 BorderRole = next(gui.OrangeUserRole)
@@ -109,6 +115,7 @@ class OWConfusionMatrix(widget.OWWidget):
 
     class Error(widget.OWWidget.Error):
         no_regression = Msg("Confusion Matrix cannot show regression results.")
+        invalid_values = Msg("Evaluation Results input contains invalid values")
 
     def __init__(self):
         super().__init__()
@@ -238,6 +245,21 @@ def set_results(self, results):
         else:
             self.Error.no_regression.clear()
 
+        nan_values = False
+        if results is not None:
+            assert isinstance(results, Orange.evaluation.Results)
+            if numpy.any(numpy.isnan(results.actual)) or \
+                    numpy.any(numpy.isnan(results.predicted)):
+                # Error out here (could filter them out with a warning
+                # instead).
+                nan_values = True
+                results = data = None
+
+        if nan_values:
+            self.Error.invalid_values()
+        else:
+            self.Error.invalid_values.clear()
+
         self.results = results
         self.data = data
 
@@ -487,7 +509,6 @@ def send_report(self):
 
     @classmethod
     def migrate_settings(cls, settings, version):
-        super().migrate_settings(settings, version)
         if not version:
             # For some period of time the 'selected_learner' property was
             # changed from List[int] -> int
diff --git a/Orange/widgets/evaluate/owliftcurve.py b/Orange/widgets/evaluate/owliftcurve.py
@@ -200,6 +200,15 @@ def _setup_plot(self):
         pen.setCosmetic(True)
         self.plot.plot([0, 1], [0, 1], pen=pen, antialias=True)
 
+        warning = ""
+        if not all(c.curve.is_valid for c in curves):
+            if any(c.curve.is_valid for c in curves):
+                warning = "Some lift curves are undefined"
+            else:
+                warning = "All lift curves are undefined"
+
+        self.warning(warning)
+
     def _replot(self):
         self.plot.clear()
         if self.results is not None:
@@ -231,6 +240,11 @@ def lift_curve_from_results(results, target, clf_idx, subset=slice(0, -1)):
 def lift_curve(ytrue, ypred, target=1):
     P = numpy.sum(ytrue == target)
     N = ytrue.size - P
+
+    if P == 0 or N == 0:
+        # Undefined TP and FP rate
+        return numpy.array([]), numpy.array([]), numpy.array([])
+
     fpr, tpr, thresholds = skl_metrics.roc_curve(ytrue, ypred, target)
     rpp = fpr * (N / (P + N)) + tpr * (P / (P + N))
     return rpp, tpr, thresholds
diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py
@@ -463,17 +463,22 @@ def commit(self):
             predictions.metas[:, -newcolumns.shape[1]:] = newcolumns
 
         results = None
+        # if the input data set contains the true target values, output a
+        # simple evaluation.Results instance
         if self.data.domain.class_var == class_var:
-            N = len(self.data)
-            results = Orange.evaluation.Results(self.data, store_data=True)
+            # omit rows with unknonw target values
+            nanmask = numpy.isnan(self.data.get_column_view(class_var)[0])
+            data = self.data[~nanmask]
+            N = len(data)
+            results = Orange.evaluation.Results(data, store_data=True)
             results.folds = None
             results.row_indices = numpy.arange(N)
-            results.actual = self.data.Y.ravel()
+            results.actual = data.Y.ravel()
             results.predicted = numpy.vstack(
-                tuple(p.results[0] for p in slots))
+                tuple(p.results[0][~nanmask] for p in slots))
             if classification:
                 results.probabilities = numpy.array(
-                    [p.results[1] for p in slots])
+                    [p.results[1][~nanmask] for p in slots])
             results.learner_names = [p.name for p in slots]
 
         self.send("Predictions", predictions)
diff --git a/Orange/widgets/evaluate/owrocanalysis.py b/Orange/widgets/evaluate/owrocanalysis.py
@@ -298,6 +298,10 @@ class OWROCAnalysis(widget.OWWidget):
     priority = 1010
     inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")]
 
+    class Warning(widget.OWWidget.Warning):
+        empty_results = widget.Msg(
+            "Empty results on input. There is nothing to display.")
+
     target_index = settings.Setting(0)
     selected_classifiers = []
 
@@ -418,8 +422,10 @@ def set_results(self, results):
         self.clear()
         self.results = check_results_adequacy(results, self.Error)
         if self.results is not None:
-            self._initialize(results)
+            self._initialize(self.results)
             self._setup_plot()
+        else:
+            self.warning()
 
     def clear(self):
         """Clear the widget state."""
@@ -517,7 +523,7 @@ def _setup_plot(self):
                 if self.display_convex_curve:
                     self.plot.addItem(graphics.hull_item)
 
-                if self.display_def_threshold:
+                if self.display_def_threshold and curve.is_valid:
                     points = curve.points
                     ind = numpy.argmin(numpy.abs(points.thresholds - 0.5))
                     item = pg.TextItem(
@@ -559,6 +565,8 @@ def _setup_plot(self):
                     if self.display_convex_curve:
                         self.plot.addItem(fold.hull_item)
             hull_curves = [fold.hull for curve in selected for fold in curve.folds]
+        else:
+            assert False
 
         if self.display_convex_hull and hull_curves:
             hull = convex_hull(hull_curves)
@@ -578,6 +586,14 @@ def _setup_plot(self):
         if self.roc_averaging == OWROCAnalysis.Merge:
             self._update_perf_line()
 
+        warning = ""
+        if not all(c.is_valid for c in hull_curves):
+            if any(c.is_valid for c in hull_curves):
+                warning = "Some ROC curves are undefined"
+            else:
+                warning = "All ROC curves are undefined"
+        self.warning(warning)
+
     def _on_target_changed(self):
         self.plot.clear()
         self._setup_plot()
@@ -612,10 +628,13 @@ def _update_perf_line(self):
                 self.fp_cost, self.fn_cost, self.target_prior / 100.0)
 
             hull = self._rocch
-            ind = roc_iso_performance_line(m, hull)
-            angle = numpy.arctan2(m, 1)  # in radians
-            self._perf_line.setAngle(angle * 180 / numpy.pi)
-            self._perf_line.setPos((hull.fpr[ind[0]], hull.tpr[ind[0]]))
+            if hull.is_valid:
+                ind = roc_iso_performance_line(m, hull)
+                angle = numpy.arctan2(m, 1)  # in radians
+                self._perf_line.setAngle(angle * 180 / numpy.pi)
+                self._perf_line.setPos((hull.fpr[ind[0]], hull.tpr[ind[0]]))
+            else:
+                self._perf_line.setVisible(False)
 
     def onDeleteWidget(self):
         self.clear()
diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py
@@ -0,0 +1,51 @@
+import copy
+
+import numpy as np
+
+import Orange.data
+import Orange.evaluation
+import Orange.classification
+
+from Orange.widgets.tests.base import WidgetTest
+from Orange.widgets.evaluate.owcalibrationplot import OWCalibrationPlot
+
+
+class TestOWCalibrationPlot(WidgetTest):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.lenses = data = Orange.data.Table("lenses")
+        cls.res = Orange.evaluation.TestOnTestData(
+            train_data=data[::2], test_data=data[1::2],
+            learners=[Orange.classification.MajorityLearner(),
+                      Orange.classification.KNNLearner()],
+            store_data=True,
+        )
+
+    def setUp(self):
+        super().setUp()
+        self.widget = self.create_widget(OWCalibrationPlot)  # type: OWCalibrationPlot
+
+    def test_basic(self):
+        self.send_signal("Evaluation Results", self.res)
+        self.widget.controls.display_rug.click()
+
+    def test_empty(self):
+        res = copy.copy(self.res)
+        res.row_indices = res.row_indices[:0]
+        res.actual = res.actual[:0]
+        res.predicted = res.predicted[:, 0]
+        res.probabilities = res.probabilities[:, :0, :]
+        self.send_signal("Evaluation Results", res)
+
+    def test_nan_input(self):
+        res = copy.copy(self.res)
+        res.actual = res.actual.copy()
+        res.probabilities = res.probabilities.copy()
+
+        res.actual[0] = np.nan
+        res.probabilities[:, [0, 3], :] = np.nan
+        self.send_signal("Evaluation Results", res)
+        self.assertTrue(self.widget.Error.invalid_results.is_shown())
+        self.send_signal("Evaluation Results", None)
+        self.assertFalse(self.widget.Error.invalid_results.is_shown())
diff --git a/Orange/widgets/evaluate/tests/test_owconfusionmatrix.py b/Orange/widgets/evaluate/tests/test_owconfusionmatrix.py
@@ -5,7 +5,7 @@
 from Orange.classification import NaiveBayesLearner, TreeLearner
 from Orange.regression import MeanLearner
 from Orange.evaluation.testing import CrossValidation, TestOnTrainingData, \
-    ShuffleSplit
+    ShuffleSplit, Results
 from Orange.widgets.evaluate.owconfusionmatrix import OWConfusionMatrix
 from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin
 
@@ -82,3 +82,27 @@ def test_row_indices(self):
         correct_indices = results.row_indices[correct]
         self.assertSetEqual(set(self.iris[correct_indices].ids),
                             set(selected.ids))
+
+    def test_empty_results(self):
+        """Test on empty results."""
+        res = Results(data=self.iris[:0], store_data=True)
+        res.row_indices = np.array([], dtype=int)
+        res.actual = np.array([])
+        res.predicted = np.array([[]])
+        res.probabilities = np.zeros((1, 0, 3))
+        self.send_signal("Evaluation Results", res)
+        self.widget.select_correct()
+        self.widget.select_wrong()
+
+    def test_nan_results(self):
+        """Test on results with nan values in actual/predicted"""
+        res = Results(data=self.iris, nmethods=2, store_data=True)
+        res.row_indices = np.array([0, 50, 100], dtype=int)
+        res.actual = np.array([0., np.nan, 2.])
+        res.predicted = np.array([[np.nan, 1, 2],
+                                  [np.nan, np.nan, np.nan]])
+        res.probabilities = np.zeros((1, 3, 3))
+        self.send_signal("Evaluation Results", res)
+        self.assertTrue(self.widget.Error.invalid_values.is_shown())
+        self.send_signal("Evaluation Results", None)
+        self.assertFalse(self.widget.Error.invalid_values.is_shown())
diff --git a/Orange/widgets/evaluate/tests/test_owliftcurve.py b/Orange/widgets/evaluate/tests/test_owliftcurve.py
@@ -0,0 +1,53 @@
+import copy
+
+import numpy as np
+
+import Orange.data
+import Orange.evaluation
+import Orange.classification
+
+from Orange.widgets.tests.base import WidgetTest
+from Orange.widgets.tests.utils import simulate
+from Orange.widgets.evaluate.owliftcurve import OWLiftCurve
+
+
+class TestOWLiftCurve(WidgetTest):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.lenses = data = Orange.data.Table("lenses")
+        cls.res = Orange.evaluation.TestOnTestData(
+            train_data=data[::2], test_data=data[1::2],
+            learners=[Orange.classification.MajorityLearner(),
+                      Orange.classification.KNNLearner()],
+            store_data=True,
+        )
+
+    def setUp(self):
+        super().setUp()
+        self.widget = self.create_widget(
+            OWLiftCurve,
+            stored_settings={
+                "display_convex_hull": True
+            }
+        )  # type: OWLiftCurve
+
+    def test_basic(self):
+        self.send_signal("Evaluation Results", self.res)
+        simulate.combobox_run_through_all(self.widget.target_cb)
+
+    def test_empty_input(self):
+        res = copy.copy(self.res)
+        res.actual = res.actual[:0]
+        res.row_indices = res.row_indices[:0]
+        res.predicted = res.predicted[:, :0]
+        res.probabilities = res.probabilities[:, :0, :]
+        self.send_signal("Evaluation Results", res)
+
+    def test_nan_input(self):
+        res = copy.copy(self.res)
+        res.actual[0] = np.nan
+        self.send_signal("Evaluation Results", res)
+        self.assertTrue(self.widget.Error.invalid_results.is_shown())
+        self.send_signal("Evaluation Results", None)
+        self.assertFalse(self.widget.Error.invalid_results.is_shown())
diff --git a/Orange/widgets/evaluate/tests/test_owpredictions.py b/Orange/widgets/evaluate/tests/test_owpredictions.py
diff --git a/Orange/widgets/evaluate/tests/test_owrocanalysis.py b/Orange/widgets/evaluate/tests/test_owrocanalysis.py
diff --git a/Orange/widgets/evaluate/utils.py b/Orange/widgets/evaluate/utils.py