diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index fe9c3406ec3..bad6da6549a 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -144,7 +144,7 @@ def __init__(self, data=None, nmethods=0, *, learners=None, train_data=None, if nmethods is not None: self.failed = [False] * nmethods - if data: + if data is not None: self.data = data if self.store_data else None self.domain = data.domain self.dtype = getattr(data.Y, 'dtype', self.dtype) diff --git a/Orange/widgets/evaluate/owcalibrationplot.py b/Orange/widgets/evaluate/owcalibrationplot.py index 090aa3db398..4a5a8879e97 100644 --- a/Orange/widgets/evaluate/owcalibrationplot.py +++ b/Orange/widgets/evaluate/owcalibrationplot.py @@ -37,6 +37,10 @@ class OWCalibrationPlot(widget.OWWidget): priority = 1030 inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")] + class Warning(widget.OWWidget.Warning): + empty_input = widget.Msg( + "Empty result on input. Nothing to display.") + target_index = settings.Setting(0) selected_classifiers = settings.Setting([]) display_rug = settings.Setting(True) @@ -84,7 +88,12 @@ def __init__(self): def set_results(self, results): self.clear() - self.results = check_results_adequacy(results, self.Error) + results = check_results_adequacy(results, self.Error) + if results is not None and not results.actual.size: + self.Warning.empty_input() + else: + self.Warning.empty_input.clear() + self.results = results if self.results is not None: self._initialize(results) self._replot() @@ -125,11 +134,15 @@ def plot_curve(self, clf_idx, target): sortind = numpy.argsort(probs) probs = probs[sortind] ytrue = ytrue[sortind] -# x = numpy.unique(probs) - xmin, xmax = probs.min(), probs.max() - x = numpy.linspace(xmin, xmax, 100) - f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin)) - observed = f(x) + if probs.size: + xmin, xmax = probs.min(), probs.max() + x = numpy.linspace(xmin, xmax, 100) + f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin)) + observed = f(x) + else: + x = numpy.array([]) + observed = numpy.array([]) + curve = Curve(x, observed) curve_item = pg.PlotDataItem( x, observed, pen=pg.mkPen(self.colors[clf_idx], width=1), diff --git a/Orange/widgets/evaluate/owconfusionmatrix.py b/Orange/widgets/evaluate/owconfusionmatrix.py index 0ed20198508..66b387f4bfb 100644 --- a/Orange/widgets/evaluate/owconfusionmatrix.py +++ b/Orange/widgets/evaluate/owconfusionmatrix.py @@ -16,6 +16,7 @@ import sklearn.metrics as skl_metrics import Orange +import Orange.evaluation from Orange.widgets import widget, settings, gui from Orange.widgets.utils.annotated_data import (create_annotated_table, ANNOTATED_DATA_SIGNAL_NAME) @@ -32,8 +33,13 @@ def confusion_matrix(res, index): Returns: Confusion matrix """ - return skl_metrics.confusion_matrix( - res.actual, res.predicted[index]) + labels = numpy.arange(len(res.domain.class_var.values)) + if not res.actual.size: + # scikit-learn will not return an zero matrix + return numpy.zeros((len(labels), len(labels))) + else: + return skl_metrics.confusion_matrix( + res.actual, res.predicted[index], labels=labels) BorderRole = next(gui.OrangeUserRole) @@ -109,6 +115,7 @@ class OWConfusionMatrix(widget.OWWidget): class Error(widget.OWWidget.Error): no_regression = Msg("Confusion Matrix cannot show regression results.") + invalid_values = Msg("Evaluation Results input contains invalid values") def __init__(self): super().__init__() @@ -238,6 +245,21 @@ def set_results(self, results): else: self.Error.no_regression.clear() + nan_values = False + if results is not None: + assert isinstance(results, Orange.evaluation.Results) + if numpy.any(numpy.isnan(results.actual)) or \ + numpy.any(numpy.isnan(results.predicted)): + # Error out here (could filter them out with a warning + # instead). + nan_values = True + results = data = None + + if nan_values: + self.Error.invalid_values() + else: + self.Error.invalid_values.clear() + self.results = results self.data = data @@ -487,7 +509,6 @@ def send_report(self): @classmethod def migrate_settings(cls, settings, version): - super().migrate_settings(settings, version) if not version: # For some period of time the 'selected_learner' property was # changed from List[int] -> int diff --git a/Orange/widgets/evaluate/owliftcurve.py b/Orange/widgets/evaluate/owliftcurve.py index ddcbf28f19e..13b9ad197b6 100644 --- a/Orange/widgets/evaluate/owliftcurve.py +++ b/Orange/widgets/evaluate/owliftcurve.py @@ -200,6 +200,15 @@ def _setup_plot(self): pen.setCosmetic(True) self.plot.plot([0, 1], [0, 1], pen=pen, antialias=True) + warning = "" + if not all(c.curve.is_valid for c in curves): + if any(c.curve.is_valid for c in curves): + warning = "Some lift curves are undefined" + else: + warning = "All lift curves are undefined" + + self.warning(warning) + def _replot(self): self.plot.clear() if self.results is not None: @@ -231,6 +240,11 @@ def lift_curve_from_results(results, target, clf_idx, subset=slice(0, -1)): def lift_curve(ytrue, ypred, target=1): P = numpy.sum(ytrue == target) N = ytrue.size - P + + if P == 0 or N == 0: + # Undefined TP and FP rate + return numpy.array([]), numpy.array([]), numpy.array([]) + fpr, tpr, thresholds = skl_metrics.roc_curve(ytrue, ypred, target) rpp = fpr * (N / (P + N)) + tpr * (P / (P + N)) return rpp, tpr, thresholds diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py index 0ccaca026a4..6dea4af7a57 100644 --- a/Orange/widgets/evaluate/owpredictions.py +++ b/Orange/widgets/evaluate/owpredictions.py @@ -463,17 +463,22 @@ def commit(self): predictions.metas[:, -newcolumns.shape[1]:] = newcolumns results = None + # if the input data set contains the true target values, output a + # simple evaluation.Results instance if self.data.domain.class_var == class_var: - N = len(self.data) - results = Orange.evaluation.Results(self.data, store_data=True) + # omit rows with unknonw target values + nanmask = numpy.isnan(self.data.get_column_view(class_var)[0]) + data = self.data[~nanmask] + N = len(data) + results = Orange.evaluation.Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) - results.actual = self.data.Y.ravel() + results.actual = data.Y.ravel() results.predicted = numpy.vstack( - tuple(p.results[0] for p in slots)) + tuple(p.results[0][~nanmask] for p in slots)) if classification: results.probabilities = numpy.array( - [p.results[1] for p in slots]) + [p.results[1][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.send("Predictions", predictions) diff --git a/Orange/widgets/evaluate/owrocanalysis.py b/Orange/widgets/evaluate/owrocanalysis.py index d2ac6680961..ebcb2f23699 100644 --- a/Orange/widgets/evaluate/owrocanalysis.py +++ b/Orange/widgets/evaluate/owrocanalysis.py @@ -298,6 +298,10 @@ class OWROCAnalysis(widget.OWWidget): priority = 1010 inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")] + class Warning(widget.OWWidget.Warning): + empty_results = widget.Msg( + "Empty results on input. There is nothing to display.") + target_index = settings.Setting(0) selected_classifiers = [] @@ -418,8 +422,10 @@ def set_results(self, results): self.clear() self.results = check_results_adequacy(results, self.Error) if self.results is not None: - self._initialize(results) + self._initialize(self.results) self._setup_plot() + else: + self.warning() def clear(self): """Clear the widget state.""" @@ -517,7 +523,7 @@ def _setup_plot(self): if self.display_convex_curve: self.plot.addItem(graphics.hull_item) - if self.display_def_threshold: + if self.display_def_threshold and curve.is_valid: points = curve.points ind = numpy.argmin(numpy.abs(points.thresholds - 0.5)) item = pg.TextItem( @@ -559,6 +565,8 @@ def _setup_plot(self): if self.display_convex_curve: self.plot.addItem(fold.hull_item) hull_curves = [fold.hull for curve in selected for fold in curve.folds] + else: + assert False if self.display_convex_hull and hull_curves: hull = convex_hull(hull_curves) @@ -578,6 +586,14 @@ def _setup_plot(self): if self.roc_averaging == OWROCAnalysis.Merge: self._update_perf_line() + warning = "" + if not all(c.is_valid for c in hull_curves): + if any(c.is_valid for c in hull_curves): + warning = "Some ROC curves are undefined" + else: + warning = "All ROC curves are undefined" + self.warning(warning) + def _on_target_changed(self): self.plot.clear() self._setup_plot() @@ -612,10 +628,13 @@ def _update_perf_line(self): self.fp_cost, self.fn_cost, self.target_prior / 100.0) hull = self._rocch - ind = roc_iso_performance_line(m, hull) - angle = numpy.arctan2(m, 1) # in radians - self._perf_line.setAngle(angle * 180 / numpy.pi) - self._perf_line.setPos((hull.fpr[ind[0]], hull.tpr[ind[0]])) + if hull.is_valid: + ind = roc_iso_performance_line(m, hull) + angle = numpy.arctan2(m, 1) # in radians + self._perf_line.setAngle(angle * 180 / numpy.pi) + self._perf_line.setPos((hull.fpr[ind[0]], hull.tpr[ind[0]])) + else: + self._perf_line.setVisible(False) def onDeleteWidget(self): self.clear() diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py new file mode 100644 index 00000000000..164c534b668 --- /dev/null +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -0,0 +1,51 @@ +import copy + +import numpy as np + +import Orange.data +import Orange.evaluation +import Orange.classification + +from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.evaluate.owcalibrationplot import OWCalibrationPlot + + +class TestOWCalibrationPlot(WidgetTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.lenses = data = Orange.data.Table("lenses") + cls.res = Orange.evaluation.TestOnTestData( + train_data=data[::2], test_data=data[1::2], + learners=[Orange.classification.MajorityLearner(), + Orange.classification.KNNLearner()], + store_data=True, + ) + + def setUp(self): + super().setUp() + self.widget = self.create_widget(OWCalibrationPlot) # type: OWCalibrationPlot + + def test_basic(self): + self.send_signal("Evaluation Results", self.res) + self.widget.controls.display_rug.click() + + def test_empty(self): + res = copy.copy(self.res) + res.row_indices = res.row_indices[:0] + res.actual = res.actual[:0] + res.predicted = res.predicted[:, 0] + res.probabilities = res.probabilities[:, :0, :] + self.send_signal("Evaluation Results", res) + + def test_nan_input(self): + res = copy.copy(self.res) + res.actual = res.actual.copy() + res.probabilities = res.probabilities.copy() + + res.actual[0] = np.nan + res.probabilities[:, [0, 3], :] = np.nan + self.send_signal("Evaluation Results", res) + self.assertTrue(self.widget.Error.invalid_results.is_shown()) + self.send_signal("Evaluation Results", None) + self.assertFalse(self.widget.Error.invalid_results.is_shown()) diff --git a/Orange/widgets/evaluate/tests/test_owconfusionmatrix.py b/Orange/widgets/evaluate/tests/test_owconfusionmatrix.py index e02e6c650b0..9ec60c553ec 100644 --- a/Orange/widgets/evaluate/tests/test_owconfusionmatrix.py +++ b/Orange/widgets/evaluate/tests/test_owconfusionmatrix.py @@ -5,7 +5,7 @@ from Orange.classification import NaiveBayesLearner, TreeLearner from Orange.regression import MeanLearner from Orange.evaluation.testing import CrossValidation, TestOnTrainingData, \ - ShuffleSplit + ShuffleSplit, Results from Orange.widgets.evaluate.owconfusionmatrix import OWConfusionMatrix from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin @@ -82,3 +82,27 @@ def test_row_indices(self): correct_indices = results.row_indices[correct] self.assertSetEqual(set(self.iris[correct_indices].ids), set(selected.ids)) + + def test_empty_results(self): + """Test on empty results.""" + res = Results(data=self.iris[:0], store_data=True) + res.row_indices = np.array([], dtype=int) + res.actual = np.array([]) + res.predicted = np.array([[]]) + res.probabilities = np.zeros((1, 0, 3)) + self.send_signal("Evaluation Results", res) + self.widget.select_correct() + self.widget.select_wrong() + + def test_nan_results(self): + """Test on results with nan values in actual/predicted""" + res = Results(data=self.iris, nmethods=2, store_data=True) + res.row_indices = np.array([0, 50, 100], dtype=int) + res.actual = np.array([0., np.nan, 2.]) + res.predicted = np.array([[np.nan, 1, 2], + [np.nan, np.nan, np.nan]]) + res.probabilities = np.zeros((1, 3, 3)) + self.send_signal("Evaluation Results", res) + self.assertTrue(self.widget.Error.invalid_values.is_shown()) + self.send_signal("Evaluation Results", None) + self.assertFalse(self.widget.Error.invalid_values.is_shown()) \ No newline at end of file diff --git a/Orange/widgets/evaluate/tests/test_owliftcurve.py b/Orange/widgets/evaluate/tests/test_owliftcurve.py new file mode 100644 index 00000000000..221e8b00b96 --- /dev/null +++ b/Orange/widgets/evaluate/tests/test_owliftcurve.py @@ -0,0 +1,53 @@ +import copy + +import numpy as np + +import Orange.data +import Orange.evaluation +import Orange.classification + +from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.tests.utils import simulate +from Orange.widgets.evaluate.owliftcurve import OWLiftCurve + + +class TestOWLiftCurve(WidgetTest): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.lenses = data = Orange.data.Table("lenses") + cls.res = Orange.evaluation.TestOnTestData( + train_data=data[::2], test_data=data[1::2], + learners=[Orange.classification.MajorityLearner(), + Orange.classification.KNNLearner()], + store_data=True, + ) + + def setUp(self): + super().setUp() + self.widget = self.create_widget( + OWLiftCurve, + stored_settings={ + "display_convex_hull": True + } + ) # type: OWLiftCurve + + def test_basic(self): + self.send_signal("Evaluation Results", self.res) + simulate.combobox_run_through_all(self.widget.target_cb) + + def test_empty_input(self): + res = copy.copy(self.res) + res.actual = res.actual[:0] + res.row_indices = res.row_indices[:0] + res.predicted = res.predicted[:, :0] + res.probabilities = res.probabilities[:, :0, :] + self.send_signal("Evaluation Results", res) + + def test_nan_input(self): + res = copy.copy(self.res) + res.actual[0] = np.nan + self.send_signal("Evaluation Results", res) + self.assertTrue(self.widget.Error.invalid_results.is_shown()) + self.send_signal("Evaluation Results", None) + self.assertFalse(self.widget.Error.invalid_results.is_shown()) diff --git a/Orange/widgets/evaluate/tests/test_owpredictions.py b/Orange/widgets/evaluate/tests/test_owpredictions.py index 66ffe3902a4..432ce89bf21 100644 --- a/Orange/widgets/evaluate/tests/test_owpredictions.py +++ b/Orange/widgets/evaluate/tests/test_owpredictions.py @@ -1,16 +1,47 @@ """Tests for OWPredictions""" +import numpy as np + from Orange.widgets.tests.base import WidgetTest from Orange.widgets.evaluate.owpredictions import OWPredictions from Orange.data import Table +from Orange.classification import MajorityLearner +from Orange.evaluation import Results + + class TestOWPredictions(WidgetTest): def setUp(self): - self.widget = self.create_widget(OWPredictions) + self.widget = self.create_widget(OWPredictions) # type: OWPredictions self.iris = Table("iris") def test_rowCount_from_model(self): """Don't crash if the bottom row is visible""" self.send_signal("Data", self.iris[:5]) self.widget.dataview.sizeHintForColumn(0) + + def test_nan_target_input(self): + data = self.iris[::10].copy() + data.Y[1] = np.nan + yvec, _ = data.get_column_view(data.domain.class_var) + nanmask = np.isnan(yvec) + self.send_signal("Data", data) + self.send_signal("Predictors", MajorityLearner()(data), 1) + pred = self.get_output("Predictions", ) + self.assertIsInstance(pred, Table) + np.testing.assert_array_equal( + yvec, pred.get_column_view(data.domain.class_var)[0]) + + evres = self.get_output("Evaluation Results") + self.assertIsInstance(evres, Results) + self.assertIsInstance(evres.data, Table) + ev_yvec, _ = evres.data.get_column_view(data.domain.class_var) + + self.assertTrue(np.all(~np.isnan(ev_yvec))) + self.assertTrue(np.all(~np.isnan(evres.actual))) + + data.Y[:] = np.nan + self.send_signal("Data", data) + evres = self.get_output("Evaluation Results") + self.assertEqual(len(evres.data), 0) diff --git a/Orange/widgets/evaluate/tests/test_owrocanalysis.py b/Orange/widgets/evaluate/tests/test_owrocanalysis.py index 5af20fdc5f2..cf6a17bf438 100644 --- a/Orange/widgets/evaluate/tests/test_owrocanalysis.py +++ b/Orange/widgets/evaluate/tests/test_owrocanalysis.py @@ -1,4 +1,5 @@ import unittest +import copy import numpy import Orange.data @@ -6,6 +7,8 @@ import Orange.classification from Orange.widgets.evaluate import owrocanalysis +from Orange.widgets.evaluate.owrocanalysis import OWROCAnalysis +from Orange.widgets.tests.base import WidgetTest class TestROC(unittest.TestCase): @@ -55,3 +58,96 @@ def test_ROCData_from_results(self): self.assertTrue(all(not c.is_valid for c in rocdata.folds)) self.assertFalse(rocdata.avg_vertical.is_valid) self.assertFalse(rocdata.avg_threshold.is_valid) + + +class TestOWROCAnalysis(WidgetTest): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.lenses = data = Orange.data.Table("lenses") + cls.res = Orange.evaluation.TestOnTestData( + train_data=data[::2], test_data=data[1::2], + learners=[Orange.classification.MajorityLearner(), + Orange.classification.KNNLearner()], + store_data=True, + ) + + def setUp(self): + super().setUp() + self.widget = self.create_widget( + OWROCAnalysis, + stored_settings={ + "display_perf_line": True, + "display_def_threshold": True, + "display_convex_hull": True, + "display_convex_curve": True + } + ) # type: OWROCAnalysis + + def tearDown(self): + super().tearDown() + self.widget.onDeleteWidget() + self.widgets.remove(self.widget) + self.widget = None + + def test_basic(self): + res = self.res + self.send_signal("Evaluation Results", res) + self.widget.roc_averaging = OWROCAnalysis.Merge + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.Vertical + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.Threshold + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.NoAveraging + self.widget._replot() + self.send_signal("Evaluation Results", None) + + def test_empty_input(self): + res = Orange.evaluation.Results( + data=self.lenses[:0], nmethods=2, store_data=True) + res.row_indices = numpy.array([], dtype=int) + res.actual = numpy.array([]) + res.predicted = numpy.zeros((2, 0)) + res.probabilities = numpy.zeros((2, 0, 3)) + + self.send_signal("Evaluation Results", res) + self.widget.roc_averaging = OWROCAnalysis.Merge + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.Vertical + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.Threshold + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.NoAveraging + self.widget._replot() + + res.row_indices = numpy.array([1], dtype=int) + res.actual = numpy.array([0.0]) + res.predicted = numpy.zeros((2, 1)) + res.probabilities = numpy.zeros((2, 1, 3)) + + self.send_signal("Evaluation Results", res) + self.widget.roc_averaging = OWROCAnalysis.Merge + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.Vertical + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.Threshold + self.widget._replot() + self.widget.roc_averaging = OWROCAnalysis.NoAveraging + self.widget._replot() + + def test_nan_input(self): + res = copy.copy(self.res) + res.actual = res.actual.copy() + res.predicted = res.predicted.copy() + res.probabilities = res.probabilities.copy() + + res.actual[0] = numpy.nan + res.predicted[:, 1] = numpy.nan + res.probabilities[0, 1, :] = numpy.nan + + self.send_signal("Evaluation Results", res) + self.assertTrue(self.widget.Error.invalid_results.is_shown()) + self.send_signal("Evaluation Results", None) + self.assertFalse(self.widget.Error.invalid_results.is_shown()) \ No newline at end of file diff --git a/Orange/widgets/evaluate/utils.py b/Orange/widgets/evaluate/utils.py index fd9e7590ae2..8378dee0446 100644 --- a/Orange/widgets/evaluate/utils.py +++ b/Orange/widgets/evaluate/utils.py @@ -1,6 +1,13 @@ -def check_results_adequacy(results, error_group): +import numpy + + +def check_results_adequacy(results, error_group, check_nan=True): error_group.add_message("invalid_results") error_group.invalid_results.clear() + + def anynan(a): + return numpy.any(numpy.isnan(a)) + if results is None: return None if results.data is None: @@ -9,5 +16,11 @@ def check_results_adequacy(results, error_group): elif not results.data.domain.has_discrete_class: error_group.invalid_results( "Discrete outcome variable is required") + elif check_nan and (anynan(results.actual) or + anynan(results.predicted) or + (results.probabilities is not None and + anynan(results.probabilities))): + error_group.invalid_results( + "Results contains invalid values") else: return results