From 004324e650bb00a150d71a7883879c3f8ac7b457 Mon Sep 17 00:00:00 2001 From: Jaka Date: Wed, 16 Feb 2022 12:02:43 +0100 Subject: [PATCH 1/4] Enable multi target data for testing and predictions --- Orange/classification/base_classification.py | 10 ++++- Orange/evaluation/testing.py | 14 +++---- Orange/regression/base_regression.py | 10 ++++- Orange/widgets/evaluate/owpredictions.py | 40 ++++++++++++-------- Orange/widgets/evaluate/owtestandscore.py | 31 ++++++++------- 5 files changed, 65 insertions(+), 40 deletions(-) diff --git a/Orange/classification/base_classification.py b/Orange/classification/base_classification.py index 6a38bef6374..2a485c30a2e 100644 --- a/Orange/classification/base_classification.py +++ b/Orange/classification/base_classification.py @@ -5,10 +5,16 @@ class LearnerClassification(Learner): - learner_adequacy_err_msg = "Categorical class variable expected." def check_learner_adequacy(self, domain): - return domain.has_discrete_class + is_adequate = True + if len(domain.class_vars) > 1: + is_adequate = False + self.learner_adequacy_err_msg = "Too many target variables." + elif not domain.has_discrete_class: + is_adequate = False + self.learner_adequacy_err_msg = "Categorical class variable expected." + return is_adequate class ModelClassification(Model): diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 8d37dafa232..34a798241b6 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -9,7 +9,7 @@ import sklearn.model_selection as skl -from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable +from Orange.data import Domain, ContinuousVariable, DiscreteVariable from Orange.data.util import get_unique_names __all__ = ["Results", "CrossValidation", "LeaveOneOut", "TestOnTrainingData", @@ -37,9 +37,10 @@ def _mp_worker(fold_i, train_data, test_data, learner_i, learner, train_time = time() - t0 t0 = time() # testing - if train_data.domain.has_discrete_class: + class_var = train_data.domain.class_var + if class_var and class_var.is_discrete: predicted, probs = model(test_data, model.ValueProbs) - elif train_data.domain.has_continuous_class: + else: predicted = model(test_data, model.Value) test_time = time() - t0 # Different models can fail at any time raising any exception @@ -269,7 +270,7 @@ def get_augmented_data(self, model_names, new_meta_vals = np.empty((len(data), 0)) names = [var.name for var in chain(domain.attributes, domain.metas, - [class_var])] + domain.class_vars)] if classification: # predictions @@ -501,8 +502,7 @@ def prepare_arrays(cls, data, indices): ptr += len(test) row_indices = np.concatenate(row_indices, axis=0) - actual = data[row_indices].Y.ravel() - return folds, row_indices, actual + return folds, row_indices, data[row_indices].Y @staticmethod def get_indices(data): @@ -751,7 +751,7 @@ def __call__(self, data, test_data, learners, preprocessor=None, nrows=len(test_data), learners=learners, row_indices=np.arange(len(test_data)), folds=(Ellipsis, ), - actual=test_data.Y.ravel(), + actual=test_data.Y, score_by_folds=self.score_by_folds, train_time=np.zeros((len(learners),)), test_time=np.zeros((len(learners),))) diff --git a/Orange/regression/base_regression.py b/Orange/regression/base_regression.py index 3f0f7620f4e..d004564f668 100644 --- a/Orange/regression/base_regression.py +++ b/Orange/regression/base_regression.py @@ -5,10 +5,16 @@ class LearnerRegression(Learner): - learner_adequacy_err_msg = "Numeric class variable expected." def check_learner_adequacy(self, domain): - return domain.has_continuous_class + is_adequate = True + if len(domain.class_vars) > 1: + is_adequate = False + self.learner_adequacy_err_msg = "Too many target variables." + elif not domain.has_continuous_class: + is_adequate = False + self.learner_adequacy_err_msg = "Numeric class variable expected." + return is_adequate class ModelRegression(Model): diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py index f2d435b1834..05271b72e98 100644 --- a/Orange/widgets/evaluate/owpredictions.py +++ b/Orange/widgets/evaluate/owpredictions.py @@ -282,7 +282,7 @@ def _set_class_values(self): self.class_values += self.data.domain.class_var.values for slot in self.predictors: class_var = slot.predictor.domain.class_var - if class_var.is_discrete: + if class_var and class_var.is_discrete: for value in class_var.values: if value not in self.class_values: self.class_values.append(value) @@ -318,7 +318,8 @@ def _call_predictors(self): predictor = slot.predictor try: - if predictor.domain.class_var.is_discrete: + class_var = predictor.domain.class_var + if class_var and predictor.domain.class_var.is_discrete: pred, prob = predictor(classless_data, Model.ValueProbs) else: pred = predictor(classless_data, Model.Value) @@ -371,7 +372,10 @@ def _update_scores(self): predicted = results.predicted probabilities = results.probabilities - mask = numpy.isnan(results.actual) + if self.class_var: + mask = numpy.isnan(results.actual) + else: + mask = numpy.any(numpy.isnan(results.actual), axis=1) no_targets = mask.sum() == len(results.actual) results.actual = results.actual[~mask] results.predicted = results.predicted[:, ~mask] @@ -486,7 +490,7 @@ def _update_predictions_model(self): for p in self._non_errored_predictors(): values = p.results.unmapped_predicted target = p.predictor.domain.class_var - if target.is_discrete: + if target and target.is_discrete: # order probabilities in order from Show prob. for prob = self._reordered_probabilities(p) values = numpy.array(target.values)[values.astype(int)] @@ -558,7 +562,9 @@ def _all_color_values(self): p.predictor.domain.class_var.colors, p.predictor.domain.class_var.values ), key=itemgetter(1)))) - for p in predictors if p.predictor.domain.class_var.is_discrete + for p in predictors + if p.predictor.domain.class_var and + p.predictor.domain.class_var.is_discrete ] return color_values if color_values else [([], [])] @@ -613,12 +619,7 @@ def _update_prediction_delegate(self): sort_col_indices = [] for col, slot in enumerate(self.predictors): target = slot.predictor.domain.class_var - if target.is_continuous: - delegate = PredictionsItemDelegate( - None, colors, (), (), target.format_str, - parent=self.predictionsview) - sort_col_indices.append(None) - else: + if target is not None and target.is_discrete: shown_probs = self._shown_prob_indices(target, in_target=True) if self.shown_probs in (self.MODEL_PROBS, self.BOTH_PROBS): tooltip_probs = [self.class_values[i] @@ -628,6 +629,13 @@ def _update_prediction_delegate(self): parent=self.predictionsview) sort_col_indices.append([col for col in shown_probs if col is not None]) + + else: + delegate = PredictionsItemDelegate( + None, colors, (), (), target.format_str if target is not None else None, + parent=self.predictionsview) + sort_col_indices.append(None) + # QAbstractItemView does not take ownership of delegates, so we must self._delegates.append(delegate) self.predictionsview.setItemDelegateForColumn(col, delegate) @@ -680,7 +688,7 @@ def commit(self): def _commit_evaluation_results(self): slots = [p for p in self._non_errored_predictors() if p.results.predicted is not None] - if not slots: + if not slots or not self.class_var: self.Outputs.evaluation_results.send(None) return @@ -707,7 +715,8 @@ def _commit_predictions(self): newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): - if slot.predictor.domain.class_var.is_discrete: + target = slot.predictor.domain.class_var + if target and target.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) @@ -724,7 +733,7 @@ def _commit_predictions(self): names.append(uniq) metas += uniq_newmetas - domain = Orange.data.Domain(attrs, self.class_var, metas=metas) + domain = Orange.data.Domain(attrs, self.data.domain.class_vars, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( @@ -865,7 +874,8 @@ def __init__( super().__init__(parent) self.class_values = class_values # will be None for continuous self.colors = [QColor(*c) for c in colors] - self.target_format = target_format # target format for cont. vars + # target format for cont. vars + self.target_format = target_format if target_format else '%.2f' self.shown_probabilities = self.fmt = self.tooltip = None # set below self.setFormat(shown_probabilities, tooltip_probabilities) diff --git a/Orange/widgets/evaluate/owtestandscore.py b/Orange/widgets/evaluate/owtestandscore.py index e73f016d277..50b93639e45 100644 --- a/Orange/widgets/evaluate/owtestandscore.py +++ b/Orange/widgets/evaluate/owtestandscore.py @@ -221,6 +221,8 @@ class Information(OWWidget.Information): test_data_transformed = Msg( "Test data has been transformed to match the train data.") cant_stratify_numeric = Msg("Stratification is ignored for regression") + cant_stratify_multitarget = Msg("Stratification is ignored when there are" + " multiple target variables.") def __init__(self): super().__init__() @@ -416,7 +418,6 @@ def set_train_data(self, data): "Train data input requires a target variable.", not data.domain.class_vars ), - ("Too many target variables.", len(data.domain.class_vars) > 1), ("Target variable has no values.", np.isnan(data.Y).all()), ( "Target variable has only one value.", @@ -475,7 +476,8 @@ def set_test_data(self, data): if data is not None and not data: self.Error.test_data_empty() data = None - if data and not data.domain.class_var: + + if data and not data.domain.class_vars: self.Error.class_required_test() data = None else: @@ -914,6 +916,7 @@ def __update(self): self.Warning.test_data_missing.clear() self.Warning.cant_stratify.clear() self.Information.cant_stratify_numeric.clear() + self.Information.cant_stratify_multitarget.clear() self.Information.test_data_transformed( shown=self.resampling == self.TestOnTest and self.data is not None @@ -942,7 +945,10 @@ def __update(self): return do_stratify = self.cv_stratified if do_stratify: - if self.data.domain.class_var.is_discrete: + if len(self.data.domain.class_vars) > 1: + self.Information.cant_stratify_multitarget() + do_stratify = False + elif self.data.domain.class_var.is_discrete: least = min(filter(None, np.bincount(self.data.Y.astype(int)))) if least < k: @@ -1085,18 +1091,15 @@ def __task_complete(self, f: 'Future[Results]'): assert all(learner in learner_key for learner in learners) # Update the results for individual learners - class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): - stats = None - if class_var.is_primitive(): - ex = result.failed[0] - if ex: - stats = [Try.Fail(ex)] * len(self.scorers) - result = Try.Fail(ex) - else: - stats = [Try(scorer_caller(scorer, result)) - for scorer in self.scorers] - result = Try.Success(result) + ex = result.failed[0] + if ex: + stats = [Try.Fail(ex)] * len(self.scorers) + result = Try.Fail(ex) + else: + stats = [Try(scorer_caller(scorer, result)) + for scorer in self.scorers] + result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) From 19c1be783e17e1c41d4cef24bfde272c030889f9 Mon Sep 17 00:00:00 2001 From: Jaka Date: Wed, 16 Feb 2022 12:08:07 +0100 Subject: [PATCH 2/4] scoring: find usable scorers for non-built-in problem types --- Orange/evaluation/clustering.py | 4 ++++ Orange/evaluation/scoring.py | 14 ++++++++++- Orange/widgets/evaluate/owpredictions.py | 2 +- Orange/widgets/evaluate/owtestandscore.py | 8 +++---- .../evaluate/tests/test_owtestandscore.py | 4 ++++ Orange/widgets/evaluate/utils.py | 23 +++++++++++-------- 6 files changed, 40 insertions(+), 15 deletions(-) diff --git a/Orange/evaluation/clustering.py b/Orange/evaluation/clustering.py index 9d4fbfc52b0..acb81c2a3e8 100644 --- a/Orange/evaluation/clustering.py +++ b/Orange/evaluation/clustering.py @@ -32,6 +32,10 @@ def get_fold(self, fold): class ClusteringScore(Score): considers_actual = False + @staticmethod + def is_compatible(domain) -> bool: + return True + # pylint: disable=arguments-differ def from_predicted(self, results, score_function): # Clustering scores from labels diff --git a/Orange/evaluation/scoring.py b/Orange/evaluation/scoring.py index e2bb763104c..6d8aabd9e34 100644 --- a/Orange/evaluation/scoring.py +++ b/Orange/evaluation/scoring.py @@ -16,7 +16,7 @@ import sklearn.metrics as skl_metrics from sklearn.metrics import confusion_matrix -from Orange.data import DiscreteVariable, ContinuousVariable +from Orange.data import DiscreteVariable, ContinuousVariable, Domain from Orange.misc.wrapper_meta import WrapperMeta __all__ = ["CA", "Precision", "Recall", "F1", "PrecisionRecallFSupport", "AUC", @@ -112,14 +112,26 @@ def from_predicted(results, score_function, **kwargs): for predicted in results.predicted), dtype=np.float64, count=len(results.predicted)) + @staticmethod + def is_compatible(domain: Domain) -> bool: + raise NotImplementedError + class ClassificationScore(Score, abstract=True): class_types = (DiscreteVariable, ) + @staticmethod + def is_compatible(domain: Domain) -> bool: + return domain.has_discrete_class + class RegressionScore(Score, abstract=True): class_types = (ContinuousVariable, ) + @staticmethod + def is_compatible(domain: Domain) -> bool: + return domain.has_continuous_class + # pylint: disable=invalid-name class CA(ClassificationScore): diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py index 05271b72e98..6af1b036a53 100644 --- a/Orange/widgets/evaluate/owpredictions.py +++ b/Orange/widgets/evaluate/owpredictions.py @@ -358,7 +358,7 @@ def _update_scores(self): else: target = None model.clear() - scorers = usable_scorers(self.class_var) if self.class_var else [] + scorers = usable_scorers(self.data.domain) if self.data else [] self.score_table.update_header(scorers) self.scorer_errors = errors = [] for pred in self.predictors: diff --git a/Orange/widgets/evaluate/owtestandscore.py b/Orange/widgets/evaluate/owtestandscore.py index 50b93639e45..99d99d4a11f 100644 --- a/Orange/widgets/evaluate/owtestandscore.py +++ b/Orange/widgets/evaluate/owtestandscore.py @@ -517,10 +517,10 @@ def _which_missing_data(self): # - we don't gain much with it # - it complicates the unit tests def _update_scorers(self): - if self.data and self.data.domain.class_var: - new_scorers = usable_scorers(self.data.domain.class_var) - else: - new_scorers = [] + new_scorers = [] + if self.data: + new_scorers = usable_scorers(self.data.domain) + # Don't unnecessarily reset the combo because this would always reset # comparison_criterion; we also set it explicitly, though, for clarity if new_scorers != self.scorers: diff --git a/Orange/widgets/evaluate/tests/test_owtestandscore.py b/Orange/widgets/evaluate/tests/test_owtestandscore.py index 08ac61ae561..aadaa4808d7 100644 --- a/Orange/widgets/evaluate/tests/test_owtestandscore.py +++ b/Orange/widgets/evaluate/tests/test_owtestandscore.py @@ -225,6 +225,10 @@ def test_addon_scorers(self): class NewScore(Score): class_types = (DiscreteVariable, ContinuousVariable) + @staticmethod + def is_compatible(domain: Domain) -> bool: + return True + class NewClassificationScore(ClassificationScore): pass diff --git a/Orange/widgets/evaluate/utils.py b/Orange/widgets/evaluate/utils.py index b63d3400020..21a45bfbb33 100644 --- a/Orange/widgets/evaluate/utils.py +++ b/Orange/widgets/evaluate/utils.py @@ -11,7 +11,7 @@ QSortFilterProxyModel from sklearn.exceptions import UndefinedMetricWarning -from Orange.data import Variable, DiscreteVariable, ContinuousVariable +from Orange.data import DiscreteVariable, ContinuousVariable, Domain from Orange.evaluation import scoring from Orange.widgets import gui from Orange.widgets.utils.tableview import table_selection_to_mime_data @@ -78,16 +78,23 @@ def learner_name(learner): return getattr(learner, "name", type(learner).__name__) -def usable_scorers(target: Variable): +def usable_scorers(domain: Domain): + if domain is None: + return [] + order = {name: i - for i, name in enumerate(BUILTIN_SCORERS_ORDER[type(target)])} + for i, name in enumerate(chain.from_iterable(BUILTIN_SCORERS_ORDER.values()))} + # 'abstract' is retrieved from __dict__ to avoid inheriting - usable = (cls for cls in scoring.Score.registry.values() - if cls.is_scalar and not cls.__dict__.get("abstract") - and isinstance(target, cls.class_types)) + scorer_candidates = [cls for cls in scoring.Score.registry.values() + if cls.is_scalar and not cls.__dict__.get("abstract")] + + usable = [scorer for scorer in scorer_candidates if + scorer.is_compatible(domain) and scorer.class_types] return sorted(usable, key=lambda cls: order.get(cls.name, 99)) + def scorer_caller(scorer, ovr_results, target=None): def thunked(): with warnings.catch_warnings(): @@ -131,9 +138,7 @@ def is_bad(x): class ScoreTable(OWComponent, QObject): - shown_scores = \ - Setting(set(chain(*BUILTIN_SCORERS_ORDER.values()))) - + shown_scores = Setting(set(chain(*BUILTIN_SCORERS_ORDER.values()))) shownScoresChanged = Signal() class ItemDelegate(QStyledItemDelegate): From e1e741987b63b7d697b5bc03d0b84e0bece40366 Mon Sep 17 00:00:00 2001 From: Jaka Date: Sun, 6 Mar 2022 19:15:29 +0100 Subject: [PATCH 3/4] learner adequacy check refactor --- Orange/base.py | 32 +++++++++++++++++--- Orange/classification/base_classification.py | 14 ++++----- Orange/preprocess/impute.py | 5 +-- Orange/regression/base_regression.py | 14 ++++----- Orange/tests/dummy_learners.py | 5 +-- Orange/tests/test_base.py | 31 +++++++++++++++++++ Orange/widgets/utils/owlearnerwidget.py | 25 +++++++++++++-- 7 files changed, 100 insertions(+), 26 deletions(-) diff --git a/Orange/base.py b/Orange/base.py index fc85ebd3e30..a44f38ad36a 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -3,12 +3,12 @@ from collections.abc import Iterable import re import warnings -from typing import Callable, Dict +from typing import Callable, Dict, Optional import numpy as np import scipy -from Orange.data import Table, Storage, Instance, Value +from Orange.data import Table, Storage, Instance, Value, Domain from Orange.data.filter import HasClass from Orange.data.table import DomainTransformationError from Orange.data.util import one_hot @@ -86,6 +86,9 @@ class Learner(ReprableWithPreprocessors): #: A sequence of data preprocessors to apply on data prior to #: fitting the model preprocessors = () + + # Note: Do not use this class attribute. + # It remains here for compatibility reasons. learner_adequacy_err_msg = '' def __init__(self, preprocessors=None): @@ -95,6 +98,7 @@ def __init__(self, preprocessors=None): elif preprocessors: self.preprocessors = (preprocessors,) + # pylint: disable=R0201 def fit(self, X, Y, W=None): raise RuntimeError( "Descendants of Learner must overload method fit or fit_storage") @@ -106,8 +110,23 @@ def fit_storage(self, data): return self.fit(X, Y, W) def __call__(self, data, progress_callback=None): - if not self.check_learner_adequacy(data.domain): - raise ValueError(self.learner_adequacy_err_msg) + + for cls in type(self).mro(): + if 'incompatibility_reason' in cls.__dict__: + incompatibility_reason = \ + self.incompatibility_reason(data.domain) # pylint: disable=assignment-from-none + if incompatibility_reason is not None: + raise ValueError(incompatibility_reason) + break + if 'check_learner_adequacy' in cls.__dict__: + warnings.warn( + "check_learner_adequacy is deprecated and will be removed " + "in upcoming releases. Learners should instead implement " + "the incompatibility_reason method.", + OrangeDeprecationWarning) + if not self.check_learner_adequacy(data.domain): + raise ValueError(self.learner_adequacy_err_msg) + break origdomain = data.domain @@ -176,6 +195,11 @@ def active_preprocessors(self): def check_learner_adequacy(self, _): return True + # pylint: disable=no-self-use + def incompatibility_reason(self, _: Domain) -> Optional[str]: + """Return None if a learner can fit domain or string explaining why it can not.""" + return None + @property def name(self): """Return a short name derived from Learner type name""" diff --git a/Orange/classification/base_classification.py b/Orange/classification/base_classification.py index 2a485c30a2e..d579ccecd6c 100644 --- a/Orange/classification/base_classification.py +++ b/Orange/classification/base_classification.py @@ -6,15 +6,13 @@ class LearnerClassification(Learner): - def check_learner_adequacy(self, domain): - is_adequate = True - if len(domain.class_vars) > 1: - is_adequate = False - self.learner_adequacy_err_msg = "Too many target variables." + def incompatibility_reason(self, domain): + reason = None + if len(domain.class_vars) > 1 and not self.supports_multiclass: + reason = "Too many target variables." elif not domain.has_discrete_class: - is_adequate = False - self.learner_adequacy_err_msg = "Categorical class variable expected." - return is_adequate + reason = "Categorical class variable expected." + return reason class ModelClassification(Model): diff --git a/Orange/preprocess/impute.py b/Orange/preprocess/impute.py index f11674cba4f..ce0142eabd1 100644 --- a/Orange/preprocess/impute.py +++ b/Orange/preprocess/impute.py @@ -224,7 +224,8 @@ def __call__(self, data, variable): variable = data.domain[variable] domain = domain_with_class_var(data.domain, variable) - if self.learner.check_learner_adequacy(domain): + incompatibility_reason = self.learner.incompatibility_reason(domain) + if incompatibility_reason is None: data = data.transform(domain) model = self.learner(data) assert model.domain.class_var == variable @@ -239,7 +240,7 @@ def copy(self): def supports_variable(self, variable): domain = Orange.data.Domain([], class_vars=variable) - return self.learner.check_learner_adequacy(domain) + return self.learner.incompatibility_reason(domain) is None def domain_with_class_var(domain, class_var): diff --git a/Orange/regression/base_regression.py b/Orange/regression/base_regression.py index d004564f668..63dd6800616 100644 --- a/Orange/regression/base_regression.py +++ b/Orange/regression/base_regression.py @@ -6,15 +6,13 @@ class LearnerRegression(Learner): - def check_learner_adequacy(self, domain): - is_adequate = True - if len(domain.class_vars) > 1: - is_adequate = False - self.learner_adequacy_err_msg = "Too many target variables." + def incompatibility_reason(self, domain): + reason = None + if len(domain.class_vars) > 1 and not self.supports_multiclass: + reason = "Too many target variables." elif not domain.has_continuous_class: - is_adequate = False - self.learner_adequacy_err_msg = "Numeric class variable expected." - return is_adequate + reason = "Numeric class variable expected." + return reason class ModelRegression(Model): diff --git a/Orange/tests/dummy_learners.py b/Orange/tests/dummy_learners.py index 133547ff9e7..8e1896f80de 100644 --- a/Orange/tests/dummy_learners.py +++ b/Orange/tests/dummy_learners.py @@ -33,8 +33,9 @@ def __init__(self, value, prob): class DummyMulticlassLearner(SklLearner): supports_multiclass = True - def check_learner_adequacy(self, domain): - return all(c.is_discrete for c in domain.class_vars) + def incompatibility_reason(self, domain): + reason = 'Not all class variables are discrete' + return None if all(c.is_discrete for c in domain.class_vars) else reason def fit(self, X, Y, W): rows, class_vars = Y.shape diff --git a/Orange/tests/test_base.py b/Orange/tests/test_base.py index 9b1a0462a8c..1f762fd02d6 100644 --- a/Orange/tests/test_base.py +++ b/Orange/tests/test_base.py @@ -2,28 +2,59 @@ # pylint: disable=missing-docstring import pickle import unittest +from distutils.version import LooseVersion +import Orange from Orange.base import SklLearner, Learner, Model from Orange.data import Domain, Table from Orange.preprocess import Discretize, Randomize, Continuize from Orange.regression import LinearRegressionLearner +from Orange.util import OrangeDeprecationWarning + + +class DummyLearnerDeprecated(Learner): + + def fit(self, *_, **__): + return unittest.mock.Mock() + + def check_learner_adequacy(self, _): + return True class DummyLearner(Learner): + def fit(self, *_, **__): return unittest.mock.Mock() class DummySklLearner(SklLearner): + def fit(self, *_, **__): return unittest.mock.Mock() class DummyLearnerPP(Learner): + preprocessors = (Randomize(),) class TestLearner(unittest.TestCase): + + def test_if_deprecation_warning_is_raised(self): + with self.assertWarns(OrangeDeprecationWarning): + DummyLearnerDeprecated()(Table('iris')) + + def test_check_learner_adequacy_deprecated(self): + """This test is to be included in the 3.32 release and will fail in + version 3.34. This serves as a reminder to remove the deprecated method + and this test.""" + if LooseVersion(Orange.__version__) >= LooseVersion("3.34"): + self.fail( + "`Orange.base.Learner.check_learner_adequacy` was deprecated in " + "version 3.32, and there have been two minor versions in " + "between. Please remove the deprecated method." + ) + def test_uses_default_preprocessors_unless_custom_pps_specified(self): """Learners should use their default preprocessors unless custom preprocessors were passed in to the constructor""" diff --git a/Orange/widgets/utils/owlearnerwidget.py b/Orange/widgets/utils/owlearnerwidget.py index ce8d5adb64f..70359cec1a3 100644 --- a/Orange/widgets/utils/owlearnerwidget.py +++ b/Orange/widgets/utils/owlearnerwidget.py @@ -1,4 +1,5 @@ from copy import deepcopy +import warnings from AnyQt.QtCore import QTimer, Qt @@ -12,6 +13,7 @@ from Orange.widgets.utils.signals import Output, Input from Orange.widgets.utils.sql import check_sql_input from Orange.widgets.widget import OWWidget, WidgetMetaClass, Msg +from Orange.util import OrangeDeprecationWarning class OWBaseLearnerMeta(WidgetMetaClass): @@ -246,8 +248,26 @@ def check_data(self): self.Error.sparse_not_supported.clear() if self.data is not None and self.learner is not None: self.Error.data_error.clear() - if not self.learner.check_learner_adequacy(self.data.domain): - self.Error.data_error(self.learner.learner_adequacy_err_msg) + + incompatibility_reason = None + for cls in type(self.learner).mro(): + if 'incompatibility_reason' in cls.__dict__: + # pylint: disable=assignment-from-none + incompatibility_reason = \ + self.learner.incompatibility_reason(self.data.domain) + break + if 'check_learner_adequacy' in cls.__dict__: + warnings.warn( + "check_learner_adequacy is deprecated and will be removed " + "in upcoming releases. Learners should instead implement " + "the incompatibility_reason method.", + OrangeDeprecationWarning) + if not self.learner.check_learner_adequacy(self.data.domain): + incompatibility_reason = self.learner.learner_adequacy_err_msg + break + + if incompatibility_reason is not None: + self.Error.data_error(incompatibility_reason) elif not len(self.data): self.Error.data_error("Dataset is empty.") elif len(ut.unique(self.data.Y)) < 2: @@ -258,6 +278,7 @@ def check_data(self): self.Error.sparse_not_supported() else: self.valid_data = True + return self.valid_data def settings_changed(self, *args, **kwargs): From f820dea654935ecbf7b65506a99bcfda34a78254 Mon Sep 17 00:00:00 2001 From: Jaka Date: Thu, 10 Mar 2022 15:01:11 +0100 Subject: [PATCH 4/4] add multi_target_input tests --- .../evaluate/tests/test_owpredictions.py | 26 ++++++++++- .../evaluate/tests/test_owtestandscore.py | 45 +++++++++++++++++-- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/Orange/widgets/evaluate/tests/test_owpredictions.py b/Orange/widgets/evaluate/tests/test_owpredictions.py index 302e7f5b9f7..706d2205fb8 100644 --- a/Orange/widgets/evaluate/tests/test_owpredictions.py +++ b/Orange/widgets/evaluate/tests/test_owpredictions.py @@ -188,6 +188,7 @@ def set_input(data, model): (self.widget.Inputs.data, data), (self.widget.Inputs.predictors, model) ]) + iris = self.iris learner = ConstantLearner() heart_disease = Table("heart_disease") @@ -253,6 +254,7 @@ def test_sort_predictions(self): """ Test whether sorting of probabilities by FilterSortProxy is correct. """ + def get_items_order(model): return model.mapToSourceRows(np.arange(model.rowCount())) @@ -878,6 +880,27 @@ def test_change_target(self): self.assertEqual(float(table.model.data(table.model.index(0, 3))), idx) + def test_multi_target_input(self): + widget = self.widget + + domain = Domain([ContinuousVariable('var1')], + class_vars=[ + ContinuousVariable('c1'), + DiscreteVariable('c2', values=('no', 'yes')) + ]) + data = Table.from_list(domain, [[1, 5, 0], [2, 10, 1]]) + + mock_model = Mock(spec=Model, return_value=np.asarray([0.2, 0.1])) + mock_model.name = 'Mockery' + mock_model.domain = domain + mock_learner = Mock(return_value=mock_model) + model = mock_learner(data) + + self.send_signal(widget.Inputs.data, data) + self.send_signal(widget.Inputs.predictors, model, 1) + pred = self.get_output(widget.Outputs.predictions) + self.assertIsInstance(pred, Table) + def test_report(self): widget = self.widget @@ -1022,7 +1045,6 @@ def assert_called(exp_selected, exp_deselected): self.assertEqual(list(selected), exp_selected) self.assertEqual(list(deselected), exp_deselected) - store.model.setSortIndices([4, 0, 1, 2, 3]) store.select_rows({3, 4}, QItemSelectionModel.Select) assert_called([4, 0], []) @@ -1132,7 +1154,7 @@ def setUpClass(cls) -> None: cls.probs = [np.array([[80, 10, 10], [30, 70, 0], [15, 80, 5], - [0, 10, 90], + [0, 10, 90], [55, 40, 5]]) / 100, np.array([[80, 0, 20], [90, 5, 5], diff --git a/Orange/widgets/evaluate/tests/test_owtestandscore.py b/Orange/widgets/evaluate/tests/test_owtestandscore.py index aadaa4808d7..1897737fbef 100644 --- a/Orange/widgets/evaluate/tests/test_owtestandscore.py +++ b/Orange/widgets/evaluate/tests/test_owtestandscore.py @@ -16,7 +16,7 @@ from Orange.evaluation import Results, TestOnTestData, scoring from Orange.evaluation.scoring import ClassificationScore, RegressionScore, \ Score -from Orange.base import Learner +from Orange.base import Learner, Model from Orange.modelling import ConstantLearner from Orange.regression import MeanLearner from Orange.widgets.evaluate.owtestandscore import ( @@ -178,7 +178,7 @@ def test_one_class_value(self): table = Table.from_list( Domain( [ContinuousVariable("a"), ContinuousVariable("b")], - [DiscreteVariable("c", values=("y", ))]), + [DiscreteVariable("c", values=("y",))]), list(zip( [42.48, 16.84, 15.23, 23.8], [1., 2., 3., 4.], @@ -192,6 +192,7 @@ def test_one_class_value(self): def test_data_errors(self): """ Test all data_errors """ + def assertErrorShown(data, is_shown, message): self.send_signal("Data", data) self.assertEqual(is_shown, self.widget.Error.train_data_error.is_shown()) @@ -378,7 +379,7 @@ def test_scores_log_reg_overfitted(self): self.assertTupleEqual(self._test_scores( table, table, LogisticRegressionLearner(), OWTestAndScore.TestOnTest, None), - (1, 1, 1, 1, 1)) + (1, 1, 1, 1, 1)) def test_scores_log_reg_bad(self): table_train = Table.from_list( @@ -393,7 +394,7 @@ def test_scores_log_reg_bad(self): self.assertTupleEqual(self._test_scores( table_train, table_test, LogisticRegressionLearner(), OWTestAndScore.TestOnTest, None), - (0, 0, 0, 0, 0)) + (0, 0, 0, 0, 0)) def test_scores_log_reg_bad2(self): table_train = Table.from_list( @@ -724,6 +725,42 @@ def test_copy_to_clipboard(self): for i in (0, 3, 4, 5, 6, 7)]) + "\r\n" self.assertEqual(clipboard_text, view_text) + def test_multi_target_input(self): + class NewScorer(Score): + class_types = ( + ContinuousVariable, + DiscreteVariable, + ) + + @staticmethod + def is_compatible(domain: Domain) -> bool: + return True + + def compute_score(self, results): + return [0.75] + + domain = Domain([ContinuousVariable('var1')], + class_vars=[ + ContinuousVariable('c1'), + DiscreteVariable('c2', values=('no', 'yes')) + ]) + data = Table.from_list(domain, [[1, 5, 0], [2, 10, 1], [2, 10, 1]]) + + mock_model = Mock(spec=Model, return_value=np.asarray([[0.2, 0.1, 0.2]])) + mock_model.name = 'Mockery' + mock_model.domain = domain + mock_learner = Mock(spec=Learner, return_value=mock_model) + mock_learner.name = 'Mockery' + + self.widget.resampling = OWTestAndScore.TestOnTrain + self.send_signal(self.widget.Inputs.train_data, data) + self.send_signal(self.widget.Inputs.learner, MajorityLearner(), 0) + self.send_signal(self.widget.Inputs.learner, mock_learner, 1) + _ = self.get_output(self.widget.Outputs.evaluations_results, wait=5000) + self.assertTrue(len(self.widget.scorers) == 1) + self.assertTrue(NewScorer in self.widget.scorers) + self.assertTrue(len(self.widget._successful_slots()) == 1) + class TestHelpers(unittest.TestCase): def test_results_one_vs_rest(self):