From 0d0aa4aa38399bc407f3c4cc312637c6120b4bc5 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Tue, 11 Feb 2020 07:56:35 +0100 Subject: [PATCH 1/6] Outliers: Introduce runner method --- Orange/widgets/data/owoutliers.py | 41 +++++++++++++++----- Orange/widgets/data/tests/test_owoutliers.py | 39 ++++++++++++++++++- 2 files changed, 69 insertions(+), 11 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 84dac78cd89..4807baba664 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -1,4 +1,5 @@ from typing import Dict, Tuple +from types import SimpleNamespace import numpy as np @@ -7,16 +8,42 @@ from orangewidget.settings import SettingProvider +from Orange.base import Learner from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner,\ LocalOutlierFactorLearner, IsolationForestLearner from Orange.data import Table from Orange.widgets import gui from Orange.widgets.settings import Setting +from Orange.widgets.utils.concurrent import TaskState from Orange.widgets.utils.sql import check_sql_input from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.widget import Msg, Input, Output, OWWidget +class Results(SimpleNamespace): + inliers = None # type: Optional[Table] + outliers = None # type: Optional[Table] + annotated_data = None # type: Optional[Table] + + +def run(data: Table, learner: Learner, state: TaskState) -> Results: + results = Results() + if not data: + return results + + model = learner(data) + pred = model(data) # type: Table + + col = pred.get_column_view(model.outlier_var)[0] + inliers_ind = np.where(col == 1)[0] + outliers_ind = np.where(col == 0)[0] + + results.inliers = data[inliers_ind] + results.outliers = data[outliers_ind] + results.annotated_data = pred + return results + + class ParametersEditor(QWidget, gui.OWComponent): param_changed = Signal() @@ -242,12 +269,13 @@ def commit(self): inliers, outliers, data = self.detect_outliers() summary = len(inliers) if inliers else self.info.NoOutput self.info.set_output_summary(summary) + self.n_inliers = len(inliers) if inliers else None + self.n_outliers = len(outliers) if outliers else None self.Outputs.inliers.send(inliers) self.Outputs.outliers.send(outliers) self.Outputs.data.send(data) def detect_outliers(self) -> Tuple[Table, Table, Table]: - self.n_inliers = self.n_outliers = None self.Error.singular_cov.clear() self.Error.memory_error.clear() if not self.data: @@ -256,21 +284,14 @@ def detect_outliers(self) -> Tuple[Table, Table, Table]: learner_class = self.METHODS[self.outlier_method] kwargs = self.current_editor.get_parameters() learner = learner_class(**kwargs) - model = learner(self.data) - pred = model(self.data) + results = run(self.data, learner, None) + return results.inliers, results.outliers, results.annotated_data except ValueError: self.Error.singular_cov() return None, None, None except MemoryError: self.Error.memory_error() return None, None, None - else: - col = pred[:, model.outlier_var].metas - inliers_ind = np.where(col == 1)[0] - outliers_ind = np.where(col == 0)[0] - self.n_inliers = len(inliers_ind) - self.n_outliers = len(outliers_ind) - return self.data[inliers_ind], self.data[outliers_ind], pred def send_report(self): if self.n_outliers is None or self.n_inliers is None: diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 2b351c2e06f..48b91b03cf5 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -5,10 +5,36 @@ from unittest.mock import patch, Mock from Orange.data import Table -from Orange.widgets.data.owoutliers import OWOutliers +from Orange.classification import LocalOutlierFactorLearner +from Orange.widgets.data.owoutliers import OWOutliers, run from Orange.widgets.tests.base import WidgetTest, simulate +class TestRun(unittest.TestCase): + def test_results(self): + iris = Table("iris") + learner = LocalOutlierFactorLearner() + + res = run(iris, learner, Mock()) + self.assertIsInstance(res.inliers, Table) + self.assertIsInstance(res.outliers, Table) + self.assertIsInstance(res.annotated_data, Table) + + self.assertEqual(iris.domain, res.inliers.domain) + self.assertEqual(iris.domain, res.outliers.domain) + self.assertIn("Outlier", res.annotated_data.domain) + + self.assertEqual(len(res.inliers), 145) + self.assertEqual(len(res.outliers), 5) + self.assertEqual(len(res.annotated_data), 150) + + def test_no_data(self): + res = run(None, LocalOutlierFactorLearner(), Mock()) + self.assertIsNone(res.inliers) + self.assertIsNone(res.outliers) + self.assertIsNone(res.annotated_data) + + class TestOWOutliers(WidgetTest): def setUp(self): self.widget = self.create_widget(OWOutliers) @@ -133,6 +159,17 @@ def test_covariance_enabled(self): self.assertFalse(self.widget.Warning.disabled_cov.is_shown()) self.assertTrue(cov_item.isEnabled()) + @patch("Orange.widgets.data.owoutliers.OWOutliers.report_items") + def test_report(self, mocked_report: Mock): + self.send_signal(self.widget.Inputs.data, self.iris) + self.widget.send_report() + mocked_report.assert_called() + mocked_report.reset_mock() + + self.send_signal(self.widget.Inputs.data, None) + self.widget.send_report() + mocked_report.assert_not_called() + def test_migrate_settings(self): settings = {"cont": 20, "empirical_covariance": True, "gamma": 0.04, "nu": 30, "outlier_method": 0, From e615b6d8cda9812b0dd5565b1c1af9aebd5e0aa1 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Tue, 11 Feb 2020 08:21:33 +0100 Subject: [PATCH 2/6] Outliers: Extend ConcurrentWidgetMixin --- Orange/widgets/data/owoutliers.py | 53 ++++++++++++-------- Orange/widgets/data/tests/test_owoutliers.py | 5 ++ 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 4807baba664..230f2497717 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -14,7 +14,7 @@ from Orange.data import Table from Orange.widgets import gui from Orange.widgets.settings import Setting -from Orange.widgets.utils.concurrent import TaskState +from Orange.widgets.utils.concurrent import TaskState, ConcurrentWidgetMixin from Orange.widgets.utils.sql import check_sql_input from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.widget import Msg, Input, Output, OWWidget @@ -159,7 +159,7 @@ def get_parameters(self): "random_state": 42 if self.replicable else None} -class OWOutliers(OWWidget): +class OWOutliers(OWWidget, ConcurrentWidgetMixin): name = "Outliers" description = "Detect outliers." icon = "icons/Outliers.svg" @@ -200,7 +200,8 @@ class Error(OWWidget.Error): memory_error = Msg("Not enough memory") def __init__(self): - super().__init__() + OWWidget.__init__(self) + ConcurrentWidgetMixin.__init__(self) self.data = None # type: Table self.n_inliers = None # type: int self.n_outliers = None # type: int @@ -251,6 +252,7 @@ def set_current_editor(self): @Inputs.data @check_sql_input def set_data(self, data): + self.cancel() self.clear_messages() self.data = data self.info.set_input_summary(len(data) if data else self.info.NoOutput) @@ -266,32 +268,41 @@ def enable_controls(self): self.Warning.disabled_cov() def commit(self): - inliers, outliers, data = self.detect_outliers() + self.Error.singular_cov.clear() + self.Error.memory_error.clear() + self.n_inliers = self.n_outliers = None + + learner_class = self.METHODS[self.outlier_method] + kwargs = self.current_editor.get_parameters() + learner = learner_class(**kwargs) + + self.start(run, self.data, learner) + + def on_partial_result(self, _): + pass + + def on_done(self, result: Results): + inliers, outliers = result.inliers, result.outliers summary = len(inliers) if inliers else self.info.NoOutput self.info.set_output_summary(summary) self.n_inliers = len(inliers) if inliers else None self.n_outliers = len(outliers) if outliers else None + self.Outputs.inliers.send(inliers) self.Outputs.outliers.send(outliers) - self.Outputs.data.send(data) + self.Outputs.data.send(result.annotated_data) - def detect_outliers(self) -> Tuple[Table, Table, Table]: - self.Error.singular_cov.clear() - self.Error.memory_error.clear() - if not self.data: - return None, None, None - try: - learner_class = self.METHODS[self.outlier_method] - kwargs = self.current_editor.get_parameters() - learner = learner_class(**kwargs) - results = run(self.data, learner, None) - return results.inliers, results.outliers, results.annotated_data - except ValueError: - self.Error.singular_cov() - return None, None, None - except MemoryError: + def on_exception(self, ex): + if isinstance(ex, ValueError): + self.Error.singular_cov(ex) + elif isinstance(ex, MemoryError): self.Error.memory_error() - return None, None, None + else: + raise ex + + def onDeleteWidget(self): + self.shutdown() + super().onDeleteWidget() def send_report(self): if self.n_outliers is None or self.n_inliers is None: diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 48b91b03cf5..1693e0fce76 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -108,6 +108,7 @@ def test_memory_error(self, mocked_predict: Mock): self.assertFalse(self.widget.Error.memory_error.is_shown()) mocked_predict.side_effect = MemoryError self.send_signal(self.widget.Inputs.data, self.iris) + self.wait_until_finished() self.assertTrue(self.widget.Error.memory_error.is_shown()) @patch("Orange.classification.outlier_detection._OutlierModel.predict") @@ -115,6 +116,7 @@ def test_singular_cov_error(self, mocked_predict: Mock): self.assertFalse(self.widget.Error.singular_cov.is_shown()) mocked_predict.side_effect = ValueError self.send_signal(self.widget.Inputs.data, self.iris) + self.wait_until_finished() self.assertTrue(self.widget.Error.singular_cov.is_shown()) def test_nans(self): @@ -132,10 +134,12 @@ def test_in_out_summary(self): self.assertEqual(info._StateInfo__output_summary.brief, "") self.send_signal(self.widget.Inputs.data, self.iris) + self.wait_until_finished() self.assertEqual(info._StateInfo__input_summary.brief, "150") self.assertEqual(info._StateInfo__output_summary.brief, "135") self.send_signal(self.widget.Inputs.data, None) + self.wait_until_finished() self.assertEqual(info._StateInfo__input_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.brief, "") @@ -162,6 +166,7 @@ def test_covariance_enabled(self): @patch("Orange.widgets.data.owoutliers.OWOutliers.report_items") def test_report(self, mocked_report: Mock): self.send_signal(self.widget.Inputs.data, self.iris) + self.wait_until_finished() self.widget.send_report() mocked_report.assert_called() mocked_report.reset_mock() From 71d0d93cc5f7eeef66de6b5e01a0a090b88e82b3 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Wed, 12 Feb 2020 09:42:44 +0100 Subject: [PATCH 3/6] Learner: Add callback to learner --- Orange/base.py | 41 +++++++++++++++++++++++++--------- Orange/data/tests/test_util.py | 20 +++++++++++++++-- Orange/data/util.py | 24 ++++++++++++++++++++ Orange/modelling/base.py | 4 ++-- Orange/tests/test_base.py | 34 +++++++++++++++++++++++++--- 5 files changed, 106 insertions(+), 17 deletions(-) diff --git a/Orange/base.py b/Orange/base.py index d504b65097f..6a8b7ab905b 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -2,6 +2,7 @@ import itertools from collections import Iterable import re +import warnings import numpy as np import scipy @@ -9,11 +10,11 @@ from Orange.data import Table, Storage, Instance, Value from Orange.data.filter import HasClass from Orange.data.table import DomainTransformationError -from Orange.data.util import one_hot +from Orange.data.util import one_hot, progress_callback, dummy_callback from Orange.misc.wrapper_meta import WrapperMeta from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute, Normalize from Orange.statistics.util import all_nan -from Orange.util import Reprable +from Orange.util import Reprable, OrangeDeprecationWarning __all__ = ["Learner", "Model", "SklLearner", "SklModel", "ReprableWithPreprocessors"] @@ -101,7 +102,7 @@ def fit_storage(self, data): X, Y, W = data.X, data.Y, data.W if data.has_weights() else None return self.fit(X, Y, W) - def __call__(self, data): + def __call__(self, data, callback=None): if not self.check_learner_adequacy(data.domain): raise ValueError(self.learner_adequacy_err_msg) @@ -110,12 +111,26 @@ def __call__(self, data): if isinstance(data, Instance): data = Table(data.domain, [data]) origdata = data - data = self.preprocess(data) + + if callback is None: + callback = dummy_callback + callback(0, "Preprocessing...") + try: + cb = progress_callback(callback, end=0.1) + data = self.preprocess(data, callback=cb) + except TypeError: + data = self.preprocess(data) + warnings.warn("A keyword argument 'callback' has been added to the" + " preprocess() signature. Implementing the method " + "without the argument is deprecated and will result " + "in an error in the future.", + OrangeDeprecationWarning) if len(data.domain.class_vars) > 1 and not self.supports_multiclass: raise TypeError("%s doesn't support multiple class variables" % self.__class__.__name__) + callback(0.1, "Fitting...") model = self._fit_model(data) model.used_vals = [np.unique(y).astype(int) for y in data.Y[:, None].T] model.domain = data.domain @@ -123,6 +138,7 @@ def __call__(self, data): model.name = self.name model.original_domain = origdomain model.original_data = origdata + callback(1) return model def _fit_model(self, data): @@ -132,10 +148,15 @@ def _fit_model(self, data): X, Y, W = data.X, data.Y, data.W if data.has_weights() else None return self.fit(X, Y, W) - def preprocess(self, data): + def preprocess(self, data, callback=None): """Apply the `preprocessors` to the data""" - for pp in self.active_preprocessors: + if callback is None: + callback = dummy_callback + n_pps = len(list(self.active_preprocessors)) + for i, pp in enumerate(self.active_preprocessors): + callback(i / n_pps) data = pp(data) + callback(1) return data @property @@ -468,8 +489,8 @@ def _get_sklparams(self, values): raise TypeError("Wrapper does not define '__wraps__'") return params - def preprocess(self, data): - data = super().preprocess(data) + def preprocess(self, data, callback=None): + data = super().preprocess(data, callback) if any(v.is_discrete and len(v.values) > 2 for v in data.domain.attributes): @@ -478,8 +499,8 @@ def preprocess(self, data): return data - def __call__(self, data): - m = super().__call__(data) + def __call__(self, data, callback=None): + m = super().__call__(data, callback) m.params = self.params return m diff --git a/Orange/data/tests/test_util.py b/Orange/data/tests/test_util.py index 0006ea4773b..11cf395f783 100644 --- a/Orange/data/tests/test_util.py +++ b/Orange/data/tests/test_util.py @@ -1,8 +1,8 @@ import unittest from Orange.data import Domain, ContinuousVariable -from Orange.data.util import \ - get_unique_names, get_unique_names_duplicates, get_unique_names_domain +from Orange.data.util import get_unique_names, get_unique_names_duplicates, \ + get_unique_names_domain, progress_callback class TestGetUniqueNames(unittest.TestCase): @@ -115,5 +115,21 @@ def test_get_unique_names_domain(self): self.assertEqual(renamed, []) +class TestProgressCallback(unittest.TestCase): + def test_wrap(self): + def func(i): + return i + + f = progress_callback(func, start=0, end=0.8) + self.assertEqual(f(0), 0) + self.assertEqual(round(f(0.1), 2), 0.08) + self.assertEqual(f(1), 0.8) + + f = progress_callback(func, start=0.1, end=0.8) + self.assertEqual(f(0), 0.1) + self.assertEqual(f(0.1), 0.17) + self.assertEqual(f(1), 0.8) + + if __name__ == "__main__": unittest.main() diff --git a/Orange/data/util.py b/Orange/data/util.py index 273a6bb7602..b94454a45e3 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -4,6 +4,7 @@ import re from collections import Counter, defaultdict from itertools import chain +from functools import wraps import numpy as np import bottleneck as bn @@ -250,3 +251,26 @@ def get_unique_names_domain(attributes, class_vars=(), metas=()): for old, new in zip(all_names, unique_names) if new != old)) return (attributes, class_vars, metas), renamed + + +def progress_callback(callback, start=0, end=1): + """ + Wraps a callback function to allocate it end-start proportion of + the progress. + + :param callback: callable + :param start: float + :param end: float + :return: callable + """ + @wraps(callback) + def func(i, *args, **kwargs): + x = start + i * (end - start) + return callback(x, *args, **kwargs) + return func + + +def dummy_callback(*_, **__): + """ A dummy callable. """ + return 1 + diff --git a/Orange/modelling/base.py b/Orange/modelling/base.py index 70fe22b073c..8993f75b1ae 100644 --- a/Orange/modelling/base.py +++ b/Orange/modelling/base.py @@ -41,8 +41,8 @@ def _fit_model(self, data): X, Y, W = data.X, data.Y, data.W if data.has_weights() else None return learner.fit(X, Y, W) - def preprocess(self, data): - return self.get_learner(data).preprocess(data) + def preprocess(self, data, callback=None): + return self.get_learner(data).preprocess(data, callback) def get_learner(self, problem_type): """Get the learner for a given problem type. diff --git a/Orange/tests/test_base.py b/Orange/tests/test_base.py index bb4429f5d79..9b1a0462a8c 100644 --- a/Orange/tests/test_base.py +++ b/Orange/tests/test_base.py @@ -4,13 +4,19 @@ import unittest from Orange.base import SklLearner, Learner, Model -from Orange.data import Domain -from Orange.preprocess import Discretize, Randomize +from Orange.data import Domain, Table +from Orange.preprocess import Discretize, Randomize, Continuize from Orange.regression import LinearRegressionLearner class DummyLearner(Learner): - pass + def fit(self, *_, **__): + return unittest.mock.Mock() + + +class DummySklLearner(SklLearner): + def fit(self, *_, **__): + return unittest.mock.Mock() class DummyLearnerPP(Learner): @@ -71,6 +77,15 @@ def test_preprocessors_can_be_passed_in_as_generator(self): 'Preprocessors should be able to be passed in as single object ' 'as well as an iterable object') + def test_callback(self): + callback = unittest.mock.Mock() + learner = DummyLearner(preprocessors=[Discretize(), Randomize()]) + learner(Table("iris"), callback) + args = [x[0][0] for x in callback.call_args_list] + self.assertEqual(min(args), 0) + self.assertEqual(max(args), 1) + self.assertListEqual(args, sorted(args)) + class TestSklLearner(unittest.TestCase): def test_sklearn_supports_weights(self): @@ -101,6 +116,15 @@ def test_linreg(self): "Either LinearRegression no longer supports weighted tables or " "SklLearner.supports_weights is out-of-date.") + def test_callback(self): + callback = unittest.mock.Mock() + learner = DummySklLearner(preprocessors=[Continuize(), Randomize()]) + learner(Table("iris"), callback) + args = [x[0][0] for x in callback.call_args_list] + self.assertEqual(min(args), 0) + self.assertEqual(max(args), 1) + self.assertListEqual(args, sorted(args)) + class TestModel(unittest.TestCase): def test_pickle(self): @@ -111,3 +135,7 @@ def test_pickle(self): self.assertEqual(model.domain, model2.domain) self.assertEqual(model.original_data, [1, 2, 3]) self.assertEqual(model2.original_data, None) + + +if __name__ == "__main__": + unittest.main() From 856798616687acb63146e69a9d917919ecfb2920 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Wed, 12 Feb 2020 09:56:27 +0100 Subject: [PATCH 4/6] _OutlierModel: Add callback to model --- Orange/classification/outlier_detection.py | 37 ++++++++++++++----- .../tests/test_outlier_detection.py | 30 +++++++++++++++ 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/Orange/classification/outlier_detection.py b/Orange/classification/outlier_detection.py index 2ddb9366b9f..344c8349f99 100644 --- a/Orange/classification/outlier_detection.py +++ b/Orange/classification/outlier_detection.py @@ -1,8 +1,8 @@ # pylint: disable=unused-argument +from typing import Callable + import numpy as np -from Orange.data.table import DomainTransformationError -from Orange.data.util import get_unique_names from sklearn.covariance import EllipticEnvelope from sklearn.ensemble import IsolationForest from sklearn.neighbors import LocalOutlierFactor @@ -11,6 +11,9 @@ from Orange.base import SklLearner, SklModel from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable, \ Variable +from Orange.data.table import DomainTransformationError +from Orange.data.util import get_unique_names, progress_callback, \ + dummy_callback from Orange.preprocess import AdaptiveNormalize from Orange.statistics.util import all_nan @@ -29,29 +32,44 @@ def predict(self, X: np.ndarray) -> np.ndarray: pred[pred == -1] = 0 return pred[:, None] - def __call__(self, data: Table) -> Table: + def __call__(self, data: Table, callback: Callable = None) -> Table: assert isinstance(data, Table) assert self.outlier_var is not None domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (self.outlier_var,)) - self._cached_data = self.data_to_model_domain(data) + if callback is None: + callback = dummy_callback + callback(0, "Preprocessing...") + self._cached_data = self.data_to_model_domain( + data, progress_callback(callback, end=0.1)) + callback(0.1, "Predicting...") metas = np.hstack((data.metas, self.predict(self._cached_data.X))) + callback(1) return Table.from_numpy(domain, data.X, data.Y, metas) - def data_to_model_domain(self, data: Table) -> Table: + def data_to_model_domain(self, data: Table, callback: Callable) -> Table: if data.domain == self.domain: return data + callback(0) if self.original_domain.attributes != data.domain.attributes \ and data.X.size \ and not all_nan(data.X): + callback(0.5) new_data = data.transform(self.original_domain) if all_nan(new_data.X): raise DomainTransformationError( "domain transformation produced no defined values") - return new_data.transform(self.domain) - return data.transform(self.domain) + callback(0.75) + data = new_data.transform(self.domain) + callback(1) + return data + + callback(0.5) + data = data.transform(self.domain) + callback(1) + return data class _OutlierLearner(SklLearner): @@ -148,8 +166,8 @@ def mahalanobis(self, observations: np.ndarray) -> np.ndarray: """ return self.skl_model.mahalanobis(observations)[:, None] - def __call__(self, data: Table) -> Table: - pred = super().__call__(data) + def __call__(self, data: Table, callback: Callable = None) -> Table: + pred = super().__call__(data, callback) domain = Domain(pred.domain.attributes, pred.domain.class_vars, pred.domain.metas + (self.mahal_var,)) metas = np.hstack((pred.metas, self.mahalanobis(self._cached_data.X))) @@ -181,4 +199,3 @@ def _fit_model(self, data: Table) -> EllipticEnvelopeClassifier: transformer.variable = variable model.mahal_var = variable return model - diff --git a/Orange/classification/tests/test_outlier_detection.py b/Orange/classification/tests/test_outlier_detection.py index 272ed93438f..dc49ad2fcce 100644 --- a/Orange/classification/tests/test_outlier_detection.py +++ b/Orange/classification/tests/test_outlier_detection.py @@ -3,12 +3,14 @@ import pickle import tempfile import unittest +from unittest.mock import Mock import numpy as np from Orange.classification import EllipticEnvelopeLearner, \ IsolationForestLearner, LocalOutlierFactorLearner, OneClassSVMLearner from Orange.data import Table, Domain, ContinuousVariable +from Orange.data.table import DomainTransformationError class _TestDetector(unittest.TestCase): @@ -207,6 +209,17 @@ def test_unique_name(self): pred = detect(table) self.assertEqual(pred.domain.metas[0].name, "Outlier (1)") + def test_predict(self): + detect = self.detector(self.iris) + subset = self.iris[:, :3] + pred = detect(subset) + self.assert_table_appended_outlier(subset, pred) + + def test_predict_all_nan(self): + detect = self.detector(self.iris[:, :2]) + subset = self.iris[:, 2:] + self.assertRaises(DomainTransformationError, detect, subset) + def test_transform(self): detect = self.detector(self.iris) pred = detect(self.iris) @@ -235,6 +248,23 @@ def test_pickle_prediction(self): pickle.dump(pred, f) f.close() + def test_fit_callback(self): + callback = Mock() + self.detector(self.iris, callback) + args = [x[0][0] for x in callback.call_args_list] + self.assertEqual(min(args), 0) + self.assertEqual(max(args), 1) + self.assertListEqual(args, sorted(args)) + + def test_predict_callback(self): + callback = Mock() + detect = self.detector(self.iris) + detect(self.iris, callback) + args = [x[0][0] for x in callback.call_args_list] + self.assertEqual(min(args), 0) + self.assertEqual(max(args), 1) + self.assertListEqual(args, sorted(args)) + if __name__ == "__main__": unittest.main() From 30ccc07f05227485d2608ed178620ce8f744893f Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Wed, 12 Feb 2020 10:00:04 +0100 Subject: [PATCH 5/6] Outliers: Semi-interruptible widget --- Orange/widgets/data/owoutliers.py | 14 ++++++++++++-- Orange/widgets/data/tests/test_owoutliers.py | 6 +++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 230f2497717..1981ab3a3b1 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -12,6 +12,7 @@ from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner,\ LocalOutlierFactorLearner, IsolationForestLearner from Orange.data import Table +from Orange.data.util import progress_callback from Orange.widgets import gui from Orange.widgets.settings import Setting from Orange.widgets.utils.concurrent import TaskState, ConcurrentWidgetMixin @@ -31,8 +32,16 @@ def run(data: Table, learner: Learner, state: TaskState) -> Results: if not data: return results - model = learner(data) - pred = model(data) # type: Table + def callback(i: float, status=""): + state.set_progress_value(i * 100) + if status: + state.set_status(status) + if state.is_interruption_requested(): + raise Exception + + callback(0, "Initializing...") + model = learner(data, progress_callback(callback, end=0.6)) + pred = model(data, progress_callback(callback, start=0.6, end=0.99)) col = pred.get_column_view(model.outlier_var)[0] inliers_ind = np.where(col == 1)[0] @@ -41,6 +50,7 @@ def run(data: Table, learner: Learner, state: TaskState) -> Results: results.inliers = data[inliers_ind] results.outliers = data[outliers_ind] results.annotated_data = pred + callback(1) return results diff --git a/Orange/widgets/data/tests/test_owoutliers.py b/Orange/widgets/data/tests/test_owoutliers.py index 1693e0fce76..f0c095ade09 100644 --- a/Orange/widgets/data/tests/test_owoutliers.py +++ b/Orange/widgets/data/tests/test_owoutliers.py @@ -13,9 +13,9 @@ class TestRun(unittest.TestCase): def test_results(self): iris = Table("iris") - learner = LocalOutlierFactorLearner() - - res = run(iris, learner, Mock()) + state = Mock() + state.is_interruption_requested = Mock(return_value=False) + res = run(iris, LocalOutlierFactorLearner(), state) self.assertIsInstance(res.inliers, Table) self.assertIsInstance(res.outliers, Table) self.assertIsInstance(res.annotated_data, Table) From 752abbdb69ee6e1a2d52203680304367603c7c30 Mon Sep 17 00:00:00 2001 From: Vesna Tanko Date: Fri, 21 Feb 2020 13:43:25 +0100 Subject: [PATCH 6/6] callback: Rename to progress_callback --- Orange/base.py | 47 +++++++++++----------- Orange/classification/outlier_detection.py | 39 +++++++++--------- Orange/data/tests/test_util.py | 18 +-------- Orange/data/util.py | 24 ----------- Orange/modelling/base.py | 4 +- Orange/tests/test_util.py | 19 +++++++++ Orange/util.py | 23 +++++++++++ Orange/widgets/data/owoutliers.py | 6 +-- 8 files changed, 93 insertions(+), 87 deletions(-) diff --git a/Orange/base.py b/Orange/base.py index 6a8b7ab905b..748cbd3dac9 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -10,11 +10,12 @@ from Orange.data import Table, Storage, Instance, Value from Orange.data.filter import HasClass from Orange.data.table import DomainTransformationError -from Orange.data.util import one_hot, progress_callback, dummy_callback +from Orange.data.util import one_hot from Orange.misc.wrapper_meta import WrapperMeta from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute, Normalize from Orange.statistics.util import all_nan -from Orange.util import Reprable, OrangeDeprecationWarning +from Orange.util import Reprable, OrangeDeprecationWarning, wrap_callback, \ + dummy_callback __all__ = ["Learner", "Model", "SklLearner", "SklModel", "ReprableWithPreprocessors"] @@ -102,7 +103,7 @@ def fit_storage(self, data): X, Y, W = data.X, data.Y, data.W if data.has_weights() else None return self.fit(X, Y, W) - def __call__(self, data, callback=None): + def __call__(self, data, progress_callback=None): if not self.check_learner_adequacy(data.domain): raise ValueError(self.learner_adequacy_err_msg) @@ -112,25 +113,25 @@ def __call__(self, data, callback=None): data = Table(data.domain, [data]) origdata = data - if callback is None: - callback = dummy_callback - callback(0, "Preprocessing...") + if progress_callback is None: + progress_callback = dummy_callback + progress_callback(0, "Preprocessing...") try: - cb = progress_callback(callback, end=0.1) - data = self.preprocess(data, callback=cb) + cb = wrap_callback(progress_callback, end=0.1) + data = self.preprocess(data, progress_callback=cb) except TypeError: data = self.preprocess(data) - warnings.warn("A keyword argument 'callback' has been added to the" - " preprocess() signature. Implementing the method " - "without the argument is deprecated and will result " - "in an error in the future.", + warnings.warn("A keyword argument 'progress_callback' has been " + "added to the preprocess() signature. Implementing " + "the method without the argument is deprecated and " + "will result in an error in the future.", OrangeDeprecationWarning) if len(data.domain.class_vars) > 1 and not self.supports_multiclass: raise TypeError("%s doesn't support multiple class variables" % self.__class__.__name__) - callback(0.1, "Fitting...") + progress_callback(0.1, "Fitting...") model = self._fit_model(data) model.used_vals = [np.unique(y).astype(int) for y in data.Y[:, None].T] model.domain = data.domain @@ -138,7 +139,7 @@ def __call__(self, data, callback=None): model.name = self.name model.original_domain = origdomain model.original_data = origdata - callback(1) + progress_callback(1) return model def _fit_model(self, data): @@ -148,15 +149,15 @@ def _fit_model(self, data): X, Y, W = data.X, data.Y, data.W if data.has_weights() else None return self.fit(X, Y, W) - def preprocess(self, data, callback=None): + def preprocess(self, data, progress_callback=None): """Apply the `preprocessors` to the data""" - if callback is None: - callback = dummy_callback + if progress_callback is None: + progress_callback = dummy_callback n_pps = len(list(self.active_preprocessors)) for i, pp in enumerate(self.active_preprocessors): - callback(i / n_pps) + progress_callback(i / n_pps) data = pp(data) - callback(1) + progress_callback(1) return data @property @@ -489,8 +490,8 @@ def _get_sklparams(self, values): raise TypeError("Wrapper does not define '__wraps__'") return params - def preprocess(self, data, callback=None): - data = super().preprocess(data, callback) + def preprocess(self, data, progress_callback=None): + data = super().preprocess(data, progress_callback) if any(v.is_discrete and len(v.values) > 2 for v in data.domain.attributes): @@ -499,8 +500,8 @@ def preprocess(self, data, callback=None): return data - def __call__(self, data, callback=None): - m = super().__call__(data, callback) + def __call__(self, data, progress_callback=None): + m = super().__call__(data, progress_callback) m.params = self.params return m diff --git a/Orange/classification/outlier_detection.py b/Orange/classification/outlier_detection.py index 344c8349f99..e7c704c6ca1 100644 --- a/Orange/classification/outlier_detection.py +++ b/Orange/classification/outlier_detection.py @@ -12,10 +12,10 @@ from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable, \ Variable from Orange.data.table import DomainTransformationError -from Orange.data.util import get_unique_names, progress_callback, \ - dummy_callback +from Orange.data.util import get_unique_names from Orange.preprocess import AdaptiveNormalize from Orange.statistics.util import all_nan +from Orange.util import wrap_callback, dummy_callback __all__ = ["LocalOutlierFactorLearner", "IsolationForestLearner", "EllipticEnvelopeLearner", "OneClassSVMLearner"] @@ -32,43 +32,45 @@ def predict(self, X: np.ndarray) -> np.ndarray: pred[pred == -1] = 0 return pred[:, None] - def __call__(self, data: Table, callback: Callable = None) -> Table: + def __call__(self, data: Table, progress_callback: Callable = None) \ + -> Table: assert isinstance(data, Table) assert self.outlier_var is not None domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (self.outlier_var,)) - if callback is None: - callback = dummy_callback - callback(0, "Preprocessing...") + if progress_callback is None: + progress_callback = dummy_callback + progress_callback(0, "Preprocessing...") self._cached_data = self.data_to_model_domain( - data, progress_callback(callback, end=0.1)) - callback(0.1, "Predicting...") + data, wrap_callback(progress_callback, end=0.1)) + progress_callback(0.1, "Predicting...") metas = np.hstack((data.metas, self.predict(self._cached_data.X))) - callback(1) + progress_callback(1) return Table.from_numpy(domain, data.X, data.Y, metas) - def data_to_model_domain(self, data: Table, callback: Callable) -> Table: + def data_to_model_domain(self, data: Table, progress_callback: Callable) \ + -> Table: if data.domain == self.domain: return data - callback(0) + progress_callback(0) if self.original_domain.attributes != data.domain.attributes \ and data.X.size \ and not all_nan(data.X): - callback(0.5) + progress_callback(0.5) new_data = data.transform(self.original_domain) if all_nan(new_data.X): raise DomainTransformationError( "domain transformation produced no defined values") - callback(0.75) + progress_callback(0.75) data = new_data.transform(self.domain) - callback(1) + progress_callback(1) return data - callback(0.5) + progress_callback(0.5) data = data.transform(self.domain) - callback(1) + progress_callback(1) return data @@ -166,8 +168,9 @@ def mahalanobis(self, observations: np.ndarray) -> np.ndarray: """ return self.skl_model.mahalanobis(observations)[:, None] - def __call__(self, data: Table, callback: Callable = None) -> Table: - pred = super().__call__(data, callback) + def __call__(self, data: Table, progress_callback: Callable = None) \ + -> Table: + pred = super().__call__(data, progress_callback) domain = Domain(pred.domain.attributes, pred.domain.class_vars, pred.domain.metas + (self.mahal_var,)) metas = np.hstack((pred.metas, self.mahalanobis(self._cached_data.X))) diff --git a/Orange/data/tests/test_util.py b/Orange/data/tests/test_util.py index 11cf395f783..2df6c287be9 100644 --- a/Orange/data/tests/test_util.py +++ b/Orange/data/tests/test_util.py @@ -2,7 +2,7 @@ from Orange.data import Domain, ContinuousVariable from Orange.data.util import get_unique_names, get_unique_names_duplicates, \ - get_unique_names_domain, progress_callback + get_unique_names_domain class TestGetUniqueNames(unittest.TestCase): @@ -115,21 +115,5 @@ def test_get_unique_names_domain(self): self.assertEqual(renamed, []) -class TestProgressCallback(unittest.TestCase): - def test_wrap(self): - def func(i): - return i - - f = progress_callback(func, start=0, end=0.8) - self.assertEqual(f(0), 0) - self.assertEqual(round(f(0.1), 2), 0.08) - self.assertEqual(f(1), 0.8) - - f = progress_callback(func, start=0.1, end=0.8) - self.assertEqual(f(0), 0.1) - self.assertEqual(f(0.1), 0.17) - self.assertEqual(f(1), 0.8) - - if __name__ == "__main__": unittest.main() diff --git a/Orange/data/util.py b/Orange/data/util.py index b94454a45e3..273a6bb7602 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -4,7 +4,6 @@ import re from collections import Counter, defaultdict from itertools import chain -from functools import wraps import numpy as np import bottleneck as bn @@ -251,26 +250,3 @@ def get_unique_names_domain(attributes, class_vars=(), metas=()): for old, new in zip(all_names, unique_names) if new != old)) return (attributes, class_vars, metas), renamed - - -def progress_callback(callback, start=0, end=1): - """ - Wraps a callback function to allocate it end-start proportion of - the progress. - - :param callback: callable - :param start: float - :param end: float - :return: callable - """ - @wraps(callback) - def func(i, *args, **kwargs): - x = start + i * (end - start) - return callback(x, *args, **kwargs) - return func - - -def dummy_callback(*_, **__): - """ A dummy callable. """ - return 1 - diff --git a/Orange/modelling/base.py b/Orange/modelling/base.py index 8993f75b1ae..b7c2a24e10f 100644 --- a/Orange/modelling/base.py +++ b/Orange/modelling/base.py @@ -41,8 +41,8 @@ def _fit_model(self, data): X, Y, W = data.X, data.Y, data.W if data.has_weights() else None return learner.fit(X, Y, W) - def preprocess(self, data, callback=None): - return self.get_learner(data).preprocess(data, callback) + def preprocess(self, data, progress_callback=None): + return self.get_learner(data).preprocess(data, progress_callback) def get_learner(self, problem_type): """Get the learner for a given problem type. diff --git a/Orange/tests/test_util.py b/Orange/tests/test_util.py index d9862343f60..eaba6448f27 100644 --- a/Orange/tests/test_util.py +++ b/Orange/tests/test_util.py @@ -11,6 +11,7 @@ from Orange.data.util import vstack, hstack, array_equal from Orange.statistics.util import stats from Orange.tests.test_statistics import dense_sparse +from Orange.util import wrap_callback SOMETHING = 0xf00babe @@ -158,3 +159,21 @@ def test_csc_unordered_array_equal(self): a1 = sp.csc_matrix(([1, 4, 5], [0, 0, 1], [0, 1, 1, 3]), shape=(2, 3)) a2 = sp.csc_matrix(([1, 5, 4], [0, 1, 0], [0, 1, 1, 3]), shape=(2, 3)) self.assertTrue(array_equal(a1, a2)) + + def test_wrap_callback(self): + def func(i): + return i + + f = wrap_callback(func, start=0, end=0.8) + self.assertEqual(f(0), 0) + self.assertEqual(round(f(0.1), 2), 0.08) + self.assertEqual(f(1), 0.8) + + f = wrap_callback(func, start=0.1, end=0.8) + self.assertEqual(f(0), 0.1) + self.assertEqual(f(0.1), 0.17) + self.assertEqual(f(1), 0.8) + + +if __name__ == "__main__": + unittest.main() diff --git a/Orange/util.py b/Orange/util.py index 9a62cbcf6f6..1c8983610fa 100644 --- a/Orange/util.py +++ b/Orange/util.py @@ -416,6 +416,29 @@ def __repr__(self): name, ", ".join("{}={!r}".format(f, v) for f, _, v in self._reprable_items()) ) + +def wrap_callback(progress_callback, start=0, end=1): + """ + Wraps a progress callback function to allocate it end-start proportion + of an execution time. + + :param progress_callback: callable + :param start: float + :param end: float + :return: callable + """ + @wraps(progress_callback) + def func(progress, *args, **kwargs): + adjusted_progress = start + progress * (end - start) + return progress_callback(adjusted_progress, *args, **kwargs) + return func + + +def dummy_callback(*_, **__): + """ A dummy callable. """ + return 1 + + # For best result, keep this at the bottom __all__ = export_globals(globals(), __name__) diff --git a/Orange/widgets/data/owoutliers.py b/Orange/widgets/data/owoutliers.py index 1981ab3a3b1..5a1c76142a9 100644 --- a/Orange/widgets/data/owoutliers.py +++ b/Orange/widgets/data/owoutliers.py @@ -12,7 +12,7 @@ from Orange.classification import OneClassSVMLearner, EllipticEnvelopeLearner,\ LocalOutlierFactorLearner, IsolationForestLearner from Orange.data import Table -from Orange.data.util import progress_callback +from Orange.util import wrap_callback from Orange.widgets import gui from Orange.widgets.settings import Setting from Orange.widgets.utils.concurrent import TaskState, ConcurrentWidgetMixin @@ -40,8 +40,8 @@ def callback(i: float, status=""): raise Exception callback(0, "Initializing...") - model = learner(data, progress_callback(callback, end=0.6)) - pred = model(data, progress_callback(callback, start=0.6, end=0.99)) + model = learner(data, wrap_callback(callback, end=0.6)) + pred = model(data, wrap_callback(callback, start=0.6, end=0.99)) col = pred.get_column_view(model.outlier_var)[0] inliers_ind = np.where(col == 1)[0]