diff --git a/Orange/base.py b/Orange/base.py index 1c7c6852216..5af4285605f 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -8,10 +8,10 @@ import scipy from Orange.data import Table, Storage, Instance, Value +from Orange.data.filter import HasClass from Orange.data.util import one_hot from Orange.misc.wrapper_meta import WrapperMeta -from Orange.preprocess import (RemoveNaNClasses, Continuize, - RemoveNaNColumns, SklImpute, Normalize) +from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute, Normalize from Orange.util import Reprable __all__ = ["Learner", "Model", "SklLearner", "SklModel"] @@ -341,7 +341,7 @@ class SklLearner(Learner, metaclass=WrapperMeta): _params = {} preprocessors = default_preprocessors = [ - RemoveNaNClasses(), + HasClass(), Continuize(), RemoveNaNColumns(), SklImpute()] diff --git a/Orange/classification/rules.py b/Orange/classification/rules.py index 9acf01d6b2e..53f6c691c1c 100644 --- a/Orange/classification/rules.py +++ b/Orange/classification/rules.py @@ -15,10 +15,11 @@ import numpy as np from scipy.stats import chi2 -from Orange.data import Table, _contingency from Orange.classification import Learner, Model +from Orange.data import Table, _contingency +from Orange.data.filter import HasClass from Orange.preprocess.discretize import EntropyMDL -from Orange.preprocess import RemoveNaNColumns, RemoveNaNClasses, Impute +from Orange.preprocess import RemoveNaNColumns, Impute __all__ = ["CN2Learner", "CN2UnorderedLearner", "CN2SDLearner", "CN2SDUnorderedLearner"] @@ -901,7 +902,7 @@ class _RuleLearner(Learner): .. [1] "Separate-and-Conquer Rule Learning", Johannes Fürnkranz, Artificial Intelligence Review 13, 3-54, 1999 """ - preprocessors = [RemoveNaNColumns(), RemoveNaNClasses(), Impute()] + preprocessors = [RemoveNaNColumns(), HasClass(), Impute()] def __init__(self, preprocessors=None, base_rules=None): """ diff --git a/Orange/classification/softmax_regression.py b/Orange/classification/softmax_regression.py index 2e0deb615dc..0978bb5ebad 100644 --- a/Orange/classification/softmax_regression.py +++ b/Orange/classification/softmax_regression.py @@ -2,8 +2,8 @@ from scipy.optimize import fmin_l_bfgs_b from Orange.classification import Learner, Model -from Orange.preprocess import (RemoveNaNClasses, Continuize, RemoveNaNColumns, - Impute, Normalize) +from Orange.data.filter import HasClass +from Orange.preprocess import Continuize, RemoveNaNColumns, Impute, Normalize __all__ = ["SoftmaxRegressionLearner"] @@ -40,7 +40,7 @@ class SoftmaxRegressionLearner(Learner): Parameters for L-BFGS algorithm. """ name = 'softmax' - preprocessors = [RemoveNaNClasses(), + preprocessors = [HasClass(), RemoveNaNColumns(), Impute(), Continuize(), diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 684652872e8..4c7ce44454c 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -9,9 +9,10 @@ import bottleneck as bn import Orange.data +from Orange.data.filter import HasClass from Orange.preprocess.util import _RefuseDataInConstructor from Orange.statistics import distribution -from Orange.util import Reprable, Enum +from Orange.util import Reprable, Enum, deprecated from . import impute, discretize, transformation __all__ = ["Continuize", "Discretize", "Impute", @@ -197,6 +198,7 @@ def __call__(self, data): return data.transform(domain) +@deprecated("Orange.data.filter.HasClas") class RemoveNaNClasses(Preprocess): """ Construct preprocessor that removes examples with missing class @@ -216,11 +218,7 @@ def __call__(self, data): ------- data : data set without rows with missing classes """ - if len(data.Y.shape) > 1: - nan_cls = np.any(np.isnan(data.Y), axis=1) - else: - nan_cls = np.isnan(data.Y) - return data[~nan_cls] + return HasClass()(data) class Normalize(Preprocess): diff --git a/Orange/preprocess/score.py b/Orange/preprocess/score.py index f5b047a99ae..78c0e0090e8 100644 --- a/Orange/preprocess/score.py +++ b/Orange/preprocess/score.py @@ -4,12 +4,13 @@ import numpy as np from sklearn import feature_selection as skl_fss -from Orange.misc.wrapper_meta import WrapperMeta -from Orange.statistics import contingency, distribution from Orange.data import Domain, Variable, DiscreteVariable, ContinuousVariable -from Orange.preprocess.preprocess import Discretize, Impute, RemoveNaNClasses +from Orange.data.filter import HasClass +from Orange.misc.wrapper_meta import WrapperMeta +from Orange.preprocess.preprocess import Discretize, Impute from Orange.preprocess.util import _RefuseDataInConstructor +from Orange.statistics import contingency, distribution from Orange.util import Reprable __all__ = ["Chi2", @@ -27,9 +28,7 @@ class Scorer(_RefuseDataInConstructor, Reprable): feature_type = None class_type = None supports_sparse_data = None - preprocessors = [ - RemoveNaNClasses() - ] + preprocessors = [HasClass()] @property def friendly_name(self): diff --git a/Orange/regression/linear_bfgs.py b/Orange/regression/linear_bfgs.py index 885dfd51f49..f266c00e28e 100644 --- a/Orange/regression/linear_bfgs.py +++ b/Orange/regression/linear_bfgs.py @@ -1,9 +1,9 @@ import numpy as np from scipy.optimize import fmin_l_bfgs_b +from Orange.data.filter import HasClass +from Orange.preprocess import Normalize, Continuize, Impute, RemoveNaNColumns from Orange.regression import Learner, Model -from Orange.preprocess import (RemoveNaNClasses, Normalize, Continuize, - Impute, RemoveNaNColumns) __all__ = ["LinearRegressionLearner"] @@ -51,7 +51,7 @@ class LinearRegressionLearner(Learner): print(c(data)) # predict ''' name = 'linear_bfgs' - preprocessors = [RemoveNaNClasses(), + preprocessors = [HasClass(), Normalize(), Continuize(), Impute(), diff --git a/Orange/tests/test_filter.py b/Orange/tests/test_filter.py index b70c2fe3e10..d7615797802 100644 --- a/Orange/tests/test_filter.py +++ b/Orange/tests/test_filter.py @@ -92,6 +92,19 @@ def test_has_class_filter_table(self): self.assertEqual(len(without_class), self.n_missing) self.assertTrue(without_class.has_missing_class()) + def test_has_class_multiclass(self): + domain = Domain([DiscreteVariable("x", values="01")], + [DiscreteVariable("y1", values="01"), + DiscreteVariable("y2", values="01")]) + table = Table(domain, [[0, 1, np.nan], + [1, np.nan, 0], + [1, 0, 1], + [1, np.nan, np.nan]]) + table = HasClass()(table) + self.assertTrue(not np.isnan(table).any()) + self.assertEqual(table.domain, domain) + self.assertEqual(len(table), 1) + def test_has_class_filter_instance(self): class_missing = self.table[9] class_present = self.table[0] diff --git a/Orange/tests/test_preprocess.py b/Orange/tests/test_preprocess.py index d6357abaa88..69c2a757942 100644 --- a/Orange/tests/test_preprocess.py +++ b/Orange/tests/test_preprocess.py @@ -4,21 +4,21 @@ import os import pickle import unittest -from unittest.mock import Mock, MagicMock, patch +from unittest.mock import Mock import numpy as np -import Orange -from Orange.data import Domain, Table, DiscreteVariable -from Orange.preprocess import * -from Orange.preprocess.discretize import * -from Orange.preprocess.fss import * -from Orange.preprocess.impute import * +from Orange.data import Table +from Orange.preprocess import EntropyMDL, DoNotImpute, Default, Average, SelectRandomFeatures, EqualFreq, \ + RemoveNaNColumns, DropInstances +from Orange.preprocess import EqualWidth, SelectBestFeatures +from Orange.preprocess.preprocess import Preprocess, Scale, Randomize, Continuize, Discretize, Impute, SklImpute, \ + Normalize, ProjectCUR, ProjectPCA, RemoveConstant from Orange.util import OrangeDeprecationWarning class TestPreprocess(unittest.TestCase): def test_read_data_calls_reader(self): - class MockPreprocessor(Orange.preprocess.preprocess.Preprocess): + class MockPreprocessor(Preprocess): __init__ = Mock(return_value=None) __call__ = Mock() @classmethod @@ -26,7 +26,7 @@ def reset(cls): cls.__init__.reset_mock() cls.__call__.reset_mock() - table = Mock(Orange.data.Table) + table = Mock(Table) MockPreprocessor(1, 2, a=3)(table) MockPreprocessor.__init__.assert_called_with(1, 2, a=3) MockPreprocessor.__call__.assert_called_with(table) @@ -52,53 +52,32 @@ def test_refuse_data_in_constructor(self): expected = self.assertRaises if is_CI else self.assertWarns with expected(OrangeDeprecationWarning): try: - Orange.preprocess.preprocess.Preprocess(Table('iris')) + Preprocess(Table('iris')) except NotImplementedError: # Expected from default Preprocess.__call__ pass -class RemoveConstant(unittest.TestCase): +class TestRemoveConstant(unittest.TestCase): def test_remove_columns(self): X = np.random.rand(6, 4) X[:, (1,3)] = 5 X[3, 1] = np.nan X[1, 1] = np.nan - data = Orange.data.Table(X) - d = Orange.preprocess.preprocess.RemoveConstant()(data) + data = Table(X) + d = RemoveConstant()(data) self.assertEqual(len(d.domain.attributes), 2) - pp_rc = Orange.preprocess.preprocess.RemoveConstant() + pp_rc = RemoveConstant() d = pp_rc(data) self.assertEqual(len(d.domain.attributes), 2) def test_nothing_to_remove(self): - data = Orange.data.Table("iris") - d = Orange.preprocess.preprocess.RemoveConstant()(data) + data = Table("iris") + d = RemoveConstant()(data) self.assertEqual(len(d.domain.attributes), 4) -class TestRemoveNanClass(unittest.TestCase): - def test_remove_nan_classes(self): - table = Table("imports-85") - self.assertTrue(np.isnan(table.Y).any()) - table = RemoveNaNClasses()(table) - self.assertTrue(not np.isnan(table.Y).any()) - - def test_remove_nan_classes_multiclass(self): - domain = Domain([DiscreteVariable("a", values="01")], - [DiscreteVariable("b", values="01"), - DiscreteVariable("c", values="01")]) - table = Table(domain, [[0, 1, np.nan], - [1, np.nan, 0], - [1, 0, 1], - [1, np.nan, np.nan]]) - table = RemoveNaNClasses()(table) - self.assertTrue(not np.isnan(table).any()) - self.assertEqual(table.domain, domain) - self.assertEqual(len(table), 1) - - class TestScaling(unittest.TestCase): @classmethod def setUpClass(cls): @@ -122,7 +101,7 @@ def test_scaling_median_stddev(self): class TestReprs(unittest.TestCase): def test_reprs(self): preprocs = [Continuize, Discretize, Impute, SklImpute, Normalize, - Randomize, RemoveNaNClasses, ProjectPCA, ProjectCUR, Scale, + Randomize, ProjectPCA, ProjectCUR, Scale, EqualFreq, EqualWidth, EntropyMDL, SelectBestFeatures, SelectRandomFeatures, RemoveNaNColumns, DoNotImpute, DropInstances, Average, Default] @@ -132,6 +111,7 @@ def test_reprs(self): new_preproc = eval(repr_str) self.assertEqual(repr(new_preproc), repr_str) + class TestEnumPickling(unittest.TestCase): def test_continuize_pickling(self): c = Continuize(multinomial_treatment=Continuize.FirstAsBase) diff --git a/Orange/tests/test_rules.py b/Orange/tests/test_rules.py index fecd97eff39..a0398544a0a 100644 --- a/Orange/tests/test_rules.py +++ b/Orange/tests/test_rules.py @@ -3,18 +3,17 @@ import unittest import numpy as np -from Orange.data import Table -from Orange.preprocess import RemoveNaNClasses, Impute -from Orange.classification.rules import main as rules_main +from Orange.classification import (CN2Learner, CN2UnorderedLearner, + CN2SDLearner, CN2SDUnorderedLearner) from Orange.classification.rules import (_RuleLearner, _RuleClassifier, RuleHunter, Rule, EntropyEvaluator, LaplaceAccuracyEvaluator, WeightedRelativeAccuracyEvaluator, argmaxrnd, hash_dist) - -from Orange.classification import (CN2Learner, CN2UnorderedLearner, - CN2SDLearner, CN2SDUnorderedLearner) +from Orange.data import Table +from Orange.data.filter import HasClass +from Orange.preprocess import Impute class TestRuleInduction(unittest.TestCase): @@ -41,7 +40,7 @@ def test_base_RuleLearner(self): self.assertEqual(len(list(base_rule_learner.active_preprocessors)), 3) # preprocessor types preprocessor_types = [type(x) for x in base_rule_learner.active_preprocessors] - self.assertIn(RemoveNaNClasses, preprocessor_types) + self.assertIn(HasClass, preprocessor_types) self.assertIn(Impute, preprocessor_types) # test find_rules diff --git a/Orange/widgets/evaluate/owtestlearners.py b/Orange/widgets/evaluate/owtestlearners.py index 73108d8eb79..9f88d813bcd 100644 --- a/Orange/widgets/evaluate/owtestlearners.py +++ b/Orange/widgets/evaluate/owtestlearners.py @@ -2,7 +2,6 @@ # pylint: disable=invalid-sequence-index import sys -import functools from itertools import chain import abc import enum @@ -13,7 +12,6 @@ import concurrent.futures from concurrent.futures import Future - from collections import OrderedDict, namedtuple try: @@ -31,21 +29,21 @@ from AnyQt.QtCore import Qt, QSize, QThread, QMetaObject, Q_ARG from AnyQt.QtCore import pyqtSlot as Slot +from Orange.base import Learner +import Orange.classification from Orange.data import Table, DiscreteVariable, ContinuousVariable +from Orange.data.filter import HasClass from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT import Orange.evaluation -import Orange.classification -import Orange.regression - -from Orange.base import Learner from Orange.evaluation import scoring, Results from Orange.preprocess.preprocess import Preprocess -from Orange.preprocess import RemoveNaNClasses +import Orange.regression from Orange.widgets import gui, settings, widget from Orange.widgets.utils.itemmodels import DomainModel from Orange.widgets.widget import OWWidget, Msg, Input, Output from Orange.widgets.utils.concurrent import ThreadExecutor + log = logging.getLogger(__name__) InputLearner = namedtuple( @@ -389,7 +387,7 @@ def set_train_data(self, data): if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: - data = RemoveNaNClasses(data) + data = HasClass()(data) else: self.Warning.missing_data.clear() @@ -439,7 +437,7 @@ def set_test_data(self, data): if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: - data = RemoveNaNClasses()(data) + data = HasClass()(data) else: self.Warning.missing_data.clear()