Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
import scipy

from Orange.data import Table, Storage, Instance, Value
from Orange.data.filter import HasClass
from Orange.data.util import one_hot
from Orange.misc.wrapper_meta import WrapperMeta
from Orange.preprocess import (RemoveNaNClasses, Continuize,
RemoveNaNColumns, SklImpute, Normalize)
from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute, Normalize
from Orange.util import Reprable

__all__ = ["Learner", "Model", "SklLearner", "SklModel"]
Expand Down Expand Up @@ -341,7 +341,7 @@ class SklLearner(Learner, metaclass=WrapperMeta):
_params = {}

preprocessors = default_preprocessors = [
RemoveNaNClasses(),
HasClass(),
Continuize(),
RemoveNaNColumns(),
SklImpute()]
Expand Down
7 changes: 4 additions & 3 deletions Orange/classification/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
import numpy as np
from scipy.stats import chi2

from Orange.data import Table, _contingency
from Orange.classification import Learner, Model
from Orange.data import Table, _contingency
from Orange.data.filter import HasClass
from Orange.preprocess.discretize import EntropyMDL
from Orange.preprocess import RemoveNaNColumns, RemoveNaNClasses, Impute
from Orange.preprocess import RemoveNaNColumns, Impute

__all__ = ["CN2Learner", "CN2UnorderedLearner", "CN2SDLearner",
"CN2SDUnorderedLearner"]
Expand Down Expand Up @@ -901,7 +902,7 @@ class _RuleLearner(Learner):
.. [1] "Separate-and-Conquer Rule Learning", Johannes Fürnkranz,
Artificial Intelligence Review 13, 3-54, 1999
"""
preprocessors = [RemoveNaNColumns(), RemoveNaNClasses(), Impute()]
preprocessors = [RemoveNaNColumns(), HasClass(), Impute()]

def __init__(self, preprocessors=None, base_rules=None):
"""
Expand Down
6 changes: 3 additions & 3 deletions Orange/classification/softmax_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from scipy.optimize import fmin_l_bfgs_b

from Orange.classification import Learner, Model
from Orange.preprocess import (RemoveNaNClasses, Continuize, RemoveNaNColumns,
Impute, Normalize)
from Orange.data.filter import HasClass
from Orange.preprocess import Continuize, RemoveNaNColumns, Impute, Normalize

__all__ = ["SoftmaxRegressionLearner"]

Expand Down Expand Up @@ -40,7 +40,7 @@ class SoftmaxRegressionLearner(Learner):
Parameters for L-BFGS algorithm.
"""
name = 'softmax'
preprocessors = [RemoveNaNClasses(),
preprocessors = [HasClass(),
RemoveNaNColumns(),
Impute(),
Continuize(),
Expand Down
10 changes: 4 additions & 6 deletions Orange/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
import bottleneck as bn

import Orange.data
from Orange.data.filter import HasClass
from Orange.preprocess.util import _RefuseDataInConstructor
from Orange.statistics import distribution
from Orange.util import Reprable, Enum
from Orange.util import Reprable, Enum, deprecated
from . import impute, discretize, transformation

__all__ = ["Continuize", "Discretize", "Impute",
Expand Down Expand Up @@ -197,6 +198,7 @@ def __call__(self, data):
return data.transform(domain)


@deprecated("Orange.data.filter.HasClas")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HasClas -> HasClass

class RemoveNaNClasses(Preprocess):
"""
Construct preprocessor that removes examples with missing class
Expand All @@ -216,11 +218,7 @@ def __call__(self, data):
-------
data : data set without rows with missing classes
"""
if len(data.Y.shape) > 1:
nan_cls = np.any(np.isnan(data.Y), axis=1)
else:
nan_cls = np.isnan(data.Y)
return data[~nan_cls]
return HasClass()(data)


class Normalize(Preprocess):
Expand Down
11 changes: 5 additions & 6 deletions Orange/preprocess/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@

import numpy as np
from sklearn import feature_selection as skl_fss
from Orange.misc.wrapper_meta import WrapperMeta

from Orange.statistics import contingency, distribution
from Orange.data import Domain, Variable, DiscreteVariable, ContinuousVariable
from Orange.preprocess.preprocess import Discretize, Impute, RemoveNaNClasses
from Orange.data.filter import HasClass
from Orange.misc.wrapper_meta import WrapperMeta
from Orange.preprocess.preprocess import Discretize, Impute
from Orange.preprocess.util import _RefuseDataInConstructor
from Orange.statistics import contingency, distribution
from Orange.util import Reprable

__all__ = ["Chi2",
Expand All @@ -27,9 +28,7 @@ class Scorer(_RefuseDataInConstructor, Reprable):
feature_type = None
class_type = None
supports_sparse_data = None
preprocessors = [
RemoveNaNClasses()
]
preprocessors = [HasClass()]

@property
def friendly_name(self):
Expand Down
6 changes: 3 additions & 3 deletions Orange/regression/linear_bfgs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
from scipy.optimize import fmin_l_bfgs_b

from Orange.data.filter import HasClass
from Orange.preprocess import Normalize, Continuize, Impute, RemoveNaNColumns
from Orange.regression import Learner, Model
from Orange.preprocess import (RemoveNaNClasses, Normalize, Continuize,
Impute, RemoveNaNColumns)

__all__ = ["LinearRegressionLearner"]

Expand Down Expand Up @@ -51,7 +51,7 @@ class LinearRegressionLearner(Learner):
print(c(data)) # predict
'''
name = 'linear_bfgs'
preprocessors = [RemoveNaNClasses(),
preprocessors = [HasClass(),
Normalize(),
Continuize(),
Impute(),
Expand Down
13 changes: 13 additions & 0 deletions Orange/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ def test_has_class_filter_table(self):
self.assertEqual(len(without_class), self.n_missing)
self.assertTrue(without_class.has_missing_class())

def test_has_class_multiclass(self):
domain = Domain([DiscreteVariable("x", values="01")],
[DiscreteVariable("y1", values="01"),
DiscreteVariable("y2", values="01")])
table = Table(domain, [[0, 1, np.nan],
[1, np.nan, 0],
[1, 0, 1],
[1, np.nan, np.nan]])
table = HasClass()(table)
self.assertTrue(not np.isnan(table).any())
self.assertEqual(table.domain, domain)
self.assertEqual(len(table), 1)

def test_has_class_filter_instance(self):
class_missing = self.table[9]
class_present = self.table[0]
Expand Down
56 changes: 18 additions & 38 deletions Orange/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,29 @@
import os
import pickle
import unittest
from unittest.mock import Mock, MagicMock, patch
from unittest.mock import Mock
import numpy as np

import Orange
from Orange.data import Domain, Table, DiscreteVariable
from Orange.preprocess import *
from Orange.preprocess.discretize import *
from Orange.preprocess.fss import *
from Orange.preprocess.impute import *
from Orange.data import Table
from Orange.preprocess import EntropyMDL, DoNotImpute, Default, Average, SelectRandomFeatures, EqualFreq, \
RemoveNaNColumns, DropInstances
from Orange.preprocess import EqualWidth, SelectBestFeatures
from Orange.preprocess.preprocess import Preprocess, Scale, Randomize, Continuize, Discretize, Impute, SklImpute, \
Normalize, ProjectCUR, ProjectPCA, RemoveConstant
from Orange.util import OrangeDeprecationWarning


class TestPreprocess(unittest.TestCase):
def test_read_data_calls_reader(self):
class MockPreprocessor(Orange.preprocess.preprocess.Preprocess):
class MockPreprocessor(Preprocess):
__init__ = Mock(return_value=None)
__call__ = Mock()
@classmethod
def reset(cls):
cls.__init__.reset_mock()
cls.__call__.reset_mock()

table = Mock(Orange.data.Table)
table = Mock(Table)
MockPreprocessor(1, 2, a=3)(table)
MockPreprocessor.__init__.assert_called_with(1, 2, a=3)
MockPreprocessor.__call__.assert_called_with(table)
Expand All @@ -52,53 +52,32 @@ def test_refuse_data_in_constructor(self):
expected = self.assertRaises if is_CI else self.assertWarns
with expected(OrangeDeprecationWarning):
try:
Orange.preprocess.preprocess.Preprocess(Table('iris'))
Preprocess(Table('iris'))
except NotImplementedError:
# Expected from default Preprocess.__call__
pass


class RemoveConstant(unittest.TestCase):
class TestRemoveConstant(unittest.TestCase):
def test_remove_columns(self):
X = np.random.rand(6, 4)
X[:, (1,3)] = 5
X[3, 1] = np.nan
X[1, 1] = np.nan
data = Orange.data.Table(X)
d = Orange.preprocess.preprocess.RemoveConstant()(data)
data = Table(X)
d = RemoveConstant()(data)
self.assertEqual(len(d.domain.attributes), 2)

pp_rc = Orange.preprocess.preprocess.RemoveConstant()
pp_rc = RemoveConstant()
d = pp_rc(data)
self.assertEqual(len(d.domain.attributes), 2)

def test_nothing_to_remove(self):
data = Orange.data.Table("iris")
d = Orange.preprocess.preprocess.RemoveConstant()(data)
data = Table("iris")
d = RemoveConstant()(data)
self.assertEqual(len(d.domain.attributes), 4)


class TestRemoveNanClass(unittest.TestCase):
def test_remove_nan_classes(self):
table = Table("imports-85")
self.assertTrue(np.isnan(table.Y).any())
table = RemoveNaNClasses()(table)
self.assertTrue(not np.isnan(table.Y).any())

def test_remove_nan_classes_multiclass(self):
domain = Domain([DiscreteVariable("a", values="01")],
[DiscreteVariable("b", values="01"),
DiscreteVariable("c", values="01")])
table = Table(domain, [[0, 1, np.nan],
[1, np.nan, 0],
[1, 0, 1],
[1, np.nan, np.nan]])
table = RemoveNaNClasses()(table)
self.assertTrue(not np.isnan(table).any())
self.assertEqual(table.domain, domain)
self.assertEqual(len(table), 1)


class TestScaling(unittest.TestCase):
@classmethod
def setUpClass(cls):
Expand All @@ -122,7 +101,7 @@ def test_scaling_median_stddev(self):
class TestReprs(unittest.TestCase):
def test_reprs(self):
preprocs = [Continuize, Discretize, Impute, SklImpute, Normalize,
Randomize, RemoveNaNClasses, ProjectPCA, ProjectCUR, Scale,
Randomize, ProjectPCA, ProjectCUR, Scale,
EqualFreq, EqualWidth, EntropyMDL, SelectBestFeatures,
SelectRandomFeatures, RemoveNaNColumns, DoNotImpute, DropInstances,
Average, Default]
Expand All @@ -132,6 +111,7 @@ def test_reprs(self):
new_preproc = eval(repr_str)
self.assertEqual(repr(new_preproc), repr_str)


class TestEnumPickling(unittest.TestCase):
def test_continuize_pickling(self):
c = Continuize(multinomial_treatment=Continuize.FirstAsBase)
Expand Down
13 changes: 6 additions & 7 deletions Orange/tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@

import unittest
import numpy as np
from Orange.data import Table
from Orange.preprocess import RemoveNaNClasses, Impute

from Orange.classification.rules import main as rules_main
from Orange.classification import (CN2Learner, CN2UnorderedLearner,
CN2SDLearner, CN2SDUnorderedLearner)
from Orange.classification.rules import (_RuleLearner, _RuleClassifier,
RuleHunter, Rule, EntropyEvaluator,
LaplaceAccuracyEvaluator,
WeightedRelativeAccuracyEvaluator,
argmaxrnd, hash_dist)

from Orange.classification import (CN2Learner, CN2UnorderedLearner,
CN2SDLearner, CN2SDUnorderedLearner)
from Orange.data import Table
from Orange.data.filter import HasClass
from Orange.preprocess import Impute


class TestRuleInduction(unittest.TestCase):
Expand All @@ -41,7 +40,7 @@ def test_base_RuleLearner(self):
self.assertEqual(len(list(base_rule_learner.active_preprocessors)), 3)
# preprocessor types
preprocessor_types = [type(x) for x in base_rule_learner.active_preprocessors]
self.assertIn(RemoveNaNClasses, preprocessor_types)
self.assertIn(HasClass, preprocessor_types)
self.assertIn(Impute, preprocessor_types)

# test find_rules
Expand Down
16 changes: 7 additions & 9 deletions Orange/widgets/evaluate/owtestlearners.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# pylint: disable=invalid-sequence-index

import sys
import functools
from itertools import chain
import abc
import enum
Expand All @@ -13,7 +12,6 @@

import concurrent.futures
from concurrent.futures import Future

from collections import OrderedDict, namedtuple

try:
Expand All @@ -31,21 +29,21 @@
from AnyQt.QtCore import Qt, QSize, QThread, QMetaObject, Q_ARG
from AnyQt.QtCore import pyqtSlot as Slot

from Orange.base import Learner
import Orange.classification
from Orange.data import Table, DiscreteVariable, ContinuousVariable
from Orange.data.filter import HasClass
from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT
import Orange.evaluation
import Orange.classification
import Orange.regression

from Orange.base import Learner
from Orange.evaluation import scoring, Results
from Orange.preprocess.preprocess import Preprocess
from Orange.preprocess import RemoveNaNClasses
import Orange.regression
from Orange.widgets import gui, settings, widget
from Orange.widgets.utils.itemmodels import DomainModel
from Orange.widgets.widget import OWWidget, Msg, Input, Output
from Orange.widgets.utils.concurrent import ThreadExecutor


log = logging.getLogger(__name__)

InputLearner = namedtuple(
Expand Down Expand Up @@ -389,7 +387,7 @@ def set_train_data(self, data):
if self.train_data_missing_vals or self.test_data_missing_vals:
self.Warning.missing_data(self._which_missing_data())
if data:
data = RemoveNaNClasses(data)
data = HasClass()(data)
else:
self.Warning.missing_data.clear()

Expand Down Expand Up @@ -439,7 +437,7 @@ def set_test_data(self, data):
if self.train_data_missing_vals or self.test_data_missing_vals:
self.Warning.missing_data(self._which_missing_data())
if data:
data = RemoveNaNClasses()(data)
data = HasClass()(data)
else:
self.Warning.missing_data.clear()

Expand Down