Skip to content

Commit 56aee9e

Browse files
authored
Merge branch 'master' into fix-remove-unused-values-sparse
2 parents 9daec5e + feea5ef commit 56aee9e

File tree

10 files changed

+66
-78
lines changed

10 files changed

+66
-78
lines changed

Orange/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import scipy
99

1010
from Orange.data import Table, Storage, Instance, Value
11+
from Orange.data.filter import HasClass
1112
from Orange.data.util import one_hot
1213
from Orange.misc.wrapper_meta import WrapperMeta
13-
from Orange.preprocess import (RemoveNaNClasses, Continuize,
14-
RemoveNaNColumns, SklImpute, Normalize)
14+
from Orange.preprocess import Continuize, RemoveNaNColumns, SklImpute, Normalize
1515
from Orange.util import Reprable
1616

1717
__all__ = ["Learner", "Model", "SklLearner", "SklModel"]
@@ -341,7 +341,7 @@ class SklLearner(Learner, metaclass=WrapperMeta):
341341
_params = {}
342342

343343
preprocessors = default_preprocessors = [
344-
RemoveNaNClasses(),
344+
HasClass(),
345345
Continuize(),
346346
RemoveNaNColumns(),
347347
SklImpute()]

Orange/classification/rules.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
import numpy as np
1616
from scipy.stats import chi2
1717

18-
from Orange.data import Table, _contingency
1918
from Orange.classification import Learner, Model
19+
from Orange.data import Table, _contingency
20+
from Orange.data.filter import HasClass
2021
from Orange.preprocess.discretize import EntropyMDL
21-
from Orange.preprocess import RemoveNaNColumns, RemoveNaNClasses, Impute
22+
from Orange.preprocess import RemoveNaNColumns, Impute
2223

2324
__all__ = ["CN2Learner", "CN2UnorderedLearner", "CN2SDLearner",
2425
"CN2SDUnorderedLearner"]
@@ -901,7 +902,7 @@ class _RuleLearner(Learner):
901902
.. [1] "Separate-and-Conquer Rule Learning", Johannes Fürnkranz,
902903
Artificial Intelligence Review 13, 3-54, 1999
903904
"""
904-
preprocessors = [RemoveNaNColumns(), RemoveNaNClasses(), Impute()]
905+
preprocessors = [RemoveNaNColumns(), HasClass(), Impute()]
905906

906907
def __init__(self, preprocessors=None, base_rules=None):
907908
"""

Orange/classification/softmax_regression.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
from scipy.optimize import fmin_l_bfgs_b
33

44
from Orange.classification import Learner, Model
5-
from Orange.preprocess import (RemoveNaNClasses, Continuize, RemoveNaNColumns,
6-
Impute, Normalize)
5+
from Orange.data.filter import HasClass
6+
from Orange.preprocess import Continuize, RemoveNaNColumns, Impute, Normalize
77

88
__all__ = ["SoftmaxRegressionLearner"]
99

@@ -40,7 +40,7 @@ class SoftmaxRegressionLearner(Learner):
4040
Parameters for L-BFGS algorithm.
4141
"""
4242
name = 'softmax'
43-
preprocessors = [RemoveNaNClasses(),
43+
preprocessors = [HasClass(),
4444
RemoveNaNColumns(),
4545
Impute(),
4646
Continuize(),

Orange/preprocess/preprocess.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
import bottleneck as bn
1010

1111
import Orange.data
12+
from Orange.data.filter import HasClass
1213
from Orange.preprocess.util import _RefuseDataInConstructor
1314
from Orange.statistics import distribution
14-
from Orange.util import Reprable, Enum
15+
from Orange.util import Reprable, Enum, deprecated
1516
from . import impute, discretize, transformation
1617

1718
__all__ = ["Continuize", "Discretize", "Impute",
@@ -197,6 +198,7 @@ def __call__(self, data):
197198
return data.transform(domain)
198199

199200

201+
@deprecated("Orange.data.filter.HasClas")
200202
class RemoveNaNClasses(Preprocess):
201203
"""
202204
Construct preprocessor that removes examples with missing class
@@ -216,11 +218,7 @@ def __call__(self, data):
216218
-------
217219
data : data set without rows with missing classes
218220
"""
219-
if len(data.Y.shape) > 1:
220-
nan_cls = np.any(np.isnan(data.Y), axis=1)
221-
else:
222-
nan_cls = np.isnan(data.Y)
223-
return data[~nan_cls]
221+
return HasClass()(data)
224222

225223

226224
class Normalize(Preprocess):

Orange/preprocess/score.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44

55
import numpy as np
66
from sklearn import feature_selection as skl_fss
7-
from Orange.misc.wrapper_meta import WrapperMeta
87

9-
from Orange.statistics import contingency, distribution
108
from Orange.data import Domain, Variable, DiscreteVariable, ContinuousVariable
11-
from Orange.preprocess.preprocess import Discretize, Impute, RemoveNaNClasses
9+
from Orange.data.filter import HasClass
10+
from Orange.misc.wrapper_meta import WrapperMeta
11+
from Orange.preprocess.preprocess import Discretize, Impute
1212
from Orange.preprocess.util import _RefuseDataInConstructor
13+
from Orange.statistics import contingency, distribution
1314
from Orange.util import Reprable
1415

1516
__all__ = ["Chi2",
@@ -27,9 +28,7 @@ class Scorer(_RefuseDataInConstructor, Reprable):
2728
feature_type = None
2829
class_type = None
2930
supports_sparse_data = None
30-
preprocessors = [
31-
RemoveNaNClasses()
32-
]
31+
preprocessors = [HasClass()]
3332

3433
@property
3534
def friendly_name(self):

Orange/regression/linear_bfgs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import numpy as np
22
from scipy.optimize import fmin_l_bfgs_b
33

4+
from Orange.data.filter import HasClass
5+
from Orange.preprocess import Normalize, Continuize, Impute, RemoveNaNColumns
46
from Orange.regression import Learner, Model
5-
from Orange.preprocess import (RemoveNaNClasses, Normalize, Continuize,
6-
Impute, RemoveNaNColumns)
77

88
__all__ = ["LinearRegressionLearner"]
99

@@ -51,7 +51,7 @@ class LinearRegressionLearner(Learner):
5151
print(c(data)) # predict
5252
'''
5353
name = 'linear_bfgs'
54-
preprocessors = [RemoveNaNClasses(),
54+
preprocessors = [HasClass(),
5555
Normalize(),
5656
Continuize(),
5757
Impute(),

Orange/tests/test_filter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,19 @@ def test_has_class_filter_table(self):
9292
self.assertEqual(len(without_class), self.n_missing)
9393
self.assertTrue(without_class.has_missing_class())
9494

95+
def test_has_class_multiclass(self):
96+
domain = Domain([DiscreteVariable("x", values="01")],
97+
[DiscreteVariable("y1", values="01"),
98+
DiscreteVariable("y2", values="01")])
99+
table = Table(domain, [[0, 1, np.nan],
100+
[1, np.nan, 0],
101+
[1, 0, 1],
102+
[1, np.nan, np.nan]])
103+
table = HasClass()(table)
104+
self.assertTrue(not np.isnan(table).any())
105+
self.assertEqual(table.domain, domain)
106+
self.assertEqual(len(table), 1)
107+
95108
def test_has_class_filter_instance(self):
96109
class_missing = self.table[9]
97110
class_present = self.table[0]

Orange/tests/test_preprocess.py

Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,29 @@
44
import os
55
import pickle
66
import unittest
7-
from unittest.mock import Mock, MagicMock, patch
7+
from unittest.mock import Mock
88
import numpy as np
99

10-
import Orange
11-
from Orange.data import Domain, Table, DiscreteVariable
12-
from Orange.preprocess import *
13-
from Orange.preprocess.discretize import *
14-
from Orange.preprocess.fss import *
15-
from Orange.preprocess.impute import *
10+
from Orange.data import Table
11+
from Orange.preprocess import EntropyMDL, DoNotImpute, Default, Average, SelectRandomFeatures, EqualFreq, \
12+
RemoveNaNColumns, DropInstances
13+
from Orange.preprocess import EqualWidth, SelectBestFeatures
14+
from Orange.preprocess.preprocess import Preprocess, Scale, Randomize, Continuize, Discretize, Impute, SklImpute, \
15+
Normalize, ProjectCUR, ProjectPCA, RemoveConstant
1616
from Orange.util import OrangeDeprecationWarning
1717

1818

1919
class TestPreprocess(unittest.TestCase):
2020
def test_read_data_calls_reader(self):
21-
class MockPreprocessor(Orange.preprocess.preprocess.Preprocess):
21+
class MockPreprocessor(Preprocess):
2222
__init__ = Mock(return_value=None)
2323
__call__ = Mock()
2424
@classmethod
2525
def reset(cls):
2626
cls.__init__.reset_mock()
2727
cls.__call__.reset_mock()
2828

29-
table = Mock(Orange.data.Table)
29+
table = Mock(Table)
3030
MockPreprocessor(1, 2, a=3)(table)
3131
MockPreprocessor.__init__.assert_called_with(1, 2, a=3)
3232
MockPreprocessor.__call__.assert_called_with(table)
@@ -52,53 +52,32 @@ def test_refuse_data_in_constructor(self):
5252
expected = self.assertRaises if is_CI else self.assertWarns
5353
with expected(OrangeDeprecationWarning):
5454
try:
55-
Orange.preprocess.preprocess.Preprocess(Table('iris'))
55+
Preprocess(Table('iris'))
5656
except NotImplementedError:
5757
# Expected from default Preprocess.__call__
5858
pass
5959

6060

61-
class RemoveConstant(unittest.TestCase):
61+
class TestRemoveConstant(unittest.TestCase):
6262
def test_remove_columns(self):
6363
X = np.random.rand(6, 4)
6464
X[:, (1,3)] = 5
6565
X[3, 1] = np.nan
6666
X[1, 1] = np.nan
67-
data = Orange.data.Table(X)
68-
d = Orange.preprocess.preprocess.RemoveConstant()(data)
67+
data = Table(X)
68+
d = RemoveConstant()(data)
6969
self.assertEqual(len(d.domain.attributes), 2)
7070

71-
pp_rc = Orange.preprocess.preprocess.RemoveConstant()
71+
pp_rc = RemoveConstant()
7272
d = pp_rc(data)
7373
self.assertEqual(len(d.domain.attributes), 2)
7474

7575
def test_nothing_to_remove(self):
76-
data = Orange.data.Table("iris")
77-
d = Orange.preprocess.preprocess.RemoveConstant()(data)
76+
data = Table("iris")
77+
d = RemoveConstant()(data)
7878
self.assertEqual(len(d.domain.attributes), 4)
7979

8080

81-
class TestRemoveNanClass(unittest.TestCase):
82-
def test_remove_nan_classes(self):
83-
table = Table("imports-85")
84-
self.assertTrue(np.isnan(table.Y).any())
85-
table = RemoveNaNClasses()(table)
86-
self.assertTrue(not np.isnan(table.Y).any())
87-
88-
def test_remove_nan_classes_multiclass(self):
89-
domain = Domain([DiscreteVariable("a", values="01")],
90-
[DiscreteVariable("b", values="01"),
91-
DiscreteVariable("c", values="01")])
92-
table = Table(domain, [[0, 1, np.nan],
93-
[1, np.nan, 0],
94-
[1, 0, 1],
95-
[1, np.nan, np.nan]])
96-
table = RemoveNaNClasses()(table)
97-
self.assertTrue(not np.isnan(table).any())
98-
self.assertEqual(table.domain, domain)
99-
self.assertEqual(len(table), 1)
100-
101-
10281
class TestScaling(unittest.TestCase):
10382
@classmethod
10483
def setUpClass(cls):
@@ -122,7 +101,7 @@ def test_scaling_median_stddev(self):
122101
class TestReprs(unittest.TestCase):
123102
def test_reprs(self):
124103
preprocs = [Continuize, Discretize, Impute, SklImpute, Normalize,
125-
Randomize, RemoveNaNClasses, ProjectPCA, ProjectCUR, Scale,
104+
Randomize, ProjectPCA, ProjectCUR, Scale,
126105
EqualFreq, EqualWidth, EntropyMDL, SelectBestFeatures,
127106
SelectRandomFeatures, RemoveNaNColumns, DoNotImpute, DropInstances,
128107
Average, Default]
@@ -132,6 +111,7 @@ def test_reprs(self):
132111
new_preproc = eval(repr_str)
133112
self.assertEqual(repr(new_preproc), repr_str)
134113

114+
135115
class TestEnumPickling(unittest.TestCase):
136116
def test_continuize_pickling(self):
137117
c = Continuize(multinomial_treatment=Continuize.FirstAsBase)

Orange/tests/test_rules.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,17 @@
33

44
import unittest
55
import numpy as np
6-
from Orange.data import Table
7-
from Orange.preprocess import RemoveNaNClasses, Impute
86

9-
from Orange.classification.rules import main as rules_main
7+
from Orange.classification import (CN2Learner, CN2UnorderedLearner,
8+
CN2SDLearner, CN2SDUnorderedLearner)
109
from Orange.classification.rules import (_RuleLearner, _RuleClassifier,
1110
RuleHunter, Rule, EntropyEvaluator,
1211
LaplaceAccuracyEvaluator,
1312
WeightedRelativeAccuracyEvaluator,
1413
argmaxrnd, hash_dist)
15-
16-
from Orange.classification import (CN2Learner, CN2UnorderedLearner,
17-
CN2SDLearner, CN2SDUnorderedLearner)
14+
from Orange.data import Table
15+
from Orange.data.filter import HasClass
16+
from Orange.preprocess import Impute
1817

1918

2019
class TestRuleInduction(unittest.TestCase):
@@ -41,7 +40,7 @@ def test_base_RuleLearner(self):
4140
self.assertEqual(len(list(base_rule_learner.active_preprocessors)), 3)
4241
# preprocessor types
4342
preprocessor_types = [type(x) for x in base_rule_learner.active_preprocessors]
44-
self.assertIn(RemoveNaNClasses, preprocessor_types)
43+
self.assertIn(HasClass, preprocessor_types)
4544
self.assertIn(Impute, preprocessor_types)
4645

4746
# test find_rules

Orange/widgets/evaluate/owtestlearners.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# pylint: disable=invalid-sequence-index
33

44
import sys
5-
import functools
65
from itertools import chain
76
import abc
87
import enum
@@ -13,7 +12,6 @@
1312

1413
import concurrent.futures
1514
from concurrent.futures import Future
16-
1715
from collections import OrderedDict, namedtuple
1816

1917
try:
@@ -31,21 +29,21 @@
3129
from AnyQt.QtCore import Qt, QSize, QThread, QMetaObject, Q_ARG
3230
from AnyQt.QtCore import pyqtSlot as Slot
3331

32+
from Orange.base import Learner
33+
import Orange.classification
3434
from Orange.data import Table, DiscreteVariable, ContinuousVariable
35+
from Orange.data.filter import HasClass
3536
from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT
3637
import Orange.evaluation
37-
import Orange.classification
38-
import Orange.regression
39-
40-
from Orange.base import Learner
4138
from Orange.evaluation import scoring, Results
4239
from Orange.preprocess.preprocess import Preprocess
43-
from Orange.preprocess import RemoveNaNClasses
40+
import Orange.regression
4441
from Orange.widgets import gui, settings, widget
4542
from Orange.widgets.utils.itemmodels import DomainModel
4643
from Orange.widgets.widget import OWWidget, Msg, Input, Output
4744
from Orange.widgets.utils.concurrent import ThreadExecutor
4845

46+
4947
log = logging.getLogger(__name__)
5048

5149
InputLearner = namedtuple(
@@ -389,7 +387,7 @@ def set_train_data(self, data):
389387
if self.train_data_missing_vals or self.test_data_missing_vals:
390388
self.Warning.missing_data(self._which_missing_data())
391389
if data:
392-
data = RemoveNaNClasses(data)
390+
data = HasClass()(data)
393391
else:
394392
self.Warning.missing_data.clear()
395393

@@ -439,7 +437,7 @@ def set_test_data(self, data):
439437
if self.train_data_missing_vals or self.test_data_missing_vals:
440438
self.Warning.missing_data(self._which_missing_data())
441439
if data:
442-
data = RemoveNaNClasses()(data)
440+
data = HasClass()(data)
443441
else:
444442
self.Warning.missing_data.clear()
445443

0 commit comments

Comments
 (0)