Skip to content

Commit 7c02c34

Browse files
committed
Rank: Fix crash on dataset with missing values
1 parent acd5dc9 commit 7c02c34

File tree

4 files changed

+25
-7
lines changed

4 files changed

+25
-7
lines changed

Orange/preprocess/fss.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import Orange
99
from Orange.util import Reprable
1010
from Orange.preprocess.preprocess import Preprocess
11-
from Orange.preprocess.score import ANOVA, GainRatio, UnivariateLinearRegression
1211

1312
__all__ = ["SelectBestFeatures", "RemoveNaNColumns", "SelectRandomFeatures"]
1413

@@ -57,6 +56,10 @@ def __call__(self, data):
5756
discr_ratio = (sum(a.is_discrete
5857
for a in data.domain.attributes)
5958
/ len(data.domain.attributes))
59+
60+
from Orange.preprocess.score import ANOVA, GainRatio, \
61+
UnivariateLinearRegression
62+
6063
if data.domain.has_discrete_class:
6164
if discr_ratio >= 0.5:
6265
method = GainRatio()

Orange/preprocess/score.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from Orange.data import Domain, Variable, DiscreteVariable, ContinuousVariable
99
from Orange.data.filter import HasClass
1010
from Orange.misc.wrapper_meta import WrapperMeta
11+
from Orange.preprocess.fss import RemoveNaNColumns
1112
from Orange.preprocess.preprocess import Discretize, SklImpute
1213
from Orange.preprocess.util import _RefuseDataInConstructor
1314
from Orange.statistics import contingency, distribution
@@ -51,7 +52,7 @@ def _friendly_vartype_name(vartype):
5152
return name.lower()[:-8]
5253
return name
5354

54-
def __call__(self, data, feature=None):
55+
def __call__(self, data, feature=None, preprocess=True):
5556
if not data.domain.class_var:
5657
raise ValueError(
5758
"{} requires data with a target variable."
@@ -66,8 +67,9 @@ def __call__(self, data, feature=None):
6667
f = data.domain[feature]
6768
data = data.transform(Domain([f], data.domain.class_vars))
6869

69-
for pp in self.preprocessors:
70-
data = pp(data)
70+
if preprocess:
71+
for pp in self.preprocessors:
72+
data = pp(data)
7173

7274
for var in data.domain.attributes:
7375
if not isinstance(var, self.feature_type):
@@ -340,6 +342,7 @@ class ReliefF(Scorer):
340342
class_type = DiscreteVariable
341343
supports_sparse_data = False
342344
friendly_name = "ReliefF"
345+
preprocessors = Scorer.preprocessors + [RemoveNaNColumns()]
343346

344347
def __init__(self, n_iterations=50, k_nearest=10, random_state=None):
345348
self.n_iterations = n_iterations
@@ -374,6 +377,7 @@ class RReliefF(Scorer):
374377
class_type = ContinuousVariable
375378
supports_sparse_data = False
376379
friendly_name = "RReliefF"
380+
preprocessors = Scorer.preprocessors + [RemoveNaNColumns()]
377381

378382
def __init__(self, n_iterations=50, k_nearest=50, random_state=None):
379383
self.n_iterations = n_iterations

Orange/widgets/data/owrank.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,13 @@ def get_method_scores(self, method):
371371
estimator = method.scorer()
372372
data = self.data
373373
try:
374-
scores = np.asarray(estimator(data))
374+
scores = np.full(len(data.domain.attributes), np.nan)
375+
pp_data = data
376+
for pp in estimator.preprocessors:
377+
pp_data = pp(pp_data)
378+
names = [a.name for a in pp_data.domain.attributes]
379+
mask = np.array([a.name in names for a in data.domain.attributes])
380+
scores[mask] = np.asarray(estimator(pp_data, preprocess=False))
375381
except ValueError:
376382
log.warning("Scorer %s wasn't able to compute all scores at once",
377383
method.name)
@@ -382,7 +388,6 @@ def get_method_scores(self, method):
382388
log.error(
383389
"Scorer %s wasn't able to compute scores at all",
384390
method.name)
385-
scores = np.full(len(data.domain.attributes), np.nan)
386391
return scores
387392

388393
@memoize_method()

Orange/widgets/data/tests/test_owrank.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from Orange.regression import LinearRegressionLearner
1111
from Orange.projection import PCA
1212
from Orange.widgets.data.owrank import OWRank, ProblemType, CLS_SCORES, REG_SCORES
13-
from Orange.widgets.tests.base import WidgetTest
13+
from Orange.widgets.tests.base import WidgetTest, datasets
1414
from Orange.widgets.widget import AttributeList
1515

1616

@@ -347,3 +347,9 @@ def test_no_attributes(self):
347347
self.assertTrue(self.widget.Error.no_attributes.is_shown())
348348
self.send_signal(self.widget.Inputs.data, data)
349349
self.assertFalse(self.widget.Error.no_attributes.is_shown())
350+
351+
def test_dataset(self):
352+
for method in CLS_SCORES + REG_SCORES:
353+
self._get_checkbox(method.shortname).setChecked(True)
354+
for ds in datasets.datasets():
355+
self.send_signal(self.widget.Inputs.data, ds)

0 commit comments

Comments
 (0)