Skip to content

Commit 1dfb316

Browse files
authored
Merge pull request #2355 from kernc/fcbf-dist
[FIX] score.FCBF: don't segfault on continuous variables w/ <0 values
2 parents 9d4e8af + 3e235f1 commit 1dfb316

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

Orange/preprocess/score.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,11 @@ def _gini(D):
235235
np.sum(D, axis=0) / np.sum(D))
236236

237237

238-
def _symmetrical_uncertainty(X, Y):
238+
def _symmetrical_uncertainty(data, attr1, attr2):
239239
"""Symmetrical uncertainty, Press et al., 1988."""
240-
from Orange.preprocess._relieff import contingency_table
241-
X, Y = np.around(X), np.around(Y)
242-
cont = contingency_table(X, Y)
240+
cont = np.asarray(contingency.Discrete(data, attr1, attr2), dtype=float)
243241
ig = InfoGain().from_contingency(cont, 1)
244-
return 2 * ig / (_entropy(cont.sum(0)) + _entropy(cont.sum(1)))
242+
return 2 * ig / (_entropy(cont) + _entropy(cont.T))
245243

246244

247245
class FCBF(ClassificationScorer):
@@ -253,9 +251,10 @@ class FCBF(ClassificationScorer):
253251
2003. http://www.aaai.org/Papers/ICML/2003/ICML03-111.pdf
254252
"""
255253
def score_data(self, data, feature=None):
254+
attributes = data.domain.attributes
256255
S = []
257-
for i, a in enumerate(data.X.T):
258-
S.append((_symmetrical_uncertainty(a, data.Y), i))
256+
for i, attr in enumerate(attributes):
257+
S.append((_symmetrical_uncertainty(data, attr, data.domain.class_var), i))
259258
S.sort()
260259
worst = []
261260

@@ -267,9 +266,7 @@ def score_data(self, data, feature=None):
267266
while True:
268267
try: SUqc, Fq = S[-q]
269268
except IndexError: break
270-
# TODO: cache
271-
if _symmetrical_uncertainty(data.X.T[Fp],
272-
data.X.T[Fq]) >= SUqc:
269+
if _symmetrical_uncertainty(data, attributes[Fp], attributes[Fq]) >= SUqc:
273270
del S[-q]
274271
worst.append((1e-4*SUqc, Fq))
275272
else:

Orange/tests/test_score_feature.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from Orange.data import Table, Domain, DiscreteVariable
8+
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
99
from Orange import preprocess
1010
from Orange.preprocess.score import InfoGain, GainRatio, Gini, Chi2, ANOVA,\
1111
UnivariateLinearRegression, ReliefF, FCBF, RReliefF
@@ -131,5 +131,13 @@ def test_fcbf(self):
131131
scorer = FCBF()
132132
weights = scorer(self.zoo, None)
133133
found = [self.zoo.domain[attr].name for attr in reversed(weights.argsort()[-5:])]
134-
reference = ['legs', 'backbone', 'toothed', 'hair', 'aquatic']
134+
reference = ['legs', 'milk', 'toothed', 'feathers', 'backbone']
135135
self.assertEqual(found, reference)
136+
137+
# GH-1916
138+
data = Table(Domain([ContinuousVariable('1'), ContinuousVariable('2')],
139+
DiscreteVariable('target')),
140+
np.full((2, 2), np.nan),
141+
np.r_[0., 1])
142+
weights = scorer(data, None)
143+
np.testing.assert_equal(weights, np.nan)

0 commit comments

Comments
 (0)