Skip to content

Commit 1d37f63

Browse files
authored
Merge pull request #2144 from jerneju/zerodivision_index-continuize
[FIX] Continuize: prevent crashing - column with equal and NaN values
2 parents 9274a20 + e8d303f commit 1d37f63

File tree

2 files changed

+71
-14
lines changed

2 files changed

+71
-14
lines changed

Orange/widgets/data/owcontinuize.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
from functools import reduce
2+
3+
import numpy as np
4+
15
from AnyQt import QtWidgets
26
from AnyQt.QtCore import Qt
37

48
import Orange.data
59
from Orange.util import Reprable
610
from Orange.statistics import distribution
711
from Orange.preprocess import Continuize, Normalize
12+
from Orange.preprocess.transformation import \
13+
Identity, Indicator, Indicator1, Normalizer
814
from Orange.data.table import Table
915
from Orange.widgets import gui, widget
1016
from Orange.widgets.settings import Setting
@@ -138,12 +144,6 @@ def send_report(self):
138144
("Value range", self.value_ranges[self.zero_based])])
139145

140146

141-
from Orange.preprocess.transformation import \
142-
Identity, Indicator, Indicator1, Normalizer
143-
144-
from functools import reduce
145-
146-
147147
class WeightedIndicator(Indicator):
148148
def __init__(self, variable, value, weight=1.0):
149149
super().__init__(variable, value)
@@ -156,7 +156,7 @@ def transform(self, c):
156156
return t
157157

158158

159-
class WeightedIndicator_1(Indicator1):
159+
class WeightedIndicator1(Indicator1):
160160
def __init__(self, variable, value, weight=1.0):
161161
super().__init__(variable, value)
162162
self.weight = weight
@@ -176,7 +176,7 @@ def make_indicator_var(source, value_ind, weight=None, zero_based=True):
176176
elif weight is None:
177177
indicator = Indicator1(source, value=value_ind)
178178
else:
179-
indicator = WeightedIndicator_1(source, value=value_ind, weight=weight)
179+
indicator = WeightedIndicator1(source, value=value_ind, weight=weight)
180180
return Orange.data.ContinuousVariable(
181181
"{}={}".format(source.name, source.values[value_ind]),
182182
compute_value=indicator
@@ -279,7 +279,7 @@ def continuize_var(var,
279279
elif multinomial_treatment == Continuize.AsOrdinal:
280280
return [ordinal_to_continuous(var)]
281281
elif multinomial_treatment == Continuize.AsNormalizedOrdinal:
282-
return [ordinal_to_normalized_continuous(var, zero_based)]
282+
return [ordinal_to_norm_continuous(var, zero_based)]
283283
elif multinomial_treatment == Continuize.Indicators:
284284
return one_hot_coding(var, zero_based)
285285
elif multinomial_treatment == Continuize.FirstAsBase or \
@@ -320,7 +320,7 @@ def ordinal_to_continuous(var):
320320
compute_value=Identity(var))
321321

322322

323-
def ordinal_to_normalized_continuous(var, zero_based=True):
323+
def ordinal_to_norm_continuous(var, zero_based=True):
324324
n_values = len(var.values)
325325
if zero_based:
326326
return normalized_var(var, 0, 1 / (n_values - 1))
@@ -330,8 +330,11 @@ def ordinal_to_normalized_continuous(var, zero_based=True):
330330

331331
def normalize_by_span(var, data_or_dist, zero_based=True):
332332
dist = _ensure_dist(var, data_or_dist)
333-
v_max, v_min = dist.max(), dist.min()
334-
span = v_max - v_min
333+
if dist.shape[1] > 0:
334+
v_max, v_min = dist.max(), dist.min()
335+
else:
336+
v_max, v_min = 0, 0
337+
span = (v_max - v_min)
335338
if span < 1e-15:
336339
span = 1
337340

@@ -343,7 +346,11 @@ def normalize_by_span(var, data_or_dist, zero_based=True):
343346

344347
def normalize_by_sd(var, data_or_dist):
345348
dist = _ensure_dist(var, data_or_dist)
346-
mean, sd = dist.mean(), dist.standard_deviation()
349+
if dist.shape[1] > 0:
350+
mean, sd = dist.mean(), dist.standard_deviation()
351+
else:
352+
mean, sd = 0, 1
353+
sd = sd if sd > 1e-10 else 1
347354
return normalized_var(var, mean, 1 / sd)
348355

349356

@@ -365,7 +372,7 @@ def __call__(self, data):
365372
domain = data.domain
366373

367374
if (treat == Continuize.ReportError and
368-
any(var.is_discrete and len(var.values) > 2 for var in domain)):
375+
any(var.is_discrete and len(var.values) > 2 for var in domain)):
369376
raise ValueError("Domain has multinomial attributes")
370377

371378
newdomain = continuize_domain(

Orange/widgets/data/tests/test_owcontinuize.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,53 @@ def test_empty_data(self):
3333
widget.unconditional_commit()
3434
imp_data = self.get_output("Data")
3535
self.assertIsNone(imp_data)
36+
37+
def test_one_column_equal_values(self):
38+
"""
39+
No crash on a column with equal values and with selected option
40+
normalize by standard deviation.
41+
GH-2144
42+
"""
43+
table = Table("iris")
44+
table = table[:, 1]
45+
table[:] = 42.0
46+
self.send_signal("Data", table)
47+
# Normalize.NormalizeBySD
48+
self.widget.continuous_treatment = 2
49+
self.widget.unconditional_commit()
50+
51+
def test_one_column_nan_values_normalize_sd(self):
52+
"""
53+
No crash on a column with NaN values and with selected option
54+
normalize by standard deviation (Not the same issue which is
55+
tested above).
56+
GH-2144
57+
"""
58+
table = Table("iris")
59+
table[:, 2] = np.NaN
60+
self.send_signal("Data", table)
61+
# Normalize.NormalizeBySD
62+
self.widget.continuous_treatment = 2
63+
self.widget.unconditional_commit()
64+
table = Table("iris")
65+
table[1, 2] = np.NaN
66+
self.send_signal("Data", table)
67+
self.widget.unconditional_commit()
68+
69+
70+
def test_one_column_nan_values_normalize_span(self):
71+
"""
72+
No crash on a column with NaN values and with selected option
73+
normalize by span.
74+
GH-2144
75+
"""
76+
table = Table("iris")
77+
table[:, 2] = np.NaN
78+
self.send_signal("Data", table)
79+
# Normalize.NormalizeBySpan
80+
self.widget.continuous_treatment = 1
81+
self.widget.unconditional_commit()
82+
table = Table("iris")
83+
table[1, 2] = np.NaN
84+
self.send_signal("Data", table)
85+
self.widget.unconditional_commit()

0 commit comments

Comments
 (0)