Skip to content

Commit 66c2911

Browse files
authored
Merge pull request #4125 from VesnaT/normalize_nan
[FIX] Normalize: Fix crash with nan column
2 parents 32b2bd4 + 027f7af commit 66c2911

File tree

5 files changed

+42
-40
lines changed

5 files changed

+42
-40
lines changed

Orange/preprocess/normalize.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import numpy as np
2+
13
from Orange.data import ContinuousVariable, Domain
24
from Orange.statistics import distribution
35
from Orange.util import Reprable
@@ -57,7 +59,7 @@ def normalize_by_sd(self, dist, var):
5759
)
5860

5961
def normalize_by_span(self, dist, var):
60-
dma, dmi = dist.max(), dist.min()
62+
dma, dmi = (dist.max(), dist.min()) if dist.shape[1] else (np.nan, np.nan)
6163
diff = dma - dmi
6264
if diff < 1e-15:
6365
diff = 1

Orange/tests/datasets/test10.tab

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
c1 c2 d1 d2 n1 n2 c3 d3 cl1 cl2
2-
c t d d c d c d d c
3-
class class
4-
1 1995-01-21 a a ? a 2 a a 2
5-
1 2003-07-23 a b 1 ? 0 b b 0
6-
1 1967-03-12 a b 2 b -2 c c 1
1+
c1 c2 d1 d2 n1 n2 c3 d3 c4 cl1 cl2
2+
c t d d c d c d c d c
3+
class class
4+
1 1995-01-21 a a ? a 2 a a 2
5+
1 2003-07-23 a b 1 ? 0 b b 0
6+
1 1967-03-12 a b 2 b -2 c c 1

Orange/tests/datasets/test5.tab

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
c1 c2 d1 d2 n1 n2 c3 d3 cl1 cl2
2-
c c d d c d c d d c
3-
class class
4-
1 2 a a ? a 2 a a 2
5-
1 0 a b 1 ? 0 b b 0
6-
1 1 a b 2 b -2 c c 1
1+
c1 c2 d1 d2 n1 n2 c3 d3 c4 cl1 cl2
2+
c c d d c d c d c d c
3+
class class
4+
1 2 a a ? a 2 a a 2
5+
1 0 a b 1 ? 0 b b 0
6+
1 1 a b 2 b -2 c c 1

Orange/tests/test_distances.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -970,8 +970,8 @@ def test_preprocess_multiclass(self):
970970
new_table = _preprocess(table)
971971
np.testing.assert_equal(new_table.Y, table.Y)
972972
self.assertEqual([a.name for a in new_table.domain.attributes],
973-
[a.name for a in table.domain.attributes
974-
if a.is_continuous])
973+
[a.name for a in table.domain.attributes if
974+
a.is_continuous and not all(np.isnan(table[:, a].X))])
975975
self.assertEqual(new_table.domain.class_vars, table.domain.class_vars)
976976

977977
def test_preprocess_impute(self):

Orange/tests/test_normalize.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def compare_tables(self, dataNorm, solution):
2020
else:
2121
self.assertEqual(dataNorm[i, j], solution[i][j])
2222
self.assertEqual([attr.name for attr in dataNorm.domain.attributes],
23-
["c1", "c2", "d1", "d2", "n1", "n2", "c3", "d3"])
23+
["c1", "c2", "d1", "d2", "n1", "n2", "c3", "d3", "c4"])
2424
self.assertEqual([attr.name for attr in dataNorm.domain.class_vars],
2525
["cl1", "cl2"])
2626
@classmethod
@@ -30,69 +30,69 @@ def setUpClass(cls):
3030
def test_normalize_default(self):
3131
normalizer = Normalize()
3232
data_norm = normalizer(self.data)
33-
solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', 'a', 2],
34-
[0., -1.225, 'a', 'b', -1., '?', 0., 'b', 'b', 0],
35-
[0., 0., 'a', 'b', 1., 'b', -1.225, 'c', 'c', 1]]
33+
solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 2],
34+
[0., -1.225, 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0],
35+
[0., 0., 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 1]]
3636
self.compare_tables(data_norm, solution)
3737

3838
def test_normalize_transform_by_sd(self):
3939
normalizer = Normalize(zero_based=False,
4040
norm_type=Normalize.NormalizeBySD,
4141
transform_class=False)
4242
data_norm = normalizer(self.data)
43-
solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', 'a', 2],
44-
[0., -1.225, 'a', 'b', -1., '?', 0., 'b', 'b', 0],
45-
[0., 0., 'a', 'b', 1., 'b', -1.225, 'c', 'c', 1]]
43+
solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 2],
44+
[0., -1.225, 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0],
45+
[0., 0., 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 1]]
4646
self.compare_tables(data_norm, solution)
4747

4848
def test_normalize_transform_class(self):
4949
normalizer = Normalize(zero_based=True,
5050
norm_type=Normalize.NormalizeBySD,
5151
transform_class=True)
5252
data_norm = normalizer(self.data)
53-
solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', 'a', 1.225],
54-
[0., -1.225, 'a', 'b', -1., '?', 0., 'b', 'b', -1.225],
55-
[0., 0., 'a', 'b', 1., 'b', -1.225, 'c', 'c', 0.]]
53+
solution = [[0., 1.225, 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 1.225],
54+
[0., -1.225, 'a', 'b', -1., '?', 0., 'b', '?', 'b', -1.225],
55+
[0., 0., 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 0.]]
5656
self.compare_tables(data_norm, solution)
5757

5858
def test_normalize_transform_by_span(self):
5959
normalizer = Normalize(zero_based=False,
6060
norm_type=Normalize.NormalizeBySpan,
6161
transform_class=False)
6262
data_norm = normalizer(self.data)
63-
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', 'a', 2.],
64-
[0., -1., 'a', 'b', -1., '?', 0., 'b', 'b', 0.],
65-
[0., 0., 'a', 'b', 1., 'b', -1., 'c', 'c', 1.]]
63+
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', '?', 'a', 2.],
64+
[0., -1., 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0.],
65+
[0., 0., 'a', 'b', 1., 'b', -1., 'c', '?', 'c', 1.]]
6666
self.compare_tables(data_norm, solution)
6767

6868
def test_normalize_transform_by_span_zero(self):
6969
normalizer = Normalize(zero_based=True,
7070
norm_type=Normalize.NormalizeBySpan,
7171
transform_class=False)
7272
data_norm = normalizer(self.data)
73-
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', 'a', 2.],
74-
[0., 0., 'a', 'b', 0., '?', 0.5, 'b', 'b', 0.],
75-
[0., 0.5, 'a', 'b', 1., 'b', 0., 'c', 'c', 1.]]
73+
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', '?', 'a', 2.],
74+
[0., 0., 'a', 'b', 0., '?', 0.5, 'b', '?', 'b', 0.],
75+
[0., 0.5, 'a', 'b', 1., 'b', 0., 'c', '?', 'c', 1.]]
7676
self.compare_tables(data_norm, solution)
7777

7878
def test_normalize_transform_by_span_class(self):
7979
normalizer = Normalize(zero_based=False,
8080
norm_type=Normalize.NormalizeBySpan,
8181
transform_class=True)
8282
data_norm = normalizer(self.data)
83-
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', 'a', 1.],
84-
[0., -1., 'a', 'b', -1., '?', 0., 'b', 'b', -1.],
85-
[0., 0., 'a', 'b', 1., 'b', -1., 'c', 'c', 0.]]
83+
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', '?', 'a', 1.],
84+
[0., -1., 'a', 'b', -1., '?', 0., 'b', '?', 'b', -1.],
85+
[0., 0., 'a', 'b', 1., 'b', -1., 'c', '?', 'c', 0.]]
8686
self.compare_tables(data_norm, solution)
8787

8888
def test_normalize_transform_by_span_zero_class(self):
8989
normalizer = Normalize(zero_based=True,
9090
norm_type=Normalize.NormalizeBySpan,
9191
transform_class=True)
9292
data_norm = normalizer(self.data)
93-
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', 'a', 1.],
94-
[0., 0., 'a', 'b', 0., '?', 0.5, 'b', 'b', 0.],
95-
[0., 0.5, 'a', 'b', 1., 'b', 0., 'c', 'c', 0.5]]
93+
solution = [[0., 1., 'a', 'a', '?', 'a', 1., 'a', '?', 'a', 1.],
94+
[0., 0., 'a', 'b', 0., '?', 0.5, 'b', '?', 'b', 0.],
95+
[0., 0.5, 'a', 'b', 1., 'b', 0., 'c', '?', 'c', 0.5]]
9696
self.compare_tables(data_norm, solution)
9797

9898
def test_normalize_sparse(self):
@@ -138,7 +138,7 @@ def test_datetime_normalization(self):
138138
norm_type=Normalize.NormalizeBySD,
139139
transform_class=False)
140140
data_norm = normalizer(data)
141-
solution = [[0., '1995-01-21', 'a', 'a', '?', 'a', 1.225, 'a', 'a', 2],
142-
[0., '2003-07-23', 'a', 'b', -1., '?', 0., 'b', 'b', 0],
143-
[0., '1967-03-12', 'a', 'b', 1., 'b', -1.225, 'c', 'c', 1]]
141+
solution = [[0., '1995-01-21', 'a', 'a', '?', 'a', 1.225, 'a', '?', 'a', 2],
142+
[0., '2003-07-23', 'a', 'b', -1., '?', 0., 'b', '?', 'b', 0],
143+
[0., '1967-03-12', 'a', 'b', 1., 'b', -1.225, 'c', '?', 'c', 1]]
144144
self.compare_tables(data_norm, solution)

0 commit comments

Comments
 (0)