Skip to content

Commit 34b5e94

Browse files
authored
Merge pull request #2064 from jerneju/zerodivisionerror-owpreprocess
[FIX] owpreprocess: Handle columns with only NaN values
2 parents c3e2f7a + 08c7255 commit 34b5e94

File tree

3 files changed

+48
-18
lines changed

3 files changed

+48
-18
lines changed

Orange/widgets/data/owpreprocess.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,22 @@
11
import sys
2-
import bisect
3-
import contextlib
4-
import warnings
52
from collections import OrderedDict
63
import pkg_resources
74

85
import numpy
96

107
from AnyQt.QtWidgets import (
11-
QWidget, QButtonGroup, QGroupBox, QRadioButton, QSlider, QFocusFrame,
12-
QDoubleSpinBox, QComboBox, QSpinBox, QListView, QDockWidget, QLabel,
13-
QScrollArea, QVBoxLayout, QHBoxLayout, QFormLayout, QSpacerItem,
14-
QSizePolicy, QStyle, QStylePainter, QAction, QLabel,
15-
QApplication, QCheckBox
8+
QWidget, QButtonGroup, QGroupBox, QRadioButton, QSlider,
9+
QDoubleSpinBox, QComboBox, QSpinBox, QListView, QLabel,
10+
QScrollArea, QVBoxLayout, QHBoxLayout, QFormLayout,
11+
QSizePolicy, QApplication, QCheckBox
1612
)
1713

1814
from AnyQt.QtGui import (
19-
QCursor, QIcon, QPainter, QPixmap, QStandardItemModel, QStandardItem,
20-
QDrag, QKeySequence
15+
QIcon, QStandardItemModel, QStandardItem
2116
)
2217

2318
from AnyQt.QtCore import (
24-
Qt, QObject, QEvent, QSize, QModelIndex, QMimeData, QTimer
19+
Qt, QEvent, QSize, QMimeData, QTimer
2520
)
2621

2722
from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot
@@ -34,7 +29,6 @@
3429
from Orange.widgets import widget, gui, settings
3530
from Orange.widgets.utils.overlay import OverlayWidget
3631
from Orange.widgets.utils.sql import check_sql_input
37-
from Orange.util import Reprable
3832

3933
from Orange.widgets.data.utils.preprocess import (
4034
BaseEditor, blocked, StandardItemModel, DescriptionRole,
@@ -271,9 +265,9 @@ class ImputeEditor(BaseEditor):
271265

272266
Imputers = {
273267
NoImputation: (None, {}),
274-
# Constant: (None, {"value": 0})
268+
# Constant: (None, {"value": 0})
275269
Average: (preprocess.impute.Average(), {}),
276-
# Model: (preprocess.impute.Model, {}),
270+
# Model: (preprocess.impute.Model, {}),
277271
Random: (preprocess.impute.Random(), {}),
278272
DropRows: (None, {})
279273
}
@@ -1175,7 +1169,7 @@ def apply(self):
11751169
self.error()
11761170
try:
11771171
data = preprocessor(self.data)
1178-
except ValueError as e:
1172+
except (ValueError, ZeroDivisionError) as e:
11791173
self.error(str(e))
11801174
return
11811175
else:
@@ -1258,4 +1252,3 @@ def test_main(argv=sys.argv):
12581252

12591253
if __name__ == "__main__":
12601254
sys.exit(test_main())
1261-

Orange/widgets/data/tests/test_owpreprocess.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from Orange.preprocess import discretize, impute, fss, score
1010
from Orange.widgets.data import owpreprocess
1111
from Orange.widgets.data.owpreprocess import OWPreprocess
12-
from Orange.widgets.tests.base import WidgetTest
12+
from Orange.widgets.tests.base import WidgetTest, datasets
1313

1414

1515
class TestOWPreprocess(WidgetTest):
@@ -44,6 +44,20 @@ def test_normalize(self):
4444
np.testing.assert_allclose(output.X.mean(0), 0, atol=1e-7)
4545
np.testing.assert_allclose(output.X.std(0), 1, atol=1e-7)
4646

47+
def test_data_column_nans(self):
48+
"""
49+
ZeroDivisonError - Weights sum to zero, can't be normalized
50+
In case when all rows in a column are NaN then it throws that error.
51+
GH-2064
52+
"""
53+
table = datasets.data_one_column_nans()
54+
saved = {"preprocessors": [("orange.preprocess.scale",
55+
{"center": Scale.CenteringType.Mean,
56+
"scale": Scale.ScalingType.Std})]}
57+
model = self.widget.load(saved)
58+
self.widget.set_model(model)
59+
self.send_signal("Data", table)
60+
4761

4862
# Test for editors
4963
class TestDiscretizeEditor(WidgetTest):

Orange/widgets/tests/base.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from Orange.classification.base_classification import (
1616
LearnerClassification, ModelClassification
1717
)
18-
from Orange.data import Table
18+
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
1919
from Orange.modelling import Fitter
2020
from Orange.preprocess import RemoveNaNColumns, Randomize
2121
from Orange.preprocess.preprocess import PreprocessorList
@@ -709,3 +709,26 @@ def missing_data_3(cls):
709709
data : Orange.data.Table
710710
"""
711711
return Table(cls.path("missing_data_3.tab"))
712+
713+
@classmethod
714+
def data_one_column_nans(cls):
715+
"""
716+
Data set with two continuous features and one discrete. One continuous
717+
columns has missing values (NaN).
718+
719+
Returns
720+
-------
721+
data : Orange.data.Table
722+
"""
723+
table = Table(
724+
Domain(
725+
[ContinuousVariable("a"),
726+
ContinuousVariable("b"),
727+
DiscreteVariable("c", values=["y", "n"])]
728+
),
729+
list(zip(
730+
[42.48, 16.84, 15.23, 23.8],
731+
["", "", "", ""],
732+
"ynyn"
733+
)))
734+
return table

0 commit comments

Comments
 (0)