Skip to content

Commit 0b42e84

Browse files
committed
Transform: Replace 'Preprocess' input with 'Template Data' input
1 parent eca8c46 commit 0b42e84

File tree

2 files changed

+85
-68
lines changed

2 files changed

+85
-68
lines changed

Orange/widgets/data/owtransform.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import numpy as np
12
from Orange.data import Table, Domain
2-
from Orange.preprocess.preprocess import Preprocess, Discretize
33
from Orange.widgets import gui
44
from Orange.widgets.settings import Setting
55
from Orange.widgets.utils.sql import check_sql_input
@@ -18,29 +18,29 @@ class OWTransform(OWWidget):
1818

1919
class Inputs:
2020
data = Input("Data", Table, default=True)
21-
preprocessor = Input("Preprocessor", Preprocess)
21+
template_data = Input("Template Data", Table)
2222

2323
class Outputs:
2424
transformed_data = Output("Transformed Data", Table)
2525

2626
class Error(OWWidget.Error):
27-
pp_error = Msg("An error occurred while transforming data.\n{}")
27+
error = Msg("An error occurred while transforming data.\n{}")
2828

2929
resizing_enabled = False
3030
want_main_area = False
3131

3232
def __init__(self):
3333
super().__init__()
3434
self.data = None
35-
self.preprocessor = None
35+
self.template_domain = None
3636
self.transformed_data = None
3737

3838
info_box = gui.widgetBox(self.controlArea, "Info")
3939
self.input_label = gui.widgetLabel(info_box, "")
40-
self.preprocessor_label = gui.widgetLabel(info_box, "")
40+
self.template_label = gui.widgetLabel(info_box, "")
4141
self.output_label = gui.widgetLabel(info_box, "")
4242
self.set_input_label_text()
43-
self.set_preprocessor_label_text()
43+
self.set_template_label_text()
4444

4545
box = gui.widgetBox(self.controlArea, "Output")
4646
gui.checkBox(box, self, "retain_all_data", "Retain all data",
@@ -54,13 +54,14 @@ def set_input_label_text(self):
5454
len(self.data.domain.attributes))
5555
self.input_label.setText(text)
5656

57-
def set_preprocessor_label_text(self):
58-
text = "No preprocessor on input."
57+
def set_template_label_text(self):
58+
text = "No template data on input."
5959
if self.transformed_data is not None:
60-
text = "Preprocessor {} applied.".format(self.preprocessor)
61-
elif self.preprocessor is not None:
62-
text = "Preprocessor {} on input.".format(self.preprocessor)
63-
self.preprocessor_label.setText(text)
60+
text = "Template domain applied."
61+
elif self.template_domain is not None:
62+
text = "Template data includes {:,} features.".format(
63+
len(self.template_domain.attributes))
64+
self.template_label.setText(text)
6465

6566
def set_output_label_text(self):
6667
text = ""
@@ -75,49 +76,55 @@ def set_data(self, data):
7576
self.data = data
7677
self.set_input_label_text()
7778

78-
@Inputs.preprocessor
79-
def set_preprocessor(self, preprocessor):
80-
self.preprocessor = preprocessor
79+
@Inputs.template_data
80+
@check_sql_input
81+
def set_template_data(self, data):
82+
self.template_domain = data and data.domain
8183

8284
def handleNewSignals(self):
8385
self.apply()
8486

8587
def apply(self):
8688
self.clear_messages()
8789
self.transformed_data = None
88-
if self.data is not None and self.preprocessor is not None:
90+
if self.data is not None and self.template_domain is not None:
8991
try:
90-
self.transformed_data = self.preprocessor(self.data)
91-
except Exception as ex: # pylint: disable=broad-except
92-
self.Error.pp_error(ex)
92+
self.transformed_data = self.data.transform(
93+
self.template_domain)
94+
except Exception as ex: # pylint: disable=broad-except
95+
self.Error.error(ex)
9396

9497
data = self.merged_data() if self.retain_all_data \
9598
else self.transformed_data
9699
self.Outputs.transformed_data.send(data)
97-
98-
self.set_preprocessor_label_text()
100+
self.set_template_label_text()
99101
self.set_output_label_text()
100102

101103
def merged_data(self):
104+
if not self.transformed_data:
105+
return None
102106
domain = self.data.domain
103-
metas = domain.metas + self.transformed_data.domain.attributes
107+
t_domain = self.transformed_data.domain
108+
metas = domain.metas + t_domain.attributes + t_domain.metas
104109
domain = Domain(domain.attributes, domain.class_vars, metas)
105110
data = self.data.transform(domain)
106-
n = self.transformed_data.X.shape[1]
107-
data.metas[:, -n:] = self.transformed_data.X
111+
metas = np.hstack((self.transformed_data.X,
112+
self.transformed_data.metas))
113+
data.metas[:, -metas.shape[1]:] = metas
108114
return data
109115

110116
def send_report(self):
111-
if self.preprocessor is not None:
112-
self.report_items("Settings",
113-
(("Preprocessor", self.preprocessor),))
114117
if self.data is not None:
115118
self.report_data("Data", self.data)
119+
if self.template_domain is not None:
120+
self.report_domain("Template data", self.template_domain)
116121
if self.transformed_data is not None:
117122
self.report_data("Transformed data", self.transformed_data)
118123

119124

120125
if __name__ == "__main__": # pragma: no cover
126+
from Orange.preprocess import Discretize
127+
128+
table = Table("iris")
121129
WidgetPreview(OWTransform).run(
122-
set_data=Table("iris"),
123-
set_preprocessor=Discretize())
130+
set_data=table, set_template_data=Discretize()(table))

Orange/widgets/data/tests/test_owtransform.py

Lines changed: 49 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# Test methods with long descriptive names can omit docstrings
22
# pylint: disable=missing-docstring
3+
from unittest.mock import Mock
4+
5+
from numpy import testing as npt
6+
37
from Orange.data import Table
48
from Orange.preprocess import Discretize, Continuize
5-
from Orange.preprocess.preprocess import Preprocess
69
from Orange.widgets.data.owtransform import OWTransform
710
from Orange.widgets.tests.base import WidgetTest
811
from Orange.widgets.unsupervised.owpca import OWPCA
@@ -12,39 +15,39 @@ class TestOWTransform(WidgetTest):
1215
def setUp(self):
1316
self.widget = self.create_widget(OWTransform)
1417
self.data = Table("iris")
15-
self.preprocessor = Discretize()
18+
self.disc_data = Discretize()(self.data)
1619

1720
def test_output(self):
18-
# send data and preprocessor
19-
self.send_signal(self.widget.Inputs.data, self.data)
20-
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
21+
# send data and template data
22+
self.send_signal(self.widget.Inputs.data, self.data[::15])
23+
self.send_signal(self.widget.Inputs.template_data, self.disc_data)
2124
output = self.get_output(self.widget.Outputs.transformed_data)
22-
self.assertIsInstance(output, Table)
23-
self.assertEqual("Input data with 150 instances and 4 features.",
25+
self.assertTableEqual(output, self.disc_data[::15])
26+
self.assertEqual("Input data with 10 instances and 4 features.",
2427
self.widget.input_label.text())
25-
self.assertEqual("Preprocessor Discretize() applied.",
26-
self.widget.preprocessor_label.text())
28+
self.assertEqual("Template domain applied.",
29+
self.widget.template_label.text())
2730
self.assertEqual("Output data includes 4 features.",
2831
self.widget.output_label.text())
2932

30-
# remove preprocessor
31-
self.send_signal(self.widget.Inputs.preprocessor, None)
33+
# remove template data
34+
self.send_signal(self.widget.Inputs.template_data, None)
3235
output = self.get_output(self.widget.Outputs.transformed_data)
3336
self.assertIsNone(output)
34-
self.assertEqual("Input data with 150 instances and 4 features.",
37+
self.assertEqual("Input data with 10 instances and 4 features.",
3538
self.widget.input_label.text())
36-
self.assertEqual("No preprocessor on input.",
37-
self.widget.preprocessor_label.text())
39+
self.assertEqual("No template data on input.",
40+
self.widget.template_label.text())
3841
self.assertEqual("", self.widget.output_label.text())
3942

40-
# send preprocessor
41-
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
43+
# send template data
44+
self.send_signal(self.widget.Inputs.template_data, self.disc_data)
4245
output = self.get_output(self.widget.Outputs.transformed_data)
43-
self.assertIsInstance(output, Table)
44-
self.assertEqual("Input data with 150 instances and 4 features.",
46+
self.assertTableEqual(output, self.disc_data[::15])
47+
self.assertEqual("Input data with 10 instances and 4 features.",
4548
self.widget.input_label.text())
46-
self.assertEqual("Preprocessor Discretize() applied.",
47-
self.widget.preprocessor_label.text())
49+
self.assertEqual("Template domain applied.",
50+
self.widget.template_label.text())
4851
self.assertEqual("Output data includes 4 features.",
4952
self.widget.output_label.text())
5053

@@ -53,48 +56,55 @@ def test_output(self):
5356
output = self.get_output(self.widget.Outputs.transformed_data)
5457
self.assertIsNone(output)
5558
self.assertEqual("No data on input.", self.widget.input_label.text())
56-
self.assertEqual("Preprocessor Discretize() on input.",
57-
self.widget.preprocessor_label.text())
59+
self.assertEqual("Template data includes 4 features.",
60+
self.widget.template_label.text())
5861
self.assertEqual("", self.widget.output_label.text())
5962

60-
# remove preprocessor
61-
self.send_signal(self.widget.Inputs.preprocessor, None)
63+
# remove template data
64+
self.send_signal(self.widget.Inputs.template_data, None)
6265
self.assertEqual("No data on input.", self.widget.input_label.text())
63-
self.assertEqual("No preprocessor on input.",
64-
self.widget.preprocessor_label.text())
66+
self.assertEqual("No template data on input.",
67+
self.widget.template_label.text())
6568
self.assertEqual("", self.widget.output_label.text())
6669

67-
def test_input_pca_preprocessor(self):
70+
def assertTableEqual(self, table1, table2):
71+
self.assertIs(table1.domain, table2.domain)
72+
npt.assert_array_equal(table1.X, table2.X)
73+
npt.assert_array_equal(table1.Y, table2.Y)
74+
npt.assert_array_equal(table1.metas, table2.metas)
75+
76+
def test_input_pca_output(self):
6877
owpca = self.create_widget(OWPCA)
6978
self.send_signal(owpca.Inputs.data, self.data, widget=owpca)
7079
owpca.components_spin.setValue(2)
71-
pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca)
72-
self.assertIsNotNone(pp, Preprocess)
80+
pca_out = self.get_output(owpca.Outputs.transformed_data, widget=owpca)
7381

74-
self.send_signal(self.widget.Inputs.data, self.data)
75-
self.send_signal(self.widget.Inputs.preprocessor, pp)
82+
self.send_signal(self.widget.Inputs.data, self.data[::10])
83+
self.send_signal(self.widget.Inputs.template_data, pca_out)
7684
output = self.get_output(self.widget.Outputs.transformed_data)
77-
self.assertIsInstance(output, Table)
78-
self.assertEqual(output.X.shape, (len(self.data), 2))
85+
npt.assert_array_equal(pca_out.X[::10], output.X)
7986

8087
def test_retain_all_data(self):
8188
data = Table("zoo")
89+
cont_data = Continuize()(data)
8290
self.send_signal(self.widget.Inputs.data, data)
83-
self.send_signal(self.widget.Inputs.preprocessor, Continuize())
91+
self.send_signal(self.widget.Inputs.template_data, cont_data)
8492
self.widget.controls.retain_all_data.click()
8593
output = self.get_output(self.widget.Outputs.transformed_data)
8694
self.assertIsInstance(output, Table)
8795
self.assertEqual(output.X.shape, (len(data), 16))
88-
self.assertEqual(output.metas.shape, (len(data), 37))
96+
self.assertEqual(output.metas.shape, (len(data), 38))
8997

9098
def test_error_transforming(self):
91-
self.send_signal(self.widget.Inputs.data, self.data)
92-
self.send_signal(self.widget.Inputs.preprocessor, Preprocess())
93-
self.assertTrue(self.widget.Error.pp_error.is_shown())
99+
data = self.data[::10]
100+
data.transform = Mock(side_effect=Exception())
101+
self.send_signal(self.widget.Inputs.data, data)
102+
self.send_signal(self.widget.Inputs.template_data, self.disc_data)
103+
self.assertTrue(self.widget.Error.error.is_shown())
94104
output = self.get_output(self.widget.Outputs.transformed_data)
95105
self.assertIsNone(output)
96106
self.send_signal(self.widget.Inputs.data, None)
97-
self.assertFalse(self.widget.Error.pp_error.is_shown())
107+
self.assertFalse(self.widget.Error.error.is_shown())
98108

99109
def test_send_report(self):
100110
self.send_signal(self.widget.Inputs.data, self.data)

0 commit comments

Comments
 (0)