Skip to content

Commit 8690622

Browse files
authored
Merge pull request #3346 from VesnaT/owtransform
[ENH] Transform: Add new widget
2 parents aea66c6 + 741245f commit 8690622

File tree

6 files changed

+252
-1
lines changed

6 files changed

+252
-1
lines changed

Orange/preprocess/preprocess.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,18 @@ def transform(var):
488488
return data.transform(domain)
489489

490490

491+
class ApplyDomain(Preprocess):
492+
def __init__(self, domain, name):
493+
self._domain = domain
494+
self._name = name
495+
496+
def __call__(self, data):
497+
return data.transform(self._domain)
498+
499+
def __str__(self):
500+
return self._name
501+
502+
491503
class PreprocessorList(Preprocess):
492504
"""
493505
Store a list of preprocessors and on call apply them to the dataset.
Lines changed: 15 additions & 0 deletions
Loading

Orange/widgets/data/owtransform.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
from Orange.data import Table
2+
from Orange.preprocess.preprocess import Preprocess, Discretize
3+
from Orange.widgets import gui
4+
from Orange.widgets.utils.sql import check_sql_input
5+
from Orange.widgets.widget import OWWidget, Input, Output, Msg
6+
7+
8+
class OWTransform(OWWidget):
9+
name = "Transform"
10+
description = "Transform data table."
11+
icon = "icons/Transform.svg"
12+
priority = 2110
13+
keywords = []
14+
15+
class Inputs:
16+
data = Input("Data", Table, default=True)
17+
preprocessor = Input("Preprocessor", Preprocess)
18+
19+
class Outputs:
20+
transformed_data = Output("Transformed Data", Table)
21+
22+
class Error(OWWidget.Error):
23+
pp_error = Msg("An error occurred while transforming data.\n{}")
24+
25+
resizing_enabled = False
26+
want_main_area = False
27+
28+
def __init__(self):
29+
super().__init__()
30+
self.data = None
31+
self.preprocessor = None
32+
self.transformed_data = None
33+
34+
info_box = gui.widgetBox(self.controlArea, "Info")
35+
self.input_label = gui.widgetLabel(info_box, "")
36+
self.preprocessor_label = gui.widgetLabel(info_box, "")
37+
self.output_label = gui.widgetLabel(info_box, "")
38+
self.set_input_label_text()
39+
self.set_preprocessor_label_text()
40+
41+
def set_input_label_text(self):
42+
text = "No data on input."
43+
if self.data is not None:
44+
text = "Input data with {:,} instances and {:,} features.".format(
45+
len(self.data),
46+
len(self.data.domain.attributes))
47+
self.input_label.setText(text)
48+
49+
def set_preprocessor_label_text(self):
50+
text = "No preprocessor on input."
51+
if self.transformed_data is not None:
52+
text = "Preprocessor {} applied.".format(self.preprocessor)
53+
elif self.preprocessor is not None:
54+
text = "Preprocessor {} on input.".format(self.preprocessor)
55+
self.preprocessor_label.setText(text)
56+
57+
def set_output_label_text(self):
58+
text = ""
59+
if self.transformed_data:
60+
text = "Output data includes {:,} features.".format(
61+
len(self.transformed_data.domain.attributes))
62+
self.output_label.setText(text)
63+
64+
@Inputs.data
65+
@check_sql_input
66+
def set_data(self, data):
67+
self.data = data
68+
self.set_input_label_text()
69+
70+
@Inputs.preprocessor
71+
def set_preprocessor(self, preprocessor):
72+
self.preprocessor = preprocessor
73+
74+
def handleNewSignals(self):
75+
self.apply()
76+
77+
def apply(self):
78+
self.clear_messages()
79+
self.transformed_data = None
80+
if self.data is not None and self.preprocessor is not None:
81+
try:
82+
self.transformed_data = self.preprocessor(self.data)
83+
except Exception as ex: # pylint: disable=broad-except
84+
self.Error.pp_error(ex)
85+
self.Outputs.transformed_data.send(self.transformed_data)
86+
87+
self.set_preprocessor_label_text()
88+
self.set_output_label_text()
89+
90+
def send_report(self):
91+
if self.preprocessor is not None:
92+
self.report_items("Settings",
93+
(("Preprocessor", self.preprocessor),))
94+
if self.data is not None:
95+
self.report_data("Data", self.data)
96+
if self.transformed_data is not None:
97+
self.report_data("Transformed data", self.transformed_data)
98+
99+
100+
if __name__ == "__main__":
101+
from AnyQt.QtWidgets import QApplication
102+
103+
app = QApplication([])
104+
ow = OWTransform()
105+
d = Table("iris")
106+
pp = Discretize()
107+
ow.set_data(d)
108+
ow.set_preprocessor(pp)
109+
ow.handleNewSignals()
110+
ow.show()
111+
app.exec_()
112+
ow.saveSettings()
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
from Orange.data import Table
4+
from Orange.preprocess import Discretize
5+
from Orange.preprocess.preprocess import Preprocess
6+
from Orange.widgets.data.owtransform import OWTransform
7+
from Orange.widgets.tests.base import WidgetTest
8+
from Orange.widgets.unsupervised.owpca import OWPCA
9+
10+
11+
class TestOWTransform(WidgetTest):
12+
def setUp(self):
13+
self.widget = self.create_widget(OWTransform)
14+
self.data = Table("iris")
15+
self.preprocessor = Discretize()
16+
17+
def test_output(self):
18+
# send data and preprocessor
19+
self.send_signal(self.widget.Inputs.data, self.data)
20+
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
21+
output = self.get_output(self.widget.Outputs.transformed_data)
22+
self.assertIsInstance(output, Table)
23+
self.assertEqual("Input data with 150 instances and 4 features.",
24+
self.widget.input_label.text())
25+
self.assertEqual("Preprocessor Discretize() applied.",
26+
self.widget.preprocessor_label.text())
27+
self.assertEqual("Output data includes 4 features.",
28+
self.widget.output_label.text())
29+
30+
# remove preprocessor
31+
self.send_signal(self.widget.Inputs.preprocessor, None)
32+
output = self.get_output(self.widget.Outputs.transformed_data)
33+
self.assertIsNone(output)
34+
self.assertEqual("Input data with 150 instances and 4 features.",
35+
self.widget.input_label.text())
36+
self.assertEqual("No preprocessor on input.", self.widget.preprocessor_label.text())
37+
self.assertEqual("", self.widget.output_label.text())
38+
39+
# send preprocessor
40+
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
41+
output = self.get_output(self.widget.Outputs.transformed_data)
42+
self.assertIsInstance(output, Table)
43+
self.assertEqual("Input data with 150 instances and 4 features.",
44+
self.widget.input_label.text())
45+
self.assertEqual("Preprocessor Discretize() applied.",
46+
self.widget.preprocessor_label.text())
47+
self.assertEqual("Output data includes 4 features.",
48+
self.widget.output_label.text())
49+
50+
# remove data
51+
self.send_signal(self.widget.Inputs.data, None)
52+
output = self.get_output(self.widget.Outputs.transformed_data)
53+
self.assertIsNone(output)
54+
self.assertEqual("No data on input.", self.widget.input_label.text())
55+
self.assertEqual("Preprocessor Discretize() on input.",
56+
self.widget.preprocessor_label.text())
57+
self.assertEqual("", self.widget.output_label.text())
58+
59+
# remove preprocessor
60+
self.send_signal(self.widget.Inputs.preprocessor, None)
61+
self.assertEqual("No data on input.", self.widget.input_label.text())
62+
self.assertEqual("No preprocessor on input.",
63+
self.widget.preprocessor_label.text())
64+
self.assertEqual("", self.widget.output_label.text())
65+
66+
def test_input_pca_preprocessor(self):
67+
owpca = self.create_widget(OWPCA)
68+
self.send_signal(owpca.Inputs.data, self.data, widget=owpca)
69+
owpca.components_spin.setValue(2)
70+
pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca)
71+
self.assertIsNotNone(pp, Preprocess)
72+
73+
self.send_signal(self.widget.Inputs.data, self.data)
74+
self.send_signal(self.widget.Inputs.preprocessor, pp)
75+
output = self.get_output(self.widget.Outputs.transformed_data)
76+
self.assertIsInstance(output, Table)
77+
self.assertEqual(output.X.shape, (len(self.data), 2))
78+
79+
def test_error_transforming(self):
80+
self.send_signal(self.widget.Inputs.data, self.data)
81+
self.send_signal(self.widget.Inputs.preprocessor, Preprocess())
82+
self.assertTrue(self.widget.Error.pp_error.is_shown())
83+
output = self.get_output(self.widget.Outputs.transformed_data)
84+
self.assertIsNone(output)
85+
self.send_signal(self.widget.Inputs.data, None)
86+
self.assertFalse(self.widget.Error.pp_error.is_shown())
87+
88+
def test_send_report(self):
89+
self.send_signal(self.widget.Inputs.data, self.data)
90+
self.widget.report_button.click()
91+
self.send_signal(self.widget.Inputs.data, None)
92+
self.widget.report_button.click()

Orange/widgets/unsupervised/owpca.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from Orange.data import Table, Domain, StringVariable, ContinuousVariable
1111
from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT
1212
from Orange.preprocess import Normalize
13+
from Orange.preprocess.preprocess import Preprocess, ApplyDomain
1314
from Orange.projection import PCA, TruncatedSVD
1415
from Orange.widgets import widget, gui, settings
1516
from Orange.widgets.widget import Input, Output
@@ -44,6 +45,7 @@ class Outputs:
4445
transformed_data = Output("Transformed data", Table)
4546
components = Output("Components", Table)
4647
pca = Output("PCA", PCA, dynamic=False)
48+
preprocessor = Output("Preprocessor", Preprocess)
4749

4850
settingsHandler = settings.DomainContextHandler()
4951

@@ -290,6 +292,7 @@ def clear_outputs(self):
290292
self.Outputs.transformed_data.send(None)
291293
self.Outputs.components.send(None)
292294
self.Outputs.pca.send(self._pca_projector)
295+
self.Outputs.preprocessor.send(None)
293296

294297
def get_model(self):
295298
if self.rpca is None:
@@ -455,7 +458,7 @@ def _update_axis(self):
455458
axis.setTicks([[(i, str(i+1)) for i in range(0, p, d)]])
456459

457460
def commit(self):
458-
transformed = components = None
461+
transformed = components = pp = None
459462
if self._pca is not None:
460463
if self._transformed is None:
461464
# Compute the full transform (MAX_COMPONENTS components) only once.
@@ -479,10 +482,13 @@ def commit(self):
479482
metas=metas)
480483
components.name = 'components'
481484

485+
pp = ApplyDomain(domain, "PCA")
486+
482487
self._pca_projector.component = self.ncomponents
483488
self.Outputs.transformed_data.send(transformed)
484489
self.Outputs.components.send(components)
485490
self.Outputs.pca.send(self._pca_projector)
491+
self.Outputs.preprocessor.send(pp)
486492

487493
def send_report(self):
488494
if self.data is None:

Orange/widgets/unsupervised/tests/test_owpca.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import scipy.sparse as sp
55

66
from Orange.data import Table, Domain, ContinuousVariable, TimeVariable
7+
from Orange.preprocess.preprocess import Preprocess
78
from Orange.widgets.tests.base import WidgetTest
89
from Orange.widgets.unsupervised.owpca import OWPCA, DECOMPOSITIONS
910

@@ -131,3 +132,16 @@ def test_do_not_mask_features(self):
131132
self.widget.set_data(data)
132133
ndata = Table("iris.tab")
133134
self.assertEqual(data.domain[0], ndata.domain[0])
135+
136+
def test_output_preprocessor(self):
137+
data = Table("iris")
138+
self.send_signal(self.widget.Inputs.data, data)
139+
pp = self.get_output(self.widget.Outputs.preprocessor)
140+
self.assertIsInstance(pp, Preprocess)
141+
transformed_data = pp(data[::10])
142+
self.assertIsInstance(transformed_data, Table)
143+
self.assertEqual(transformed_data.X.shape, (15, 2))
144+
output = self.get_output(self.widget.Outputs.transformed_data)
145+
np.testing.assert_array_equal(transformed_data.X, output.X[::10])
146+
self.assertEqual([a.name for a in transformed_data.domain.attributes],
147+
[m.name for m in output.domain.attributes])

0 commit comments

Comments
 (0)