diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index dbdfeba4872..90eb0fc033f 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -536,18 +536,6 @@ def transform(var): return data.transform(domain) -class ApplyDomain(Preprocess): - def __init__(self, domain, name): - self._domain = domain - self._name = name - - def __call__(self, data): - return data.transform(self._domain) - - def __str__(self): - return self._name - - class PreprocessorList(Preprocess): """ Store a list of preprocessors and on call apply them to the dataset. diff --git a/Orange/widgets/data/owtransform.py b/Orange/widgets/data/owtransform.py index 9a240c3d397..9063a07a416 100644 --- a/Orange/widgets/data/owtransform.py +++ b/Orange/widgets/data/owtransform.py @@ -1,8 +1,10 @@ +from typing import Optional + import numpy as np from Orange.data import Table, Domain -from Orange.preprocess.preprocess import Preprocess, Discretize from Orange.widgets import gui +from Orange.widgets.report.report import describe_data from Orange.widgets.settings import Setting from Orange.widgets.utils.sql import check_sql_input from Orange.widgets.utils.widgetpreview import WidgetPreview @@ -10,66 +12,66 @@ class OWTransform(OWWidget): - name = "Transform" - description = "Transform data table." + name = "Apply Domain" + description = "Applies template domain on data table." icon = "icons/Transform.svg" priority = 2110 - keywords = [] + keywords = ["transform"] retain_all_data = Setting(False) class Inputs: data = Input("Data", Table, default=True) - preprocessor = Input("Preprocessor", Preprocess) + template_data = Input("Template Data", Table) class Outputs: transformed_data = Output("Transformed Data", Table) class Error(OWWidget.Error): - pp_error = Msg("An error occurred while transforming data.\n{}") + error = Msg("An error occurred while transforming data.\n{}") resizing_enabled = False want_main_area = False def __init__(self): super().__init__() - self.data = None - self.preprocessor = None - self.transformed_data = None + self.data = None # type: Optional[Table] + self.template_domain = None # type: Optional[Domain] + self.transformed_info = describe_data(None) # type: OrderedDict info_box = gui.widgetBox(self.controlArea, "Info") self.input_label = gui.widgetLabel(info_box, "") - self.preprocessor_label = gui.widgetLabel(info_box, "") + self.template_label = gui.widgetLabel(info_box, "") self.output_label = gui.widgetLabel(info_box, "") self.set_input_label_text() - self.set_preprocessor_label_text() + self.set_template_label_text() - self.retain_all_data_cb = gui.checkBox( - self.controlArea, self, "retain_all_data", label="Retain all data", - callback=self.apply - ) + box = gui.widgetBox(self.controlArea, "Output") + gui.checkBox(box, self, "retain_all_data", "Retain all data", + callback=self.apply) def set_input_label_text(self): text = "No data on input." - if self.data is not None: + if self.data: text = "Input data with {:,} instances and {:,} features.".format( len(self.data), len(self.data.domain.attributes)) self.input_label.setText(text) - def set_preprocessor_label_text(self): - text = "No preprocessor on input." - if self.transformed_data is not None: - text = "Preprocessor {} applied.".format(self.preprocessor) - elif self.preprocessor is not None: - text = "Preprocessor {} on input.".format(self.preprocessor) - self.preprocessor_label.setText(text) + def set_template_label_text(self): + text = "No template data on input." + if self.data and self.template_domain is not None: + text = "Template domain applied." + elif self.template_domain is not None: + text = "Template data includes {:,} features.".format( + len(self.template_domain.attributes)) + self.template_label.setText(text) - def set_output_label_text(self): + def set_output_label_text(self, data): text = "" - if self.transformed_data: + if data: text = "Output data includes {:,} features.".format( - len(self.transformed_data.domain.attributes)) + len(data.domain.attributes)) self.output_label.setText(text) @Inputs.data @@ -78,56 +80,53 @@ def set_data(self, data): self.data = data self.set_input_label_text() - @Inputs.preprocessor - def set_preprocessor(self, preprocessor): - self.preprocessor = preprocessor + @Inputs.template_data + @check_sql_input + def set_template_data(self, data): + self.template_domain = data and data.domain def handleNewSignals(self): self.apply() def apply(self): self.clear_messages() - self.transformed_data = None - if self.data is not None and self.preprocessor is not None: + transformed_data = None + if self.data and self.template_domain is not None: try: - self.transformed_data = self.preprocessor(self.data) - except Exception as ex: # pylint: disable=broad-except - self.Error.pp_error(ex) - - if self.retain_all_data: - self.Outputs.transformed_data.send(self.merge_data()) - else: - self.Outputs.transformed_data.send(self.transformed_data) - - self.set_preprocessor_label_text() - self.set_output_label_text() - - def merge_data(self): - attributes = getattr(self.data.domain, 'attributes') - cls_vars = getattr(self.data.domain, 'class_vars') - metas_v = getattr(self.data.domain, 'metas')\ - + getattr(self.transformed_data.domain, 'attributes') - domain = Domain(attributes, cls_vars, metas_v) - X = self.data.X - Y = self.data.Y - metas = np.hstack((self.data.metas, self.transformed_data.X)) - table = Table.from_numpy(domain, X, Y, metas) - table.name = getattr(self.data, 'name', '') - table.attributes = getattr(self.data, 'attributes', {}) - table.ids = self.data.ids - return table + transformed_data = self.data.transform(self.template_domain) + except Exception as ex: # pylint: disable=broad-except + self.Error.error(ex) + + data = transformed_data + if data and self.retain_all_data: + data = self.merged_data(data) + self.transformed_info = describe_data(data) + self.Outputs.transformed_data.send(data) + self.set_template_label_text() + self.set_output_label_text(data) + + def merged_data(self, t_data): + domain = self.data.domain + t_domain = t_data.domain + metas = domain.metas + t_domain.attributes + t_domain.metas + domain = Domain(domain.attributes, domain.class_vars, metas) + data = self.data.transform(domain) + metas = np.hstack((t_data.X, t_data.metas)) + data.metas[:, -metas.shape[1]:] = metas + return data def send_report(self): - if self.preprocessor is not None: - self.report_items("Settings", - (("Preprocessor", self.preprocessor),)) - if self.data is not None: + if self.data: self.report_data("Data", self.data) - if self.transformed_data is not None: - self.report_data("Transformed data", self.transformed_data) + if self.template_domain is not None: + self.report_domain("Template data", self.template_domain) + if self.transformed_info: + self.report_items("Transformed data", self.transformed_info) if __name__ == "__main__": # pragma: no cover + from Orange.preprocess import Discretize + + table = Table("iris") WidgetPreview(OWTransform).run( - set_data=Table("iris"), - set_preprocessor=Discretize()) + set_data=table, set_template_data=Discretize()(table)) diff --git a/Orange/widgets/data/tests/test_owtransform.py b/Orange/widgets/data/tests/test_owtransform.py index 571dd5525cd..28ad90b3818 100644 --- a/Orange/widgets/data/tests/test_owtransform.py +++ b/Orange/widgets/data/tests/test_owtransform.py @@ -1,8 +1,11 @@ # Test methods with long descriptive names can omit docstrings # pylint: disable=missing-docstring +from unittest.mock import Mock + +from numpy import testing as npt + from Orange.data import Table -from Orange.preprocess import Discretize -from Orange.preprocess.preprocess import Preprocess +from Orange.preprocess import Discretize, Continuize from Orange.widgets.data.owtransform import OWTransform from Orange.widgets.tests.base import WidgetTest from Orange.widgets.unsupervised.owpca import OWPCA @@ -12,38 +15,39 @@ class TestOWTransform(WidgetTest): def setUp(self): self.widget = self.create_widget(OWTransform) self.data = Table("iris") - self.preprocessor = Discretize() + self.disc_data = Discretize()(self.data) def test_output(self): - # send data and preprocessor - self.send_signal(self.widget.Inputs.data, self.data) - self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor) + # send data and template data + self.send_signal(self.widget.Inputs.data, self.data[::15]) + self.send_signal(self.widget.Inputs.template_data, self.disc_data) output = self.get_output(self.widget.Outputs.transformed_data) - self.assertIsInstance(output, Table) - self.assertEqual("Input data with 150 instances and 4 features.", + self.assertTableEqual(output, self.disc_data[::15]) + self.assertEqual("Input data with 10 instances and 4 features.", self.widget.input_label.text()) - self.assertEqual("Preprocessor Discretize() applied.", - self.widget.preprocessor_label.text()) + self.assertEqual("Template domain applied.", + self.widget.template_label.text()) self.assertEqual("Output data includes 4 features.", self.widget.output_label.text()) - # remove preprocessor - self.send_signal(self.widget.Inputs.preprocessor, None) + # remove template data + self.send_signal(self.widget.Inputs.template_data, None) output = self.get_output(self.widget.Outputs.transformed_data) self.assertIsNone(output) - self.assertEqual("Input data with 150 instances and 4 features.", + self.assertEqual("Input data with 10 instances and 4 features.", self.widget.input_label.text()) - self.assertEqual("No preprocessor on input.", self.widget.preprocessor_label.text()) + self.assertEqual("No template data on input.", + self.widget.template_label.text()) self.assertEqual("", self.widget.output_label.text()) - # send preprocessor - self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor) + # send template data + self.send_signal(self.widget.Inputs.template_data, self.disc_data) output = self.get_output(self.widget.Outputs.transformed_data) - self.assertIsInstance(output, Table) - self.assertEqual("Input data with 150 instances and 4 features.", + self.assertTableEqual(output, self.disc_data[::15]) + self.assertEqual("Input data with 10 instances and 4 features.", self.widget.input_label.text()) - self.assertEqual("Preprocessor Discretize() applied.", - self.widget.preprocessor_label.text()) + self.assertEqual("Template domain applied.", + self.widget.template_label.text()) self.assertEqual("Output data includes 4 features.", self.widget.output_label.text()) @@ -52,49 +56,63 @@ def test_output(self): output = self.get_output(self.widget.Outputs.transformed_data) self.assertIsNone(output) self.assertEqual("No data on input.", self.widget.input_label.text()) - self.assertEqual("Preprocessor Discretize() on input.", - self.widget.preprocessor_label.text()) + self.assertEqual("Template data includes 4 features.", + self.widget.template_label.text()) self.assertEqual("", self.widget.output_label.text()) - # remove preprocessor - self.send_signal(self.widget.Inputs.preprocessor, None) + # remove template data + self.send_signal(self.widget.Inputs.template_data, None) self.assertEqual("No data on input.", self.widget.input_label.text()) - self.assertEqual("No preprocessor on input.", - self.widget.preprocessor_label.text()) + self.assertEqual("No template data on input.", + self.widget.template_label.text()) self.assertEqual("", self.widget.output_label.text()) - def test_input_pca_preprocessor(self): + def assertTableEqual(self, table1, table2): + self.assertIs(table1.domain, table2.domain) + npt.assert_array_equal(table1.X, table2.X) + npt.assert_array_equal(table1.Y, table2.Y) + npt.assert_array_equal(table1.metas, table2.metas) + + def test_input_pca_output(self): owpca = self.create_widget(OWPCA) self.send_signal(owpca.Inputs.data, self.data, widget=owpca) owpca.components_spin.setValue(2) - pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca) - self.assertIsNotNone(pp, Preprocess) + pca_out = self.get_output(owpca.Outputs.transformed_data, widget=owpca) - self.send_signal(self.widget.Inputs.data, self.data) - self.send_signal(self.widget.Inputs.preprocessor, pp) + self.send_signal(self.widget.Inputs.data, self.data[::10]) + self.send_signal(self.widget.Inputs.template_data, pca_out) output = self.get_output(self.widget.Outputs.transformed_data) - self.assertIsInstance(output, Table) - self.assertEqual(output.X.shape, (len(self.data), 2)) + npt.assert_array_equal(pca_out.X[::10], output.X) - # test retain data functionality - self.widget.retain_all_data = True - self.widget.apply() + def test_retain_all_data(self): + data = Table("zoo") + cont_data = Continuize()(data) + self.send_signal(self.widget.Inputs.data, data) + self.send_signal(self.widget.Inputs.template_data, cont_data) + self.widget.controls.retain_all_data.click() output = self.get_output(self.widget.Outputs.transformed_data) self.assertIsInstance(output, Table) - self.assertEqual(output.X.shape, (len(self.data), 4)) - self.assertEqual(output.metas.shape, (len(self.data), 2)) + self.assertEqual(output.X.shape, (len(data), 16)) + self.assertEqual(output.metas.shape, (len(data), 38)) def test_error_transforming(self): - self.send_signal(self.widget.Inputs.data, self.data) - self.send_signal(self.widget.Inputs.preprocessor, Preprocess()) - self.assertTrue(self.widget.Error.pp_error.is_shown()) + data = self.data[::10] + data.transform = Mock(side_effect=Exception()) + self.send_signal(self.widget.Inputs.data, data) + self.send_signal(self.widget.Inputs.template_data, self.disc_data) + self.assertTrue(self.widget.Error.error.is_shown()) output = self.get_output(self.widget.Outputs.transformed_data) self.assertIsNone(output) self.send_signal(self.widget.Inputs.data, None) - self.assertFalse(self.widget.Error.pp_error.is_shown()) + self.assertFalse(self.widget.Error.error.is_shown()) def test_send_report(self): self.send_signal(self.widget.Inputs.data, self.data) self.widget.report_button.click() self.send_signal(self.widget.Inputs.data, None) self.widget.report_button.click() + + +if __name__ == "__main__": + import unittest + unittest.main() diff --git a/Orange/widgets/tests/base.py b/Orange/widgets/tests/base.py index 8d5ddc34bec..dd619f57328 100644 --- a/Orange/widgets/tests/base.py +++ b/Orange/widgets/tests/base.py @@ -31,7 +31,7 @@ ) from Orange.modelling import Fitter from Orange.preprocess import RemoveNaNColumns, Randomize, Continuize -from Orange.preprocess.preprocess import PreprocessorList, Preprocess +from Orange.preprocess.preprocess import PreprocessorList from Orange.regression.base_regression import ( LearnerRegression, ModelRegression ) @@ -1074,18 +1074,6 @@ def test_manual_move(self): self.assertEqual(len(self.widget.graph.scatterplot_item.data), nvalid) np.testing.assert_equal(self.widget.graph.selection, selection) - def test_output_preprocessor(self): - self.send_signal(self.widget.Inputs.data, self.data) - pp = self.get_output(self.widget.Outputs.preprocessor) - self.assertIsInstance(pp, Preprocess) - transformed = pp(self.data[::10]) - self.assertIsInstance(transformed, Table) - self.assertEqual(transformed.X.shape, (len(self.data) / 10, 2)) - output = self.get_output(self.widget.Outputs.annotated_data) - np.testing.assert_array_equal(transformed.X, output.metas[::10, :2]) - self.assertEqual([a.name for a in transformed.domain.attributes], - [m.name for m in output.domain.metas[:2]]) - class datasets: @staticmethod diff --git a/Orange/widgets/unsupervised/owpca.py b/Orange/widgets/unsupervised/owpca.py index 3eb232104bd..8023352d9fa 100644 --- a/Orange/widgets/unsupervised/owpca.py +++ b/Orange/widgets/unsupervised/owpca.py @@ -40,7 +40,6 @@ class Outputs: transformed_data = Output("Transformed data", Table) components = Output("Components", Table) pca = Output("PCA", PCA, dynamic=False) - preprocessor = Output("Preprocessor", preprocess.Preprocess) settingsHandler = settings.DomainContextHandler() @@ -265,7 +264,6 @@ def clear_outputs(self): self.Outputs.transformed_data.send(None) self.Outputs.components.send(None) self.Outputs.pca.send(self._pca_projector) - self.Outputs.preprocessor.send(None) def get_model(self): if self.rpca is None: @@ -426,7 +424,7 @@ def _update_axis(self): axis.setTicks([[(i, str(i+1)) for i in range(0, p, d)]]) def commit(self): - transformed = components = pp = None + transformed = components = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) only once. @@ -450,13 +448,10 @@ def commit(self): metas=metas) components.name = 'components' - pp = preprocess.ApplyDomain(domain, "PCA") - self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.pca.send(self._pca_projector) - self.Outputs.preprocessor.send(pp) def send_report(self): if self.data is None: diff --git a/Orange/widgets/unsupervised/owtsne.py b/Orange/widgets/unsupervised/owtsne.py index 50c9e8e621a..254c12555ad 100644 --- a/Orange/widgets/unsupervised/owtsne.py +++ b/Orange/widgets/unsupervised/owtsne.py @@ -14,7 +14,7 @@ from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.visualize.owscatterplotgraph import OWScatterPlotBase from Orange.widgets.visualize.utils.widget import OWDataProjectionWidget -from Orange.widgets.widget import Msg, Output +from Orange.widgets.widget import Msg class TSNERunner: @@ -86,9 +86,6 @@ class OWtSNE(OWDataProjectionWidget): #: Runtime state Running, Finished, Waiting, Paused = 1, 2, 3, 4 - class Outputs(OWDataProjectionWidget.Outputs): - preprocessor = Output("Preprocessor", preprocess.Preprocess) - class Error(OWDataProjectionWidget.Error): not_enough_rows = Msg("Input data needs at least 2 rows") constant_data = Msg("Input data is constant") @@ -370,10 +367,6 @@ def setup_plot(self): super().setup_plot() self.start() - def commit(self): - super().commit() - self.send_preprocessor() - def _get_projection_data(self): if self.data is None: return None @@ -389,12 +382,6 @@ def _get_projection_data(self): self.data.domain.metas + self.projection.domain.attributes) return data - def send_preprocessor(self): - prep = None - if self.data is not None and self.projection is not None: - prep = preprocess.ApplyDomain(self.projection.domain, self.projection.name) - self.Outputs.preprocessor.send(prep) - def clear(self): super().clear() self.__state = OWtSNE.Waiting diff --git a/Orange/widgets/unsupervised/tests/test_owpca.py b/Orange/widgets/unsupervised/tests/test_owpca.py index af8defc0ac1..aec73834b17 100644 --- a/Orange/widgets/unsupervised/tests/test_owpca.py +++ b/Orange/widgets/unsupervised/tests/test_owpca.py @@ -6,7 +6,7 @@ from Orange.data import Table, Domain, ContinuousVariable, TimeVariable from Orange.preprocess import preprocess -from Orange.preprocess.preprocess import Preprocess, Normalize +from Orange.preprocess.preprocess import Normalize from Orange.widgets.tests.base import WidgetTest from Orange.widgets.tests.utils import table_dense_sparse from Orange.widgets.unsupervised.owpca import OWPCA @@ -202,15 +202,3 @@ def test_do_not_mask_features(self): self.widget.set_data(data) ndata = Table("iris.tab") self.assertEqual(data.domain[0], ndata.domain[0]) - - def test_output_preprocessor(self): - self.send_signal(self.widget.Inputs.data, self.iris) - pp = self.get_output(self.widget.Outputs.preprocessor) - self.assertIsInstance(pp, Preprocess) - transformed_data = pp(self.iris[::10]) - self.assertIsInstance(transformed_data, Table) - self.assertEqual(transformed_data.X.shape, (15, 2)) - output = self.get_output(self.widget.Outputs.transformed_data) - np.testing.assert_array_equal(transformed_data.X, output.X[::10]) - self.assertEqual([a.name for a in transformed_data.domain.attributes], - [m.name for m in output.domain.attributes]) diff --git a/Orange/widgets/unsupervised/tests/test_owtsne.py b/Orange/widgets/unsupervised/tests/test_owtsne.py index bf270383777..479c5d2c8e9 100644 --- a/Orange/widgets/unsupervised/tests/test_owtsne.py +++ b/Orange/widgets/unsupervised/tests/test_owtsne.py @@ -3,7 +3,7 @@ import numpy as np from Orange.data import DiscreteVariable, ContinuousVariable, Domain, Table -from Orange.preprocess import Preprocess, Normalize +from Orange.preprocess import Normalize from Orange.projection.manifold import TSNE from Orange.widgets.tests.base import ( WidgetTest, WidgetOutputsTestMixin, ProjectionWidgetTestMixin @@ -111,28 +111,6 @@ def test_attr_models(self): self.assertNotIn(var, controls.attr_size.model()) self.assertIn(var, controls.attr_shape.model()) - def test_output_preprocessor(self): - # To test the validity of the preprocessor, we'll have to actually - # compute the projections - self.restore_mocked_functions() - - self.send_signal(self.widget.Inputs.data, self.data) - self.wait_until_stop_blocking(wait=20000) - output_data = self.get_output(self.widget.Outputs.annotated_data) - - # We send the same data to the widget, we expect the point locations to - # be fairly close to their original ones - pp = self.get_output(self.widget.Outputs.preprocessor) - self.assertIsInstance(pp, Preprocess) - - transformed_data = pp(self.data) - self.assertIsInstance(transformed_data, Table) - self.assertEqual(transformed_data.X.shape, (len(self.data), 2)) - np.testing.assert_allclose(transformed_data.X, output_data.metas[:, :2], - rtol=1, atol=3) - self.assertEqual([a.name for a in transformed_data.domain.attributes], - [m.name for m in output_data.domain.metas[:2]]) - def test_multiscale_changed(self): self.assertFalse(self.widget.controls.multiscale.isChecked()) self.assertTrue(self.widget.perplexity_spin.isEnabled()) diff --git a/Orange/widgets/visualize/utils/widget.py b/Orange/widgets/visualize/utils/widget.py index 56dac68dd25..72cb6dc952f 100644 --- a/Orange/widgets/visualize/utils/widget.py +++ b/Orange/widgets/visualize/utils/widget.py @@ -10,7 +10,6 @@ ) from Orange.data.util import get_unique_names, array_equal from Orange.data.sql.table import SqlTable -from Orange.preprocess.preprocess import Preprocess, ApplyDomain from Orange.statistics.util import bincount from Orange.widgets import gui, report @@ -624,7 +623,6 @@ class OWAnchorProjectionWidget(OWDataProjectionWidget): class Outputs(OWDataProjectionWidget.Outputs): components = Output("Components", Table) - preprocessor = Output("Preprocessor", Preprocess) class Error(OWDataProjectionWidget.Error): sparse_data = Msg("Sparse data is not supported") @@ -702,7 +700,6 @@ def _get_projection_data(self): def commit(self): super().commit() self.send_components() - self.send_preprocessor() def send_components(self): components = None @@ -721,12 +718,6 @@ def _send_components_metas(self): variable_names = [a.name for a in self.projection.domain.attributes] return np.array(variable_names, dtype=object)[:, None] - def send_preprocessor(self): - prep = None - if self.data is not None and self.projection is not None: - prep = ApplyDomain(self.projection.domain, self.projection.name) - self.Outputs.preprocessor.send(prep) - def clear(self): super().clear() self.projector = self.projection = None