diff --git a/Orange/data/table.py b/Orange/data/table.py index 4b7c9b5f5fe..4cc8b785378 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -20,7 +20,7 @@ ) from Orange.data.util import SharedComputeValue, vstack, hstack, \ assure_array_dense, assure_array_sparse, \ - assure_column_dense, assure_column_sparse + assure_column_dense, assure_column_sparse, get_unique_names_duplicates from Orange.statistics.util import bincount, countnans, contingency, \ stats as fast_stats, sparse_has_implicit_zeros, sparse_count_implicit_zeros, \ sparse_implicit_zero_weights @@ -1604,8 +1604,8 @@ def _compute_contingency(self, col_vars=None, row_var=None): return contingencies, unknown_rows @classmethod - def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name", - feature_name="Feature"): + def transpose(cls, table, feature_names_column="", + meta_attr_name="Feature name", feature_name="Feature"): """ Transpose the table. @@ -1614,6 +1614,7 @@ def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name" use for feature names :param meta_attr_name: str - name of new meta attribute into which feature names are mapped + :param feature_name: str - default feature name prefix :return: Table - transposed table """ @@ -1625,10 +1626,14 @@ def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name" # - classes and metas to attributes of attributes # - arbitrary meta column to feature names self.X = table.X.T - attributes = [ContinuousVariable(str(row[feature_names_column])) - for row in table] if feature_names_column else \ - [ContinuousVariable(feature_name + " " + str(i + 1).zfill( - int(np.ceil(np.log10(n_cols))))) for i in range(n_cols)] + if feature_names_column: + names = [str(row[feature_names_column]) for row in table] + names = get_unique_names_duplicates(names) + attributes = [ContinuousVariable(name) for name in names] + else: + places = int(np.ceil(np.log10(n_cols))) + attributes = [ContinuousVariable(f"{feature_name} {i:0{places}}") + for i in range(1, n_cols + 1)] if old_domain is not None and feature_names_column: for i, _ in enumerate(attributes): if attributes[i].name in old_domain: diff --git a/Orange/data/tests/test_util.py b/Orange/data/tests/test_util.py index c6574046191..b1b2de859b6 100644 --- a/Orange/data/tests/test_util.py +++ b/Orange/data/tests/test_util.py @@ -1,7 +1,7 @@ import unittest from Orange.data import Domain, ContinuousVariable -from Orange.data.util import get_unique_names +from Orange.data.util import get_unique_names, get_unique_names_duplicates class TestGetUniqueNames(unittest.TestCase): @@ -46,6 +46,14 @@ def test_get_unique_names_with_domain(self): self.assertEqual(get_unique_names(domain, "foo"), "foo (1)") self.assertEqual(get_unique_names(domain, "baz"), "baz (4)") + def test_get_unique_names_from_duplicates(self): + self.assertEqual( + get_unique_names_duplicates(["foo", "bar", "baz"]), + ["foo", "bar", "baz"]) + self.assertEqual( + get_unique_names_duplicates(["foo", "bar", "baz", "bar"]), + ["foo", "bar (1)", "baz", "bar (2)"]) + if __name__ == "__main__": unittest.main() diff --git a/Orange/data/util.py b/Orange/data/util.py index ce69c836382..abbffe3514a 100644 --- a/Orange/data/util.py +++ b/Orange/data/util.py @@ -2,6 +2,7 @@ Data-manipulation utilities. """ import re +from collections import Counter from itertools import chain import numpy as np @@ -196,3 +197,19 @@ def get_unique_names(names, proposed): return proposed max_index = max(map(max, indicess), default=0) + 1 return [f"{name} ({max_index})" for name in proposed] + + +def get_unique_names_duplicates(proposed: list) -> list: + """ + Returns list of unique names. If a name is duplicated, the + function appends an index in parentheses. + """ + counter = Counter(proposed) + temp_counter = Counter() + names = [] + for name in proposed: + if counter[name] > 1: + temp_counter.update([name]) + name = f"{name} ({temp_counter[name]})" + names.append(name) + return names diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py index a373a118726..441d6f6692b 100644 --- a/Orange/tests/test_table.py +++ b/Orange/tests/test_table.py @@ -2751,6 +2751,15 @@ def test_transpose_class_metas_attributes(self): self.assertDictEqual(data.domain.attributes[0].attributes, {"attr1": "a1", "attr2": "aa1"}) + def test_transpose_duplicate_feature_names(self): + table = Table("iris") + domain = table.domain + attrs, metas = domain.attributes[:3], domain.attributes[3:] + table = table.transform(Domain(attrs, domain.class_vars, metas)) + transposed = Table.transpose(table, domain.attributes[3].name) + names = [f.name for f in transposed.domain.attributes] + self.assertEqual(len(names), len(set(names))) + def test_transpose(self): zoo = Table("zoo") t1 = Table.transpose(zoo) diff --git a/Orange/widgets/data/owtranspose.py b/Orange/widgets/data/owtranspose.py index c1fa37655ed..6d8798016e4 100644 --- a/Orange/widgets/data/owtranspose.py +++ b/Orange/widgets/data/owtranspose.py @@ -1,4 +1,4 @@ -from Orange.data import Table, StringVariable +from Orange.data import Table, ContinuousVariable, StringVariable from Orange.widgets.settings import (Setting, ContextSetting, DomainContextHandler) from Orange.widgets.utils.itemmodels import DomainModel @@ -21,7 +21,7 @@ class Inputs: class Outputs: data = Output("Data", Table, dynamic=False) - GENERIC, FROM_META_ATTR = range(2) + GENERIC, FROM_VAR = range(2) resizing_enabled = False want_main_area = False @@ -34,6 +34,11 @@ class Outputs: feature_names_column = ContextSetting(None) auto_apply = Setting(True) + class Warning(OWWidget.Warning): + duplicate_names = Msg("Values are not unique.\nTo avoid multiple " + "features with the same name, values \nof " + "'{}' have been augmented with indices.") + class Error(OWWidget.Error): value_error = Msg("{}") @@ -53,10 +58,10 @@ def __init__(self): placeholderText="Type a prefix ...", toolTip="Custom feature name") edit.editingFinished.connect(self._apply_editing) - self.meta_button = gui.appendRadioButton(box, "From meta attribute:") + self.meta_button = gui.appendRadioButton(box, "From variable:") self.feature_model = DomainModel( - order=DomainModel.METAS, valid_types=StringVariable, - alphabetical=True) + valid_types=(ContinuousVariable, StringVariable), + alphabetical=False) self.feature_combo = gui.comboBox( gui.indentedBox(box, gui.checkButtonOffsetHint(button)), self, "feature_names_column", contentsLength=12, @@ -75,7 +80,7 @@ def _apply_editing(self): self.apply() def _feature_combo_changed(self): - self.feature_type = self.FROM_META_ATTR + self.feature_type = self.FROM_VAR self.apply() @Inputs.data @@ -95,7 +100,7 @@ def set_controls(self): self.meta_button.setEnabled(bool(self.feature_model)) if self.feature_model: self.feature_names_column = self.feature_model[0] - self.feature_type = self.FROM_META_ATTR + self.feature_type = self.FROM_VAR else: self.feature_names_column = None @@ -104,10 +109,15 @@ def apply(self): transposed = None if self.data: try: + variable = self.feature_type == self.FROM_VAR and \ + self.feature_names_column transposed = Table.transpose( - self.data, - self.feature_type == self.FROM_META_ATTR and self.feature_names_column, + self.data, variable, feature_name=self.feature_name or self.DEFAULT_PREFIX) + if variable: + names = self.data.get_column_view(variable)[0] + if len(names) != len(set(names)): + self.Warning.duplicate_names(variable) except ValueError as e: self.Error.value_error(e) self.Outputs.data.send(transposed) @@ -116,7 +126,7 @@ def send_report(self): if self.feature_type == self.GENERIC: names = self.feature_name or self.DEFAULT_PREFIX else: - names = "from meta attribute" + names = "from variable" if self.feature_names_column: names += " '{}'".format(self.feature_names_column.name) self.report_items("", [("Feature names", names)]) @@ -125,4 +135,4 @@ def send_report(self): if __name__ == "__main__": # pragma: no cover - WidgetPreview(OWTranspose).run(Table("zoo")) + WidgetPreview(OWTranspose).run(Table("iris")) diff --git a/Orange/widgets/data/tests/test_owtranspose.py b/Orange/widgets/data/tests/test_owtranspose.py index ae4ab96d80d..16ebce6d4b1 100644 --- a/Orange/widgets/data/tests/test_owtranspose.py +++ b/Orange/widgets/data/tests/test_owtranspose.py @@ -5,7 +5,7 @@ import numpy as np -from Orange.data import Table, Domain +from Orange.data import Table from Orange.widgets.data.owtranspose import OWTranspose from Orange.widgets.tests.base import WidgetTest @@ -32,18 +32,11 @@ def test_feature_type(self): widget = self.widget data = Table("conferences.tab") metas = data.domain.metas - domain = data.domain - # Put one non-string column to metas, so widget must skip it - domain2 = Domain(domain.attributes[:-1], - domain.class_vars, - (domain.attributes[0], ) + domain.metas) - data2 = Table(domain2, data) - widget.feature_type = widget.GENERIC - self.send_signal(widget.Inputs.data, data2) + self.send_signal(widget.Inputs.data, data) - # By default, the widget switches from GENERIC to the first string meta - self.assertEqual(widget.feature_type, widget.FROM_META_ATTR) + # By default, the widget switches from GENERIC to the first meta + self.assertEqual(widget.feature_type, widget.FROM_VAR) self.assertIs(widget.feature_names_column, metas[0]) output = self.get_output(widget.Outputs.data) self.assertListEqual( @@ -54,9 +47,9 @@ def test_feature_type(self): widget.feature_names_column = metas[1] widget.apply() output = self.get_output(widget.Outputs.data) - self.assertListEqual( - [a.name for a in output.domain.attributes], - [metas[1].to_val(m) for m in data.metas[:, 1]]) + self.assertTrue( + all(a.name.startswith(metas[1].to_val(m)) + for a, m in zip(output.domain.attributes, data.metas[:, 1]))) # Switch to generic self.assertEqual(widget.DEFAULT_PREFIX, "Feature") @@ -84,13 +77,13 @@ def test_feature_type(self): def test_send_report(self): widget = self.widget - widget.feature_type = widget.FROM_META_ATTR + widget.feature_type = widget.FROM_VAR widget.report_button.click() widget.feature_type = widget.GENERIC widget.report_button.click() self.send_signal(widget.Inputs.data, self.zoo) - widget.feature_type = widget.FROM_META_ATTR + widget.feature_type = widget.FROM_VAR widget.report_button.click() widget.feature_type = widget.GENERIC widget.report_button.click() @@ -110,7 +103,7 @@ def test_gui_behaviour(self): # Data with metas: default type is meta, radio enabled self.send_signal(widget.Inputs.data, self.zoo) self.assertTrue(widget.meta_button.isEnabled()) - self.assertEqual(widget.feature_type, widget.FROM_META_ATTR) + self.assertEqual(widget.feature_type, widget.FROM_VAR) self.assertIs(widget.feature_names_column, widget.feature_model[0]) self.assertTrue(widget.apply.called) @@ -123,7 +116,7 @@ def test_gui_behaviour(self): # Changing combo changes the radio button to meta widget.apply.reset_mock() widget.feature_combo.activated.emit(0) - self.assertEqual(widget.feature_type, widget.FROM_META_ATTR) + self.assertEqual(widget.feature_type, widget.FROM_VAR) self.assertTrue(widget.apply.called) def test_all_whitespace(self): @@ -141,5 +134,19 @@ def test_error(self): self.send_signal(widget.Inputs.data, self.zoo) self.assertFalse(widget.Error.value_error.is_shown()) + def test_feature_names_from_cont_vars(self): + table = Table("iris") + self.send_signal(self.widget.Inputs.data, table) + self.assertListEqual(self.widget.feature_model[:], + list(table.domain.attributes)) + self.widget.feature_combo.activated.emit(3) + output = self.get_output(self.widget.Outputs.data) + self.assertListEqual([f.name for f in output.domain.attributes[:10]], + ["0.2 (1)", "0.2 (2)", "0.2 (3)", "0.2 (4)", + "0.2 (5)", "0.4 (1)", "0.3 (1)", "0.2 (6)", + "0.2 (7)", "0.1 (1)"]) + self.assertTrue(self.widget.Warning.duplicate_names.is_shown()) + + if __name__ == "__main__": unittest.main()