Skip to content

Commit 9a15fa0

Browse files
committed
Transform: Values of continuous variables as feature names
1 parent 31a9c08 commit 9a15fa0

File tree

6 files changed

+93
-37
lines changed

6 files changed

+93
-37
lines changed

Orange/data/table.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
)
2121
from Orange.data.util import SharedComputeValue, vstack, hstack, \
2222
assure_array_dense, assure_array_sparse, \
23-
assure_column_dense, assure_column_sparse
23+
assure_column_dense, assure_column_sparse, get_unique_names_duplicates
2424
from Orange.statistics.util import bincount, countnans, contingency, \
2525
stats as fast_stats, sparse_has_implicit_zeros, sparse_count_implicit_zeros, \
2626
sparse_implicit_zero_weights
@@ -1604,8 +1604,8 @@ def _compute_contingency(self, col_vars=None, row_var=None):
16041604
return contingencies, unknown_rows
16051605

16061606
@classmethod
1607-
def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name",
1608-
feature_name="Feature"):
1607+
def transpose(cls, table, feature_names_column="",
1608+
meta_attr_name="Feature name", feature_name="Feature"):
16091609
"""
16101610
Transpose the table.
16111611
@@ -1614,6 +1614,7 @@ def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name"
16141614
use for feature names
16151615
:param meta_attr_name: str - name of new meta attribute into which
16161616
feature names are mapped
1617+
:param feature_name: str - default feature name prefix
16171618
:return: Table - transposed table
16181619
"""
16191620

@@ -1625,10 +1626,14 @@ def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name"
16251626
# - classes and metas to attributes of attributes
16261627
# - arbitrary meta column to feature names
16271628
self.X = table.X.T
1628-
attributes = [ContinuousVariable(str(row[feature_names_column]))
1629-
for row in table] if feature_names_column else \
1630-
[ContinuousVariable(feature_name + " " + str(i + 1).zfill(
1631-
int(np.ceil(np.log10(n_cols))))) for i in range(n_cols)]
1629+
if feature_names_column:
1630+
names = [str(row[feature_names_column]) for row in table]
1631+
names = get_unique_names_duplicates(names)
1632+
attributes = [ContinuousVariable(name) for name in names]
1633+
else:
1634+
places = int(np.ceil(np.log10(n_cols)))
1635+
attributes = [ContinuousVariable(f"{feature_name} {i:0{places}}")
1636+
for i in range(1, n_cols + 1)]
16321637
if old_domain is not None and feature_names_column:
16331638
for i, _ in enumerate(attributes):
16341639
if attributes[i].name in old_domain:

Orange/data/tests/test_util.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22

33
from Orange.data import Domain, ContinuousVariable
4-
from Orange.data.util import get_unique_names
4+
from Orange.data.util import get_unique_names, get_unique_names_duplicates
55

66

77
class TestGetUniqueNames(unittest.TestCase):
@@ -46,6 +46,14 @@ def test_get_unique_names_with_domain(self):
4646
self.assertEqual(get_unique_names(domain, "foo"), "foo (1)")
4747
self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")
4848

49+
def test_get_unique_names_from_duplicates(self):
50+
self.assertEqual(
51+
get_unique_names_duplicates(["foo", "bar", "baz"]),
52+
["foo", "bar", "baz"])
53+
self.assertEqual(
54+
get_unique_names_duplicates(["foo", "bar", "baz", "bar"]),
55+
["foo", "bar (1)", "baz", "bar (2)"])
56+
4957

5058
if __name__ == "__main__":
5159
unittest.main()

Orange/data/util.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Data-manipulation utilities.
33
"""
44
import re
5+
from collections import Counter
56
from itertools import chain
67

78
import numpy as np
@@ -196,3 +197,19 @@ def get_unique_names(names, proposed):
196197
return proposed
197198
max_index = max(map(max, indicess), default=0) + 1
198199
return [f"{name} ({max_index})" for name in proposed]
200+
201+
202+
def get_unique_names_duplicates(proposed: list) -> list:
203+
"""
204+
Returns list of unique names. If a name is duplicated, the
205+
function appends an index in parentheses.
206+
"""
207+
counter = Counter(proposed)
208+
temp_counter = Counter()
209+
names = []
210+
for name in proposed:
211+
if counter[name] > 1:
212+
temp_counter.update([name])
213+
name = f"{name} ({temp_counter[name]})"
214+
names.append(name)
215+
return names

Orange/tests/test_table.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2751,6 +2751,15 @@ def test_transpose_class_metas_attributes(self):
27512751
self.assertDictEqual(data.domain.attributes[0].attributes,
27522752
{"attr1": "a1", "attr2": "aa1"})
27532753

2754+
def test_transpose_duplicate_feature_names(self):
2755+
table = Table("iris")
2756+
domain = table.domain
2757+
attrs, metas = domain.attributes[:3], domain.attributes[3:]
2758+
table = table.transform(Domain(attrs, domain.class_vars, metas))
2759+
transposed = Table.transpose(table, domain.attributes[3].name)
2760+
names = [f.name for f in transposed.domain.attributes]
2761+
self.assertEqual(len(names), len(set(names)))
2762+
27542763
def test_transpose(self):
27552764
zoo = Table("zoo")
27562765
t1 = Table.transpose(zoo)

Orange/widgets/data/owtranspose.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from Orange.data import Table, StringVariable
1+
from Orange.data import Table, ContinuousVariable, StringVariable
22
from Orange.widgets.settings import (Setting, ContextSetting,
33
DomainContextHandler)
44
from Orange.widgets.utils.itemmodels import DomainModel
@@ -21,7 +21,7 @@ class Inputs:
2121
class Outputs:
2222
data = Output("Data", Table, dynamic=False)
2323

24-
GENERIC, FROM_META_ATTR = range(2)
24+
GENERIC, FROM_VAR = range(2)
2525

2626
resizing_enabled = False
2727
want_main_area = False
@@ -34,6 +34,11 @@ class Outputs:
3434
feature_names_column = ContextSetting(None)
3535
auto_apply = Setting(True)
3636

37+
class Warning(OWWidget.Warning):
38+
duplicate_names = Msg("Values are not unique.\nTo avoid multiple "
39+
"features with the same name, values \nof "
40+
"'{}' have been augmented with indices.")
41+
3742
class Error(OWWidget.Error):
3843
value_error = Msg("{}")
3944

@@ -53,10 +58,10 @@ def __init__(self):
5358
placeholderText="Type a prefix ...", toolTip="Custom feature name")
5459
edit.editingFinished.connect(self._apply_editing)
5560

56-
self.meta_button = gui.appendRadioButton(box, "From meta attribute:")
61+
self.meta_button = gui.appendRadioButton(box, "From variable:")
5762
self.feature_model = DomainModel(
58-
order=DomainModel.METAS, valid_types=StringVariable,
59-
alphabetical=True)
63+
valid_types=(ContinuousVariable, StringVariable),
64+
alphabetical=False)
6065
self.feature_combo = gui.comboBox(
6166
gui.indentedBox(box, gui.checkButtonOffsetHint(button)), self,
6267
"feature_names_column", contentsLength=12,
@@ -75,7 +80,7 @@ def _apply_editing(self):
7580
self.apply()
7681

7782
def _feature_combo_changed(self):
78-
self.feature_type = self.FROM_META_ATTR
83+
self.feature_type = self.FROM_VAR
7984
self.apply()
8085

8186
@Inputs.data
@@ -95,7 +100,7 @@ def set_controls(self):
95100
self.meta_button.setEnabled(bool(self.feature_model))
96101
if self.feature_model:
97102
self.feature_names_column = self.feature_model[0]
98-
self.feature_type = self.FROM_META_ATTR
103+
self.feature_type = self.FROM_VAR
99104
else:
100105
self.feature_names_column = None
101106

@@ -104,10 +109,15 @@ def apply(self):
104109
transposed = None
105110
if self.data:
106111
try:
112+
variable = self.feature_type == self.FROM_VAR and \
113+
self.feature_names_column
107114
transposed = Table.transpose(
108-
self.data,
109-
self.feature_type == self.FROM_META_ATTR and self.feature_names_column,
115+
self.data, variable,
110116
feature_name=self.feature_name or self.DEFAULT_PREFIX)
117+
if variable:
118+
names = self.data.get_column_view(variable)[0]
119+
if len(names) != len(set(names)):
120+
self.Warning.duplicate_names(variable)
111121
except ValueError as e:
112122
self.Error.value_error(e)
113123
self.Outputs.data.send(transposed)
@@ -116,7 +126,7 @@ def send_report(self):
116126
if self.feature_type == self.GENERIC:
117127
names = self.feature_name or self.DEFAULT_PREFIX
118128
else:
119-
names = "from meta attribute"
129+
names = "from variable"
120130
if self.feature_names_column:
121131
names += " '{}'".format(self.feature_names_column.name)
122132
self.report_items("", [("Feature names", names)])
@@ -125,4 +135,4 @@ def send_report(self):
125135

126136

127137
if __name__ == "__main__": # pragma: no cover
128-
WidgetPreview(OWTranspose).run(Table("zoo"))
138+
WidgetPreview(OWTranspose).run(Table("iris"))

Orange/widgets/data/tests/test_owtranspose.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from Orange.data import Table, Domain
8+
from Orange.data import Table
99
from Orange.widgets.data.owtranspose import OWTranspose
1010
from Orange.widgets.tests.base import WidgetTest
1111

@@ -32,18 +32,11 @@ def test_feature_type(self):
3232
widget = self.widget
3333
data = Table("conferences.tab")
3434
metas = data.domain.metas
35-
domain = data.domain
36-
# Put one non-string column to metas, so widget must skip it
37-
domain2 = Domain(domain.attributes[:-1],
38-
domain.class_vars,
39-
(domain.attributes[0], ) + domain.metas)
40-
data2 = Table(domain2, data)
41-
4235
widget.feature_type = widget.GENERIC
43-
self.send_signal(widget.Inputs.data, data2)
36+
self.send_signal(widget.Inputs.data, data)
4437

45-
# By default, the widget switches from GENERIC to the first string meta
46-
self.assertEqual(widget.feature_type, widget.FROM_META_ATTR)
38+
# By default, the widget switches from GENERIC to the first meta
39+
self.assertEqual(widget.feature_type, widget.FROM_VAR)
4740
self.assertIs(widget.feature_names_column, metas[0])
4841
output = self.get_output(widget.Outputs.data)
4942
self.assertListEqual(
@@ -54,9 +47,9 @@ def test_feature_type(self):
5447
widget.feature_names_column = metas[1]
5548
widget.apply()
5649
output = self.get_output(widget.Outputs.data)
57-
self.assertListEqual(
58-
[a.name for a in output.domain.attributes],
59-
[metas[1].to_val(m) for m in data.metas[:, 1]])
50+
self.assertTrue(
51+
all(a.name.startswith(metas[1].to_val(m))
52+
for a, m in zip(output.domain.attributes, data.metas[:, 1])))
6053

6154
# Switch to generic
6255
self.assertEqual(widget.DEFAULT_PREFIX, "Feature")
@@ -84,13 +77,13 @@ def test_feature_type(self):
8477

8578
def test_send_report(self):
8679
widget = self.widget
87-
widget.feature_type = widget.FROM_META_ATTR
80+
widget.feature_type = widget.FROM_VAR
8881
widget.report_button.click()
8982
widget.feature_type = widget.GENERIC
9083
widget.report_button.click()
9184

9285
self.send_signal(widget.Inputs.data, self.zoo)
93-
widget.feature_type = widget.FROM_META_ATTR
86+
widget.feature_type = widget.FROM_VAR
9487
widget.report_button.click()
9588
widget.feature_type = widget.GENERIC
9689
widget.report_button.click()
@@ -110,7 +103,7 @@ def test_gui_behaviour(self):
110103
# Data with metas: default type is meta, radio enabled
111104
self.send_signal(widget.Inputs.data, self.zoo)
112105
self.assertTrue(widget.meta_button.isEnabled())
113-
self.assertEqual(widget.feature_type, widget.FROM_META_ATTR)
106+
self.assertEqual(widget.feature_type, widget.FROM_VAR)
114107
self.assertIs(widget.feature_names_column, widget.feature_model[0])
115108
self.assertTrue(widget.apply.called)
116109

@@ -123,7 +116,7 @@ def test_gui_behaviour(self):
123116
# Changing combo changes the radio button to meta
124117
widget.apply.reset_mock()
125118
widget.feature_combo.activated.emit(0)
126-
self.assertEqual(widget.feature_type, widget.FROM_META_ATTR)
119+
self.assertEqual(widget.feature_type, widget.FROM_VAR)
127120
self.assertTrue(widget.apply.called)
128121

129122
def test_all_whitespace(self):
@@ -141,5 +134,19 @@ def test_error(self):
141134
self.send_signal(widget.Inputs.data, self.zoo)
142135
self.assertFalse(widget.Error.value_error.is_shown())
143136

137+
def test_feature_names_from_cont_vars(self):
138+
table = Table("iris")
139+
self.send_signal(self.widget.Inputs.data, table)
140+
self.assertListEqual(self.widget.feature_model[:],
141+
list(table.domain.attributes))
142+
self.widget.feature_combo.activated.emit(3)
143+
output = self.get_output(self.widget.Outputs.data)
144+
self.assertListEqual([f.name for f in output.domain.attributes[:10]],
145+
["0.2 (1)", "0.2 (2)", "0.2 (3)", "0.2 (4)",
146+
"0.2 (5)", "0.4 (1)", "0.3 (1)", "0.2 (6)",
147+
"0.2 (7)", "0.1 (1)"])
148+
self.assertTrue(self.widget.Warning.duplicate_names.is_shown())
149+
150+
144151
if __name__ == "__main__":
145152
unittest.main()

0 commit comments

Comments
 (0)