Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from Orange.data.util import SharedComputeValue, vstack, hstack, \
assure_array_dense, assure_array_sparse, \
assure_column_dense, assure_column_sparse
assure_column_dense, assure_column_sparse, get_unique_names_duplicates
from Orange.statistics.util import bincount, countnans, contingency, \
stats as fast_stats, sparse_has_implicit_zeros, sparse_count_implicit_zeros, \
sparse_implicit_zero_weights
Expand Down Expand Up @@ -1604,8 +1604,8 @@ def _compute_contingency(self, col_vars=None, row_var=None):
return contingencies, unknown_rows

@classmethod
def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name",
feature_name="Feature"):
def transpose(cls, table, feature_names_column="",
meta_attr_name="Feature name", feature_name="Feature"):
"""
Transpose the table.

Expand All @@ -1614,6 +1614,7 @@ def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name"
use for feature names
:param meta_attr_name: str - name of new meta attribute into which
feature names are mapped
:param feature_name: str - default feature name prefix
:return: Table - transposed table
"""

Expand All @@ -1625,10 +1626,14 @@ def transpose(cls, table, feature_names_column="", meta_attr_name="Feature name"
# - classes and metas to attributes of attributes
# - arbitrary meta column to feature names
self.X = table.X.T
attributes = [ContinuousVariable(str(row[feature_names_column]))
for row in table] if feature_names_column else \
[ContinuousVariable(feature_name + " " + str(i + 1).zfill(
int(np.ceil(np.log10(n_cols))))) for i in range(n_cols)]
if feature_names_column:
names = [str(row[feature_names_column]) for row in table]
names = get_unique_names_duplicates(names)
attributes = [ContinuousVariable(name) for name in names]
else:
places = int(np.ceil(np.log10(n_cols)))
attributes = [ContinuousVariable(f"{feature_name} {i:0{places}}")
for i in range(1, n_cols + 1)]
if old_domain is not None and feature_names_column:
for i, _ in enumerate(attributes):
if attributes[i].name in old_domain:
Expand Down
10 changes: 9 additions & 1 deletion Orange/data/tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest

from Orange.data import Domain, ContinuousVariable
from Orange.data.util import get_unique_names
from Orange.data.util import get_unique_names, get_unique_names_duplicates


class TestGetUniqueNames(unittest.TestCase):
Expand Down Expand Up @@ -46,6 +46,14 @@ def test_get_unique_names_with_domain(self):
self.assertEqual(get_unique_names(domain, "foo"), "foo (1)")
self.assertEqual(get_unique_names(domain, "baz"), "baz (4)")

def test_get_unique_names_from_duplicates(self):
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz"]),
["foo", "bar", "baz"])
self.assertEqual(
get_unique_names_duplicates(["foo", "bar", "baz", "bar"]),
["foo", "bar (1)", "baz", "bar (2)"])


if __name__ == "__main__":
unittest.main()
17 changes: 17 additions & 0 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Data-manipulation utilities.
"""
import re
from collections import Counter
from itertools import chain

import numpy as np
Expand Down Expand Up @@ -196,3 +197,19 @@ def get_unique_names(names, proposed):
return proposed
max_index = max(map(max, indicess), default=0) + 1
return [f"{name} ({max_index})" for name in proposed]


def get_unique_names_duplicates(proposed: list) -> list:
"""
Returns list of unique names. If a name is duplicated, the
function appends an index in parentheses.
"""
counter = Counter(proposed)
temp_counter = Counter()
names = []
for name in proposed:
if counter[name] > 1:
temp_counter.update([name])
name = f"{name} ({temp_counter[name]})"
names.append(name)
return names
9 changes: 9 additions & 0 deletions Orange/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2751,6 +2751,15 @@ def test_transpose_class_metas_attributes(self):
self.assertDictEqual(data.domain.attributes[0].attributes,
{"attr1": "a1", "attr2": "aa1"})

def test_transpose_duplicate_feature_names(self):
table = Table("iris")
domain = table.domain
attrs, metas = domain.attributes[:3], domain.attributes[3:]
table = table.transform(Domain(attrs, domain.class_vars, metas))
transposed = Table.transpose(table, domain.attributes[3].name)
names = [f.name for f in transposed.domain.attributes]
self.assertEqual(len(names), len(set(names)))

def test_transpose(self):
zoo = Table("zoo")
t1 = Table.transpose(zoo)
Expand Down
32 changes: 21 additions & 11 deletions Orange/widgets/data/owtranspose.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from Orange.data import Table, StringVariable
from Orange.data import Table, ContinuousVariable, StringVariable
from Orange.widgets.settings import (Setting, ContextSetting,
DomainContextHandler)
from Orange.widgets.utils.itemmodels import DomainModel
Expand All @@ -21,7 +21,7 @@ class Inputs:
class Outputs:
data = Output("Data", Table, dynamic=False)

GENERIC, FROM_META_ATTR = range(2)
GENERIC, FROM_VAR = range(2)

resizing_enabled = False
want_main_area = False
Expand All @@ -34,6 +34,11 @@ class Outputs:
feature_names_column = ContextSetting(None)
auto_apply = Setting(True)

class Warning(OWWidget.Warning):
duplicate_names = Msg("Values are not unique.\nTo avoid multiple "
"features with the same name, values \nof "
"'{}' have been augmented with indices.")

class Error(OWWidget.Error):
value_error = Msg("{}")

Expand All @@ -53,10 +58,10 @@ def __init__(self):
placeholderText="Type a prefix ...", toolTip="Custom feature name")
edit.editingFinished.connect(self._apply_editing)

self.meta_button = gui.appendRadioButton(box, "From meta attribute:")
self.meta_button = gui.appendRadioButton(box, "From variable:")
self.feature_model = DomainModel(
order=DomainModel.METAS, valid_types=StringVariable,
alphabetical=True)
valid_types=(ContinuousVariable, StringVariable),
alphabetical=False)
self.feature_combo = gui.comboBox(
gui.indentedBox(box, gui.checkButtonOffsetHint(button)), self,
"feature_names_column", contentsLength=12,
Expand All @@ -75,7 +80,7 @@ def _apply_editing(self):
self.apply()

def _feature_combo_changed(self):
self.feature_type = self.FROM_META_ATTR
self.feature_type = self.FROM_VAR
self.apply()

@Inputs.data
Expand All @@ -95,7 +100,7 @@ def set_controls(self):
self.meta_button.setEnabled(bool(self.feature_model))
if self.feature_model:
self.feature_names_column = self.feature_model[0]
self.feature_type = self.FROM_META_ATTR
self.feature_type = self.FROM_VAR
else:
self.feature_names_column = None

Expand All @@ -104,10 +109,15 @@ def apply(self):
transposed = None
if self.data:
try:
variable = self.feature_type == self.FROM_VAR and \
self.feature_names_column
transposed = Table.transpose(
self.data,
self.feature_type == self.FROM_META_ATTR and self.feature_names_column,
self.data, variable,
feature_name=self.feature_name or self.DEFAULT_PREFIX)
if variable:
names = self.data.get_column_view(variable)[0]
if len(names) != len(set(names)):
self.Warning.duplicate_names(variable)
except ValueError as e:
self.Error.value_error(e)
self.Outputs.data.send(transposed)
Expand All @@ -116,7 +126,7 @@ def send_report(self):
if self.feature_type == self.GENERIC:
names = self.feature_name or self.DEFAULT_PREFIX
else:
names = "from meta attribute"
names = "from variable"
if self.feature_names_column:
names += " '{}'".format(self.feature_names_column.name)
self.report_items("", [("Feature names", names)])
Expand All @@ -125,4 +135,4 @@ def send_report(self):


if __name__ == "__main__": # pragma: no cover
WidgetPreview(OWTranspose).run(Table("zoo"))
WidgetPreview(OWTranspose).run(Table("iris"))
43 changes: 25 additions & 18 deletions Orange/widgets/data/tests/test_owtranspose.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from Orange.data import Table, Domain
from Orange.data import Table
from Orange.widgets.data.owtranspose import OWTranspose
from Orange.widgets.tests.base import WidgetTest

Expand All @@ -32,18 +32,11 @@ def test_feature_type(self):
widget = self.widget
data = Table("conferences.tab")
metas = data.domain.metas
domain = data.domain
# Put one non-string column to metas, so widget must skip it
domain2 = Domain(domain.attributes[:-1],
domain.class_vars,
(domain.attributes[0], ) + domain.metas)
data2 = Table(domain2, data)

widget.feature_type = widget.GENERIC
self.send_signal(widget.Inputs.data, data2)
self.send_signal(widget.Inputs.data, data)

# By default, the widget switches from GENERIC to the first string meta
self.assertEqual(widget.feature_type, widget.FROM_META_ATTR)
# By default, the widget switches from GENERIC to the first meta
self.assertEqual(widget.feature_type, widget.FROM_VAR)
self.assertIs(widget.feature_names_column, metas[0])
output = self.get_output(widget.Outputs.data)
self.assertListEqual(
Expand All @@ -54,9 +47,9 @@ def test_feature_type(self):
widget.feature_names_column = metas[1]
widget.apply()
output = self.get_output(widget.Outputs.data)
self.assertListEqual(
[a.name for a in output.domain.attributes],
[metas[1].to_val(m) for m in data.metas[:, 1]])
self.assertTrue(
all(a.name.startswith(metas[1].to_val(m))
for a, m in zip(output.domain.attributes, data.metas[:, 1])))

# Switch to generic
self.assertEqual(widget.DEFAULT_PREFIX, "Feature")
Expand Down Expand Up @@ -84,13 +77,13 @@ def test_feature_type(self):

def test_send_report(self):
widget = self.widget
widget.feature_type = widget.FROM_META_ATTR
widget.feature_type = widget.FROM_VAR
widget.report_button.click()
widget.feature_type = widget.GENERIC
widget.report_button.click()

self.send_signal(widget.Inputs.data, self.zoo)
widget.feature_type = widget.FROM_META_ATTR
widget.feature_type = widget.FROM_VAR
widget.report_button.click()
widget.feature_type = widget.GENERIC
widget.report_button.click()
Expand All @@ -110,7 +103,7 @@ def test_gui_behaviour(self):
# Data with metas: default type is meta, radio enabled
self.send_signal(widget.Inputs.data, self.zoo)
self.assertTrue(widget.meta_button.isEnabled())
self.assertEqual(widget.feature_type, widget.FROM_META_ATTR)
self.assertEqual(widget.feature_type, widget.FROM_VAR)
self.assertIs(widget.feature_names_column, widget.feature_model[0])
self.assertTrue(widget.apply.called)

Expand All @@ -123,7 +116,7 @@ def test_gui_behaviour(self):
# Changing combo changes the radio button to meta
widget.apply.reset_mock()
widget.feature_combo.activated.emit(0)
self.assertEqual(widget.feature_type, widget.FROM_META_ATTR)
self.assertEqual(widget.feature_type, widget.FROM_VAR)
self.assertTrue(widget.apply.called)

def test_all_whitespace(self):
Expand All @@ -141,5 +134,19 @@ def test_error(self):
self.send_signal(widget.Inputs.data, self.zoo)
self.assertFalse(widget.Error.value_error.is_shown())

def test_feature_names_from_cont_vars(self):
table = Table("iris")
self.send_signal(self.widget.Inputs.data, table)
self.assertListEqual(self.widget.feature_model[:],
list(table.domain.attributes))
self.widget.feature_combo.activated.emit(3)
output = self.get_output(self.widget.Outputs.data)
self.assertListEqual([f.name for f in output.domain.attributes[:10]],
["0.2 (1)", "0.2 (2)", "0.2 (3)", "0.2 (4)",
"0.2 (5)", "0.4 (1)", "0.3 (1)", "0.2 (6)",
"0.2 (7)", "0.1 (1)"])
self.assertTrue(self.widget.Warning.duplicate_names.is_shown())


if __name__ == "__main__":
unittest.main()