Skip to content

Commit 1a4ac2d

Browse files
committed
Table (transpose): Use heuristic to guess data type of attributes of attributes
1 parent 0e4b7cf commit 1a4ac2d

File tree

2 files changed

+57
-13
lines changed

2 files changed

+57
-13
lines changed

Orange/data/table.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import bottleneck as bn
1414
from scipy import sparse as sp
1515

16+
from Orange import data # import for io.py
1617
from Orange.data import (
1718
_contingency, _valuecount,
1819
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance,
@@ -1453,6 +1454,7 @@ def transpose(cls, table, feature_names_column="",
14531454
feature names are mapped
14541455
:return: Table - transposed table
14551456
"""
1457+
14561458
self = cls()
14571459
n_cols, self.n_rows = table.X.shape
14581460
old_domain = table.attributes.get("old_domain")
@@ -1521,10 +1523,22 @@ def get_table_from_attributes_of_attributes(_vars, _dtype=float):
15211523
names = chain.from_iterable(list(attr.attributes)
15221524
for attr in table.domain.attributes)
15231525
names = sorted(set(names) - {var.name for var in class_vars})
1526+
1527+
def guessed_var(i, var_name):
1528+
orig_values = M[:, i]
1529+
value_map, values, var_type = data.io.guess_data_type(orig_values)
1530+
values, variable = data.io.sanitize_variable(
1531+
value_map, values, orig_values, var_type,
1532+
{}, _metas, None, var_name)
1533+
M[:, i] = values
1534+
return variable
1535+
15241536
_metas = [StringVariable(n) for n in names]
15251537
if old_domain:
15261538
_metas = [m for m in old_domain.metas if m.name != meta_attr_name]
15271539
M = get_table_from_attributes_of_attributes(_metas, _dtype=object)
1540+
if not old_domain:
1541+
_metas = [guessed_var(i, m.name) for i, m in enumerate(_metas)]
15281542
if _metas:
15291543
self.metas = np.hstack((self.metas, M))
15301544
metas.extend(_metas)

Orange/tests/test_table.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,7 +2449,7 @@ def test_transpose_class_and_metas(self):
24492449
# original should not change
24502450
self.assertDictEqual(data.domain.attributes[0].attributes, {})
24512451

2452-
def test_transpose_attributes_of_attributes(self):
2452+
def test_transpose_attributes_of_attributes_discrete(self):
24532453
attrs = [ContinuousVariable("c1"), ContinuousVariable("c2")]
24542454
attrs[0].attributes = {"attr1": "a", "attr2": "aa"}
24552455
attrs[1].attributes = {"attr1": "b", "attr2": "bb"}
@@ -2458,11 +2458,12 @@ def test_transpose_attributes_of_attributes(self):
24582458

24592459
att = [ContinuousVariable("Feature 1"), ContinuousVariable("Feature 2"),
24602460
ContinuousVariable("Feature 3"), ContinuousVariable("Feature 4")]
2461-
metas = [StringVariable("Feature name"), StringVariable("attr1"),
2462-
StringVariable("attr2")]
2461+
metas = [StringVariable("Feature name"),
2462+
DiscreteVariable("attr1", values=("a", "b")),
2463+
DiscreteVariable("attr2", values=("aa", "bb"))]
24632464
domain = Domain(att, metas=metas)
2464-
result = Table(domain, np.arange(8).reshape((4, 2)).T,
2465-
metas=np.array([["c1", "a", "aa"], ["c2", "b", "bb"]]))
2465+
M = np.array([["c1", 0, 0], ["c2", 1, 1]], dtype=object)
2466+
result = Table(domain, np.arange(8).reshape((4, 2)).T, metas=M)
24662467

24672468
# transpose and compare
24682469
self._compare_tables(result, Table.transpose(data))
@@ -2475,6 +2476,33 @@ def test_transpose_attributes_of_attributes(self):
24752476
self.assertDictEqual(data.domain.attributes[0].attributes,
24762477
{"attr1": "a", "attr2": "aa"})
24772478

2479+
def test_transpose_attributes_of_attributes_continuous(self):
2480+
attrs = [ContinuousVariable("c1"), ContinuousVariable("c2")]
2481+
attrs[0].attributes = {"attr1": "1.100", "attr2": "1.300"}
2482+
attrs[1].attributes = {"attr1": "2.200", "attr2": "2.300"}
2483+
domain = Domain(attrs)
2484+
data = Table(domain, np.arange(8).reshape((4, 2)))
2485+
2486+
att = [ContinuousVariable("Feature 1"), ContinuousVariable("Feature 2"),
2487+
ContinuousVariable("Feature 3"), ContinuousVariable("Feature 4")]
2488+
metas = [StringVariable("Feature name"), ContinuousVariable("attr1"),
2489+
ContinuousVariable("attr2")]
2490+
domain = Domain(att, metas=metas)
2491+
result = Table(domain, np.arange(8).reshape((4, 2)).T,
2492+
metas=np.array([["c1", 1.1, 1.3],
2493+
["c2", 2.2, 2.3]], dtype=object))
2494+
2495+
# transpose and compare
2496+
self._compare_tables(result, Table.transpose(data))
2497+
2498+
# transpose of transpose
2499+
t = Table.transpose(Table.transpose(data), "Feature name")
2500+
self._compare_tables(data, t)
2501+
2502+
# original should not change
2503+
self.assertDictEqual(data.domain.attributes[0].attributes,
2504+
{"attr1": "1.100", "attr2": "1.300"})
2505+
24782506
def test_transpose_attributes_of_attributes_missings(self):
24792507
attrs = [ContinuousVariable("c1"), ContinuousVariable("c2")]
24802508
attrs[0].attributes = {"attr1": "a", "attr2": "aa"}
@@ -2484,11 +2512,12 @@ def test_transpose_attributes_of_attributes_missings(self):
24842512

24852513
att = [ContinuousVariable("Feature 1"), ContinuousVariable("Feature 2"),
24862514
ContinuousVariable("Feature 3"), ContinuousVariable("Feature 4")]
2487-
metas = [StringVariable("Feature name"), StringVariable("attr1"),
2488-
StringVariable("attr2")]
2515+
metas = [StringVariable("Feature name"),
2516+
DiscreteVariable("attr1", values=("a", "b")),
2517+
DiscreteVariable("attr2", values=("aa",))]
24892518
domain = Domain(att, metas=metas)
2490-
result = Table(domain, np.arange(8).reshape((4, 2)).T,
2491-
metas=np.array([["c1", "a", "aa"], ["c2", "b", ""]]))
2519+
M = np.array([["c1", 0, 0], ["c2", 1, np.nan]], dtype=object)
2520+
result = Table(domain, np.arange(8).reshape((4, 2)).T, metas=M)
24922521

24932522
# transpose and compare
24942523
self._compare_tables(result, Table.transpose(data))
@@ -2517,10 +2546,11 @@ def test_transpose_class_metas_attributes(self):
25172546
att[1].attributes = {"cls": "2.000", "m1": "bb", "m2": "bbb"}
25182547
att[2].attributes = {"cls": "3.000", "m1": "cc", "m2": "ccc"}
25192548
att[3].attributes = {"cls": "4.000", "m1": "dd", "m2": "ddd"}
2520-
metas = [StringVariable("Feature name"), StringVariable("attr1"),
2521-
StringVariable("attr2")]
2549+
metas = [StringVariable("Feature name"),
2550+
DiscreteVariable("attr1", values=("a1", "b1")),
2551+
DiscreteVariable("attr2", values=("aa1", "bb1"))]
25222552
domain = Domain(att, metas=metas)
2523-
M = np.array([["c1", "a1", "aa1"], ["c2", "b1", "bb1"]])
2553+
M = np.array([["c1", 0, 0], ["c2", 1, 1]], dtype=object)
25242554
result = Table(domain, np.arange(8).reshape((4, 2)).T, metas=M)
25252555

25262556
# transpose and compare
@@ -2546,7 +2576,7 @@ def _compare_tables(self, table1, table2):
25462576
self.assertEqual(table1.n_rows, table2.n_rows)
25472577
np.testing.assert_array_equal(table1.X, table2.X)
25482578
np.testing.assert_array_equal(table1.Y, table2.Y)
2549-
np.testing.assert_array_equal(table1.metas, table2.metas)
2579+
self.assertTrue((table1.metas == table1.metas).all())
25502580
np.testing.assert_array_equal(table1.W, table2.W)
25512581

25522582
self.assertEqual([(type(x), x.name, x.attributes)

0 commit comments

Comments
 (0)