Skip to content

Commit 92dbfef

Browse files
committed
Table: Add transpose method
1 parent 050004c commit 92dbfef

File tree

2 files changed

+505
-3
lines changed

2 files changed

+505
-3
lines changed

Orange/data/table.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515

1616
from Orange.data import (
1717
_contingency, _valuecount,
18-
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance
18+
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance,
19+
ContinuousVariable, DiscreteVariable, MISSING_VALUES
1920
)
2021
from Orange.data.util import SharedComputeValue
2122
from Orange.statistics.util import bincount, countnans, contingency, stats as fast_stats
@@ -1430,6 +1431,101 @@ def _compute_contingency(self, col_vars=None, row_var=None):
14301431

14311432
return contingencies, unknown_rows
14321433

1434+
@classmethod
1435+
def transpose(cls, table, feature_names_column="",
1436+
meta_attr_name="Feature name"):
1437+
"""
1438+
Transpose the table.
1439+
1440+
:param table: Table - table to transpose
1441+
:param feature_names_column: str - name of (String) meta attribute to
1442+
use for feature names
1443+
:param meta_attr_name: str - name of new meta attribute into which
1444+
feature names are mapped
1445+
:return: Table - transposed table
1446+
"""
1447+
self = cls()
1448+
n_cols, self.n_rows = table.X.shape
1449+
old_domain = table.attributes.get("old_domain")
1450+
1451+
# attributes
1452+
# - classes and metas to attributes of attributes
1453+
# - arbitrary meta column to feature names
1454+
self.X = table.X.T
1455+
attributes = [ContinuousVariable(str(row[feature_names_column]))
1456+
for row in table] if feature_names_column else \
1457+
[ContinuousVariable("Feature " + str(i + 1).zfill(
1458+
int(np.ceil(np.log10(n_cols))))) for i in range(n_cols)]
1459+
if old_domain and feature_names_column:
1460+
for i in range(len(attributes)):
1461+
if attributes[i].name in old_domain:
1462+
var = old_domain[attributes[i].name]
1463+
attr = ContinuousVariable(var.name) if var.is_continuous \
1464+
else DiscreteVariable(var.name, var.values)
1465+
attr.attributes = var.attributes.copy()
1466+
attributes[i] = attr
1467+
1468+
def set_attributes_of_attributes(_vars, _table):
1469+
for i, variable in enumerate(_vars):
1470+
if variable.name == feature_names_column:
1471+
continue
1472+
for j, row in enumerate(_table):
1473+
value = variable.repr_val(row) if np.isscalar(row) \
1474+
else row[i] if isinstance(row[i], str) \
1475+
else variable.repr_val(row[i])
1476+
1477+
if value not in MISSING_VALUES:
1478+
attributes[j].attributes[variable.name] = value
1479+
1480+
set_attributes_of_attributes(table.domain.class_vars, table.Y)
1481+
set_attributes_of_attributes(table.domain.metas, table.metas)
1482+
1483+
# weights
1484+
self.W = np.empty((self.n_rows, 0))
1485+
1486+
def get_table_from_attributes_of_attributes(_vars, _dtype=float):
1487+
T = np.empty((self.n_rows, len(_vars)), dtype=_dtype)
1488+
for i, _attr in enumerate(table.domain.attributes):
1489+
for j, _var in enumerate(_vars):
1490+
val = str(_attr.attributes.get(_var.name, ""))
1491+
if not _var.is_string:
1492+
val = np.nan if val in MISSING_VALUES else \
1493+
_var.values.index(val) if \
1494+
_var.is_discrete else float(val)
1495+
T[i, j] = val
1496+
return T
1497+
1498+
# class_vars - attributes of attributes to class - from old domain
1499+
class_vars = []
1500+
if old_domain:
1501+
class_vars = old_domain.class_vars
1502+
self.Y = get_table_from_attributes_of_attributes(class_vars)
1503+
1504+
# metas
1505+
# - feature names and attributes of attributes to metas
1506+
self.metas, metas = np.empty((self.n_rows, 0), dtype=object), []
1507+
if meta_attr_name not in [m.name for m in table.domain.metas]:
1508+
self.metas = np.array([[a.name] for a in table.domain.attributes],
1509+
dtype=object)
1510+
metas.append(StringVariable(meta_attr_name))
1511+
1512+
names = chain.from_iterable(list(attr.attributes)
1513+
for attr in table.domain.attributes)
1514+
names = sorted(set(names) - {var.name for var in class_vars})
1515+
_metas = [StringVariable(n) for n in names]
1516+
if old_domain:
1517+
_metas = [m for m in old_domain.metas if m.name != meta_attr_name]
1518+
M = get_table_from_attributes_of_attributes(_metas, _dtype=object)
1519+
if _metas:
1520+
self.metas = np.hstack((self.metas, M))
1521+
metas.extend(_metas)
1522+
1523+
self.domain = Domain(attributes, class_vars, metas)
1524+
cls._init_ids(self)
1525+
self.attributes = table.attributes.copy()
1526+
self.attributes["old_domain"] = table.domain
1527+
return self
1528+
14331529

14341530
def _check_arrays(*arrays, dtype=None):
14351531
checked = []

0 commit comments

Comments
 (0)