Skip to content

Commit f3f5ca0

Browse files
authored
Merge pull request #1738 from VesnaT/transpose
[ENH] OWTranspose: Add a new widget
2 parents bf61805 + bc7b988 commit f3f5ca0

File tree

5 files changed

+702
-3
lines changed

5 files changed

+702
-3
lines changed

Orange/data/table.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515

1616
from Orange.data import (
1717
_contingency, _valuecount,
18-
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance
18+
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance,
19+
ContinuousVariable, DiscreteVariable, MISSING_VALUES
1920
)
2021
from Orange.data.util import SharedComputeValue
2122
from Orange.statistics.util import bincount, countnans, contingency, stats as fast_stats
@@ -1432,6 +1433,101 @@ def _compute_contingency(self, col_vars=None, row_var=None):
14321433

14331434
return contingencies, unknown_rows
14341435

1436+
@classmethod
1437+
def transpose(cls, table, feature_names_column="",
1438+
meta_attr_name="Feature name"):
1439+
"""
1440+
Transpose the table.
1441+
1442+
:param table: Table - table to transpose
1443+
:param feature_names_column: str - name of (String) meta attribute to
1444+
use for feature names
1445+
:param meta_attr_name: str - name of new meta attribute into which
1446+
feature names are mapped
1447+
:return: Table - transposed table
1448+
"""
1449+
self = cls()
1450+
n_cols, self.n_rows = table.X.shape
1451+
old_domain = table.attributes.get("old_domain")
1452+
1453+
# attributes
1454+
# - classes and metas to attributes of attributes
1455+
# - arbitrary meta column to feature names
1456+
self.X = table.X.T
1457+
attributes = [ContinuousVariable(str(row[feature_names_column]))
1458+
for row in table] if feature_names_column else \
1459+
[ContinuousVariable("Feature " + str(i + 1).zfill(
1460+
int(np.ceil(np.log10(n_cols))))) for i in range(n_cols)]
1461+
if old_domain and feature_names_column:
1462+
for i in range(len(attributes)):
1463+
if attributes[i].name in old_domain:
1464+
var = old_domain[attributes[i].name]
1465+
attr = ContinuousVariable(var.name) if var.is_continuous \
1466+
else DiscreteVariable(var.name, var.values)
1467+
attr.attributes = var.attributes.copy()
1468+
attributes[i] = attr
1469+
1470+
def set_attributes_of_attributes(_vars, _table):
1471+
for i, variable in enumerate(_vars):
1472+
if variable.name == feature_names_column:
1473+
continue
1474+
for j, row in enumerate(_table):
1475+
value = variable.repr_val(row) if np.isscalar(row) \
1476+
else row[i] if isinstance(row[i], str) \
1477+
else variable.repr_val(row[i])
1478+
1479+
if value not in MISSING_VALUES:
1480+
attributes[j].attributes[variable.name] = value
1481+
1482+
set_attributes_of_attributes(table.domain.class_vars, table.Y)
1483+
set_attributes_of_attributes(table.domain.metas, table.metas)
1484+
1485+
# weights
1486+
self.W = np.empty((self.n_rows, 0))
1487+
1488+
def get_table_from_attributes_of_attributes(_vars, _dtype=float):
1489+
T = np.empty((self.n_rows, len(_vars)), dtype=_dtype)
1490+
for i, _attr in enumerate(table.domain.attributes):
1491+
for j, _var in enumerate(_vars):
1492+
val = str(_attr.attributes.get(_var.name, ""))
1493+
if not _var.is_string:
1494+
val = np.nan if val in MISSING_VALUES else \
1495+
_var.values.index(val) if \
1496+
_var.is_discrete else float(val)
1497+
T[i, j] = val
1498+
return T
1499+
1500+
# class_vars - attributes of attributes to class - from old domain
1501+
class_vars = []
1502+
if old_domain:
1503+
class_vars = old_domain.class_vars
1504+
self.Y = get_table_from_attributes_of_attributes(class_vars)
1505+
1506+
# metas
1507+
# - feature names and attributes of attributes to metas
1508+
self.metas, metas = np.empty((self.n_rows, 0), dtype=object), []
1509+
if meta_attr_name not in [m.name for m in table.domain.metas]:
1510+
self.metas = np.array([[a.name] for a in table.domain.attributes],
1511+
dtype=object)
1512+
metas.append(StringVariable(meta_attr_name))
1513+
1514+
names = chain.from_iterable(list(attr.attributes)
1515+
for attr in table.domain.attributes)
1516+
names = sorted(set(names) - {var.name for var in class_vars})
1517+
_metas = [StringVariable(n) for n in names]
1518+
if old_domain:
1519+
_metas = [m for m in old_domain.metas if m.name != meta_attr_name]
1520+
M = get_table_from_attributes_of_attributes(_metas, _dtype=object)
1521+
if _metas:
1522+
self.metas = np.hstack((self.metas, M))
1523+
metas.extend(_metas)
1524+
1525+
self.domain = Domain(attributes, class_vars, metas)
1526+
cls._init_ids(self)
1527+
self.attributes = table.attributes.copy()
1528+
self.attributes["old_domain"] = table.domain
1529+
return self
1530+
14351531

14361532
def _check_arrays(*arrays, dtype=None):
14371533
checked = []

0 commit comments

Comments
 (0)