|
15 | 15 |
|
16 | 16 | from Orange.data import ( |
17 | 17 | _contingency, _valuecount, |
18 | | - Domain, Variable, Storage, StringVariable, Unknown, Value, Instance |
| 18 | + Domain, Variable, Storage, StringVariable, Unknown, Value, Instance, |
| 19 | + ContinuousVariable, DiscreteVariable, MISSING_VALUES |
19 | 20 | ) |
20 | 21 | from Orange.data.util import SharedComputeValue |
21 | 22 | from Orange.statistics.util import bincount, countnans, contingency, stats as fast_stats |
@@ -1432,6 +1433,101 @@ def _compute_contingency(self, col_vars=None, row_var=None): |
1432 | 1433 |
|
1433 | 1434 | return contingencies, unknown_rows |
1434 | 1435 |
|
| 1436 | + @classmethod |
| 1437 | + def transpose(cls, table, feature_names_column="", |
| 1438 | + meta_attr_name="Feature name"): |
| 1439 | + """ |
| 1440 | + Transpose the table. |
| 1441 | +
|
| 1442 | + :param table: Table - table to transpose |
| 1443 | + :param feature_names_column: str - name of (String) meta attribute to |
| 1444 | + use for feature names |
| 1445 | + :param meta_attr_name: str - name of new meta attribute into which |
| 1446 | + feature names are mapped |
| 1447 | + :return: Table - transposed table |
| 1448 | + """ |
| 1449 | + self = cls() |
| 1450 | + n_cols, self.n_rows = table.X.shape |
| 1451 | + old_domain = table.attributes.get("old_domain") |
| 1452 | + |
| 1453 | + # attributes |
| 1454 | + # - classes and metas to attributes of attributes |
| 1455 | + # - arbitrary meta column to feature names |
| 1456 | + self.X = table.X.T |
| 1457 | + attributes = [ContinuousVariable(str(row[feature_names_column])) |
| 1458 | + for row in table] if feature_names_column else \ |
| 1459 | + [ContinuousVariable("Feature " + str(i + 1).zfill( |
| 1460 | + int(np.ceil(np.log10(n_cols))))) for i in range(n_cols)] |
| 1461 | + if old_domain and feature_names_column: |
| 1462 | + for i in range(len(attributes)): |
| 1463 | + if attributes[i].name in old_domain: |
| 1464 | + var = old_domain[attributes[i].name] |
| 1465 | + attr = ContinuousVariable(var.name) if var.is_continuous \ |
| 1466 | + else DiscreteVariable(var.name, var.values) |
| 1467 | + attr.attributes = var.attributes.copy() |
| 1468 | + attributes[i] = attr |
| 1469 | + |
| 1470 | + def set_attributes_of_attributes(_vars, _table): |
| 1471 | + for i, variable in enumerate(_vars): |
| 1472 | + if variable.name == feature_names_column: |
| 1473 | + continue |
| 1474 | + for j, row in enumerate(_table): |
| 1475 | + value = variable.repr_val(row) if np.isscalar(row) \ |
| 1476 | + else row[i] if isinstance(row[i], str) \ |
| 1477 | + else variable.repr_val(row[i]) |
| 1478 | + |
| 1479 | + if value not in MISSING_VALUES: |
| 1480 | + attributes[j].attributes[variable.name] = value |
| 1481 | + |
| 1482 | + set_attributes_of_attributes(table.domain.class_vars, table.Y) |
| 1483 | + set_attributes_of_attributes(table.domain.metas, table.metas) |
| 1484 | + |
| 1485 | + # weights |
| 1486 | + self.W = np.empty((self.n_rows, 0)) |
| 1487 | + |
| 1488 | + def get_table_from_attributes_of_attributes(_vars, _dtype=float): |
| 1489 | + T = np.empty((self.n_rows, len(_vars)), dtype=_dtype) |
| 1490 | + for i, _attr in enumerate(table.domain.attributes): |
| 1491 | + for j, _var in enumerate(_vars): |
| 1492 | + val = str(_attr.attributes.get(_var.name, "")) |
| 1493 | + if not _var.is_string: |
| 1494 | + val = np.nan if val in MISSING_VALUES else \ |
| 1495 | + _var.values.index(val) if \ |
| 1496 | + _var.is_discrete else float(val) |
| 1497 | + T[i, j] = val |
| 1498 | + return T |
| 1499 | + |
| 1500 | + # class_vars - attributes of attributes to class - from old domain |
| 1501 | + class_vars = [] |
| 1502 | + if old_domain: |
| 1503 | + class_vars = old_domain.class_vars |
| 1504 | + self.Y = get_table_from_attributes_of_attributes(class_vars) |
| 1505 | + |
| 1506 | + # metas |
| 1507 | + # - feature names and attributes of attributes to metas |
| 1508 | + self.metas, metas = np.empty((self.n_rows, 0), dtype=object), [] |
| 1509 | + if meta_attr_name not in [m.name for m in table.domain.metas]: |
| 1510 | + self.metas = np.array([[a.name] for a in table.domain.attributes], |
| 1511 | + dtype=object) |
| 1512 | + metas.append(StringVariable(meta_attr_name)) |
| 1513 | + |
| 1514 | + names = chain.from_iterable(list(attr.attributes) |
| 1515 | + for attr in table.domain.attributes) |
| 1516 | + names = sorted(set(names) - {var.name for var in class_vars}) |
| 1517 | + _metas = [StringVariable(n) for n in names] |
| 1518 | + if old_domain: |
| 1519 | + _metas = [m for m in old_domain.metas if m.name != meta_attr_name] |
| 1520 | + M = get_table_from_attributes_of_attributes(_metas, _dtype=object) |
| 1521 | + if _metas: |
| 1522 | + self.metas = np.hstack((self.metas, M)) |
| 1523 | + metas.extend(_metas) |
| 1524 | + |
| 1525 | + self.domain = Domain(attributes, class_vars, metas) |
| 1526 | + cls._init_ids(self) |
| 1527 | + self.attributes = table.attributes.copy() |
| 1528 | + self.attributes["old_domain"] = table.domain |
| 1529 | + return self |
| 1530 | + |
1435 | 1531 |
|
1436 | 1532 | def _check_arrays(*arrays, dtype=None): |
1437 | 1533 | checked = [] |
|
0 commit comments