|
11 | 11 | ContinuousVariable, |
12 | 12 | ) |
13 | 13 |
|
14 | | -__all__ = ['table_from_frame'] |
| 14 | +__all__ = ['table_from_frame', 'table_to_frame'] |
15 | 15 |
|
16 | 16 |
|
17 | 17 | def table_from_frame(df, *, force_nominal=False): |
@@ -76,3 +76,52 @@ def _is_datetime(s): |
76 | 76 | np.column_stack(X) if X else np.empty((df.shape[0], 0)), |
77 | 77 | None, |
78 | 78 | np.column_stack(M) if M else None) |
| 79 | + |
| 80 | + |
| 81 | +def table_to_frame(tab): |
| 82 | + """ |
| 83 | + Convert Orange.data.Table to pandas.DataFrame |
| 84 | +
|
| 85 | + Parameters |
| 86 | + ---------- |
| 87 | + tab : Table |
| 88 | +
|
| 89 | + Returns |
| 90 | + ------- |
| 91 | + pandas.DataFrame |
| 92 | + """ |
| 93 | + def _column_to_series(col, vals): |
| 94 | + result = () |
| 95 | + if col.is_discrete: |
| 96 | + codes = pd.Series(vals).fillna(-1).astype(int) |
| 97 | + result = (col.name, pd.Categorical.from_codes(codes=codes, categories=col.values, |
| 98 | + ordered=col.ordered)) |
| 99 | + elif col.is_time: |
| 100 | + result = (col.name, pd.to_datetime(vals, unit='s').to_series().reset_index()[0]) |
| 101 | + elif col.is_continuous: |
| 102 | + dt = float |
| 103 | + # np.nan are not compatible with int column |
| 104 | + nan_values_in_column = [t for t in vals if np.isnan(t)] |
| 105 | + if col.number_of_decimals == 0 and len(nan_values_in_column) == 0: |
| 106 | + dt = int |
| 107 | + result = (col.name, pd.Series(vals).astype(dt)) |
| 108 | + elif col.is_string: |
| 109 | + result = (col.name, pd.Series(vals)) |
| 110 | + return result |
| 111 | + |
| 112 | + def _columns_to_series(cols, vals): |
| 113 | + return [_column_to_series(col, vals[:, i]) for i, col in enumerate(cols)] |
| 114 | + |
| 115 | + x, y, metas = [], [], [] |
| 116 | + domain = tab.domain |
| 117 | + if domain.attributes: |
| 118 | + x = _columns_to_series(domain.attributes, tab.X) |
| 119 | + if domain.class_vars: |
| 120 | + y_values = tab.Y.reshape(tab.Y.shape[0], len(domain.class_vars)) |
| 121 | + y = _columns_to_series(domain.class_vars, y_values) |
| 122 | + if domain.metas: |
| 123 | + metas = _columns_to_series(domain.metas, tab.metas) |
| 124 | + all_series = dict(x + y + metas) |
| 125 | + original_column_order = [var.name for var in tab.domain.variables] |
| 126 | + unsorted_columns_df = pd.DataFrame(all_series) |
| 127 | + return unsorted_columns_df[original_column_order] |
0 commit comments