forked from biolab/orange3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathannotated_data.py
More file actions
129 lines (106 loc) · 4.36 KB
/
annotated_data.py
File metadata and controls
129 lines (106 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from typing import Union
import numpy as np
from orangewidget.utils.signals import LazyValue
from Orange.data import Domain, DiscreteVariable, Table
from Orange.data.util import get_unique_names
ANNOTATED_DATA_SIGNAL_NAME = "Data"
ANNOTATED_DATA_FEATURE_NAME = "Selected"
def add_columns(domain, attributes=(), class_vars=(), metas=()):
"""Construct a new domain with new columns added to the specified place
Parameters
----------
domain : Domain
source domain
attributes
list of variables to append to attributes from source domain
class_vars
list of variables to append to class_vars from source domain
metas
list of variables to append to metas from source domain
Returns
-------
Domain
"""
attributes = domain.attributes + tuple(attributes)
class_vars = domain.class_vars + tuple(class_vars)
metas = domain.metas + tuple(metas)
return Domain(attributes, class_vars, metas)
def domain_with_annotation_column(
data: Union[Table, Domain],
values=("No", "Yes"),
var_name=ANNOTATED_DATA_FEATURE_NAME):
domain = data if isinstance(data, Domain) else data.domain
var = DiscreteVariable(get_unique_names(domain, var_name), values)
class_vars, metas = domain.class_vars, domain.metas
if not domain.class_vars:
class_vars += (var, )
else:
metas += (var, )
return Domain(domain.attributes, class_vars, metas), var
def _table_with_annotation_column(data, values, column_data, var_name):
domain, var = domain_with_annotation_column(data, values, var_name)
if not data.domain.class_vars:
column_data = column_data.reshape((len(data), ))
else:
column_data = column_data.reshape((len(data), 1))
table = data.transform(domain)
with table.unlocked(table.Y if not data.domain.class_vars else table.metas):
table[:, var] = column_data
return table
def create_annotated_table(data, selected_indices):
"""
Returns data with concatenated flag column. Flag column represents
whether data instance has been selected (Yes) or not (No), which is
determined in selected_indices parameter.
:param data: Table
:param selected_indices: list or ndarray
:return: Table
"""
if data is None:
return None
annotated = np.zeros((len(data), 1))
if selected_indices is not None:
annotated[selected_indices] = 1
return _table_with_annotation_column(
data, ("No", "Yes"), annotated, ANNOTATED_DATA_FEATURE_NAME)
def lazy_annotated_table(data, selected_indices):
domain, _ = domain_with_annotation_column(data)
return LazyValue[Table](
lambda: create_annotated_table(data, selected_indices),
length=len(data), domain=domain)
def create_groups_table(data, selection,
include_unselected=True,
var_name=ANNOTATED_DATA_FEATURE_NAME,
values=None):
if data is None:
return None
values, max_sel = group_values(selection, include_unselected, values)
if include_unselected:
# Place Unselected instances in the "last group", so that the group
# colors and scatter diagram marker colors will match
mask = (selection != 0)
selection = selection.copy()
selection[mask] = selection[mask] - 1
selection[~mask] = selection[~mask] = max_sel
else:
mask = np.flatnonzero(selection)
data = data[mask]
selection = selection[mask] - 1
return _table_with_annotation_column(data, values, selection, var_name)
def lazy_groups_table(data, selection, include_unselected=True,
var_name=ANNOTATED_DATA_FEATURE_NAME, values=None):
length = len(data) if include_unselected else np.sum(selection != 0)
values, _ = group_values(selection, include_unselected, values)
domain, _ = domain_with_annotation_column(data, values, var_name)
return LazyValue[Table](
lambda: create_groups_table(data, selection, include_unselected,
var_name, values),
length=length, domain=domain
)
def group_values(selection, include_unselected, values):
max_sel = np.max(selection)
if values is None:
values = ["G{}".format(i + 1) for i in range(max_sel)]
if include_unselected:
values.append("Unselected")
return values, max_sel