Skip to content

Commit 69796a2

Browse files
committed
annotated_data: Add functions for lazy tables
1 parent 6ff6ebd commit 69796a2

File tree

2 files changed

+141
-13
lines changed

2 files changed

+141
-13
lines changed

Orange/widgets/utils/annotated_data.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
from typing import Union
2+
13
import numpy as np
2-
from Orange.data import Domain, DiscreteVariable
4+
5+
from orangewidget.utils.signals import LazyValue
6+
7+
from Orange.data import Domain, DiscreteVariable, Table
38
from Orange.data.util import get_unique_names
49

510
ANNOTATED_DATA_SIGNAL_NAME = "Data"
@@ -30,16 +35,26 @@ def add_columns(domain, attributes=(), class_vars=(), metas=()):
3035
return Domain(attributes, class_vars, metas)
3136

3237

38+
def domain_with_annotation_column(
39+
data: Union[Table, Domain],
40+
values=("No", "Yes"),
41+
var_name=ANNOTATED_DATA_FEATURE_NAME):
42+
domain = data if isinstance(data, Domain) else data.domain
43+
var = DiscreteVariable(get_unique_names(domain, var_name), values)
44+
class_vars, metas = domain.class_vars, domain.metas
45+
if not domain.class_vars:
46+
class_vars += (var, )
47+
else:
48+
metas += (var, )
49+
return Domain(domain.attributes, class_vars, metas), var
50+
51+
3352
def _table_with_annotation_column(data, values, column_data, var_name):
34-
var = DiscreteVariable(get_unique_names(data.domain, var_name), values)
35-
class_vars, metas = data.domain.class_vars, data.domain.metas
53+
domain, var = domain_with_annotation_column(data, values, var_name)
3654
if not data.domain.class_vars:
37-
class_vars += (var, )
3855
column_data = column_data.reshape((len(data), ))
3956
else:
40-
metas += (var, )
4157
column_data = column_data.reshape((len(data), 1))
42-
domain = Domain(data.domain.attributes, class_vars, metas)
4358
table = data.transform(domain)
4459
with table.unlocked(table.Y if not data.domain.class_vars else table.metas):
4560
table[:, var] = column_data
@@ -65,17 +80,20 @@ def create_annotated_table(data, selected_indices):
6580
data, ("No", "Yes"), annotated, ANNOTATED_DATA_FEATURE_NAME)
6681

6782

83+
def lazy_annotated_table(data, selected_indices):
84+
domain, _ = domain_with_annotation_column(data)
85+
return LazyValue[Table](
86+
lambda: create_annotated_table(data, selected_indices),
87+
length=len(data), domain=domain)
88+
89+
6890
def create_groups_table(data, selection,
6991
include_unselected=True,
7092
var_name=ANNOTATED_DATA_FEATURE_NAME,
7193
values=None):
7294
if data is None:
7395
return None
74-
max_sel = np.max(selection)
75-
if values is None:
76-
values = ["G{}".format(i + 1) for i in range(max_sel)]
77-
if include_unselected:
78-
values.append("Unselected")
96+
values, max_sel = group_values(selection, include_unselected, values)
7997
if include_unselected:
8098
# Place Unselected instances in the "last group", so that the group
8199
# colors and scatter diagram marker colors will match
@@ -88,3 +106,24 @@ def create_groups_table(data, selection,
88106
data = data[mask]
89107
selection = selection[mask] - 1
90108
return _table_with_annotation_column(data, values, selection, var_name)
109+
110+
111+
def lazy_groups_table(data, selection, include_unselected=True,
112+
var_name=ANNOTATED_DATA_FEATURE_NAME, values=None):
113+
length = len(data) if include_unselected else np.sum(selection != 0)
114+
values, _ = group_values(selection, include_unselected, values)
115+
domain, _ = domain_with_annotation_column(data, values, var_name)
116+
return LazyValue[Table](
117+
lambda: create_groups_table(data, selection, include_unselected,
118+
var_name, values),
119+
length=length, domain=domain
120+
)
121+
122+
123+
def group_values(selection, include_unselected, values):
124+
max_sel = np.max(selection)
125+
if values is None:
126+
values = ["G{}".format(i + 1) for i in range(max_sel)]
127+
if include_unselected:
128+
values.append("Unselected")
129+
return values, max_sel

Orange/widgets/utils/tests/test_annotated_data.py

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1+
from unittest.mock import patch
2+
13
import random
24
import unittest
35

46
import numpy as np
57

6-
from Orange.data import Table, Domain, StringVariable, DiscreteVariable
8+
from Orange.data import Table, Domain, StringVariable, DiscreteVariable, \
9+
ContinuousVariable
710
from Orange.data.filter import SameValue
811
from Orange.widgets.utils.annotated_data import (
9-
create_annotated_table, create_groups_table, ANNOTATED_DATA_FEATURE_NAME
12+
create_annotated_table, create_groups_table, ANNOTATED_DATA_FEATURE_NAME,
13+
lazy_annotated_table, lazy_groups_table, domain_with_annotation_column
1014
)
1115

1216

@@ -15,6 +19,42 @@ def setUp(self):
1519
random.seed(42)
1620
self.zoo = Table("zoo")
1721

22+
def test_domain_with_annotation_column(self):
23+
a, b, c = (ContinuousVariable(x) for x in "abc")
24+
25+
x = [[1, 2, 3], [4, 5, 6]]
26+
27+
for data in (dabc := Domain([a, b, c]), Table.from_list(dabc, x)):
28+
dom, var = domain_with_annotation_column(data)
29+
self.assertEqual(dom.attributes, (a, b, c))
30+
self.assertIs(dom.class_var, var)
31+
self.assertEqual(var.name, ANNOTATED_DATA_FEATURE_NAME)
32+
self.assertEqual(var.values, ("No", "Yes"))
33+
34+
dom, var = domain_with_annotation_column(
35+
data, values=tuple("xyz"), var_name="d")
36+
self.assertEqual(dom.attributes, (a, b, c))
37+
self.assertIs(dom.class_var, var)
38+
self.assertEqual(var.name, "d")
39+
self.assertEqual(var.values, tuple("xyz"))
40+
41+
for data in (dabc := Domain([a, b], c), Table.from_list(dabc, x)):
42+
dom, var = domain_with_annotation_column(
43+
data, values=tuple("xyz"), var_name="d")
44+
self.assertEqual(dom.attributes, (a, b))
45+
self.assertIs(dom.class_var, c)
46+
self.assertEqual(dom.metas, (var, ))
47+
self.assertEqual(var.name, "d")
48+
self.assertEqual(var.values, tuple("xyz"))
49+
50+
dom, var = domain_with_annotation_column(
51+
data, values=tuple("xyz"), var_name="c")
52+
self.assertEqual(dom.attributes, (a, b))
53+
self.assertIs(dom.class_var, c)
54+
self.assertEqual(dom.metas, (var, ))
55+
self.assertEqual(var.name, "c (1)")
56+
self.assertEqual(var.values, tuple("xyz"))
57+
1858
def test_create_annotated_table(self):
1959
annotated = create_annotated_table(self.zoo, list(range(10)))
2060

@@ -129,3 +169,52 @@ def test_create_groups_table_set_values(self):
129169
values = ("this", "that", "rest")
130170
table = create_groups_table(self.zoo, selection, values=values)
131171
self.assertEqual(tuple(table.domain["Selected"].values), values)
172+
173+
@patch("Orange.widgets.utils.annotated_data.create_annotated_table")
174+
def test_lazy_annotated_table(self, creator):
175+
selected_indices = np.array([1, 2, 3])
176+
lazy_table = lazy_annotated_table(self.zoo, selected_indices)
177+
self.assertEqual(lazy_table.length, len(self.zoo))
178+
self.assertEqual(lazy_table.domain.attributes, self.zoo.domain.attributes)
179+
self.assertEqual(lazy_table.domain.class_var, self.zoo.domain.class_var)
180+
self.assertEqual(len(lazy_table.domain.metas), 2)
181+
var = lazy_table.domain.metas[1]
182+
self.assertIsInstance(var, DiscreteVariable)
183+
self.assertEqual(var.name, ANNOTATED_DATA_FEATURE_NAME)
184+
creator.assert_not_called()
185+
self.assertIs(lazy_table.get_value(), creator.return_value)
186+
187+
@patch("Orange.widgets.utils.annotated_data.create_groups_table")
188+
def test_lazy_groups_table(self, creator):
189+
group_indices = np.zeros(len(self.zoo), dtype=int)
190+
group_indices[10:15] = 1
191+
192+
lazy_table = lazy_groups_table(self.zoo, group_indices)
193+
self.assertEqual(lazy_table.length, len(self.zoo))
194+
self.assertEqual(lazy_table.domain.attributes, self.zoo.domain.attributes)
195+
self.assertEqual(lazy_table.domain.class_var, self.zoo.domain.class_var)
196+
self.assertEqual(len(lazy_table.domain.metas), 2)
197+
var = lazy_table.domain.metas[1]
198+
self.assertIsInstance(var, DiscreteVariable)
199+
self.assertEqual(var.name, ANNOTATED_DATA_FEATURE_NAME)
200+
creator.assert_not_called()
201+
self.assertIs(lazy_table.get_value(), creator.return_value)
202+
creator.reset_mock()
203+
204+
lazy_table = lazy_groups_table(
205+
self.zoo, group_indices, include_unselected=False, var_name="foo",
206+
values=("bar", "baz"))
207+
self.assertEqual(lazy_table.length, 5)
208+
self.assertEqual(lazy_table.domain.attributes, self.zoo.domain.attributes)
209+
self.assertEqual(lazy_table.domain.class_var, self.zoo.domain.class_var)
210+
self.assertEqual(len(lazy_table.domain.metas), 2)
211+
var = lazy_table.domain.metas[1]
212+
self.assertIsInstance(var, DiscreteVariable)
213+
self.assertEqual(var.name, "foo")
214+
self.assertEqual(var.values, ("bar", "baz"))
215+
creator.assert_not_called()
216+
self.assertIs(lazy_table.get_value(), creator.return_value)
217+
218+
219+
if __name__ == "__main__":
220+
unittest.main()

0 commit comments

Comments
 (0)