Skip to content

Commit eb3633c

Browse files
authored
Merge pull request #6348 from janezd/lazy-signals
[ENH] Lazy signals for Hierarchical Clustering
2 parents 1725baa + 2fc2717 commit eb3633c

File tree

8 files changed

+321
-102
lines changed

8 files changed

+321
-102
lines changed

Orange/widgets/unsupervised/owhierarchicalclustering.py

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919

2020
from Orange.widgets.utils.localization import pl
2121
from orangewidget.utils.itemmodels import PyListModel
22+
from orangewidget.utils.signals import LazyValue
2223

2324
import Orange.data
2425
from Orange.data.domain import filter_visible
2526
from Orange.data import Domain, DiscreteVariable, ContinuousVariable, \
26-
StringVariable
27+
StringVariable, Table
2728
import Orange.misc
2829
from Orange.clustering.hierarchical import \
2930
postorder, preorder, Tree, tree_from_linkage, dist_matrix_linkage, \
@@ -32,8 +33,11 @@
3233

3334
from Orange.widgets import widget, gui, settings
3435
from Orange.widgets.utils import itemmodels, combobox
35-
from Orange.widgets.utils.annotated_data import (create_annotated_table,
36-
ANNOTATED_DATA_SIGNAL_NAME)
36+
from Orange.widgets.utils.annotated_data import (lazy_annotated_table,
37+
ANNOTATED_DATA_SIGNAL_NAME,
38+
domain_with_annotation_column,
39+
add_columns,
40+
create_annotated_table)
3741
from Orange.widgets.utils.widgetpreview import WidgetPreview
3842
from Orange.widgets.visualize.utils.plotutils import AxisItem
3943
from Orange.widgets.widget import Input, Output, Msg
@@ -776,71 +780,73 @@ def commit(self):
776780
for node in selection]
777781

778782
selected_indices = list(chain(*maps))
779-
unselected_indices = sorted(set(range(self.root.value.last)) -
780-
set(selected_indices))
781783

782784
if not selected_indices:
783785
self.Outputs.selected_data.send(None)
784-
annotated_data = create_annotated_table(items, []) \
786+
annotated_data = lazy_annotated_table(items, []) \
785787
if self.selection_method == 0 and self.matrix.axis else None
786788
self.Outputs.annotated_data.send(annotated_data)
787789
return
788790

789-
selected_data = None
791+
selected_data = annotated_data = None
790792

791793
if isinstance(items, Orange.data.Table) and self.matrix.axis == 1:
792794
# Select rows
793-
c = np.zeros(self.matrix.shape[0])
795+
data, domain = items, items.domain
794796

797+
c = np.full(self.matrix.shape[0], len(maps))
795798
for i, indices in enumerate(maps):
796799
c[indices] = i
797-
c[unselected_indices] = len(maps)
798-
799-
mask = c != len(maps)
800-
801-
data, domain = items, items.domain
802-
attrs = domain.attributes
803-
classes = domain.class_vars
804-
metas = domain.metas
805800

806-
var_name = get_unique_names(domain, "Cluster")
801+
clust_name = get_unique_names(domain, "Cluster")
807802
values = [f"C{i + 1}" for i in range(len(maps))]
808803

809-
clust_var = Orange.data.DiscreteVariable(
810-
var_name, values=values + ["Other"])
811-
domain = Orange.data.Domain(attrs, classes, metas + (clust_var,))
812-
data = items.transform(domain)
813-
with data.unlocked(data.metas):
814-
data.set_column(clust_var, c)
815-
816-
if selected_indices:
817-
selected_data = data[mask]
818-
clust_var = Orange.data.DiscreteVariable(
819-
var_name, values=values)
820-
selected_data.domain = Domain(
821-
attrs, classes, metas + (clust_var, ))
822-
823-
annotated_data = create_annotated_table(data, selected_indices)
804+
sel_clust_var = Orange.data.DiscreteVariable(
805+
name=clust_name, values=values)
806+
sel_domain = add_columns(domain, metas=(sel_clust_var,))
807+
selected_data = LazyValue[Table](
808+
lambda: items.add_column(
809+
sel_clust_var, c, to_metas=True)[c != len(maps)],
810+
domain=sel_domain, length=len(selected_indices))
811+
812+
ann_clust_var = Orange.data.DiscreteVariable(
813+
name=clust_name, values=values + ["Other"]
814+
)
815+
ann_domain = add_columns(
816+
domain_with_annotation_column(data)[0], metas=(ann_clust_var, ))
817+
annotated_data = LazyValue[Table](
818+
lambda: create_annotated_table(
819+
data=items.add_column(ann_clust_var, c, to_metas=True),
820+
selected_indices=selected_indices),
821+
domain=ann_domain, length=len(items)
822+
)
824823

825824
elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0:
826825
# Select columns
827826
attrs = []
827+
unselected_indices = sorted(set(range(self.root.value.last)) -
828+
set(selected_indices))
828829
for clust, indices in chain(enumerate(maps, start=1),
829830
[(0, unselected_indices)]):
830831
for i in indices:
831832
attr = items.domain[i].copy()
832833
attr.attributes["cluster"] = clust
833834
attrs.append(attr)
834-
domain = Orange.data.Domain(
835+
all_domain = Orange.data.Domain(
835836
# len(unselected_indices) can be 0
836837
attrs[:len(attrs) - len(unselected_indices)],
837838
items.domain.class_vars, items.domain.metas)
838-
selected_data = items.from_table(domain, items)
839839

840-
domain = Orange.data.Domain(
840+
selected_data = LazyValue[Table](
841+
lambda: items.from_table(all_domain, items),
842+
domain=all_domain, length=len(items))
843+
844+
sel_domain = Orange.data.Domain(
841845
attrs,
842846
items.domain.class_vars, items.domain.metas)
843-
annotated_data = items.from_table(domain, items)
847+
annotated_data = LazyValue[Table](
848+
lambda: items.from_table(sel_domain, items),
849+
domain=sel_domain, length=len(items))
844850

845851
self.Outputs.selected_data.send(selected_data)
846852
self.Outputs.annotated_data.send(annotated_data)

Orange/widgets/utils/annotated_data.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
from typing import Union
2+
13
import numpy as np
2-
from Orange.data import Domain, DiscreteVariable
4+
5+
from orangewidget.utils.signals import LazyValue
6+
7+
from Orange.data import Domain, DiscreteVariable, Table
38
from Orange.data.util import get_unique_names
49

510
ANNOTATED_DATA_SIGNAL_NAME = "Data"
@@ -30,16 +35,26 @@ def add_columns(domain, attributes=(), class_vars=(), metas=()):
3035
return Domain(attributes, class_vars, metas)
3136

3237

38+
def domain_with_annotation_column(
39+
data: Union[Table, Domain],
40+
values=("No", "Yes"),
41+
var_name=ANNOTATED_DATA_FEATURE_NAME):
42+
domain = data if isinstance(data, Domain) else data.domain
43+
var = DiscreteVariable(get_unique_names(domain, var_name), values)
44+
class_vars, metas = domain.class_vars, domain.metas
45+
if not domain.class_vars:
46+
class_vars += (var, )
47+
else:
48+
metas += (var, )
49+
return Domain(domain.attributes, class_vars, metas), var
50+
51+
3352
def _table_with_annotation_column(data, values, column_data, var_name):
34-
var = DiscreteVariable(get_unique_names(data.domain, var_name), values)
35-
class_vars, metas = data.domain.class_vars, data.domain.metas
53+
domain, var = domain_with_annotation_column(data, values, var_name)
3654
if not data.domain.class_vars:
37-
class_vars += (var, )
3855
column_data = column_data.reshape((len(data), ))
3956
else:
40-
metas += (var, )
4157
column_data = column_data.reshape((len(data), 1))
42-
domain = Domain(data.domain.attributes, class_vars, metas)
4358
table = data.transform(domain)
4459
with table.unlocked(table.Y if not data.domain.class_vars else table.metas):
4560
table[:, var] = column_data
@@ -65,17 +80,20 @@ def create_annotated_table(data, selected_indices):
6580
data, ("No", "Yes"), annotated, ANNOTATED_DATA_FEATURE_NAME)
6681

6782

83+
def lazy_annotated_table(data, selected_indices):
84+
domain, _ = domain_with_annotation_column(data)
85+
return LazyValue[Table](
86+
lambda: create_annotated_table(data, selected_indices),
87+
length=len(data), domain=domain)
88+
89+
6890
def create_groups_table(data, selection,
6991
include_unselected=True,
7092
var_name=ANNOTATED_DATA_FEATURE_NAME,
7193
values=None):
7294
if data is None:
7395
return None
74-
max_sel = np.max(selection)
75-
if values is None:
76-
values = ["G{}".format(i + 1) for i in range(max_sel)]
77-
if include_unselected:
78-
values.append("Unselected")
96+
values, max_sel = group_values(selection, include_unselected, values)
7997
if include_unselected:
8098
# Place Unselected instances in the "last group", so that the group
8199
# colors and scatter diagram marker colors will match
@@ -88,3 +106,24 @@ def create_groups_table(data, selection,
88106
data = data[mask]
89107
selection = selection[mask] - 1
90108
return _table_with_annotation_column(data, values, selection, var_name)
109+
110+
111+
def lazy_groups_table(data, selection, include_unselected=True,
112+
var_name=ANNOTATED_DATA_FEATURE_NAME, values=None):
113+
length = len(data) if include_unselected else np.sum(selection != 0)
114+
values, _ = group_values(selection, include_unselected, values)
115+
domain, _ = domain_with_annotation_column(data, values, var_name)
116+
return LazyValue[Table](
117+
lambda: create_groups_table(data, selection, include_unselected,
118+
var_name, values),
119+
length=length, domain=domain
120+
)
121+
122+
123+
def group_values(selection, include_unselected, values):
124+
max_sel = np.max(selection)
125+
if values is None:
126+
values = ["G{}".format(i + 1) for i in range(max_sel)]
127+
if include_unselected:
128+
values.append("Unselected")
129+
return values, max_sel

0 commit comments

Comments
 (0)