Skip to content

Commit c838736

Browse files
authored
Merge pull request #4835 from ales-erjavec/heatmap-limits-split
owheatmap: Disable/enable clustering on estimated cost
2 parents 46932b5 + 79b7b6f commit c838736

File tree

1 file changed

+24
-15
lines changed

1 file changed

+24
-15
lines changed

Orange/widgets/visualize/owheatmap.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -879,11 +879,11 @@ def construct_heatmaps(self, data, group_var=None, column_split_key=None) -> 'Pa
879879

880880
self.effective_data = effective_data
881881

882-
self.__update_clustering_enable_state(effective_data)
883-
884882
parts = self._make_parts(
885883
effective_data, group_var,
886884
column_split_key.name if column_split_key is not None else None)
885+
886+
self.__update_clustering_enable_state(parts)
887887
# Restore/update the row/columns items descriptions from cache if
888888
# available
889889
rows_cache_key = (group_var,
@@ -1005,17 +1005,26 @@ def __aspect_mode_changed(self):
10051005
sp.setVerticalPolicy(QSizePolicy.Preferred)
10061006
widget.setSizePolicy(sp)
10071007

1008-
def __update_clustering_enable_state(self, data):
1009-
if data is not None:
1010-
N = len(data)
1011-
M = len(data.domain.attributes)
1008+
def __update_clustering_enable_state(self, parts: Optional['Parts']):
1009+
def c_cost(sizes: Iterable[int]) -> int:
1010+
"""Estimated cost for clustering of `sizes`"""
1011+
return sum(n ** 2 for n in sizes)
1012+
1013+
def co_cost(sizes: Iterable[int]) -> int:
1014+
"""Estimated cost for cluster ordering of `sizes`"""
1015+
# ~O(N ** 3) but O(N ** 4) worst case.
1016+
return sum(n ** 4 for n in sizes)
1017+
1018+
if parts is not None:
1019+
Ns = [len(p.indices) for p in parts.rows]
1020+
Ms = [len(p.indices) for p in parts.columns]
10121021
else:
1013-
N = M = 0
1022+
Ns = Ms = [0]
10141023

1015-
rc_enabled = N <= self.MaxClustering
1016-
rco_enabled = N <= self.MaxOrderedClustering
1017-
cc_enabled = M <= self.MaxClustering
1018-
cco_enabled = M <= self.MaxOrderedClustering
1024+
rc_enabled = c_cost(Ns) <= c_cost([self.MaxClustering])
1025+
rco_enabled = co_cost(Ns) <= co_cost([self.MaxOrderedClustering])
1026+
cc_enabled = c_cost(Ms) <= c_cost([self.MaxClustering])
1027+
cco_enabled = co_cost(Ms) <= co_cost([self.MaxOrderedClustering])
10191028
row_clust, col_clust = self.row_clustering, self.col_clustering
10201029

10211030
row_clust_msg = ""
@@ -1024,20 +1033,20 @@ def __update_clustering_enable_state(self, data):
10241033
if not rco_enabled and row_clust == Clustering.OrderedClustering:
10251034
row_clust = Clustering.Clustering
10261035
row_clust_msg = "Row cluster ordering was disabled due to the " \
1027-
"input matrix being to big"
1036+
"estimated runtime cost"
10281037
if not rc_enabled and row_clust == Clustering.Clustering:
10291038
row_clust = Clustering.None_
10301039
row_clust_msg = "Row clustering was was disabled due to the " \
1031-
"input matrix being to big"
1040+
"estimated runtime cost"
10321041

10331042
if not cco_enabled and col_clust == Clustering.OrderedClustering:
10341043
col_clust = Clustering.Clustering
10351044
col_clust_msg = "Column cluster ordering was disabled due to " \
1036-
"the input matrix being to big"
1045+
"estimated runtime cost"
10371046
if not cc_enabled and col_clust == Clustering.Clustering:
10381047
col_clust = Clustering.None_
10391048
col_clust_msg = "Column clustering was disabled due to the " \
1040-
"input matrix being to big"
1049+
"estimated runtime cost"
10411050

10421051
self.col_clustering = col_clust
10431052
self.row_clustering = row_clust

0 commit comments

Comments
 (0)