Skip to content

Commit 1416da5

Browse files
committed
Simplify varying_between code.
1 parent d3a75b5 commit 1416da5

File tree

1 file changed

+18
-58
lines changed

1 file changed

+18
-58
lines changed

Orange/widgets/visualize/owvenndiagram.py

Lines changed: 18 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from AnyQt.QtCore import pyqtSignal as Signal
2525

2626
import Orange.data
27+
import Orange.statistics.util as util
2728
from Orange.widgets import widget, gui, settings
2829
from Orange.widgets.utils import itemmodels, colorpalette
2930
from Orange.widgets.utils.annotated_data import (create_annotated_table,
@@ -883,74 +884,33 @@ def unique_non_nan(ar):
883884
return uniq[~numpy.isnan(uniq)]
884885

885886

886-
def varying_matrix(X, idmap):
887-
"""
888-
Enables efficient implementation of `varying_between`
889-
for continuous and discrete variables.
890-
891-
Return indices of non-constant matrix columns between groups
892-
defined by indices. Supports sparse data.
893-
"""
894-
# Map rows to groups
895-
inv_idmap = dict()
896-
for g, rows in idmap.items():
897-
for r in rows: inv_idmap[r] = g
898-
899-
# Group columns accordingly
900-
vals_per_group = dict()
901-
rows_per_group = dict()
902-
varying = set()
903-
904-
# Remove columns with duplicate non-zero, non-nan values
905-
for r, c in zip(*X.nonzero()):
906-
g = inv_idmap[r]
907-
rows_per_group[g, c] = \
908-
rows_per_group.get((g, c), 0) + 1
909-
val = X[r, c]
910-
if isinstance(val, str) or not numpy.isnan(val):
911-
if (g, c) not in vals_per_group:
912-
vals_per_group[g, c] = set()
913-
vals_per_group[g, c].add(val)
914-
if len(vals_per_group[g, c]) > 1:
915-
varying.add(c)
916-
917-
# Remove columns with mixed zero and non-zero values
918-
for (g, c), cnt in rows_per_group.items():
919-
if (g, c) in vals_per_group \
920-
and len(vals_per_group[g, c]) \
921-
and (0 < cnt < len(idmap[g])):
922-
varying.add(c)
923-
return varying
924-
925-
926887
def varying_between(table, idvar):
927888
"""
928889
Return a list of all variables with non constant values between
929890
groups defined by `idvar`.
930891
931-
I.e. columns where values differ by group:
932-
- If each example is its own group, this operation is trivial.
933-
934892
"""
893+
all_possible = [var for var in table.domain.variables + table.domain.metas
894+
if var != idvar]
895+
candidate_set = set(all_possible)
935896

936-
# idvar is not varying, so it would not be removed from the candidate_set
937897
idmap = group_table_indices(table, idvar)
938-
varying = set()
939-
940-
# Trivial case
941-
if len(idmap) == len(table):
942-
return varying
943-
944-
varying_metas = varying_matrix(table.metas, idmap)
945-
varying_data = varying_matrix(table.X, idmap)
946898

947-
varying = [var for vi, var in enumerate(table.domain.variables)
948-
if var != idvar and vi in varying_data] + \
949-
[var for vi, var in enumerate(table.domain.metas)
950-
if var != idvar and vi in varying_metas]
899+
varying = set()
900+
for indices in idmap.values():
901+
subset = table[indices]
902+
for var in list(candidate_set):
903+
column, _ = subset.get_column_view(var)
904+
values = util.unique(column)
905+
906+
if var.is_string:
907+
uniq = set(values)
908+
else:
909+
uniq = unique_non_nan(values)
951910

952-
all_possible = [var for var in (table.domain.variables + table.domain.metas)
953-
if var != idvar]
911+
if len(uniq) > 1:
912+
varying.add(var)
913+
candidate_set.remove(var)
954914

955915
return sorted(varying, key=all_possible.index)
956916

0 commit comments

Comments
 (0)