Skip to content

Commit 93bfcc2

Browse files
committed
Mosaic Vizrank: Ranking with no class and continuous class, related changed in Mosaic
1 parent 4ae5e39 commit 93bfcc2

File tree

3 files changed

+130
-79
lines changed

3 files changed

+130
-79
lines changed

Orange/widgets/visualize/owmosaic.py

Lines changed: 94 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME
1515
from Orange.preprocess import Discretize
1616
from Orange.preprocess.discretize import EqualFreq
17-
from Orange.preprocess.score import ReliefF, RReliefF
17+
from Orange.preprocess.score import ReliefF
1818
from Orange.statistics.distribution import get_distribution, get_distributions
1919
from Orange.widgets import gui, widget
2020
from Orange.widgets.gui import OWComponent
@@ -31,7 +31,6 @@
3131
class MosaicVizRank(VizRankDialog, OWComponent):
3232
"""VizRank dialog for Mosaic"""
3333
captionTitle = "Mosaic Ranking"
34-
K = 10 # for ReliefF
3534
max_attrs = Setting(3)
3635

3736
pairSelected = Signal(Variable, Variable, Variable, Variable)
@@ -42,35 +41,45 @@ def __init__(self, master):
4241
VizRankDialog.__init__(self, master)
4342
OWComponent.__init__(self, master)
4443

45-
box = gui.vBox(self)
44+
box = gui.hBox(self)
4645
self.max_attr_spin = gui.spin(
4746
box, self, "max_attrs", 2, 4,
4847
label="Limit the number of attributes to: ",
4948
controlWidth=50, alignment=Qt.AlignRight,
5049
callback=self.max_attr_changed)
50+
gui.rubber(box)
5151
self.layout().addWidget(self.button)
52-
self.resize(320, 512)
53-
self.attrs = []
52+
self.attr_ordering = None
5453
self.marginal = {}
5554
self.last_run_max_attr = None
5655

5756
def sizeHint(self):
58-
return QSize(320, 512)
57+
return QSize(400, 512)
58+
59+
def initialize(self):
60+
"""Clear the ordering to trigger recomputation when needed"""
61+
super().initialize()
62+
self.attr_ordering = None
63+
64+
def initialize_keep_ordering(self):
65+
"""Initialize triggered by change of coloring"""
66+
super().initialize()
5967

6068
def run(self):
6169
"""
6270
Add handling of the spin box for maximal number of attributes.
6371
64-
Disable the box before running and enable afterwards.
65-
Also, if the number of attributes is different than in the last run,
66-
reset the saved state (if it was paused).
72+
Disable the spin for maximal number of attributes before running and
73+
enable afterwards. Also, if the number of attributes is different than
74+
in the last run, reset the saved state (if it was paused).
6775
"""
6876
if self.max_attrs != self.last_run_max_attr:
6977
self.saved_state = None
7078
self.saved_progress = 0
7179
if self.saved_state is None:
7280
self.scores = []
7381
self.rank_model.clear()
82+
self.compute_attr_order()
7483
self.last_run_max_attr = self.max_attrs
7584
self.max_attr_spin.setDisabled(True)
7685
try:
@@ -85,45 +94,65 @@ def max_attr_changed(self):
8594
The method does not reset anything so the user can still see the
8695
results until actually restarting the search.
8796
"""
88-
self.button.setText("Start")
97+
if self.max_attrs != self.last_run_max_attr or self.saved_state is None:
98+
self.button.setText("Start")
99+
else:
100+
self.button.setText("Continue")
89101
self.button.setEnabled(self.check_preconditions())
90102

103+
def coloring_changed(self):
104+
self.stop_and_reset(self.initialize_keep_ordering)
105+
91106
def check_preconditions(self):
92107
"""Require at least one variable to allow ranking."""
93108
self.Information.add_message("no_attributes", "No variables to rank.")
94109
self.Information.no_attributes.clear()
95-
if not super().check_preconditions():
110+
data = self.master.discrete_data
111+
if not super().check_preconditions() or data is None:
96112
return False
97-
if not self.master.discrete_data.domain.attributes:
113+
if not data.domain.attributes:
98114
self.Information.no_attributes()
99115
return False
100116
return True
101117

102-
def score_heuristic(self):
118+
def compute_attr_order(self):
103119
"""
104-
Order attributes by ReliefF or RReliefF if there is a target
105-
variable. In case of ties or without target, other by name.
120+
Order attributes by Relief if there is a target variable. In case of
121+
ties or without target, other by name.
122+
123+
Add the class variable at the beginning when not coloring by class
124+
distribution.
125+
126+
If `self.attrs` is not `None`, keep the ordering and just add or remove
127+
the class as needed.
106128
"""
107129
data = self.master.discrete_data
108-
if data.domain.class_var is None:
109-
return data.domain.attributes
110-
relief = ReliefF if data.domain.has_discrete_class else RReliefF
111-
weights = relief(n_iterations=100, k_nearest=self.K)(data)
112-
attrs = sorted(zip(weights, data.domain.attributes),
113-
key=lambda x: (-x[0], x[1].name))
114-
return [a for _, a in attrs]
130+
class_var = data.domain.class_var
131+
if not self.attr_ordering:
132+
if class_var is None:
133+
self.attr_ordering = sorted(data.domain, key=attrgetter("name"))
134+
else:
135+
weights = ReliefF(n_iterations=100, k_nearest=10)(data)
136+
attrs = sorted(zip(weights, data.domain.attributes),
137+
key=lambda x: (-x[0], x[1].name))
138+
self.attr_ordering = [a for _, a in attrs]
139+
if class_var is not None:
140+
if self._compute_class_dists():
141+
if self.attr_ordering[0] is class_var:
142+
del self.attr_ordering[0]
143+
elif self.attr_ordering[0] is not class_var:
144+
self.attr_ordering.insert(0, class_var)
115145

116146
def _compute_class_dists(self):
117-
master = self.master
118-
return master.interior_coloring == master.CLASS_DISTRIBUTION and \
119-
master.data.domain.has_discrete_class
147+
return self.master.interior_coloring == self.master.CLASS_DISTRIBUTION
120148

121149
def state_count(self):
122150
"""
123151
Return the number of combinations, starting with a single attribute
124152
if Mosaic is colored by class distributions, and two if by Pearson
125153
"""
126-
n_attrs = len(self.master.discrete_data.domain.attributes)
154+
self.compute_attr_order()
155+
n_attrs = len(self.attr_ordering)
127156
min_attrs = 1 if self._compute_class_dists() else 2
128157
max_attrs = min(n_attrs, self.max_attrs)
129158
return sum(comb(n_attrs, k, exact=True)
@@ -136,11 +165,10 @@ def iterate_states(self, state):
136165
distributions, and two if by Pearson.
137166
"""
138167
# If we put initialization of `self.attrs` to `initialize`,
139-
# `score_heuristic` would be run on every call to `set_data`.
168+
# `score_heuristic` would be run on every call to master's `set_data`.
140169
master = self.master
141170
data = master.discrete_data
142171
if state is None: # on the first call, compute order
143-
self.attrs = self.score_heuristic()
144172
if self._compute_class_dists():
145173
self.marginal = get_distribution(data, data.domain.class_var)
146174
self.marginal.normalize()
@@ -153,12 +181,15 @@ def iterate_states(self, state):
153181
n_attrs = len(data.domain.attributes)
154182
while True:
155183
yield state
184+
# Reset while running; just abort
185+
if self.attr_ordering is None:
186+
break
156187
for up in range(len(state)):
157188
state[up] += 1
158189
if up + 1 == len(state) or state[up] < state[up + 1]:
159190
break
160191
state[up] = up
161-
if state[-1] == len(self.attrs):
192+
if state[-1] == len(self.attr_ordering):
162193
if len(state) < min(self.max_attrs, n_attrs):
163194
state = list(range(len(state) + 1))
164195
else:
@@ -175,7 +206,7 @@ def compute_score(self, state):
175206
master = self.master
176207
data = master.discrete_data
177208
domain = data.domain
178-
attrlist = [self.attrs[i] for i in state]
209+
attrlist = [self.attr_ordering[i] for i in state]
179210
cond_dist = get_conditional_distribution(data, attrlist)[0]
180211
n = cond_dist[""]
181212
ss = 0
@@ -200,7 +231,8 @@ def compute_score(self, state):
200231
mul,
201232
(self.marginal[attr_idx][ind]
202233
for attr_idx, ind in zip(attr_indices, indices)))
203-
ss += (expected - observed) ** 2 / expected
234+
if expected > 1e-6:
235+
ss += (expected - observed) ** 2 / expected
204236
if class_values:
205237
dof = (len(class_values) - 1) * \
206238
reduce(mul, (len(attr.values) for attr in attrlist))
@@ -213,8 +245,12 @@ def on_selection_changed(self, selected, deselected):
213245
self.selectionChanged.emit(attrs + (None, ) * (4 - len(attrs)))
214246

215247
def row_for_state(self, score, state):
216-
attrs = tuple(sorted((self.attrs[x] for x in state),
217-
key=attrgetter("name")))
248+
"""The row consists of attributes sorted by name; class is at the
249+
beginning, if present, so it's on the x-axis and not lost somewhere."""
250+
class_var = self.master.data.domain.class_var
251+
attrs = tuple(
252+
sorted((self.attr_ordering[x] for x in state),
253+
key=lambda attr: (1 - (attr is class_var), attr.name)))
218254
item = QStandardItem(", ".join(a.name for a in attrs))
219255
item.setData(attrs, self._AttrRole)
220256
return [item]
@@ -237,14 +273,12 @@ class OWMosaicDisplay(OWWidget):
237273

238274
settingsHandler = DomainContextHandler()
239275
use_boxes = Setting(True)
276+
interior_coloring = Setting(CLASS_DISTRIBUTION)
240277
variable1 = ContextSetting("", exclude_metas=False)
241278
variable2 = ContextSetting("", exclude_metas=False)
242279
variable3 = ContextSetting("", exclude_metas=False)
243280
variable4 = ContextSetting("", exclude_metas=False)
244281
selection = ContextSetting(set())
245-
# interior_coloring is context setting to properly reset it
246-
# if the widget switches to regression and back (set setData)
247-
interior_coloring = ContextSetting(CLASS_DISTRIBUTION)
248282

249283
BAR_WIDTH = 5
250284
SPACING = 4
@@ -305,12 +339,16 @@ def __init__(self):
305339
gui.rubber(self.controlArea)
306340

307341
def sizeHint(self):
308-
return QSize(530, 720)
342+
return QSize(720, 530)
309343

310344
def _compare_with_total(self):
311-
if self.data and self.data.domain.has_discrete_class:
345+
if self.data is not None and \
346+
self.data.domain.class_var is not None and \
347+
self.interior_coloring != self.CLASS_DISTRIBUTION:
312348
self.interior_coloring = self.CLASS_DISTRIBUTION
313-
self.coloring_changed()
349+
self.coloring_changed() # This also calls self.update_graph
350+
else:
351+
self.update_graph()
314352

315353
def init_combos(self, data):
316354
for combo in self.attr_combos:
@@ -359,28 +397,26 @@ def set_data(self, data):
359397
self.closeContext()
360398
self.data = data
361399
self.init_combos(self.data)
362-
if not self.data:
400+
if self.data is None:
363401
self.discrete_data = None
364-
return
365-
if any(attr.is_continuous for attr in data.domain):
366-
self.discrete_data = Discretize(method=EqualFreq(n=4))(data)
402+
elif any(attr.is_continuous for attr in data.domain):
403+
self.discrete_data = Discretize(
404+
method=EqualFreq(n=4), discretize_classes=True)(data)
367405
else:
368406
self.discrete_data = self.data
369407

370-
if self.data.domain.class_var is None:
371-
self.rb_colors.setDisabled(True)
372-
disc_class = False
373-
else:
374-
self.rb_colors.setDisabled(False)
375-
disc_class = self.data.domain.has_discrete_class
376-
self.rb_colors.group.button(2).setDisabled(not disc_class)
377-
self.bar_button.setDisabled(not disc_class)
378-
self.interior_coloring = \
379-
self.CLASS_DISTRIBUTION if disc_class else self.PEARSON
380408
self.vizrank.stop_and_reset()
381409
self.vizrank_button.setEnabled(
382-
self.data is not None and self.data.domain.class_var is not None
383-
and len(self.data.domain.attributes) >= 1 and len(self.data) > 1)
410+
self.data is not None and len(self.data) > 1 \
411+
and len(self.data.domain.attributes) >= 1)
412+
413+
if self.data is None:
414+
return
415+
416+
has_class = self.data.domain.class_var is not None
417+
self.rb_colors.setDisabled(not has_class)
418+
self.interior_coloring = \
419+
self.CLASS_DISTRIBUTION if has_class else self.PEARSON
384420

385421
self.openContext(self.data)
386422

@@ -411,7 +447,7 @@ def clear_selection(self):
411447
self.send_selection()
412448

413449
def coloring_changed(self):
414-
self.vizrank.stop_and_reset()
450+
self.vizrank.coloring_changed()
415451
self.update_graph()
416452

417453
def reset_graph(self):
@@ -671,7 +707,7 @@ def add_rect(x0, x1, y0, y1, condition="",
671707
if x1 - x0 + y1 - y0 == 2:
672708
y1 += 1
673709

674-
if class_var and class_var.is_discrete:
710+
if class_var:
675711
colors = [QColor(*col) for col in class_var.colors]
676712
else:
677713
colors = None
@@ -922,7 +958,7 @@ def get_conditional_distribution(data, attrs):
922958
dist = defaultdict(int)
923959
cond_dist[""] = dist[""] = len(data)
924960
all_attrs = [data.domain[a] for a in attrs]
925-
if data.domain.has_discrete_class:
961+
if data.domain.class_var is not None:
926962
all_attrs.append(data.domain.class_var)
927963

928964
for i in range(1, len(all_attrs) + 1):

0 commit comments

Comments
 (0)