Skip to content

Commit c3e2596

Browse files
committed
OWBoxPlot: Add sorting of groups
1 parent 66c2911 commit c3e2596

File tree

2 files changed

+123
-37
lines changed

2 files changed

+123
-37
lines changed

Orange/widgets/visualize/owboxplot.py

Lines changed: 78 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from Orange.widgets import widget, gui
2121
from Orange.widgets.settings import (Setting, DomainContextHandler,
2222
ContextSetting)
23-
from Orange.widgets.utils.itemmodels import DomainModel, VariableListModel
23+
from Orange.widgets.utils.itemmodels import VariableListModel
2424
from Orange.widgets.utils.annotated_data import (create_annotated_table,
2525
ANNOTATED_DATA_SIGNAL_NAME)
2626
from Orange.widgets.utils.widgetpreview import WidgetPreview
@@ -153,6 +153,7 @@ class Outputs:
153153

154154
attribute = ContextSetting(None)
155155
order_by_importance = Setting(False)
156+
order_grouping_by_importance = Setting(False)
156157
group_var = ContextSetting(None)
157158
show_annotations = Setting(True)
158159
compare = Setting(CompareMeans)
@@ -212,19 +213,20 @@ def __init__(self):
212213
# set the minimal height (see the penultimate paragraph of
213214
# http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
214215
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
215-
gui.separator(view.box, 6, 6)
216-
self.cb_order = gui.checkBox(
216+
gui.checkBox(
217217
view.box, self, "order_by_importance",
218-
"Order by relevance",
218+
"Order by relevance to subgroups",
219219
tooltip="Order by 𝜒² or ANOVA over the subgroups",
220-
callback=self.apply_sorting)
221-
self.group_vars = DomainModel(
222-
placeholder="None", separators=False,
223-
valid_types=Orange.data.DiscreteVariable)
224-
self.group_view = view = gui.listView(
220+
callback=self.apply_attr_sorting)
221+
self.group_vars = VariableListModel(placeholder="None")
222+
view = gui.listView(
225223
self.controlArea, self, "group_var", box="Subgroups",
226224
model=self.group_vars, callback=self.grouping_changed)
227-
view.setEnabled(False)
225+
gui.checkBox(
226+
view.box, self, "order_grouping_by_importance",
227+
"Order by relevance to variable",
228+
tooltip="Order by 𝜒² or ANOVA over the variable values",
229+
callback=self.apply_group_sorting)
228230
view.setMinimumSize(QSize(30, 30))
229231
# See the comment above
230232
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
@@ -257,7 +259,6 @@ def __init__(self):
257259
self.sort_cb = gui.checkBox(
258260
box, self, 'sort_freqs', "Sort by subgroup frequencies",
259261
callback=self.display_changed)
260-
gui.rubber(box)
261262

262263
gui.vBox(self.mainArea, addSpace=True)
263264
self.box_scene = QGraphicsScene()
@@ -289,12 +290,20 @@ def eventFilter(self, obj, event):
289290

290291
return super().eventFilter(obj, event)
291292

292-
def reset_attrs(self, domain):
293+
def reset_attrs(self):
294+
domain = self.dataset.domain
293295
self.attrs[:] = [
294296
var for var in chain(
295297
domain.class_vars, domain.metas, domain.attributes)
296298
if var.is_primitive()]
297299

300+
def reset_groups(self):
301+
domain = self.dataset.domain
302+
self.group_vars[:] = [None] + [
303+
var for var in chain(
304+
domain.class_vars, domain.metas, domain.attributes)
305+
if var.is_discrete]
306+
298307
# noinspection PyTypeChecker
299308
@Inputs.data
300309
def set_data(self, dataset):
@@ -308,19 +317,19 @@ def set_data(self, dataset):
308317
self.group_var = None
309318
self.attribute = None
310319
if dataset:
311-
domain = dataset.domain
312-
self.group_vars.set_domain(domain)
313-
self.group_view.setEnabled(len(self.group_vars) > 1)
314-
self.reset_attrs(domain)
315-
self.select_default_variables(domain)
320+
self.reset_attrs()
321+
self.reset_groups()
322+
self.select_default_variables()
316323
self.openContext(self.dataset)
317324
self.grouping_changed()
325+
self.attr_changed()
318326
else:
319327
self.reset_all_data()
320328
self.commit()
321329

322-
def select_default_variables(self, domain):
330+
def select_default_variables(self):
323331
# visualize first non-class variable, group by class (if present)
332+
domain = self.dataset.domain
324333
if len(self.attrs) > len(domain.class_vars):
325334
self.attribute = self.attrs[len(domain.class_vars)]
326335
elif self.attrs:
@@ -331,7 +340,7 @@ def select_default_variables(self, domain):
331340
else:
332341
self.group_var = None # Reset to trigger selection via callback
333342

334-
def apply_sorting(self):
343+
def apply_attr_sorting(self):
335344
def compute_score(attr):
336345
if attr is group_var:
337346
return 3
@@ -361,8 +370,48 @@ def compute_score(attr):
361370
include_class=True, include_metas=True) else None
362371
self.attrs.sort(key=compute_score)
363372
else:
364-
self.reset_attrs(domain)
365-
self.attribute = attribute
373+
self.reset_attrs()
374+
self.attribute = attribute # reset selection
375+
self._ensure_selection_visible(self.controls.attribute)
376+
377+
def apply_group_sorting(self):
378+
def compute_stat(group):
379+
if group is attr:
380+
return 3
381+
if group is None:
382+
return -1
383+
if attr.is_continuous:
384+
group_col = data.get_column_view(group)[0].astype(int)
385+
groups = (attr_col[group_col == i]
386+
for i in range(len(group.values)))
387+
groups = (col[~np.isnan(col)] for col in groups)
388+
groups = [group for group in groups if len(group)]
389+
p = f_oneway(*groups)[1] if len(groups) > 1 else 2
390+
else:
391+
p = self._chi_square(group, attr)[1]
392+
if math.isnan(p):
393+
return 2
394+
return p
395+
396+
data = self.dataset
397+
if data is None:
398+
return
399+
attr = self.attribute
400+
group_var = self.group_var
401+
if self.order_grouping_by_importance:
402+
if attr.is_continuous:
403+
attr_col = data.get_column_view(attr)[0].astype(float)
404+
self.group_vars.sort(key=compute_stat)
405+
else:
406+
self.reset_groups()
407+
self.group_var = group_var # reset selection
408+
self._ensure_selection_visible(self.controls.group_var)
409+
410+
@staticmethod
411+
def _ensure_selection_visible(view):
412+
selection = view.selectedIndexes()
413+
if len(selection) == 1:
414+
view.scrollTo(selection[0])
366415

367416
def _chi_square(self, group_var, attr):
368417
# Chi-square with the given distribution into groups
@@ -379,16 +428,14 @@ def _chi_square(self, group_var, attr):
379428
def reset_all_data(self):
380429
self.clear_scene()
381430
self.stat_test = ""
382-
self.attrs.clear()
383-
self.group_vars.set_domain(None)
384-
self.group_view.setEnabled(False)
431+
self.attrs[:] = []
432+
self.group_vars[:] = [None]
385433
self.is_continuous = False
386434
self.update_display_box()
387435

388436
def grouping_changed(self):
389-
self.cb_order.setEnabled(self.group_var is not None)
390-
self.apply_sorting()
391-
self.attr_changed()
437+
self.apply_attr_sorting()
438+
self.update_graph()
392439

393440
def select_box_items(self):
394441
temp_cond = self.conditions.copy()
@@ -398,6 +445,10 @@ def select_box_items(self):
398445
[c.conditions for c in temp_cond])
399446

400447
def attr_changed(self):
448+
self.apply_group_sorting()
449+
self.update_graph()
450+
451+
def update_graph(self):
401452
self.compute_box_data()
402453
self.update_display_box()
403454
self.layout_changed()

Orange/widgets/visualize/tests/test_owboxplot.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,9 @@ def test_input_data(self):
4343
self.send_signal(self.widget.Inputs.data, None)
4444
self.assertEqual(len(self.widget.attrs), 0)
4545
self.assertEqual(len(self.widget.group_vars), 1)
46-
self.assertFalse(self.widget.group_view.isEnabled())
4746
self.assertTrue(self.widget.display_box.isHidden())
4847
self.assertFalse(self.widget.stretching_box.isHidden())
4948

50-
self.send_signal(self.widget.Inputs.data, self.iris)
51-
self.assertTrue(self.widget.group_view.isEnabled())
52-
5349
def test_primitive_metas(self):
5450
new_domain = Domain(attributes=[], class_vars=[], metas=(
5551
self.data.domain.attributes + self.data.domain.class_vars))
@@ -88,7 +84,6 @@ def test_input_data_missings_disc_no_group_var(self):
8884
data.X[:, 1] = np.nan
8985
data.domain.attributes[1].values = []
9086
self.send_signal("Data", data)
91-
self.widget.controls.order_by_importance.setChecked(True)
9287
self._select_list_items(self.widget.controls.attribute)
9388
self._select_list_items(self.widget.controls.group_var)
9489

@@ -100,7 +95,7 @@ def test_attribute_combinations(self):
10095
m.setCurrentIndex(group_list.model().index(i), m.ClearAndSelect)
10196
self._select_list_items(self.widget.controls.attribute)
10297

103-
def test_apply_sorting(self):
98+
def test_apply_sorting_group(self):
10499
controls = self.widget.controls
105100
group_list = controls.group_var
106101
order_check = controls.order_by_importance
@@ -115,10 +110,7 @@ def select_group(i):
115110
data = self.titanic
116111
self.send_signal("Data", data)
117112

118-
select_group(0)
119-
self.assertFalse(order_check.isEnabled())
120113
select_group(2) # First attribute
121-
self.assertTrue(order_check.isEnabled())
122114

123115
order_check.setChecked(False)
124116
self.assertEqual(tuple(attributes),
@@ -150,6 +142,50 @@ def select_group(i):
150142
'fasting blood sugar > 120',
151143
'diameter narrowing'])
152144

145+
def test_apply_sorting_vars(self):
146+
controls = self.widget.controls
147+
attr_list = self.widget.attrs
148+
order_check = controls.order_grouping_by_importance
149+
groups = self.widget.group_vars
150+
151+
def select_attr(i):
152+
attr_selection = controls.attribute.selectionModel()
153+
attr_selection.setCurrentIndex(
154+
attr_list.index(i),
155+
attr_selection.ClearAndSelect)
156+
157+
data = self.titanic
158+
self.send_signal("Data", data)
159+
160+
select_attr(1) # First attribute
161+
162+
order_check.setChecked(False)
163+
self.assertEqual(
164+
tuple(groups),
165+
(None, ) + data.domain.class_vars + data.domain.attributes)
166+
order_check.setChecked(True)
167+
self.assertIsNone(groups[0])
168+
self.assertEqual([x.name for x in groups[1:]],
169+
['sex', 'survived', 'age', 'status'])
170+
select_attr(0) # Class
171+
self.assertIsNone(groups[0])
172+
self.assertEqual([x.name for x in groups[1:]],
173+
['sex', 'status', 'age', 'survived'])
174+
175+
data = self.heart
176+
self.send_signal("Data", data)
177+
select_attr(0) # Class
178+
self.assertIsNone(groups[0])
179+
self.assertEqual([x.name for x in groups[1:]],
180+
['thal',
181+
'chest pain',
182+
'exerc ind ang',
183+
'slope peak exc ST',
184+
'gender',
185+
'rest ECG',
186+
'fasting blood sugar > 120',
187+
'diameter narrowing'])
188+
153189
def test_box_order_when_missing_stats(self):
154190
self.widget.compare = 1
155191
# The widget can't do anything smart here, but shouldn't crash
@@ -170,7 +206,6 @@ def test_continuous_metas(self):
170206
domain = Domain([], domain.class_var, metas)
171207
data = Table.from_table(domain, self.iris)
172208
self.send_signal(self.widget.Inputs.data, data)
173-
self.widget.controls.order_by_importance.setChecked(True)
174209

175210
def test_label_overlap(self):
176211
self.send_signal(self.widget.Inputs.data, self.heart)

0 commit comments

Comments
 (0)