Skip to content

Commit 5330135

Browse files
committed
OWBoxPlot: Add sorting of groups
1 parent f6fe789 commit 5330135

File tree

2 files changed

+123
-37
lines changed

2 files changed

+123
-37
lines changed

Orange/widgets/visualize/owboxplot.py

Lines changed: 78 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from Orange.widgets import widget, gui
2222
from Orange.widgets.settings import (Setting, DomainContextHandler,
2323
ContextSetting)
24-
from Orange.widgets.utils.itemmodels import DomainModel, VariableListModel
24+
from Orange.widgets.utils.itemmodels import VariableListModel
2525
from Orange.widgets.utils.annotated_data import (create_annotated_table,
2626
ANNOTATED_DATA_SIGNAL_NAME)
2727
from Orange.widgets.utils.widgetpreview import WidgetPreview
@@ -154,6 +154,7 @@ class Outputs:
154154

155155
attribute = ContextSetting(None)
156156
order_by_importance = Setting(False)
157+
order_grouping_by_importance = Setting(False)
157158
group_var = ContextSetting(None)
158159
show_annotations = Setting(True)
159160
compare = Setting(CompareMeans)
@@ -213,19 +214,20 @@ def __init__(self):
213214
# set the minimal height (see the penultimate paragraph of
214215
# http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget)
215216
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
216-
gui.separator(view.box, 6, 6)
217-
self.cb_order = gui.checkBox(
217+
gui.checkBox(
218218
view.box, self, "order_by_importance",
219-
"Order by relevance",
219+
"Order by relevance to subgroups",
220220
tooltip="Order by 𝜒² or ANOVA over the subgroups",
221-
callback=self.apply_sorting)
222-
self.group_vars = DomainModel(
223-
placeholder="None", separators=False,
224-
valid_types=Orange.data.DiscreteVariable)
225-
self.group_view = view = gui.listView(
221+
callback=self.apply_attr_sorting)
222+
self.group_vars = VariableListModel(placeholder="None")
223+
view = gui.listView(
226224
self.controlArea, self, "group_var", box="Subgroups",
227225
model=self.group_vars, callback=self.grouping_changed)
228-
view.setEnabled(False)
226+
gui.checkBox(
227+
view.box, self, "order_grouping_by_importance",
228+
"Order by relevance to variable",
229+
tooltip="Order by 𝜒² or ANOVA over the variable values",
230+
callback=self.apply_group_sorting)
229231
view.setMinimumSize(QSize(30, 30))
230232
# See the comment above
231233
view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored)
@@ -258,7 +260,6 @@ def __init__(self):
258260
self.sort_cb = gui.checkBox(
259261
box, self, 'sort_freqs', "Sort by subgroup frequencies",
260262
callback=self.display_changed)
261-
gui.rubber(box)
262263

263264
gui.vBox(self.mainArea, addSpace=True)
264265
self.box_scene = QGraphicsScene()
@@ -290,12 +291,20 @@ def eventFilter(self, obj, event):
290291

291292
return super().eventFilter(obj, event)
292293

293-
def reset_attrs(self, domain):
294+
def reset_attrs(self):
295+
domain = self.dataset.domain
294296
self.attrs[:] = [
295297
var for var in chain(
296298
domain.class_vars, domain.metas, domain.attributes)
297299
if var.is_primitive()]
298300

301+
def reset_groups(self):
302+
domain = self.dataset.domain
303+
self.group_vars[:] = [None] + [
304+
var for var in chain(
305+
domain.class_vars, domain.metas, domain.attributes)
306+
if var.is_discrete]
307+
299308
# noinspection PyTypeChecker
300309
@Inputs.data
301310
def set_data(self, dataset):
@@ -309,19 +318,19 @@ def set_data(self, dataset):
309318
self.group_var = None
310319
self.attribute = None
311320
if dataset:
312-
domain = dataset.domain
313-
self.group_vars.set_domain(domain)
314-
self.group_view.setEnabled(len(self.group_vars) > 1)
315-
self.reset_attrs(domain)
316-
self.select_default_variables(domain)
321+
self.reset_attrs()
322+
self.reset_groups()
323+
self.select_default_variables()
317324
self.openContext(self.dataset)
318325
self.grouping_changed()
326+
self.attr_changed()
319327
else:
320328
self.reset_all_data()
321329
self.commit()
322330

323-
def select_default_variables(self, domain):
331+
def select_default_variables(self):
324332
# visualize first non-class variable, group by class (if present)
333+
domain = self.dataset.domain
325334
if len(self.attrs) > len(domain.class_vars):
326335
self.attribute = self.attrs[len(domain.class_vars)]
327336
elif self.attrs:
@@ -332,7 +341,7 @@ def select_default_variables(self, domain):
332341
else:
333342
self.group_var = None # Reset to trigger selection via callback
334343

335-
def apply_sorting(self):
344+
def apply_attr_sorting(self):
336345
def compute_score(attr):
337346
if attr is group_var:
338347
return 3
@@ -362,8 +371,48 @@ def compute_score(attr):
362371
include_class=True, include_metas=True) else None
363372
self.attrs.sort(key=compute_score)
364373
else:
365-
self.reset_attrs(domain)
366-
self.attribute = attribute
374+
self.reset_attrs()
375+
self.attribute = attribute # reset selection
376+
self._ensure_selection_visible(self.controls.attribute)
377+
378+
def apply_group_sorting(self):
379+
def compute_stat(group):
380+
if group is attr:
381+
return 3
382+
if group is None:
383+
return -1
384+
if attr.is_continuous:
385+
group_col = data.get_column_view(group)[0].astype(int)
386+
groups = (attr_col[group_col == i]
387+
for i in range(len(group.values)))
388+
groups = (col[~np.isnan(col)] for col in groups)
389+
groups = [group for group in groups if len(group)]
390+
p = f_oneway(*groups)[1] if len(groups) > 1 else 2
391+
else:
392+
p = self._chi_square(group, attr)[1]
393+
if math.isnan(p):
394+
return 2
395+
return p
396+
397+
data = self.dataset
398+
if data is None:
399+
return
400+
attr = self.attribute
401+
group_var = self.group_var
402+
if self.order_grouping_by_importance:
403+
if attr.is_continuous:
404+
attr_col = data.get_column_view(attr)[0].astype(float)
405+
self.group_vars.sort(key=compute_stat)
406+
else:
407+
self.reset_groups()
408+
self.group_var = group_var # reset selection
409+
self._ensure_selection_visible(self.controls.group_var)
410+
411+
@staticmethod
412+
def _ensure_selection_visible(view):
413+
selection = view.selectedIndexes()
414+
if len(selection) == 1:
415+
view.scrollTo(selection[0])
367416

368417
def _chi_square(self, group_var, attr):
369418
# Chi-square with the given distribution into groups
@@ -380,16 +429,14 @@ def _chi_square(self, group_var, attr):
380429
def reset_all_data(self):
381430
self.clear_scene()
382431
self.stat_test = ""
383-
self.attrs.clear()
384-
self.group_vars.set_domain(None)
385-
self.group_view.setEnabled(False)
432+
self.attrs[:] = []
433+
self.group_vars[:] = [None]
386434
self.is_continuous = False
387435
self.update_display_box()
388436

389437
def grouping_changed(self):
390-
self.cb_order.setEnabled(self.group_var is not None)
391-
self.apply_sorting()
392-
self.attr_changed()
438+
self.apply_attr_sorting()
439+
self.update_graph()
393440

394441
def select_box_items(self):
395442
temp_cond = self.conditions.copy()
@@ -399,6 +446,10 @@ def select_box_items(self):
399446
[c.conditions for c in temp_cond])
400447

401448
def attr_changed(self):
449+
self.apply_group_sorting()
450+
self.update_graph()
451+
452+
def update_graph(self):
402453
self.compute_box_data()
403454
self.update_display_box()
404455
self.layout_changed()

Orange/widgets/visualize/tests/test_owboxplot.py

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,9 @@ def test_input_data(self):
4343
self.send_signal(self.widget.Inputs.data, None)
4444
self.assertEqual(len(self.widget.attrs), 0)
4545
self.assertEqual(len(self.widget.group_vars), 1)
46-
self.assertFalse(self.widget.group_view.isEnabled())
4746
self.assertTrue(self.widget.display_box.isHidden())
4847
self.assertFalse(self.widget.stretching_box.isHidden())
4948

50-
self.send_signal(self.widget.Inputs.data, self.iris)
51-
self.assertTrue(self.widget.group_view.isEnabled())
52-
5349
def test_primitive_metas(self):
5450
new_domain = Domain(attributes=[], class_vars=[], metas=(
5551
self.data.domain.attributes + self.data.domain.class_vars))
@@ -88,7 +84,6 @@ def test_input_data_missings_disc_no_group_var(self):
8884
data.X[:, 1] = np.nan
8985
data.domain.attributes[1].values = []
9086
self.send_signal("Data", data)
91-
self.widget.controls.order_by_importance.setChecked(True)
9287
self._select_list_items(self.widget.controls.attribute)
9388
self._select_list_items(self.widget.controls.group_var)
9489

@@ -100,7 +95,7 @@ def test_attribute_combinations(self):
10095
m.setCurrentIndex(group_list.model().index(i), m.ClearAndSelect)
10196
self._select_list_items(self.widget.controls.attribute)
10297

103-
def test_apply_sorting(self):
98+
def test_apply_sorting_group(self):
10499
controls = self.widget.controls
105100
group_list = controls.group_var
106101
order_check = controls.order_by_importance
@@ -115,10 +110,7 @@ def select_group(i):
115110
data = self.titanic
116111
self.send_signal("Data", data)
117112

118-
select_group(0)
119-
self.assertFalse(order_check.isEnabled())
120113
select_group(2) # First attribute
121-
self.assertTrue(order_check.isEnabled())
122114

123115
order_check.setChecked(False)
124116
self.assertEqual(tuple(attributes),
@@ -150,6 +142,50 @@ def select_group(i):
150142
'fasting blood sugar > 120',
151143
'diameter narrowing'])
152144

145+
def test_apply_sorting_vars(self):
146+
controls = self.widget.controls
147+
attr_list = self.widget.attrs
148+
order_check = controls.order_grouping_by_importance
149+
groups = self.widget.group_vars
150+
151+
def select_attr(i):
152+
attr_selection = controls.attribute.selectionModel()
153+
attr_selection.setCurrentIndex(
154+
attr_list.index(i),
155+
attr_selection.ClearAndSelect)
156+
157+
data = self.titanic
158+
self.send_signal("Data", data)
159+
160+
select_attr(1) # First attribute
161+
162+
order_check.setChecked(False)
163+
self.assertEqual(
164+
tuple(groups),
165+
(None, ) + data.domain.class_vars + data.domain.attributes)
166+
order_check.setChecked(True)
167+
self.assertIsNone(groups[0])
168+
self.assertEqual([x.name for x in groups[1:]],
169+
['sex', 'survived', 'age', 'status'])
170+
select_attr(0) # Class
171+
self.assertIsNone(groups[0])
172+
self.assertEqual([x.name for x in groups[1:]],
173+
['sex', 'status', 'age', 'survived'])
174+
175+
data = self.heart
176+
self.send_signal("Data", data)
177+
select_attr(0) # Class
178+
self.assertIsNone(groups[0])
179+
self.assertEqual([x.name for x in groups[1:]],
180+
['thal',
181+
'chest pain',
182+
'exerc ind ang',
183+
'slope peak exc ST',
184+
'gender',
185+
'rest ECG',
186+
'fasting blood sugar > 120',
187+
'diameter narrowing'])
188+
153189
def test_box_order_when_missing_stats(self):
154190
self.widget.compare = 1
155191
# The widget can't do anything smart here, but shouldn't crash
@@ -170,7 +206,6 @@ def test_continuous_metas(self):
170206
domain = Domain([], domain.class_var, metas)
171207
data = Table.from_table(domain, self.iris)
172208
self.send_signal(self.widget.Inputs.data, data)
173-
self.widget.controls.order_by_importance.setChecked(True)
174209

175210
def test_label_overlap(self):
176211
self.send_signal(self.widget.Inputs.data, self.heart)

0 commit comments

Comments
 (0)