Skip to content

Commit 8d2429e

Browse files
authored
Merge pull request #5378 from janezd/add-unique
Unique: Move widget from prototypes
2 parents 266d365 + 294ba57 commit 8d2429e

File tree

8 files changed

+326
-0
lines changed

8 files changed

+326
-0
lines changed
Lines changed: 76 additions & 0 deletions
Loading

Orange/widgets/data/owunique.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from operator import itemgetter
2+
3+
import numpy as np
4+
5+
from AnyQt.QtCore import Qt
6+
7+
from Orange.data import Table
8+
from Orange.widgets import widget, gui, settings
9+
from Orange.widgets.utils.itemmodels import DomainModel
10+
from Orange.widgets.utils.widgetpreview import WidgetPreview
11+
12+
13+
class OWUnique(widget.OWWidget):
14+
name = 'Unique'
15+
icon = 'icons/Unique.svg'
16+
description = 'Filter instances unique by specified key attribute(s).'
17+
18+
class Inputs:
19+
data = widget.Input("Data", Table)
20+
21+
class Outputs:
22+
data = widget.Output("Data", Table)
23+
24+
want_main_area = False
25+
26+
TIEBREAKERS = {'Last instance': itemgetter(-1),
27+
'First instance': itemgetter(0),
28+
'Middle instance': lambda seq: seq[len(seq) // 2],
29+
'Random instance': np.random.choice,
30+
'Discard non-unique instances':
31+
lambda seq: seq[0] if len(seq) == 1 else None}
32+
33+
settingsHandler = settings.DomainContextHandler()
34+
selected_vars = settings.ContextSetting([])
35+
tiebreaker = settings.Setting(next(iter(TIEBREAKERS)))
36+
autocommit = settings.Setting(True)
37+
38+
def __init__(self):
39+
# Commit is thunked because autocommit redefines it
40+
# pylint: disable=unnecessary-lambda
41+
super().__init__()
42+
self.data = None
43+
44+
self.var_model = DomainModel(parent=self, order=DomainModel.MIXED)
45+
var_list = gui.listView(
46+
self.controlArea, self, "selected_vars", box="Group by",
47+
model=self.var_model, callback=lambda: self.commit())
48+
var_list.setSelectionMode(var_list.ExtendedSelection)
49+
50+
gui.comboBox(
51+
self.controlArea, self, 'tiebreaker', box=True,
52+
label='Instance to select in each group:',
53+
items=tuple(self.TIEBREAKERS),
54+
callback=lambda: self.commit(), sendSelectedValue=True)
55+
gui.auto_commit(
56+
self.controlArea, self, 'autocommit', 'Commit',
57+
orientation=Qt.Horizontal)
58+
59+
@Inputs.data
60+
def set_data(self, data):
61+
self.closeContext()
62+
self.data = data
63+
self.selected_vars = []
64+
if data:
65+
self.var_model.set_domain(data.domain)
66+
self.selected_vars = self.var_model[:]
67+
self.openContext(data.domain)
68+
else:
69+
self.var_model.set_domain(None)
70+
71+
self.unconditional_commit()
72+
73+
def commit(self):
74+
if self.data is None:
75+
self.Outputs.data.send(None)
76+
else:
77+
self.Outputs.data.send(self._compute_unique_data())
78+
79+
def _compute_unique_data(self):
80+
uniques = {}
81+
keys = zip(*[self.data.get_column_view(attr)[0]
82+
for attr in self.selected_vars or self.var_model])
83+
for i, key in enumerate(keys):
84+
uniques.setdefault(key, []).append(i)
85+
86+
choose = self.TIEBREAKERS[self.tiebreaker]
87+
selection = sorted(
88+
x for x in (choose(inds) for inds in uniques.values())
89+
if x is not None)
90+
if selection:
91+
return self.data[selection]
92+
else:
93+
return None
94+
95+
96+
if __name__ == "__main__": # pragma: no cover
97+
WidgetPreview(OWUnique).run(Table("iris"))
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# Tests test protected methods
2+
# pylint: disable=protected-access
3+
import unittest
4+
from unittest.mock import Mock
5+
6+
import numpy as np
7+
8+
from Orange.data import DiscreteVariable, ContinuousVariable, Domain, Table
9+
from Orange.widgets.tests.base import WidgetTest
10+
11+
from Orange.widgets.data import owunique
12+
13+
14+
class TestOWUnique(WidgetTest):
15+
def setUp(self):
16+
self.widget = self.create_widget(owunique.OWUnique) #: OWUnique
17+
18+
self.domain = Domain(
19+
[DiscreteVariable(name, values=("a", "b", "c")) for name in "abcd"],
20+
[ContinuousVariable("e")],
21+
[DiscreteVariable(name, values=("a", "b", "c")) for name in "fg"])
22+
self.table = Table.from_numpy(
23+
self.domain,
24+
[[0, 0, 0, 0],
25+
[0, 0, 0, 0],
26+
[0, 0, 0, 0],
27+
[0, 1, 0, 0],
28+
[0, 2, 0, 0],
29+
[1, 2, 0, 0]],
30+
np.arange(6),
31+
np.zeros((6, 2)))
32+
33+
def test_settings(self):
34+
w = self.widget
35+
domain = self.domain
36+
w.unconditional_commit = Mock()
37+
38+
self.send_signal(w.Inputs.data, self.table)
39+
w.selected_vars = [w.var_model[2]]
40+
41+
self.send_signal(w.Inputs.data, None)
42+
self.assertEqual(w.selected_vars, [])
43+
44+
domain = Domain(domain.attributes[2:], domain.class_vars, domain.metas)
45+
table = self.table.transform(domain)
46+
self.send_signal(w.Inputs.data, table)
47+
self.assertEqual(w.selected_vars, [self.domain[2]])
48+
49+
def test_unconditional_commit(self):
50+
w = self.widget
51+
w.autocommit = False
52+
53+
w._compute_unique_data = cud = Mock()
54+
cud.return_value = self.table
55+
56+
self.send_signal(w.Inputs.data, self.table)
57+
out = self.get_output(w.Outputs.data)
58+
self.assertIs(out, cud.return_value)
59+
60+
self.send_signal(w.Inputs.data, None)
61+
out = self.get_output(w.Outputs.data)
62+
self.assertIs(out, None)
63+
64+
def test_compute(self):
65+
w = self.widget
66+
67+
self.send_signal(w.Inputs.data, self.table)
68+
out = self.get_output(w.Outputs.data)
69+
np.testing.assert_equal(out.Y, self.table.Y)
70+
71+
w.selected_vars = w.var_model[:2]
72+
73+
w.tiebreaker = "Last instance"
74+
w.commit()
75+
out = self.get_output(w.Outputs.data)
76+
np.testing.assert_equal(out.Y, [2, 3, 4, 5])
77+
78+
w.tiebreaker = "First instance"
79+
w.commit()
80+
out = self.get_output(w.Outputs.data)
81+
np.testing.assert_equal(out.Y, [0, 3, 4, 5])
82+
83+
w.tiebreaker = "Middle instance"
84+
w.commit()
85+
out = self.get_output(w.Outputs.data)
86+
np.testing.assert_equal(out.Y, [1, 3, 4, 5])
87+
88+
w.tiebreaker = "Discard non-unique instances"
89+
w.commit()
90+
out = self.get_output(w.Outputs.data)
91+
np.testing.assert_equal(out.Y, [3, 4, 5])
92+
93+
def test_use_all_when_non_selected(self):
94+
w = self.widget
95+
w.tiebreaker = "First instance"
96+
97+
data = self.table.transform(Domain(self.table.domain.attributes))
98+
99+
self.send_signal(w.Inputs.data, data)
100+
out = self.get_output(w.Outputs.data)
101+
np.testing.assert_equal(out.X, data.X[2:])
102+
103+
w.selected_vars.clear()
104+
w.unconditional_commit()
105+
out = self.get_output(w.Outputs.data)
106+
np.testing.assert_equal(out.X, data.X[2:])
107+
108+
def test_no_output_on_no_unique(self):
109+
w = self.widget
110+
w.tiebreaker = "Discard non-unique instances"
111+
112+
attrs = self.table.domain.attributes
113+
data = Table.from_numpy(Domain(attrs), np.zeros((5, len(attrs))))
114+
self.send_signal(w.Inputs.data, data)
115+
self.assertIsNone(self.get_output(w.Outputs.data))
116+
117+
118+
if __name__ == "__main__":
119+
unittest.main()

doc/visual-programming/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ Data
6060
widgets/data/color
6161
widgets/data/featurestatistics
6262
widgets/data/neighbors
63+
widgets/data/unique
6364

6465

6566
Visualize
104 KB
Loading
23.9 KB
Loading
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Unique
2+
======
3+
4+
Remove duplicated data instances.
5+
6+
**Inputs**
7+
8+
- Data: data table
9+
10+
**Outputs**
11+
12+
- Data: data table without duplicates
13+
14+
The widget removes duplicated data instances. The user can choose a subset of observed variables, so two instances are considered as duplicates although they may differ in values of other, ignored variables.
15+
16+
![](images/Unique-stamped.png)
17+
18+
1. Select the variables that are considered in comparing data instances.
19+
2. Data instance that is kept. The options are to use the first, last, middle or random instance, or to keep none, that is, to remove duplicated instances altogether.
20+
21+
Example
22+
-------
23+
24+
Data set *Zoo* contains two frogs. This workflow keeps only one by removing instances with the same names.
25+
26+
![](images/Unique-Example.png)

doc/widgets.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,13 @@
338338
"keywords": [
339339
"export"
340340
]
341+
},
342+
{
343+
"text": "Unique",
344+
"doc": "visual-programming/source/widgets/data/unique.md",
345+
"icon": "../Orange/widgets/data/icons/Unique.svg",
346+
"background": "#FFD39F",
347+
"keywords": []
341348
}
342349
]
343350
],

0 commit comments

Comments
 (0)