Skip to content

Commit 3052123

Browse files
committed
OWRandomize: Add a new widget
1 parent 9d3a9fd commit 3052123

File tree

5 files changed

+223
-0
lines changed

5 files changed

+223
-0
lines changed

Orange/widgets/data/owrandomize.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import random
2+
3+
from AnyQt.QtCore import Qt
4+
from AnyQt.QtWidgets import QSizePolicy
5+
6+
from Orange.data import Table
7+
from Orange.preprocess import Randomize
8+
from Orange.widgets.settings import Setting
9+
from Orange.widgets.widget import OWWidget
10+
from Orange.widgets import gui
11+
12+
13+
class OWRandomize(OWWidget):
14+
name = "Randomize"
15+
description = "Randomize features, class and/or metas in data table."
16+
icon = "icons/Random.svg"
17+
priority = 2100
18+
19+
inputs = [("Data", Table, "set_data")]
20+
outputs = [("Data", Table)]
21+
22+
resizing_enabled = False
23+
want_main_area = False
24+
25+
shuffle_class = Setting(True)
26+
shuffle_attrs = Setting(False)
27+
shuffle_metas = Setting(False)
28+
scope_prop = Setting(80)
29+
random_seed = Setting(0)
30+
auto_apply = Setting(True)
31+
32+
def __init__(self):
33+
super().__init__()
34+
self.data = None
35+
36+
# GUI
37+
box = gui.hBox(self.controlArea, "Shuffled columns")
38+
box.layout().setSpacing(20)
39+
self.class_check = gui.checkBox(
40+
box, self, "shuffle_class", "Classes",
41+
callback=self._shuffle_check_changed)
42+
self.attrs_check = gui.checkBox(
43+
box, self, "shuffle_attrs", "Features",
44+
callback=self._shuffle_check_changed)
45+
self.metas_check = gui.checkBox(
46+
box, self, "shuffle_metas", "Metas",
47+
callback=self._shuffle_check_changed)
48+
49+
box = gui.vBox(self.controlArea, "Shuffled rows")
50+
hbox = gui.hBox(box)
51+
gui.widgetLabel(hbox, "None")
52+
self.scope_slider = gui.hSlider(
53+
hbox, self, "scope_prop", minValue=0, maxValue=100, width=140,
54+
createLabel=False, callback=self._scope_slider_changed)
55+
gui.widgetLabel(hbox, "All")
56+
self.scope_label = gui.widgetLabel(
57+
box, "", alignment=Qt.AlignCenter,
58+
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
59+
self._set_scope_label()
60+
gui.separator(box, 10, 10)
61+
self.replicable_check = gui.checkBox(
62+
box, self, "random_seed", "Replicable shuffling",
63+
callback=self._shuffle_check_changed)
64+
65+
self.apply_button = gui.auto_commit(
66+
self.controlArea, self, "auto_apply", "&Apply",
67+
box=False, commit=self.apply)
68+
69+
@property
70+
def parts(self):
71+
return [self.shuffle_class, self.shuffle_attrs, self.shuffle_metas]
72+
73+
def _shuffle_check_changed(self):
74+
self.apply()
75+
76+
def _scope_slider_changed(self):
77+
self._set_scope_label()
78+
self.apply()
79+
80+
def _set_scope_label(self):
81+
self.scope_label.setText("{}%".format(self.scope_prop))
82+
83+
def set_data(self, data):
84+
self.data = data
85+
self.apply()
86+
87+
def apply(self):
88+
data = None
89+
if self.data:
90+
rand_seed = self.random_seed or None
91+
size = int(len(self.data) * self.scope_prop / 100)
92+
random.seed(rand_seed)
93+
indices = sorted(random.sample(range(len(self.data)), size))
94+
type_ = sum(t for t, p in zip(Randomize.Type, self.parts) if p)
95+
randomized = Randomize(type_, rand_seed)(self.data[indices])
96+
data = self.data.copy()
97+
for i, instance in zip(indices, randomized):
98+
data[i] = instance
99+
self.send("Data", data)
100+
101+
def send_report(self):
102+
labels = ["classes", "features", "metas"]
103+
include = [label for label, i in zip(labels, self.parts) if i]
104+
text = "none" if not include else \
105+
" and ".join(filter(None, (", ".join(include[:-1]), include[-1])))
106+
self.report_items(
107+
"Settings",
108+
[("Shuffled columns", text),
109+
("Proportion of shuffled rows", "{}%".format(self.scope_prop)),
110+
("Replicable", ["no", "yes"][self.random_seed])])
111+
112+
113+
if __name__ == "__main__":
114+
from AnyQt.QtWidgets import QApplication
115+
116+
app = QApplication([])
117+
ow = OWRandomize()
118+
d = Table("iris")
119+
ow.set_data(d)
120+
ow.show()
121+
app.exec_()
122+
ow.saveSettings()
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import numpy as np
5+
6+
from Orange.data import Table
7+
from Orange.widgets.data.owrandomize import OWRandomize
8+
from Orange.widgets.tests.base import WidgetTest
9+
10+
11+
class TestOWRandomize(WidgetTest):
12+
@classmethod
13+
def setUpClass(cls):
14+
super().setUpClass()
15+
cls.zoo = Table("zoo")
16+
17+
def setUp(self):
18+
self.widget = self.create_widget(OWRandomize)
19+
20+
def test_data(self):
21+
"""Check widget's data and output with data on the input"""
22+
self.assertEqual(self.widget.data, None)
23+
self.send_signal("Data", self.zoo)
24+
self.assertEqual(self.widget.data, self.zoo)
25+
output = self.get_output("Data")
26+
np.testing.assert_array_equal(output.X, self.zoo.X)
27+
np.testing.assert_array_equal(output.metas, self.zoo.metas)
28+
self.assertTrue((output.Y != self.zoo.Y).any())
29+
self.assertTrue((np.sort(output.Y, axis=0) ==
30+
np.sort(self.zoo.Y, axis=0)).all())
31+
self.send_signal("Data", None)
32+
self.assertEqual(self.widget.data, None)
33+
self.assertIsNone(self.get_output("Data"))
34+
35+
def test_shuffling(self):
36+
"""Check widget's output for all types of shuffling"""
37+
self.send_signal("Data", self.zoo)
38+
self.widget.class_check.setChecked(True)
39+
self.widget.attrs_check.setChecked(True)
40+
self.widget.metas_check.setChecked(True)
41+
output = self.get_output("Data")
42+
self.assertTrue((output.X != self.zoo.X).any())
43+
self.assertTrue((np.sort(output.X, axis=0) ==
44+
np.sort(self.zoo.X, axis=0)).all())
45+
self.assertTrue((output.Y != self.zoo.Y).any())
46+
self.assertTrue((np.sort(output.Y, axis=0) ==
47+
np.sort(self.zoo.Y, axis=0)).all())
48+
self.assertTrue((output.metas != self.zoo.metas).any())
49+
self.assertTrue((np.sort(output.metas, axis=0) ==
50+
np.sort(self.zoo.metas, axis=0)).all())
51+
52+
def test_scope(self):
53+
self.send_signal("Data", self.zoo)
54+
output = self.get_output("Data")
55+
n_zoo = len(self.zoo)
56+
s = int(self.widget.scope_prop / 100 * n_zoo)
57+
self.assertGreater(sum((output.Y == self.zoo.Y).astype(int)), n_zoo - s)
58+
self.assertLessEqual(sum((output.Y != self.zoo.Y).astype(int)), s)
59+
60+
def test_replicable_shuffling(self):
61+
"""Check widget's output for replicable shuffling """
62+
self.send_signal("Data", self.zoo)
63+
self.widget.replicable_check.setChecked(True)
64+
output = self.get_output("Data")
65+
np.testing.assert_array_equal(output.X, self.zoo.X)
66+
np.testing.assert_array_equal(output.metas, self.zoo.metas)
67+
self.assertTrue((output.Y != self.zoo.Y).any())
68+
self.assertTrue((np.sort(output.Y, axis=0) ==
69+
np.sort(self.zoo.Y, axis=0)).all())
70+
self.widget.apply()
71+
output2 = self.get_output("Data")
72+
np.testing.assert_array_equal(output.X, output2.X)
73+
np.testing.assert_array_equal(output.Y, output2.Y)
74+
np.testing.assert_array_equal(output.metas, output2.metas)

doc/visual-programming/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Data
3333
widgets/data/transpose
3434
widgets/data/discretize
3535
widgets/data/continuize
36+
widgets/data/randomize
3637
widgets/data/concatenate
3738
widgets/data/paintdata
3839
widgets/data/pythonscript
831 Bytes
Loading
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Randomize
2+
=========
3+
4+
.. figure:: icons/randomize.png
5+
6+
Shuffles classes, features and/or metas of data.
7+
8+
Signals
9+
-------
10+
11+
**Inputs**:
12+
13+
- **Data**
14+
15+
Data set.
16+
17+
**Outputs**:
18+
19+
- **Data**
20+
21+
Randomized data set.
22+
23+
Description
24+
-----------
25+
26+
A simple widget that shuffles classes, features and/or metas of data.

0 commit comments

Comments
 (0)