Skip to content

Commit 70fd197

Browse files
authored
Merge pull request #1863 from VesnaT/owrandomize
[ENH] OWRandomize: Add a new widget
2 parents 2add4f6 + 3052123 commit 70fd197

File tree

7 files changed

+245
-9
lines changed

7 files changed

+245
-9
lines changed

Orange/preprocess/preprocess.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def __call__(self, data):
302302
class Randomize(Preprocess):
303303
"""
304304
Construct a preprocessor for randomization of classes,
305-
attributes or metas.
305+
attributes and/or metas.
306306
Given a data table, preprocessor returns a new table in
307307
which the data is shuffled.
308308
@@ -326,8 +326,8 @@ class Randomize(Preprocess):
326326
>>> randomizer = Randomize(Randomize.RandomizeClasses)
327327
>>> randomized_data = randomizer(data)
328328
"""
329-
Type = Enum("Randomize",
330-
"RandomizeClasses, RandomizeAttributes, RandomizeMetas")
329+
Type = Enum("Randomize", dict(RandomizeClasses=1, RandomizeAttributes=2,
330+
RandomizeMetas=4), type=int)
331331
RandomizeClasses, RandomizeAttributes, RandomizeMetas = Type
332332

333333
def __init__(self, rand_type=RandomizeClasses, rand_seed=None):
@@ -352,15 +352,12 @@ def __call__(self, data):
352352
new_data = Table(data)
353353
new_data.ensure_copy()
354354

355-
if self.rand_type == Randomize.RandomizeClasses:
355+
if self.rand_type & Randomize.RandomizeClasses:
356356
self.randomize(new_data.Y)
357-
elif self.rand_type == Randomize.RandomizeAttributes:
357+
if self.rand_type & Randomize.RandomizeAttributes:
358358
self.randomize(new_data.X)
359-
elif self.rand_type == Randomize.RandomizeMetas:
359+
if self.rand_type & Randomize.RandomizeMetas:
360360
self.randomize(new_data.metas)
361-
else:
362-
raise TypeError('Unsupported type')
363-
364361
return new_data
365362

366363
def randomize(self, table):

Orange/tests/test_randomize.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,22 @@ def test_randomize_metas(self):
5454
self.assertTrue((np.sort(data.metas, axis=0) == np.sort(
5555
data_rand.metas, axis=0)).all())
5656

57+
def test_randomize_all(self):
58+
data = self.zoo
59+
rand_type = Randomize.RandomizeClasses | Randomize.RandomizeAttributes \
60+
| Randomize.RandomizeMetas
61+
randomizer = Randomize(rand_type=rand_type)
62+
data_rand = randomizer(data)
63+
self.assertTrue((data.Y != data_rand.Y).any())
64+
self.assertTrue((np.sort(data.Y, axis=0) == np.sort(
65+
data_rand.Y, axis=0)).all())
66+
self.assertTrue((data.X != data_rand.X).any())
67+
self.assertTrue((np.sort(data.X, axis=0) == np.sort(
68+
data_rand.X, axis=0)).all())
69+
self.assertTrue((data.metas != data_rand.metas).any())
70+
self.assertTrue((np.sort(data.metas, axis=0) == np.sort(
71+
data_rand.metas, axis=0)).all())
72+
5773
def test_randomize_keep_original_data(self):
5874
data_orig = self.zoo
5975
data = Table("zoo")

Orange/widgets/data/owrandomize.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import random
2+
3+
from AnyQt.QtCore import Qt
4+
from AnyQt.QtWidgets import QSizePolicy
5+
6+
from Orange.data import Table
7+
from Orange.preprocess import Randomize
8+
from Orange.widgets.settings import Setting
9+
from Orange.widgets.widget import OWWidget
10+
from Orange.widgets import gui
11+
12+
13+
class OWRandomize(OWWidget):
14+
name = "Randomize"
15+
description = "Randomize features, class and/or metas in data table."
16+
icon = "icons/Random.svg"
17+
priority = 2100
18+
19+
inputs = [("Data", Table, "set_data")]
20+
outputs = [("Data", Table)]
21+
22+
resizing_enabled = False
23+
want_main_area = False
24+
25+
shuffle_class = Setting(True)
26+
shuffle_attrs = Setting(False)
27+
shuffle_metas = Setting(False)
28+
scope_prop = Setting(80)
29+
random_seed = Setting(0)
30+
auto_apply = Setting(True)
31+
32+
def __init__(self):
33+
super().__init__()
34+
self.data = None
35+
36+
# GUI
37+
box = gui.hBox(self.controlArea, "Shuffled columns")
38+
box.layout().setSpacing(20)
39+
self.class_check = gui.checkBox(
40+
box, self, "shuffle_class", "Classes",
41+
callback=self._shuffle_check_changed)
42+
self.attrs_check = gui.checkBox(
43+
box, self, "shuffle_attrs", "Features",
44+
callback=self._shuffle_check_changed)
45+
self.metas_check = gui.checkBox(
46+
box, self, "shuffle_metas", "Metas",
47+
callback=self._shuffle_check_changed)
48+
49+
box = gui.vBox(self.controlArea, "Shuffled rows")
50+
hbox = gui.hBox(box)
51+
gui.widgetLabel(hbox, "None")
52+
self.scope_slider = gui.hSlider(
53+
hbox, self, "scope_prop", minValue=0, maxValue=100, width=140,
54+
createLabel=False, callback=self._scope_slider_changed)
55+
gui.widgetLabel(hbox, "All")
56+
self.scope_label = gui.widgetLabel(
57+
box, "", alignment=Qt.AlignCenter,
58+
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
59+
self._set_scope_label()
60+
gui.separator(box, 10, 10)
61+
self.replicable_check = gui.checkBox(
62+
box, self, "random_seed", "Replicable shuffling",
63+
callback=self._shuffle_check_changed)
64+
65+
self.apply_button = gui.auto_commit(
66+
self.controlArea, self, "auto_apply", "&Apply",
67+
box=False, commit=self.apply)
68+
69+
@property
70+
def parts(self):
71+
return [self.shuffle_class, self.shuffle_attrs, self.shuffle_metas]
72+
73+
def _shuffle_check_changed(self):
74+
self.apply()
75+
76+
def _scope_slider_changed(self):
77+
self._set_scope_label()
78+
self.apply()
79+
80+
def _set_scope_label(self):
81+
self.scope_label.setText("{}%".format(self.scope_prop))
82+
83+
def set_data(self, data):
84+
self.data = data
85+
self.apply()
86+
87+
def apply(self):
88+
data = None
89+
if self.data:
90+
rand_seed = self.random_seed or None
91+
size = int(len(self.data) * self.scope_prop / 100)
92+
random.seed(rand_seed)
93+
indices = sorted(random.sample(range(len(self.data)), size))
94+
type_ = sum(t for t, p in zip(Randomize.Type, self.parts) if p)
95+
randomized = Randomize(type_, rand_seed)(self.data[indices])
96+
data = self.data.copy()
97+
for i, instance in zip(indices, randomized):
98+
data[i] = instance
99+
self.send("Data", data)
100+
101+
def send_report(self):
102+
labels = ["classes", "features", "metas"]
103+
include = [label for label, i in zip(labels, self.parts) if i]
104+
text = "none" if not include else \
105+
" and ".join(filter(None, (", ".join(include[:-1]), include[-1])))
106+
self.report_items(
107+
"Settings",
108+
[("Shuffled columns", text),
109+
("Proportion of shuffled rows", "{}%".format(self.scope_prop)),
110+
("Replicable", ["no", "yes"][self.random_seed])])
111+
112+
113+
if __name__ == "__main__":
114+
from AnyQt.QtWidgets import QApplication
115+
116+
app = QApplication([])
117+
ow = OWRandomize()
118+
d = Table("iris")
119+
ow.set_data(d)
120+
ow.show()
121+
app.exec_()
122+
ow.saveSettings()
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import numpy as np
5+
6+
from Orange.data import Table
7+
from Orange.widgets.data.owrandomize import OWRandomize
8+
from Orange.widgets.tests.base import WidgetTest
9+
10+
11+
class TestOWRandomize(WidgetTest):
12+
@classmethod
13+
def setUpClass(cls):
14+
super().setUpClass()
15+
cls.zoo = Table("zoo")
16+
17+
def setUp(self):
18+
self.widget = self.create_widget(OWRandomize)
19+
20+
def test_data(self):
21+
"""Check widget's data and output with data on the input"""
22+
self.assertEqual(self.widget.data, None)
23+
self.send_signal("Data", self.zoo)
24+
self.assertEqual(self.widget.data, self.zoo)
25+
output = self.get_output("Data")
26+
np.testing.assert_array_equal(output.X, self.zoo.X)
27+
np.testing.assert_array_equal(output.metas, self.zoo.metas)
28+
self.assertTrue((output.Y != self.zoo.Y).any())
29+
self.assertTrue((np.sort(output.Y, axis=0) ==
30+
np.sort(self.zoo.Y, axis=0)).all())
31+
self.send_signal("Data", None)
32+
self.assertEqual(self.widget.data, None)
33+
self.assertIsNone(self.get_output("Data"))
34+
35+
def test_shuffling(self):
36+
"""Check widget's output for all types of shuffling"""
37+
self.send_signal("Data", self.zoo)
38+
self.widget.class_check.setChecked(True)
39+
self.widget.attrs_check.setChecked(True)
40+
self.widget.metas_check.setChecked(True)
41+
output = self.get_output("Data")
42+
self.assertTrue((output.X != self.zoo.X).any())
43+
self.assertTrue((np.sort(output.X, axis=0) ==
44+
np.sort(self.zoo.X, axis=0)).all())
45+
self.assertTrue((output.Y != self.zoo.Y).any())
46+
self.assertTrue((np.sort(output.Y, axis=0) ==
47+
np.sort(self.zoo.Y, axis=0)).all())
48+
self.assertTrue((output.metas != self.zoo.metas).any())
49+
self.assertTrue((np.sort(output.metas, axis=0) ==
50+
np.sort(self.zoo.metas, axis=0)).all())
51+
52+
def test_scope(self):
53+
self.send_signal("Data", self.zoo)
54+
output = self.get_output("Data")
55+
n_zoo = len(self.zoo)
56+
s = int(self.widget.scope_prop / 100 * n_zoo)
57+
self.assertGreater(sum((output.Y == self.zoo.Y).astype(int)), n_zoo - s)
58+
self.assertLessEqual(sum((output.Y != self.zoo.Y).astype(int)), s)
59+
60+
def test_replicable_shuffling(self):
61+
"""Check widget's output for replicable shuffling """
62+
self.send_signal("Data", self.zoo)
63+
self.widget.replicable_check.setChecked(True)
64+
output = self.get_output("Data")
65+
np.testing.assert_array_equal(output.X, self.zoo.X)
66+
np.testing.assert_array_equal(output.metas, self.zoo.metas)
67+
self.assertTrue((output.Y != self.zoo.Y).any())
68+
self.assertTrue((np.sort(output.Y, axis=0) ==
69+
np.sort(self.zoo.Y, axis=0)).all())
70+
self.widget.apply()
71+
output2 = self.get_output("Data")
72+
np.testing.assert_array_equal(output.X, output2.X)
73+
np.testing.assert_array_equal(output.Y, output2.Y)
74+
np.testing.assert_array_equal(output.metas, output2.metas)

doc/visual-programming/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Data
3333
widgets/data/transpose
3434
widgets/data/discretize
3535
widgets/data/continuize
36+
widgets/data/randomize
3637
widgets/data/concatenate
3738
widgets/data/paintdata
3839
widgets/data/pythonscript
831 Bytes
Loading
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Randomize
2+
=========
3+
4+
.. figure:: icons/randomize.png
5+
6+
Shuffles classes, features and/or metas of data.
7+
8+
Signals
9+
-------
10+
11+
**Inputs**:
12+
13+
- **Data**
14+
15+
Data set.
16+
17+
**Outputs**:
18+
19+
- **Data**
20+
21+
Randomized data set.
22+
23+
Description
24+
-----------
25+
26+
A simple widget that shuffles classes, features and/or metas of data.

0 commit comments

Comments
 (0)