Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions Orange/widgets/data/owdatasampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Warning(OWWidget.Warning):

class Error(OWWidget.Error):
too_many_folds = Msg("Number of folds exceeds data size")
sample_larger_than_data = Msg("Sample must be smaller than data")
sample_larger_than_data = Msg("Sample can't be larger than data")
not_enough_to_stratify = Msg("Data is too small to stratify")
no_data = Msg("Dataset is empty")

Expand Down Expand Up @@ -88,7 +88,7 @@ def set_sampling_type_i():
self.sampleSizePercentageSlider = gui.hSlider(
gui.indentedBox(sampling), self,
"sampleSizePercentage",
minValue=0, maxValue=99, ticks=10, labelFormat="%d %%",
minValue=0, maxValue=100, ticks=10, labelFormat="%d %%",
callback=set_sampling_type(self.FixedProportion),
addSpace=12)

Expand Down Expand Up @@ -265,7 +265,7 @@ def updateindices(self):
else:
assert self.sampling_type == self.Bootstrap

if not repl and size is not None and (data_length <= size):
if not repl and size is not None and (size > data_length):
self.Error.sample_larger_than_data()
if not repl and data_length <= num_classes and self.stratify:
self.Error.not_enough_to_stratify()
Expand Down Expand Up @@ -386,7 +386,12 @@ def __call__(self, table):
o[sample] = 0
others = np.nonzero(o)[0]
return others, sample
if self.stratified and table.domain.has_discrete_class:
if self.n == len(table):
rgen = np.random.RandomState(self.random_state)
sample = np.arange(self.n)
rgen.shuffle(sample)
return np.array([], dtype=int), sample
elif self.stratified and table.domain.has_discrete_class:
test_size = max(len(table.domain.class_var.values), self.n)
splitter = skl.StratifiedShuffleSplit(
n_splits=1, test_size=test_size,
Expand Down
16 changes: 16 additions & 0 deletions Orange/widgets/data/tests/test_owdatasampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,22 @@ def test_bigger_output_warning(self):
self.set_fixed_sample_size(3, with_replacement=True)
self.assertTrue(self.widget.Warning.bigger_sample.is_shown())

def test_shuffling(self):
self.send_signal('Data', self.iris)

self.set_fixed_sample_size(150)
self.assertFalse(self.widget.Warning.bigger_sample.is_shown())
sample = self.get_output("Data Sample")
self.assertTrue((self.iris.ids != sample.ids).any())
self.assertEqual(set(self.iris.ids), set(sample.ids))

self.select_sampling_type(self.widget.FixedProportion)
self.widget.sampleSizePercentage = 100
self.widget.commit()
sample = self.get_output("Data Sample")
self.assertTrue((self.iris.ids != sample.ids).any())
self.assertEqual(set(self.iris.ids), set(sample.ids))

def set_fixed_sample_size(self, sample_size, with_replacement=False):
"""Set fixed sample size and return the number of gui spin.

Expand Down
2 changes: 2 additions & 0 deletions doc/visual-programming/source/widgets/data/datasampler.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ The **Data Sampler** widget implements several data sampling methods. It outputs
input dataset.
4. Press *Sample Data* to output the data sample.

If all data instances are selected (by setting the proportion to 100 % or setting the fixed sample size to the entire data size), output instances are still shuffled.

Examples
--------

Expand Down