Skip to content

Commit 56706b6

Browse files
authored
Merge pull request #5411 from PrimozGodec/fix-distributions
Distributions: fix bins that values on the edge fall in the right bin
2 parents 01843a5 + df34d90 commit 56706b6

File tree

2 files changed

+34
-3
lines changed

2 files changed

+34
-3
lines changed

Orange/preprocess/discretize.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,11 @@ def decimal_binnings(
320320
nbins = np.round((mx_ - mn_) / width)
321321
if min_bins <= nbins <= max_bins \
322322
and (not bins or bins[-1].nbins != nbins):
323-
bin_def = BinDefinition(mn_ + width * np.arange(nbins + 1),
324-
label_fmt, None, width)
323+
bins_ = mn_ + width * np.arange(nbins + 1)
324+
# to prevent values on the edge of the bin fall in the wrong bin
325+
# due to precision error on decimals that are not precise
326+
bins_ = np.around(bins_, decimals=np.finfo(bins_.dtype).precision)
327+
bin_def = BinDefinition(bins_, label_fmt, None, width)
325328
bins.append(bin_def)
326329
return bins
327330

Orange/tests/test_discretize.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import scipy.sparse as sp
1010

11-
from Orange.preprocess import discretize, Discretize
11+
from Orange.preprocess import discretize, Discretize, decimal_binnings
1212
from Orange import data
1313
from Orange.data import Table, Instance, Domain, ContinuousVariable, DiscreteVariable
1414

@@ -95,6 +95,34 @@ def test_equalwidth_const_value(self):
9595
self.assertEqual(dvar.compute_value.points, [])
9696

9797

98+
class TestBinning(TestCase):
99+
def test_decimal_binnings(self):
100+
values = np.array([
101+
-0.2, -0.2, -0.6, 1.0, 0.2, -0.6, 0.6, 1.0, 0.4, -0.5, -0.4, -0.4,
102+
-0.6, 0.6, 0.75, 0.4, -0.2, 0.2, 0.0, 0.0, -1.0, -0.6, -0.2, -0.6,
103+
])
104+
binning = decimal_binnings(values, factors=[0.2, 0.25, 0.5])
105+
self.assertEqual(len(binning), 3)
106+
107+
np.testing.assert_array_equal(
108+
binning[0].thresholds,
109+
[-1, -0.8, -0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6, 0.8, 1]
110+
)
111+
self.assertEqual(binning[0].width, 0.2)
112+
113+
np.testing.assert_array_equal(
114+
binning[1].thresholds,
115+
[-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1]
116+
)
117+
self.assertEqual(binning[1].width, 0.25)
118+
119+
np.testing.assert_array_equal(
120+
binning[2].thresholds,
121+
[-1, -0.5, 0, 0.5, 1]
122+
)
123+
self.assertEqual(binning[2].width, 0.5)
124+
125+
98126
# noinspection PyPep8Naming
99127
class TestEntropyMDL(TestCase):
100128
def test_entropy_with_two_values(self):

0 commit comments

Comments
 (0)