Skip to content

Commit fdc4024

Browse files
authored
Make BetaUnivariate reject fitted parameters that obviously don’t match the data (#473)
1 parent 4727e09 commit fdc4024

File tree

3 files changed

+117
-3
lines changed

3 files changed

+117
-3
lines changed

copulas/univariate/beta.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from scipy.stats import beta
55

66
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel
7+
from copulas.utils import EPSILON
78

89

910
class BetaUnivariate(ScipyModel):
@@ -25,9 +26,19 @@ def _fit_constant(self, X):
2526
}
2627

2728
def _fit(self, X):
28-
loc = np.min(X)
29-
scale = np.max(X) - loc
30-
a, b, loc, scale = beta.fit(X, loc=loc, scale=scale)
29+
min_x = np.min(X)
30+
max_x = np.max(X)
31+
a, b, loc, scale = beta.fit(X, loc=min_x, scale=max_x - min_x)
32+
33+
if loc > max_x or scale + loc < min_x:
34+
raise ValueError(
35+
'Converged parameters for beta distribution are '
36+
'outside the min/max range of the data.'
37+
)
38+
39+
if scale < EPSILON:
40+
raise ValueError('Converged parameters for beta distribution have a near-zero range.')
41+
3142
self._params = {'loc': loc, 'scale': scale, 'a': a, 'b': b}
3243

3344
def _is_constant(self):

tests/end-to-end/univariate/test_beta.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from unittest import TestCase
44

55
import numpy as np
6+
import pytest
67
from scipy.stats import beta
78

89
from copulas.univariate import BetaUnivariate
@@ -43,6 +44,67 @@ def test_fit_sample_constant(self):
4344
assert model._constant_value == 5
4445
np.testing.assert_equal(np.full(50, 5), model.sample(50))
4546

47+
def test_fit_raises(self):
48+
"""Test it for dataset that fails."""
49+
model = BetaUnivariate()
50+
data = np.array([ # From GH #472
51+
3.337169,
52+
6.461266,
53+
4.871053,
54+
4.206772,
55+
5.157541,
56+
3.437069,
57+
6.712143,
58+
5.135402,
59+
6.453203,
60+
4.623059,
61+
5.827161,
62+
5.291858,
63+
5.571134,
64+
5.441359,
65+
4.816826,
66+
3.277817,
67+
4.215228,
68+
4.48338,
69+
4.345968,
70+
6.125759,
71+
4.860464,
72+
6.511877,
73+
3.959057,
74+
4.882996,
75+
6.058503,
76+
3.337436,
77+
5.06921,
78+
4.414371,
79+
4.564768,
80+
5.1014,
81+
4.161663,
82+
5.757317,
83+
4.032375,
84+
3.907653,
85+
4.269559,
86+
4.08505,
87+
6.811531,
88+
5.02597,
89+
5.438358,
90+
3.44442,
91+
3.462209,
92+
4.871354,
93+
5.947369,
94+
4.167546,
95+
4.692054,
96+
5.542011,
97+
4.926634,
98+
4.491286,
99+
5.344663,
100+
4.526089,
101+
1.645776,
102+
])
103+
104+
err_msg = 'Converged parameters for beta distribution have a near-zero range.'
105+
with pytest.raises(ValueError, match=err_msg):
106+
model.fit(data)
107+
46108
def test_pdf(self):
47109
model = BetaUnivariate()
48110
model.fit(self.data)

tests/unit/univariate/test_beta.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from unittest.mock import patch
33

44
import numpy as np
5+
import pytest
56
from scipy.stats import beta
67

78
from copulas.univariate import BetaUnivariate
@@ -25,6 +26,46 @@ def test__fit(self):
2526
for key, value in distribution._params.items():
2627
np.testing.assert_allclose(value, expected[key], atol=0.3)
2728

29+
@patch('copulas.univariate.beta.beta')
30+
def test__fit_raises_value_error_if_scale_is_near_zero(self, mock_beta):
31+
mock_beta.fit.return_value = (1, 0, 1, 1e-8)
32+
distribution = BetaUnivariate()
33+
34+
data = np.array([1, 2, 3, 4])
35+
err_msg = 'Converged parameters for beta distribution have a near-zero range.'
36+
with pytest.raises(ValueError, match=err_msg):
37+
distribution._fit(data)
38+
39+
mock_beta.fit.assert_called_once_with(data, loc=1, scale=3)
40+
41+
@patch('copulas.univariate.beta.beta')
42+
def test__fit_raises_value_error_if_parameters_are_higher_than_range(self, mock_beta):
43+
mock_beta.fit.return_value = (5, 1, 5, 1)
44+
distribution = BetaUnivariate()
45+
46+
data = np.array([7, 8, 9, 10])
47+
err_msg = (
48+
'Converged parameters for beta distribution are outside the min/max range of the data.'
49+
)
50+
with pytest.raises(ValueError, match=err_msg):
51+
distribution._fit(data)
52+
53+
mock_beta.fit.assert_called_once_with(data, loc=7, scale=3)
54+
55+
@patch('copulas.univariate.beta.beta')
56+
def test__fit_raises_value_error_if_parameters_are_lower_than_range(self, mock_beta):
57+
mock_beta.fit.return_value = (5, 1, 5, 1)
58+
distribution = BetaUnivariate()
59+
60+
data = np.array([1, 2, 3, 4])
61+
err_msg = (
62+
'Converged parameters for beta distribution are outside the min/max range of the data.'
63+
)
64+
with pytest.raises(ValueError, match=err_msg):
65+
distribution._fit(data)
66+
67+
mock_beta.fit.assert_called_once_with(data, loc=1, scale=3)
68+
2869
def test__is_constant_true(self):
2970
distribution = BetaUnivariate()
3071

0 commit comments

Comments
 (0)