Skip to content

Commit 6f0f0b8

Browse files
authored
Merge pull request scipy#19475 from mdhaber/lmoment
2 parents 85c33d1 + e4331b2 commit 6f0f0b8

File tree

5 files changed

+245
-12
lines changed

5 files changed

+245
-12
lines changed

scipy/stats/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@
247247
kurtosis -- Fisher or Pearson kurtosis
248248
mode -- Modal value
249249
moment -- Central moment
250+
lmoment
250251
expectile -- Expectile
251252
skew -- Skewness
252253
kstat --

scipy/stats/_axis_nan_policy.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,14 @@ def _broadcast_shapes(shapes, axis=None):
6464
# input validation
6565
if axis is not None:
6666
axis = np.atleast_1d(axis)
67-
axis_int = axis.astype(int)
67+
message = '`axis` must be an integer, a tuple of integers, or `None`.'
68+
try:
69+
with np.errstate(invalid='ignore'):
70+
axis_int = axis.astype(int)
71+
except ValueError as e:
72+
raise AxisError(message) from e
6873
if not np.array_equal(axis_int, axis):
69-
raise AxisError('`axis` must be an integer, a '
70-
'tuple of integers, or `None`.')
74+
raise AxisError(message)
7175
axis = axis_int
7276

7377
# First, ensure all shapes have same number of dimensions by prepending 1s.

scipy/stats/_stats_py.py

Lines changed: 147 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
'rankdata', 'combine_pvalues', 'quantile_test',
104104
'wasserstein_distance', 'wasserstein_distance_nd', 'energy_distance',
105105
'brunnermunzel', 'alexandergovern',
106-
'expectile']
106+
'expectile', 'lmoment']
107107

108108

109109
def _chk_asarray(a, axis, *, xp=None):
@@ -961,11 +961,11 @@ def tsem(a, limits=None, inclusive=(True, True), axis=0, ddof=1):
961961
#####################################
962962

963963

964-
def _moment_outputs(kwds):
965-
order = np.atleast_1d(kwds.get('order', 1))
966-
if order.size == 0:
967-
raise ValueError("'order' must be a scalar or a non-empty 1D "
968-
"list/array.")
964+
def _moment_outputs(kwds, default_order=1):
965+
order = np.atleast_1d(kwds.get('order', default_order))
966+
message = "`order` must be a scalar or a non-empty 1D array."
967+
if order.size == 0 or order.ndim > 1:
968+
raise ValueError(message)
969969
return len(order)
970970

971971

@@ -1094,7 +1094,7 @@ def moment(a, order=1, axis=0, nan_policy='propagate', *, center=None):
10941094
# decorator. Currently, the `_axis_nan_policy` decorator is skipped when `a`
10951095
# is a non-NumPy array, so we need to check again. When the decorator is
10961096
# updated for array API compatibility, we can remove this second check.
1097-
raise ValueError("'order' must be a scalar or a non-empty 1D list/array.")
1097+
raise ValueError("`order` must be a scalar or a non-empty 1D array.")
10981098
if xp.any(order != xp.round(order)):
10991099
raise ValueError("All elements of `order` must be integral.")
11001100
order = order[()] if order.ndim == 0 else order
@@ -10225,6 +10225,146 @@ def first_order(t):
1022510225
return res.root
1022610226

1022710227

10228+
def _lmoment_iv(sample, order, axis, sorted, standardize):
10229+
# input validation/standardization for `lmoment`
10230+
sample = np.asarray(sample)
10231+
message = "`sample` must be an array of real numbers."
10232+
if np.issubdtype(sample.dtype, np.integer):
10233+
sample = sample.astype(np.float64)
10234+
if not np.issubdtype(sample.dtype, np.floating):
10235+
raise ValueError(message)
10236+
10237+
message = "`order` must be a scalar or a non-empty array of positive integers."
10238+
order = np.arange(1, 5) if order is None else np.asarray(order)
10239+
if not np.issubdtype(order.dtype, np.integer) or np.any(order <= 0):
10240+
raise ValueError(message)
10241+
10242+
axis = np.asarray(axis)[()]
10243+
message = "`axis` must be an integer."
10244+
if not np.issubdtype(axis.dtype, np.integer) or axis.ndim != 0:
10245+
raise ValueError(message)
10246+
10247+
sorted = np.asarray(sorted)[()]
10248+
message = "`sorted` must be True or False."
10249+
if not np.issubdtype(sorted.dtype, np.bool_) or sorted.ndim != 0:
10250+
raise ValueError(message)
10251+
10252+
standardize = np.asarray(standardize)[()]
10253+
message = "`standardize` must be True or False."
10254+
if not np.issubdtype(standardize.dtype, np.bool_) or standardize.ndim != 0:
10255+
raise ValueError(message)
10256+
10257+
sample = np.moveaxis(sample, axis, -1)
10258+
sample = np.sort(sample, axis=-1) if not sorted else sample
10259+
10260+
return sample, order, axis, sorted, standardize
10261+
10262+
10263+
def _br(x, *, r=0):
10264+
n = x.shape[-1]
10265+
x = np.expand_dims(x, axis=-2)
10266+
x = np.broadcast_to(x, x.shape[:-2] + (len(r), n))
10267+
x = np.triu(x)
10268+
j = np.arange(n, dtype=x.dtype)
10269+
n = np.asarray(n, dtype=x.dtype)[()]
10270+
return (np.sum(special.binom(j, r[:, np.newaxis])*x, axis=-1)
10271+
/ special.binom(n-1, r) / n)
10272+
10273+
10274+
def _prk(r, k):
10275+
# Writen to match [1] Equation 27 closely to facilitate review.
10276+
# This does not protect against overflow, so improvements to
10277+
# robustness would be a welcome follow-up.
10278+
return (-1)**(r-k)*special.binom(r, k)*special.binom(r+k, k)
10279+
10280+
10281+
@_axis_nan_policy_factory( # noqa: E302
10282+
_moment_result_object, n_samples=1, result_to_tuple=lambda x: (x,),
10283+
n_outputs=lambda kwds: _moment_outputs(kwds, [1, 2, 3, 4])
10284+
)
10285+
def lmoment(sample, order=None, *, axis=0, sorted=False, standardize=True):
10286+
r"""Compute L-moments of a sample from a continuous distribution
10287+
10288+
The L-moments of a probability distribution are summary statistics with
10289+
uses similar to those of conventional moments, but they are defined in
10290+
terms of the expected values of order statistics.
10291+
Sample L-moments are defined analogously to population L-moments, and
10292+
they can serve as estimators of population L-moments. They tend to be less
10293+
sensitive to extreme observations than conventional moments.
10294+
10295+
Parameters
10296+
----------
10297+
sample : array_like
10298+
The real-valued sample whose L-moments are desired.
10299+
order : array_like, optional
10300+
The (positive integer) orders of the desired L-moments.
10301+
Must be a scalar or non-empty 1D array. Default is [1, 2, 3, 4].
10302+
axis : int or None, default=0
10303+
If an int, the axis of the input along which to compute the statistic.
10304+
The statistic of each axis-slice (e.g. row) of the input will appear
10305+
in a corresponding element of the output. If None, the input will be
10306+
raveled before computing the statistic.
10307+
sorted : bool, default=False
10308+
Whether `sample` is already sorted in increasing order along `axis`.
10309+
If False (default), `sample` will be sorted.
10310+
standardize : bool, default=True
10311+
Whether to return L-moment ratios for orders 3 and higher.
10312+
L-moment ratios are analogous to standardized conventional
10313+
moments: they are the non-standardized L-moments divided
10314+
by the L-moment of order 2.
10315+
10316+
Returns
10317+
-------
10318+
lmoments : ndarray
10319+
The sample L-moments of order `order`.
10320+
10321+
See Also
10322+
--------
10323+
moment
10324+
10325+
References
10326+
----------
10327+
.. [1] D. Bilkova. "L-Moments and TL-Moments as an Alternative Tool of
10328+
Statistical Data Analysis". Journal of Applied Mathematics and
10329+
Physics. 2014. :doi:`10.4236/jamp.2014.210104`
10330+
.. [2] J. R. M. Hosking. "L-Moments: Analysis and Estimation of Distributions
10331+
Using Linear Combinations of Order Statistics". Journal of the Royal
10332+
Statistical Society. 1990. :doi:`10.1111/j.2517-6161.1990.tb01775.x`
10333+
.. [3] "L-moment". *Wikipedia*. https://en.wikipedia.org/wiki/L-moment.
10334+
10335+
Examples
10336+
--------
10337+
>>> import numpy as np
10338+
>>> from scipy import stats
10339+
>>> rng = np.random.default_rng(328458568356392)
10340+
>>> sample = rng.exponential(size=100000)
10341+
>>> stats.lmoment(sample)
10342+
array([1.00124272, 0.50111437, 0.3340092 , 0.16755338])
10343+
10344+
Note that the first four standardized population L-moments of the standard
10345+
exponential distribution are 1, 1/2, 1/3, and 1/6; the sample L-moments
10346+
provide reasonable estimates.
10347+
10348+
"""
10349+
args = _lmoment_iv(sample, order, axis, sorted, standardize)
10350+
sample, order, axis, sorted, standardize = args
10351+
10352+
n_moments = np.max(order)
10353+
k = np.arange(n_moments, dtype=sample.dtype)
10354+
prk = _prk(np.expand_dims(k, tuple(range(1, sample.ndim+1))), k)
10355+
bk = _br(sample, r=k)
10356+
10357+
n = sample.shape[-1]
10358+
bk[..., n:] = 0 # remove NaNs due to n_moments > n
10359+
10360+
lmoms = np.sum(prk * bk, axis=-1)
10361+
if standardize and n_moments > 2:
10362+
lmoms[2:] /= lmoms[1]
10363+
10364+
lmoms[n:] = np.nan # add NaNs where appropriate
10365+
return lmoms[order-1]
10366+
10367+
1022810368
LinregressResult = _make_tuple_bunch('LinregressResult',
1022910369
['slope', 'intercept', 'rvalue',
1023010370
'pvalue', 'stderr'],

scipy/stats/tests/test_axis_nan_policy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ def combine_pvalues_weighted(*args, **kwargs):
136136
(stats.alexandergovern, tuple(), {}, 2, 2, False,
137137
lambda res: (res.statistic, res.pvalue)),
138138
(stats.combine_pvalues, tuple(), {}, 1, 2, False, None),
139+
(stats.lmoment, tuple(), dict(), 1, 4, False, lambda x: tuple(x)),
139140
(combine_pvalues_weighted, tuple(), {}, 2, 2, True, None),
140141
(xp_mean_1samp, tuple(), dict(), 1, 1, False, lambda x: (x,)),
141142
(xp_mean_2samp, tuple(), dict(), 2, 1, True, lambda x: (x,)),

scipy/stats/tests/test_stats.py

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3505,8 +3505,8 @@ def test_moment_propagate_nan(self, xp):
35053505
@array_api_compatible
35063506
def test_moment_empty_order(self, xp):
35073507
# tests moment with empty `order` list
3508-
with pytest.raises(ValueError, match=r"'order' must be a scalar or a"
3509-
r" non-empty 1D list/array."):
3508+
with pytest.raises(ValueError, match=r"`order` must be a scalar or a"
3509+
r" non-empty 1D array."):
35103510
stats.moment(xp.asarray([1, 2, 3, 4]), order=[])
35113511

35123512
@array_api_compatible
@@ -9269,6 +9269,93 @@ def test_monotonicity_in_alpha(self, n):
92699269
assert np.all(np.diff(e_list) > 0)
92709270

92719271

9272+
class TestLMoment:
9273+
# data from https://github.com/scipy/scipy/issues/19460
9274+
data = [0.87, 0.87, 1.29, 1.5, 1.7, 0.66, 1.5, 0.5, 1., 1.25, 2.3,
9275+
1.03, 2.85, 0.68, 1.74, 1.94, 0.63, 2.04, 1.2, 0.64, 2.05, 0.97,
9276+
2.81, 1.02, 2.76, 0.86, 1.36, 1.29, 1.68, 0.72, 1.67, 1.15, 3.26,
9277+
0.93, 0.83, 0.91, 0.92, 2.32, 1.12, 3.21, 1.23, 1.22, 1.29, 2.08,
9278+
0.64, 2.83, 2.68, 1.77, 0.69, 1.69, 0.7, 1.83, 2.25, 1.23, 1.17,
9279+
0.94, 1.22, 0.76, 0.69, 0.48, 1.04, 2.49, 1.38, 1.57, 1.79, 1.59,
9280+
1.3, 1.54, 1.07, 1.03, 0.76, 2.35, 2.05, 2.02, 2.36, 1.59, 0.97,
9281+
1.63, 1.66, 0.94, 1.45, 1.26, 1.25, 0.68, 2.96, 0.8, 1.16, 0.82,
9282+
0.64, 0.87, 1.33, 1.28, 1.26, 1.19, 1.24, 1.12, 1.45, 1.03, 1.37,
9283+
1.4, 1.35, 1.28, 1.04, 1.31, 0.87, 0.96, 2.55, 1.72, 1.05, 1.15,
9284+
1.73, 1.03, 1.53, 2.41, 1.36, 2.08, 0.92, 0.73, 1.56, 1.94, 0.78]
9285+
9286+
not_integers = [1.5, [1, 2, 3.5], np.nan, np.inf, 'a duck']
9287+
9288+
def test_dtype_iv(self):
9289+
message = '`sample` must be an array of real numbers.'
9290+
with pytest.raises(ValueError, match=message):
9291+
stats.lmoment(np.array(self.data, dtype=np.complex128))
9292+
9293+
@skip_xp_invalid_arg
9294+
def test_dtype_iv_non_numeric(self):
9295+
message = '`sample` must be an array of real numbers.'
9296+
with pytest.raises(ValueError, match=message):
9297+
stats.lmoment(np.array(self.data, dtype=object))
9298+
9299+
@pytest.mark.parametrize('order', not_integers + [0, -1, [], [[1, 2, 3]]])
9300+
def test_order_iv(self, order):
9301+
message = '`order` must be a scalar or a non-empty...'
9302+
with pytest.raises(ValueError, match=message):
9303+
stats.lmoment(self.data, order=order)
9304+
9305+
@pytest.mark.parametrize('axis', not_integers)
9306+
def test_axis_iv(self, axis):
9307+
message = '`axis` must be an integer, a tuple'
9308+
with pytest.raises(ValueError, match=message):
9309+
stats.lmoment(self.data, axis=axis)
9310+
9311+
@pytest.mark.parametrize('sorted', not_integers)
9312+
def test_sorted_iv(self, sorted):
9313+
message = '`sorted` must be True or False.'
9314+
with pytest.raises(ValueError, match=message):
9315+
stats.lmoment(self.data, sorted=sorted)
9316+
9317+
@pytest.mark.parametrize('standardize', not_integers)
9318+
def test_standardize_iv(self, standardize):
9319+
message = '`standardize` must be True or False.'
9320+
with pytest.raises(ValueError, match=message):
9321+
stats.lmoment(self.data, standardize=standardize)
9322+
9323+
@pytest.mark.parametrize('order', [1, 4, [1, 2, 3, 4]])
9324+
@pytest.mark.parametrize('standardize', [False, True])
9325+
@pytest.mark.parametrize('sorted', [False, True])
9326+
def test_lmoment(self, order, standardize, sorted):
9327+
# Reference values from R package `lmom`
9328+
# options(digits=16)
9329+
# library(lmom)
9330+
# data= c(0.87, 0.87,..., 1.94, 0.78)
9331+
# samlmu(data)
9332+
ref = np.asarray([1.4087603305785130, 0.3415936639118458,
9333+
0.2189964482831403, 0.1328186463415905])
9334+
9335+
if not standardize:
9336+
ref[2:] *= ref[1]
9337+
9338+
data = np.sort(self.data) if sorted else self.data
9339+
9340+
res = stats.lmoment(data, order, standardize=standardize, sorted=sorted)
9341+
assert_allclose(res, ref[np.asarray(order)-1])
9342+
9343+
def test_dtype(self):
9344+
dtype = np.float32
9345+
sample = np.asarray(self.data)
9346+
res = stats.lmoment(sample.astype(dtype))
9347+
ref = stats.lmoment(sample)
9348+
assert res.dtype.type == dtype
9349+
assert_allclose(res, ref, rtol=1e-4)
9350+
9351+
dtype = np.int64
9352+
sample = np.asarray([1, 2, 3, 4, 5])
9353+
res = stats.lmoment(sample.astype(dtype))
9354+
ref = stats.lmoment(sample.astype(np.float64))
9355+
assert res.dtype.type == np.float64
9356+
assert_allclose(res, ref, rtol=1e-15)
9357+
9358+
92729359
@array_api_compatible
92739360
class TestXP_Mean:
92749361
@pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)])

0 commit comments

Comments
 (0)