From ce6fa498f7822c68584d2cc55f209af43f466435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 12:47:40 -0300 Subject: [PATCH 01/13] fix(nanops): fix kurtosis computation on low variance --- pandas/core/nanops.py | 25 ++++++++++++++++--------- pandas/tests/test_nanops.py | 10 +++++++++- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 58bcc60f9274e..da5b11e0aa674 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1261,7 +1261,8 @@ def nanskew( return np.nan with np.errstate(invalid="ignore", divide="ignore"): - mean = values.sum(axis, dtype=np.float64) / count + total = values.sum(axis, dtype=np.float64) + mean = total / count if axis is not None: mean = np.expand_dims(mean, axis) @@ -1277,8 +1278,9 @@ def nanskew( # # #18044 in _libs/windows.pyx calc_skew follow this behavior # to fix the fperr to treat m2 <1e-14 as zero - m2 = _zero_out_fperr(m2) - m3 = _zero_out_fperr(m3) + constant_tolerance = (np.finfo(m2.dtype).eps * total) ** 2 + m2 = _zero_out_fperr(m2, constant_tolerance) + m3 = _zero_out_fperr(m3, constant_tolerance) with np.errstate(invalid="ignore", divide="ignore"): result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5) @@ -1349,7 +1351,8 @@ def nankurt( return np.nan with np.errstate(invalid="ignore", divide="ignore"): - mean = values.sum(axis, dtype=np.float64) / count + total = values.sum(axis, dtype=np.float64) + mean = total / count if axis is not None: mean = np.expand_dims(mean, axis) @@ -1370,8 +1373,12 @@ def nankurt( # # #18044 in _libs/windows.pyx calc_kurt follow this behavior # to fix the fperr to treat denom <1e-14 as zero - numerator = _zero_out_fperr(numerator) - denominator = _zero_out_fperr(denominator) + # #57972 arbitrary <1e-14 tolerance leads to problematic behaviour on low variance. + # We adapted the tolerance to use one similar to scipy: + # https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429 + constant_tolerance = (np.finfo(m2.dtype).eps * total) ** 2 + numerator = _zero_out_fperr(numerator, constant_tolerance) + denominator = _zero_out_fperr(denominator, constant_tolerance) if not isinstance(denominator, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before @@ -1587,12 +1594,12 @@ def check_below_min_count( return False -def _zero_out_fperr(arg): +def _zero_out_fperr(arg, tol): # #18044 reference this behavior to fix rolling skew/kurt issue if isinstance(arg, np.ndarray): - return np.where(np.abs(arg) < 1e-14, 0, arg) + return np.where(np.abs(arg) < tol, 0, arg) else: - return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg + return arg.dtype.type(0) if np.abs(arg) < tol else arg @disallow("M8", "m8") diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index e7ed8e855a762..7181916f1ae2d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1072,7 +1072,7 @@ def test_constant_series(self, val): # xref GH 11974 data = val * np.ones(300) kurt = nanops.nankurt(data) - assert kurt == 0.0 + tm.assert_equal(kurt, 0.0) def test_all_finite(self): alpha, beta = 0.3, 0.1 @@ -1102,6 +1102,14 @@ def test_nans_skipna(self, samples, actual_kurt): kurt = nanops.nankurt(samples, skipna=True) tm.assert_almost_equal(kurt, actual_kurt) + def test_low_variance(self): + # GH#57972 + data_list = [-2.05191341e-05, -4.10391103e-05] + ([0.0] * 27) + data = np.array(data_list) + kurt = nanops.nankurt(data) + expected = 18.087646853025614 # scipy.stats.kurtosis(data, bias=False) + tm.assert_almost_equal(kurt, expected) + @property def prng(self): return np.random.default_rng(2) From fd1e6c3433ce3a86eb89fb00af5a8bbeddba8bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 13:00:31 -0300 Subject: [PATCH 02/13] docs(whatsnew): add entry --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 9210f1e0082f0..03fcb09334d30 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1148,6 +1148,7 @@ Other - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) +- Bug in :meth:`Series.kurt` resulting in zero for low variance arrays (:issue:`57972`) - Bug in :meth:`Series.map` with a ``timestamp[pyarrow]`` dtype or ``duration[pyarrow]`` dtype incorrectly returning all-``NaN`` entries (:issue:`61231`) - Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) From 01d51edb735ec84d785dfcd846d9c32530289a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 13:37:16 -0300 Subject: [PATCH 03/13] fix(nanops): make tolerance match order of m2, m3 and m4 --- pandas/core/nanops.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index da5b11e0aa674..ecd31379ad2b2 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1278,9 +1278,11 @@ def nanskew( # # #18044 in _libs/windows.pyx calc_skew follow this behavior # to fix the fperr to treat m2 <1e-14 as zero - constant_tolerance = (np.finfo(m2.dtype).eps * total) ** 2 - m2 = _zero_out_fperr(m2, constant_tolerance) - m3 = _zero_out_fperr(m3, constant_tolerance) + constant_tolerance = np.finfo(m2.dtype).eps * total + constant_tolerance2 = constant_tolerance**2 # match order of m2 + constant_tolerance3 = constant_tolerance2 * constant_tolerance # match order of m3 + m2 = _zero_out_fperr(m2, constant_tolerance2) + m3 = _zero_out_fperr(m3, constant_tolerance3) with np.errstate(invalid="ignore", divide="ignore"): result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5) @@ -1376,9 +1378,10 @@ def nankurt( # #57972 arbitrary <1e-14 tolerance leads to problematic behaviour on low variance. # We adapted the tolerance to use one similar to scipy: # https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429 - constant_tolerance = (np.finfo(m2.dtype).eps * total) ** 2 - numerator = _zero_out_fperr(numerator, constant_tolerance) - denominator = _zero_out_fperr(denominator, constant_tolerance) + constant_tolerance2 = (np.finfo(m2.dtype).eps * total) ** 2 # match order of m2 + constant_tolerance4 = constant_tolerance2**2 # match order of m4 + numerator = _zero_out_fperr(numerator, constant_tolerance2) + denominator = _zero_out_fperr(denominator, constant_tolerance4) if not isinstance(denominator, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before From 8649a09f7b79d04de29a858b804a41d231b23306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 14:32:05 -0300 Subject: [PATCH 04/13] fix: apply tolerance on m2 and m4 for kurt --- pandas/core/nanops.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index ecd31379ad2b2..d2a7a445d1df6 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1366,23 +1366,19 @@ def nankurt( m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) + # #57972: tolerance to consider the central moment equals to zero. + # We adapted the tolerance from scipy: + # https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429 + constant_tolerance2 = (np.finfo(m2.dtype).eps * total) ** 2 # match order of m2 + constant_tolerance4 = constant_tolerance2**2 # match order of m4 + m2 = _zero_out_fperr(m2, constant_tolerance2) + m4 = _zero_out_fperr(m4, constant_tolerance4) + with np.errstate(invalid="ignore", divide="ignore"): adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) numerator = count * (count + 1) * (count - 1) * m4 denominator = (count - 2) * (count - 3) * m2**2 - # floating point error - # - # #18044 in _libs/windows.pyx calc_kurt follow this behavior - # to fix the fperr to treat denom <1e-14 as zero - # #57972 arbitrary <1e-14 tolerance leads to problematic behaviour on low variance. - # We adapted the tolerance to use one similar to scipy: - # https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429 - constant_tolerance2 = (np.finfo(m2.dtype).eps * total) ** 2 # match order of m2 - constant_tolerance4 = constant_tolerance2**2 # match order of m4 - numerator = _zero_out_fperr(numerator, constant_tolerance2) - denominator = _zero_out_fperr(denominator, constant_tolerance4) - if not isinstance(denominator, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before # doing division From 59ffa923e2a845cfd8a2c884aacf1db39a1b0176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 15:46:12 -0300 Subject: [PATCH 05/13] fix: fix negative total tolerance for m3 --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index d2a7a445d1df6..e80c5519fa3fa 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1278,7 +1278,7 @@ def nanskew( # # #18044 in _libs/windows.pyx calc_skew follow this behavior # to fix the fperr to treat m2 <1e-14 as zero - constant_tolerance = np.finfo(m2.dtype).eps * total + constant_tolerance = np.finfo(m2.dtype).eps * np.abs(total) constant_tolerance2 = constant_tolerance**2 # match order of m2 constant_tolerance3 = constant_tolerance2 * constant_tolerance # match order of m3 m2 = _zero_out_fperr(m2, constant_tolerance2) From 1e21e213bca622ff9319ce4a5bf7af326bc62988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 15:50:06 -0300 Subject: [PATCH 06/13] test(skew): add low variance test --- pandas/tests/test_nanops.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 7181916f1ae2d..5fe0fc95b5a48 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1051,6 +1051,13 @@ def test_nans_skipna(self, samples, actual_skew): skew = nanops.nanskew(samples, skipna=True) tm.assert_almost_equal(skew, actual_skew) + def test_low_variance(self): + data_list = [-2.05191341e-06, -4.10391103e-07] + ([0.0] * 27) + data = np.array(data_list) + kurt = nanops.nanskew(data) + expected = -5.092092799675377 # scipy.stats.skew(data, bias=False) + tm.assert_almost_equal(kurt, expected) + @property def prng(self): return np.random.default_rng(2) From b01d33de07467b97983ab04b14ffafad352448a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Sun, 21 Sep 2025 15:55:47 -0300 Subject: [PATCH 07/13] docs(whatsnew): update with skew bug --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 03fcb09334d30..f6e960513765f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1148,7 +1148,7 @@ Other - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) -- Bug in :meth:`Series.kurt` resulting in zero for low variance arrays (:issue:`57972`) +- Bug in :meth:`Series.kurt` and :meth:`Series.skew` resulting in zero for low variance arrays (:issue:`57972`) - Bug in :meth:`Series.map` with a ``timestamp[pyarrow]`` dtype or ``duration[pyarrow]`` dtype incorrectly returning all-``NaN`` entries (:issue:`61231`) - Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) From fdf1cac14bee7331c72a7d3095d041fd8ad5295e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Mon, 22 Sep 2025 06:52:24 -0300 Subject: [PATCH 08/13] fix: deviate from scipy --- pandas/core/nanops.py | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e80c5519fa3fa..00a0f9578316c 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1274,13 +1274,11 @@ def nanskew( m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) - # floating point error - # - # #18044 in _libs/windows.pyx calc_skew follow this behavior - # to fix the fperr to treat m2 <1e-14 as zero - constant_tolerance = np.finfo(m2.dtype).eps * np.abs(total) - constant_tolerance2 = constant_tolerance**2 # match order of m2 - constant_tolerance3 = constant_tolerance2 * constant_tolerance # match order of m3 + # floating point error. See comment in [nankurt] + max_abs = np.abs(values).max(axis) + eps = np.finfo(m2.dtype).eps + constant_tolerance2 = ((eps * max_abs) ** 2) * count + constant_tolerance3 = ((eps * max_abs) ** 3) * count m2 = _zero_out_fperr(m2, constant_tolerance2) m3 = _zero_out_fperr(m3, constant_tolerance3) @@ -1366,11 +1364,28 @@ def nankurt( m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) - # #57972: tolerance to consider the central moment equals to zero. - # We adapted the tolerance from scipy: - # https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429 - constant_tolerance2 = (np.finfo(m2.dtype).eps * total) ** 2 # match order of m2 - constant_tolerance4 = constant_tolerance2**2 # match order of m4 + # Several floating point errors may occur during the summation due to rounding. + # We need to estimate an upper bound to the error to consider the data constant. + # Lets call: + # x: true value in data + # y: floating point representation + # e: relative approximation error + # n: number of observations in array + # + # We have that: + # |x - y|/|x| <= e (See https://en.wikipedia.org/wiki/Machine_epsilon) + # (|x - y|/|x|)² <= e² + # Σ (|x - y|/|x|)² <= ne² + # + # Lets say that the fperr upper bound for m2 is constrained by the summation. + # |m2 - y|/|m2| <= ne² + # |m2 - y| <= n|m2|e² + # + # We will use max (x²) to estimate |m2| + max_abs = np.abs(values).max(axis) + eps = np.finfo(m2.dtype).eps + constant_tolerance2 = ((eps * max_abs) ** 2) * count + constant_tolerance4 = ((eps * max_abs) ** 4) * count m2 = _zero_out_fperr(m2, constant_tolerance2) m4 = _zero_out_fperr(m4, constant_tolerance4) From 5f638ecd6f4776001d04f22792741f1e843fcee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Mon, 22 Sep 2025 07:06:27 -0300 Subject: [PATCH 09/13] test(nanops): parametrize and utilize little and a lot of data --- pandas/tests/test_nanops.py | 40 +++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 5fe0fc95b5a48..6788f2056bb9a 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1051,12 +1051,22 @@ def test_nans_skipna(self, samples, actual_skew): skew = nanops.nanskew(samples, skipna=True) tm.assert_almost_equal(skew, actual_skew) - def test_low_variance(self): - data_list = [-2.05191341e-06, -4.10391103e-07] + ([0.0] * 27) - data = np.array(data_list) - kurt = nanops.nanskew(data) - expected = -5.092092799675377 # scipy.stats.skew(data, bias=False) - tm.assert_almost_equal(kurt, expected) + @pytest.mark.parametrize( + "initial_data, nobs", + [ + ([-2.05191341e-05, -4.10391103e-05], 27), + ([-2.05191341e-10, -4.10391103e-10], 27), + ([-2.05191341e-05, -4.10391103e-05], 10_000), + ([-2.05191341e-10, -4.10391103e-10], 10_000), + ], + ) + def test_low_variance(self, initial_data, nobs): + st = pytest.importorskip("scipy.stats") + data = np.zeros((nobs,), dtype=np.float64) + data[: len(initial_data)] = initial_data + skew = nanops.nanskew(data) + expected = st.skew(data, bias=False) + tm.assert_almost_equal(skew, expected) @property def prng(self): @@ -1109,12 +1119,22 @@ def test_nans_skipna(self, samples, actual_kurt): kurt = nanops.nankurt(samples, skipna=True) tm.assert_almost_equal(kurt, actual_kurt) - def test_low_variance(self): + @pytest.mark.parametrize( + "initial_data, nobs", + [ + ([-2.05191341e-05, -4.10391103e-05], 27), + ([-2.05191341e-10, -4.10391103e-10], 27), + ([-2.05191341e-05, -4.10391103e-05], 10_000), + ([-2.05191341e-10, -4.10391103e-10], 10_000), + ], + ) + def test_low_variance(self, initial_data, nobs): # GH#57972 - data_list = [-2.05191341e-05, -4.10391103e-05] + ([0.0] * 27) - data = np.array(data_list) + st = pytest.importorskip("scipy.stats") + data = np.zeros((nobs,), dtype=np.float64) + data[: len(initial_data)] = initial_data kurt = nanops.nankurt(data) - expected = 18.087646853025614 # scipy.stats.kurtosis(data, bias=False) + expected = st.kurtosis(data, bias=False) tm.assert_almost_equal(kurt, expected) @property From 2a8e1f876f936f4833e424f964f2816dbd192c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Mon, 22 Sep 2025 07:40:11 -0300 Subject: [PATCH 10/13] fix: handle empty arrays in kurt and skew --- pandas/core/nanops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 00a0f9578316c..697b9c8b96d50 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1275,7 +1275,7 @@ def nanskew( m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error. See comment in [nankurt] - max_abs = np.abs(values).max(axis) + max_abs = np.abs(values).max(axis, initial=0.0) eps = np.finfo(m2.dtype).eps constant_tolerance2 = ((eps * max_abs) ** 2) * count constant_tolerance3 = ((eps * max_abs) ** 3) * count @@ -1382,7 +1382,7 @@ def nankurt( # |m2 - y| <= n|m2|e² # # We will use max (x²) to estimate |m2| - max_abs = np.abs(values).max(axis) + max_abs = np.abs(values).max(axis, initial=0.0) eps = np.finfo(m2.dtype).eps constant_tolerance2 = ((eps * max_abs) ** 2) * count constant_tolerance4 = ((eps * max_abs) ** 4) * count From 303465a04dac7588bd73f1c5066d017c06337fcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Mon, 22 Sep 2025 07:51:06 -0300 Subject: [PATCH 11/13] fix: add type annotation to `tol` --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 697b9c8b96d50..17f7242c51042 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1608,7 +1608,7 @@ def check_below_min_count( return False -def _zero_out_fperr(arg, tol): +def _zero_out_fperr(arg, tol: float | np.ndarray): # #18044 reference this behavior to fix rolling skew/kurt issue if isinstance(arg, np.ndarray): return np.where(np.abs(arg) < tol, 0, arg) From a3a855969872eaa26b3011c9bdbc38d16d73deba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Mon, 22 Sep 2025 07:51:21 -0300 Subject: [PATCH 12/13] fix: leave mean computation as is --- pandas/core/nanops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 17f7242c51042..1e9a307209e2f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1261,8 +1261,7 @@ def nanskew( return np.nan with np.errstate(invalid="ignore", divide="ignore"): - total = values.sum(axis, dtype=np.float64) - mean = total / count + mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) @@ -1351,8 +1350,7 @@ def nankurt( return np.nan with np.errstate(invalid="ignore", divide="ignore"): - total = values.sum(axis, dtype=np.float64) - mean = total / count + mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) From ce0e67f3ce6f34ffe3c7c320279b51a1afd58ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= Date: Mon, 22 Sep 2025 13:52:18 -0300 Subject: [PATCH 13/13] chore: add scipy permalink and clarify changes --- pandas/core/nanops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 1e9a307209e2f..7bcf4371a0bcd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1363,6 +1363,10 @@ def nankurt( m4 = adjusted4.sum(axis, dtype=np.float64) # Several floating point errors may occur during the summation due to rounding. + # This computation is similar to the one in Scipy + # https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429 + # With a few modifications, like using the maximum value instead of the averages + # and some adaptations because they use the average and we use the sum for `m2`. # We need to estimate an upper bound to the error to consider the data constant. # Lets call: # x: true value in data