Skip to content

Commit fdf1cac

Browse files
committed
fix: deviate from scipy
1 parent b01d33d commit fdf1cac

File tree

1 file changed

+27
-12
lines changed

1 file changed

+27
-12
lines changed

pandas/core/nanops.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,13 +1274,11 @@ def nanskew(
12741274
m2 = adjusted2.sum(axis, dtype=np.float64)
12751275
m3 = adjusted3.sum(axis, dtype=np.float64)
12761276

1277-
# floating point error
1278-
#
1279-
# #18044 in _libs/windows.pyx calc_skew follow this behavior
1280-
# to fix the fperr to treat m2 <1e-14 as zero
1281-
constant_tolerance = np.finfo(m2.dtype).eps * np.abs(total)
1282-
constant_tolerance2 = constant_tolerance**2 # match order of m2
1283-
constant_tolerance3 = constant_tolerance2 * constant_tolerance # match order of m3
1277+
# floating point error. See comment in [nankurt]
1278+
max_abs = np.abs(values).max(axis)
1279+
eps = np.finfo(m2.dtype).eps
1280+
constant_tolerance2 = ((eps * max_abs) ** 2) * count
1281+
constant_tolerance3 = ((eps * max_abs) ** 3) * count
12841282
m2 = _zero_out_fperr(m2, constant_tolerance2)
12851283
m3 = _zero_out_fperr(m3, constant_tolerance3)
12861284

@@ -1366,11 +1364,28 @@ def nankurt(
13661364
m2 = adjusted2.sum(axis, dtype=np.float64)
13671365
m4 = adjusted4.sum(axis, dtype=np.float64)
13681366

1369-
# #57972: tolerance to consider the central moment equals to zero.
1370-
# We adapted the tolerance from scipy:
1371-
# https://github.com/scipy/scipy/blob/04d6d9c460b1fed83f2919ecec3d743cfa2e8317/scipy/stats/_stats_py.py#L1429
1372-
constant_tolerance2 = (np.finfo(m2.dtype).eps * total) ** 2 # match order of m2
1373-
constant_tolerance4 = constant_tolerance2**2 # match order of m4
1367+
# Several floating point errors may occur during the summation due to rounding.
1368+
# We need to estimate an upper bound to the error to consider the data constant.
1369+
# Lets call:
1370+
# x: true value in data
1371+
# y: floating point representation
1372+
# e: relative approximation error
1373+
# n: number of observations in array
1374+
#
1375+
# We have that:
1376+
# |x - y|/|x| <= e (See https://en.wikipedia.org/wiki/Machine_epsilon)
1377+
# (|x - y|/|x|)² <= e²
1378+
# Σ (|x - y|/|x|)² <= ne²
1379+
#
1380+
# Lets say that the fperr upper bound for m2 is constrained by the summation.
1381+
# |m2 - y|/|m2| <= ne²
1382+
# |m2 - y| <= n|m2|e²
1383+
#
1384+
# We will use max (x²) to estimate |m2|
1385+
max_abs = np.abs(values).max(axis)
1386+
eps = np.finfo(m2.dtype).eps
1387+
constant_tolerance2 = ((eps * max_abs) ** 2) * count
1388+
constant_tolerance4 = ((eps * max_abs) ** 4) * count
13741389
m2 = _zero_out_fperr(m2, constant_tolerance2)
13751390
m4 = _zero_out_fperr(m4, constant_tolerance4)
13761391

0 commit comments

Comments
 (0)