Skip to content

Commit e655198

Browse files
committed
Add a test for float precision issues
1 parent 5708d85 commit e655198

File tree

3 files changed

+36
-7
lines changed

3 files changed

+36
-7
lines changed

pandas/_libs/window/aggregations.pyx

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,7 +1478,7 @@ def roll_nunique(const float64_t[:] values, ndarray[int64_t] start,
14781478
"""
14791479
cdef:
14801480
Py_ssize_t i, j, s, e, N = len(start)
1481-
int64_t nobs = 0, num_unique = 0
1481+
int64_t nobs = 0
14821482
float64_t val
14831483
float64_t[::1] output
14841484
unordered_map[float64_t, int64_t] value_counts
@@ -1506,8 +1506,6 @@ def roll_nunique(const float64_t[:] values, ndarray[int64_t] start,
15061506
nobs += 1
15071507
value_counts[val] += 1
15081508

1509-
num_unique = value_counts.size()
1510-
15111509
else:
15121510
# calculate deletes
15131511
for j in range(start[i - 1], s):
@@ -1525,9 +1523,8 @@ def roll_nunique(const float64_t[:] values, ndarray[int64_t] start,
15251523
nobs += 1
15261524
value_counts[val] += 1
15271525

1528-
num_unique = value_counts.size()
15291526
if nobs >= minp:
1530-
output[i] = num_unique
1527+
output[i] = value_counts.size()
15311528
else:
15321529
output[i] = NaN
15331530

pandas/tests/window/test_expanding.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def test_rank(window, method, pct, ascending, test_data):
256256

257257

258258
@pytest.mark.parametrize("window", [1, 3, 10, 20])
259-
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
259+
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans", "precision"])
260260
def test_nunique(window, test_data):
261261
length = 20
262262
if test_data == "default":
@@ -269,6 +269,22 @@ def test_nunique(window, test_data):
269269
[1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
270270
)
271271
)
272+
elif test_data == "precision":
273+
ser = Series(
274+
data=[
275+
0.3,
276+
0.1 * 3, # Not necessarily exactly 0.3
277+
0.6,
278+
0.2 * 3, # Not necessarily exactly 0.6
279+
0.9,
280+
0.3 * 3, # Not necessarily exactly 0.9
281+
0.5,
282+
0.1 * 5, # Not necessarily exactly 0.5
283+
0.8,
284+
0.2 * 4, # Not necessarily exactly 0.8
285+
],
286+
dtype=np.float64,
287+
)
272288

273289
expected = ser.expanding(window).apply(lambda x: x.nunique())
274290
result = ser.expanding(window).nunique()

pandas/tests/window/test_rolling.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1587,7 +1587,7 @@ def test_rank(window, method, pct, ascending, test_data):
15871587

15881588

15891589
@pytest.mark.parametrize("window", [1, 3, 10, 20])
1590-
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
1590+
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans", "precision"])
15911591
def test_nunique(window, test_data):
15921592
length = 20
15931593
if test_data == "default":
@@ -1600,6 +1600,22 @@ def test_nunique(window, test_data):
16001600
[1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
16011601
)
16021602
)
1603+
elif test_data == "precision":
1604+
ser = Series(
1605+
data=[
1606+
0.3,
1607+
0.1 * 3, # Not necessarily exactly 0.3
1608+
0.6,
1609+
0.2 * 3, # Not necessarily exactly 0.6
1610+
0.9,
1611+
0.3 * 3, # Not necessarily exactly 0.9
1612+
0.5,
1613+
0.1 * 5, # Not necessarily exactly 0.5
1614+
0.8,
1615+
0.2 * 4, # Not necessarily exactly 0.8
1616+
],
1617+
dtype=np.float64,
1618+
)
16031619

16041620
expected = ser.rolling(window).apply(lambda x: x.nunique())
16051621
result = ser.rolling(window).nunique()

0 commit comments

Comments
 (0)