Skip to content

Commit f752309

Browse files
committed
Fix overflowing skipping to handle NaNs
Remove from ffill, bfill Fix fill_value for datetime64 isnan to isnull
1 parent 39d8dcf commit f752309

File tree

4 files changed

+14
-7
lines changed

4 files changed

+14
-7
lines changed

flox/aggregate_flox.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def ffill(group_idx, array, *, axis, **kwargs):
239239
(group_starts,) = flag.nonzero()
240240

241241
# https://stackoverflow.com/questions/41190852/most-efficient-way-to-forward-fill-nan-values-in-numpy-array
242-
mask = np.isnan(array)
242+
mask = isnull(array)
243243
# modified from the SO answer, just reset the index at the start of every group!
244244
mask[..., np.asarray(group_starts)] = False
245245

flox/aggregations.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,11 @@ def _get_fill_value(dtype, fill_value):
158158
return np.nan
159159
# This is madness, but npg checks that fill_value is compatible
160160
# with array dtype even if the fill_value is never used.
161-
elif np.issubdtype(dtype, np.integer):
161+
elif (
162+
np.issubdtype(dtype, np.integer)
163+
or np.issubdtype(dtype, np.timedelta64)
164+
or np.issubdtype(dtype, np.datetime64)
165+
):
162166
return dtypes.get_neg_infinity(dtype, min_for_int=True)
163167
else:
164168
return None

flox/xrdtypes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,9 @@ def get_neg_infinity(dtype, min_for_int=False):
125125
fill_value : positive infinity value corresponding to this dtype.
126126
"""
127127

128-
if np.issubdtype(dtype, (np.timedelta64, np.datetime64)):
129-
return dtype.type(np.iinfo(np.int64).min + 1)
128+
if is_datetime_like(dtype):
129+
unit, _ = np.datetime_data(dtype)
130+
return dtype.type(np.iinfo(np.int64).min + 1, unit)
130131

131132
if issubclass(dtype.type, np.floating):
132133
return -np.inf

tests/test_properties.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import flox
1616
from flox.core import groupby_reduce, groupby_scan
17+
from flox.xrutils import notnull
1718

1819
from . import assert_equal
1920
from .strategies import by_arrays, chunked_arrays, func_st, numeric_arrays
@@ -48,6 +49,8 @@ def not_overflowing_array(array: np.ndarray[Any, Any]) -> bool:
4849
else:
4950
return True
5051

52+
array = array.ravel()
53+
array = array[notnull(array)]
5154
result = bool(np.all((array < info.max / array.size) & (array > info.min / array.size)))
5255
# note(f"returning {result}, {array.min()} vs {info.min}, {array.max()} vs {info.max}")
5356
return result
@@ -117,7 +120,8 @@ def test_groupby_reduce(data, array, func: str) -> None:
117120
func=st.sampled_from(tuple(NUMPY_SCAN_FUNCS)),
118121
)
119122
def test_scans(data, array: dask.array.Array, func: str) -> None:
120-
assume(not_overflowing_array(np.asarray(array)))
123+
if "cum" in func:
124+
assume(not_overflowing_array(np.asarray(array)))
121125

122126
by = data.draw(by_arrays(shape=(array.shape[-1],)))
123127
axis = array.ndim - 1
@@ -150,8 +154,6 @@ def test_scans(data, array: dask.array.Array, func: str) -> None:
150154

151155
@given(data=st.data(), array=chunked_arrays())
152156
def test_ffill_bfill_reverse(data, array: dask.array.Array) -> None:
153-
# TODO: test NaT and timedelta, datetime
154-
assume(not_overflowing_array(np.asarray(array)))
155157
by = data.draw(by_arrays(shape=(array.shape[-1],)))
156158

157159
def reverse(arr):

0 commit comments

Comments
 (0)