Skip to content

Commit 48706b7

Browse files
authored
One more fill-value fix. (#424)
* One more fill-value fix. xref pydata/xarray#10169 * Add test * fix docs * fix typing * fix more types * fix
1 parent 3660887 commit 48706b7

File tree

6 files changed

+67
-68
lines changed

6 files changed

+67
-68
lines changed

.readthedocs.yml

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
1-
# Read the Docs configuration file
2-
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3-
41
version: 2
52

3+
sphinx:
4+
# Path to your Sphinx configuration file.
5+
configuration: docs/source/conf.py
6+
67
build:
7-
os: ubuntu-22.04
8+
os: "ubuntu-lts-latest"
89
tools:
9-
python: "3.11"
10-
sphinx:
11-
configuration: docs/conf.py
10+
python: "mambaforge-latest"
11+
12+
conda:
13+
environment: ci/docs.yml
1214

13-
python:
14-
install:
15-
- method: pip
16-
path: .
17-
extra_requirements:
18-
- docs
15+
formats: []

flox/aggregate_numbagg.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"nanstd": {np.int_: np.float64},
3333
"nanfirst": {np.datetime64: np.int64, np.timedelta64: np.int64},
3434
"nanlast": {np.datetime64: np.int64, np.timedelta64: np.int64},
35+
"nancount": {np.datetime64: np.int64, np.timedelta64: np.int64},
3536
}
3637

3738

flox/aggregations.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -831,12 +831,11 @@ def _initialize_aggregation(
831831
)
832832
agg.fill_value[func] = dtypes._get_fill_value(agg.dtype["final"], agg.fill_value[func])
833833

834-
fv = fill_value if fill_value is not None else agg.fill_value[agg.name]
835834
if _is_arg_reduction(agg):
836835
# this allows us to unravel_index easily. we have to do that nearly every time.
837836
agg.fill_value["numpy"] = (0,)
838837
else:
839-
agg.fill_value["numpy"] = (fv,)
838+
agg.fill_value["numpy"] = (agg.fill_value[func],)
840839

841840
if finalize_kwargs is not None:
842841
assert isinstance(finalize_kwargs, dict)

flox/xarray.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,9 +468,9 @@ def wrapper(array, *by, func, skipna, core_dims, **kwargs):
468468
):
469469
levelnames = ds_broad.indexes[name].names
470470
if isinstance(expect3, np.ndarray):
471-
# TODO: workaoround for IntervalIndex issue.
471+
# TODO: workaround for IntervalIndex issue.
472472
raise NotImplementedError
473-
expect3 = pd.MultiIndex.from_tuples(expect3.values, names=levelnames)
473+
expect3 = pd.MultiIndex.from_tuples(expect3.values.tolist(), names=levelnames)
474474
actual[name] = expect3
475475
if Version(xr.__version__) > Version("2022.03.0"):
476476
actual = actual.set_coords(levelnames)

readthedocs.yml

Lines changed: 0 additions & 15 deletions
This file was deleted.

tests/test_core.py

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,15 +1749,15 @@ def test_validate_reindex() -> None:
17491749

17501750

17511751
@requires_dask
1752-
def test_1d_blockwise_sort_optimization():
1752+
def test_1d_blockwise_sort_optimization() -> None:
17531753
# Make sure for resampling problems sorting isn't done.
17541754
time = pd.Series(pd.date_range("2020-09-01", "2020-12-31 23:59", freq="3h"))
17551755
array = dask.array.ones((len(time),), chunks=(224,))
17561756

1757-
actual, _ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count")
1757+
actual, *_ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count")
17581758
assert all("getitem" not in k for k in actual.dask)
17591759

1760-
actual, _ = groupby_reduce(
1760+
actual, *_ = groupby_reduce(
17611761
array,
17621762
time.dt.dayofyear.values[::-1],
17631763
sort=True,
@@ -1766,7 +1766,7 @@ def test_1d_blockwise_sort_optimization():
17661766
)
17671767
assert any("getitem" in k for k in actual.dask.layers)
17681768

1769-
actual, _ = groupby_reduce(
1769+
actual, *_ = groupby_reduce(
17701770
array,
17711771
time.dt.dayofyear.values[::-1],
17721772
sort=False,
@@ -1777,7 +1777,7 @@ def test_1d_blockwise_sort_optimization():
17771777

17781778

17791779
@requires_dask
1780-
def test_negative_index_factorize_race_condition():
1780+
def test_negative_index_factorize_race_condition() -> None:
17811781
# shape = (10, 2000)
17821782
# chunks = ((shape[0]-1,1), 10)
17831783
shape = (101, 174000)
@@ -1804,17 +1804,17 @@ def test_negative_index_factorize_race_condition():
18041804

18051805

18061806
@pytest.mark.parametrize("sort", [True, False])
1807-
def test_expected_index_conversion_passthrough_range_index(sort):
1807+
def test_expected_index_conversion_passthrough_range_index(sort) -> None:
18081808
index = pd.RangeIndex(100)
1809-
actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,))
1809+
actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,)) # type: ignore[call-overload]
18101810
assert actual[0] is index
18111811

18121812

1813-
def test_method_check_numpy():
1813+
def test_method_check_numpy() -> None:
18141814
bins = [-2, -1, 0, 1, 2]
18151815
field = np.ones((5, 3))
18161816
by = np.array([[-1.5, -1.5, 0.5, 1.5, 1.5] * 3]).reshape(5, 3)
1817-
actual, _ = groupby_reduce(
1817+
actual, *_ = groupby_reduce(
18181818
field,
18191819
by,
18201820
expected_groups=pd.IntervalIndex.from_breaks(bins),
@@ -1825,7 +1825,7 @@ def test_method_check_numpy():
18251825
expected = np.array([6, np.nan, 3, 6])
18261826
assert_equal(actual, expected)
18271827

1828-
actual, _ = groupby_reduce(
1828+
actual, *_ = groupby_reduce(
18291829
field,
18301830
by,
18311831
expected_groups=pd.IntervalIndex.from_breaks(bins),
@@ -1845,7 +1845,7 @@ def test_method_check_numpy():
18451845

18461846

18471847
@pytest.mark.parametrize("dtype", [None, np.float64])
1848-
def test_choose_engine(dtype):
1848+
def test_choose_engine(dtype) -> None:
18491849
numbagg_possible = HAS_NUMBAGG and dtype is None
18501850
default = "numbagg" if numbagg_possible else "numpy"
18511851
mean = _initialize_aggregation(
@@ -1887,10 +1887,10 @@ def test_choose_engine(dtype):
18871887
assert _choose_engine(np.array([1, 1, 2, 2]), agg=argmax) == "numpy"
18881888

18891889

1890-
def test_xarray_fill_value_behaviour():
1890+
def test_xarray_fill_value_behaviour() -> None:
18911891
bar = np.array([1, 2, 3, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan])
18921892
times = np.arange(0, 20, 2)
1893-
actual, _ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),))
1893+
actual, *_ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),))
18941894
nan = np.nan
18951895
# fmt: off
18961896
expected = np.array(
@@ -1905,7 +1905,7 @@ def test_xarray_fill_value_behaviour():
19051905
@pytest.mark.parametrize("func", ["nanquantile", "quantile"])
19061906
@pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False])
19071907
@pytest.mark.parametrize("by_ndim", [1, 2])
1908-
def test_multiple_quantiles(q, chunk, func, by_ndim):
1908+
def test_multiple_quantiles(q, chunk, func, by_ndim) -> None:
19091909
array = np.array([[1, -1, np.nan, 3, 4, 10, 5], [1, np.nan, np.nan, 3, 4, np.nan, np.nan]])
19101910
labels = np.array([0, 0, 0, 1, 0, 1, 1])
19111911
if by_ndim == 2:
@@ -1916,38 +1916,37 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
19161916
if chunk:
19171917
array = dask.array.from_array(array, chunks=(1,) + (-1,) * by_ndim)
19181918

1919-
actual, _ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis)
1919+
actual, *_ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis)
19201920
sorted_array = array[..., [0, 1, 2, 4, 3, 5, 6]]
19211921
f = partial(getattr(np, func), q=q, axis=axis, keepdims=True)
19221922
if chunk:
1923-
sorted_array = sorted_array.compute()
1923+
sorted_array = sorted_array.compute() # type: ignore[attr-defined]
19241924
expected = np.concatenate((f(sorted_array[..., :4]), f(sorted_array[..., 4:])), axis=-1)
19251925
if by_ndim == 2:
19261926
expected = expected.squeeze(axis=-2)
19271927
assert_equal(expected, actual, tolerance={"atol": 1e-14})
19281928

19291929

19301930
@pytest.mark.parametrize("dtype", ["U3", "S3"])
1931-
def test_nanlen_string(dtype, engine):
1931+
def test_nanlen_string(dtype, engine) -> None:
19321932
array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype)
19331933
by = np.array([0, 0, 1, 2, 1, 0])
19341934
expected = np.array([3, 2, 1], dtype=np.intp)
19351935
actual, *_ = groupby_reduce(array, by, func="count", engine=engine)
19361936
assert_equal(expected, actual)
19371937

19381938

1939-
def test_cumusm():
1939+
def test_cumusm() -> None:
19401940
array = np.array([1, 1, 1], dtype=np.uint64)
19411941
by = np.array([0] * array.shape[-1])
1942-
kwargs = {"func": "nancumsum", "axis": -1}
19431942
expected = np.nancumsum(array, axis=-1)
19441943

1945-
actual = groupby_scan(array, by, **kwargs)
1944+
actual = groupby_scan(array, by, func="nancumsum", axis=-1)
19461945
assert_equal(expected, actual)
19471946

19481947
if has_dask:
19491948
da = dask.array.from_array(array, chunks=2)
1950-
actual = groupby_scan(da, by, **kwargs)
1949+
actual = groupby_scan(da, by, func="nancumsum", axis=-1)
19511950
assert_equal(expected, actual)
19521951

19531952

@@ -1962,7 +1961,7 @@ def test_cumusm():
19621961
@pytest.mark.parametrize("size", ((1, 12), (12,), (12, 9)))
19631962
@pytest.mark.parametrize("add_nan_by", [True, False])
19641963
@pytest.mark.parametrize("func", ["ffill", "bfill"])
1965-
def test_ffill_bfill(chunks, size, add_nan_by, func):
1964+
def test_ffill_bfill(chunks, size, add_nan_by, func) -> None:
19661965
array, by = gen_array_by(size, func)
19671966
if chunks:
19681967
array = dask.array.from_array(array, chunks=chunks)
@@ -1976,11 +1975,11 @@ def test_ffill_bfill(chunks, size, add_nan_by, func):
19761975

19771976

19781977
@requires_dask
1979-
def test_blockwise_nans():
1978+
def test_blockwise_nans() -> None:
19801979
array = dask.array.ones((1, 10), chunks=2)
19811980
by = np.array([-1, 0, -1, 1, -1, 2, -1, 3, 4, 4])
1982-
actual, actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5))
1983-
expected, expected_groups = flox.groupby_reduce(
1981+
actual, *actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5))
1982+
expected, *expected_groups = flox.groupby_reduce(
19841983
array.compute(), by, func="sum", expected_groups=pd.RangeIndex(0, 5)
19851984
)
19861985
assert_equal(expected_groups, actual_groups)
@@ -1989,50 +1988,68 @@ def test_blockwise_nans():
19891988

19901989
@pytest.mark.parametrize("func", ["sum", "prod", "count", "nansum"])
19911990
@pytest.mark.parametrize("engine", ["flox", "numpy"])
1992-
def test_agg_dtypes(func, engine):
1991+
def test_agg_dtypes(func, engine) -> None:
19931992
# regression test for GH388
19941993
counts = np.array([0, 2, 1, 0, 1])
19951994
group = np.array([1, 1, 1, 2, 2])
1996-
actual, _ = groupby_reduce(
1995+
actual, *_ = groupby_reduce(
19971996
counts, group, expected_groups=(np.array([1, 2]),), func=func, dtype="uint8", engine=engine
19981997
)
19991998
expected = _get_array_func(func)(counts, dtype="uint8")
20001999
assert actual.dtype == np.uint8 == expected.dtype
20012000

20022001

20032002
@requires_dask
2004-
def test_blockwise_avoid_rechunk():
2003+
def test_blockwise_avoid_rechunk() -> None:
20052004
array = dask.array.zeros((6,), chunks=(2, 4), dtype=np.int64)
20062005
by = np.array(["1", "1", "0", "", "0", ""], dtype="<U1")
2007-
actual, groups = groupby_reduce(array, by, func="first")
2008-
assert_equal(groups, ["", "0", "1"])
2006+
actual, *groups = groupby_reduce(array, by, func="first")
2007+
assert_equal(groups, [["", "0", "1"]])
20092008
assert_equal(actual, np.array([0, 0, 0], dtype=np.int64))
20102009

20112010

2012-
def test_datetime_minmax(engine):
2011+
def test_datetime_minmax(engine) -> None:
20132012
# GH403
20142013
array = np.array([np.datetime64("2000-01-01"), np.datetime64("2000-01-02"), np.datetime64("2000-01-03")])
20152014
by = np.array([0, 0, 1])
2016-
actual, _ = flox.groupby_reduce(array, by, func="nanmin", engine=engine)
2015+
actual, *_ = flox.groupby_reduce(array, by, func="nanmin", engine=engine)
20172016
expected = array[[0, 2]]
20182017
assert_equal(expected, actual)
20192018

20202019
expected = array[[1, 2]]
2021-
actual, _ = flox.groupby_reduce(array, by, func="nanmax", engine=engine)
2020+
actual, *_ = flox.groupby_reduce(array, by, func="nanmax", engine=engine)
20222021
assert_equal(expected, actual)
20232022

20242023

20252024
@pytest.mark.parametrize("func", ["first", "last", "nanfirst", "nanlast"])
2026-
def test_datetime_timedelta_first_last(engine, func):
2025+
def test_datetime_timedelta_first_last(engine, func) -> None:
20272026
import flox
20282027

20292028
idx = 0 if "first" in func else -1
2029+
idx1 = 2 if "first" in func else -1
20302030

2031+
## datetime
20312032
dt = pd.date_range("2001-01-01", freq="d", periods=5).values
20322033
by = np.ones(dt.shape, dtype=int)
2033-
actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
2034+
actual, *_ = flox.groupby_reduce(dt, by, func=func, engine=engine)
20342035
assert_equal(actual, dt[[idx]])
20352036

2037+
# missing group
2038+
by = np.array([0, 2, 3, 3, 3])
2039+
actual, *_ = flox.groupby_reduce(
2040+
dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA
2041+
)
2042+
assert_equal(actual, [dt[0], np.datetime64("NaT"), dt[1], dt[idx1]])
2043+
2044+
## timedelta
20362045
dt = dt - dt[0]
2037-
actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
2046+
by = np.ones(dt.shape, dtype=int)
2047+
actual, *_ = flox.groupby_reduce(dt, by, func=func, engine=engine)
20382048
assert_equal(actual, dt[[idx]])
2049+
2050+
# missing group
2051+
by = np.array([0, 2, 3, 3, 3])
2052+
actual, *_ = flox.groupby_reduce(
2053+
dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA
2054+
)
2055+
assert_equal(actual, [dt[0], np.timedelta64("NaT"), dt[1], dt[idx1]])

0 commit comments

Comments
 (0)