@@ -1749,15 +1749,15 @@ def test_validate_reindex() -> None:
1749
1749
1750
1750
1751
1751
@requires_dask
1752
- def test_1d_blockwise_sort_optimization ():
1752
+ def test_1d_blockwise_sort_optimization () -> None :
1753
1753
# Make sure for resampling problems sorting isn't done.
1754
1754
time = pd .Series (pd .date_range ("2020-09-01" , "2020-12-31 23:59" , freq = "3h" ))
1755
1755
array = dask .array .ones ((len (time ),), chunks = (224 ,))
1756
1756
1757
- actual , _ = groupby_reduce (array , time .dt .dayofyear .values , method = "blockwise" , func = "count" )
1757
+ actual , * _ = groupby_reduce (array , time .dt .dayofyear .values , method = "blockwise" , func = "count" )
1758
1758
assert all ("getitem" not in k for k in actual .dask )
1759
1759
1760
- actual , _ = groupby_reduce (
1760
+ actual , * _ = groupby_reduce (
1761
1761
array ,
1762
1762
time .dt .dayofyear .values [::- 1 ],
1763
1763
sort = True ,
@@ -1766,7 +1766,7 @@ def test_1d_blockwise_sort_optimization():
1766
1766
)
1767
1767
assert any ("getitem" in k for k in actual .dask .layers )
1768
1768
1769
- actual , _ = groupby_reduce (
1769
+ actual , * _ = groupby_reduce (
1770
1770
array ,
1771
1771
time .dt .dayofyear .values [::- 1 ],
1772
1772
sort = False ,
@@ -1777,7 +1777,7 @@ def test_1d_blockwise_sort_optimization():
1777
1777
1778
1778
1779
1779
@requires_dask
1780
- def test_negative_index_factorize_race_condition ():
1780
+ def test_negative_index_factorize_race_condition () -> None :
1781
1781
# shape = (10, 2000)
1782
1782
# chunks = ((shape[0]-1,1), 10)
1783
1783
shape = (101 , 174000 )
@@ -1804,17 +1804,17 @@ def test_negative_index_factorize_race_condition():
1804
1804
1805
1805
1806
1806
@pytest .mark .parametrize ("sort" , [True , False ])
1807
- def test_expected_index_conversion_passthrough_range_index (sort ):
1807
+ def test_expected_index_conversion_passthrough_range_index (sort ) -> None :
1808
1808
index = pd .RangeIndex (100 )
1809
- actual = _convert_expected_groups_to_index (expected_groups = (index ,), isbin = (False ,), sort = (sort ,))
1809
+ actual = _convert_expected_groups_to_index (expected_groups = (index ,), isbin = (False ,), sort = (sort ,)) # type: ignore[call-overload]
1810
1810
assert actual [0 ] is index
1811
1811
1812
1812
1813
- def test_method_check_numpy ():
1813
+ def test_method_check_numpy () -> None :
1814
1814
bins = [- 2 , - 1 , 0 , 1 , 2 ]
1815
1815
field = np .ones ((5 , 3 ))
1816
1816
by = np .array ([[- 1.5 , - 1.5 , 0.5 , 1.5 , 1.5 ] * 3 ]).reshape (5 , 3 )
1817
- actual , _ = groupby_reduce (
1817
+ actual , * _ = groupby_reduce (
1818
1818
field ,
1819
1819
by ,
1820
1820
expected_groups = pd .IntervalIndex .from_breaks (bins ),
@@ -1825,7 +1825,7 @@ def test_method_check_numpy():
1825
1825
expected = np .array ([6 , np .nan , 3 , 6 ])
1826
1826
assert_equal (actual , expected )
1827
1827
1828
- actual , _ = groupby_reduce (
1828
+ actual , * _ = groupby_reduce (
1829
1829
field ,
1830
1830
by ,
1831
1831
expected_groups = pd .IntervalIndex .from_breaks (bins ),
@@ -1845,7 +1845,7 @@ def test_method_check_numpy():
1845
1845
1846
1846
1847
1847
@pytest .mark .parametrize ("dtype" , [None , np .float64 ])
1848
- def test_choose_engine (dtype ):
1848
+ def test_choose_engine (dtype ) -> None :
1849
1849
numbagg_possible = HAS_NUMBAGG and dtype is None
1850
1850
default = "numbagg" if numbagg_possible else "numpy"
1851
1851
mean = _initialize_aggregation (
@@ -1887,10 +1887,10 @@ def test_choose_engine(dtype):
1887
1887
assert _choose_engine (np .array ([1 , 1 , 2 , 2 ]), agg = argmax ) == "numpy"
1888
1888
1889
1889
1890
- def test_xarray_fill_value_behaviour ():
1890
+ def test_xarray_fill_value_behaviour () -> None :
1891
1891
bar = np .array ([1 , 2 , 3 , np .nan , np .nan , np .nan , 4 , 5 , np .nan , np .nan ])
1892
1892
times = np .arange (0 , 20 , 2 )
1893
- actual , _ = groupby_reduce (bar , times , func = "nansum" , expected_groups = (np .arange (19 ),))
1893
+ actual , * _ = groupby_reduce (bar , times , func = "nansum" , expected_groups = (np .arange (19 ),))
1894
1894
nan = np .nan
1895
1895
# fmt: off
1896
1896
expected = np .array (
@@ -1905,7 +1905,7 @@ def test_xarray_fill_value_behaviour():
1905
1905
@pytest .mark .parametrize ("func" , ["nanquantile" , "quantile" ])
1906
1906
@pytest .mark .parametrize ("chunk" , [pytest .param (True , marks = requires_dask ), False ])
1907
1907
@pytest .mark .parametrize ("by_ndim" , [1 , 2 ])
1908
- def test_multiple_quantiles (q , chunk , func , by_ndim ):
1908
+ def test_multiple_quantiles (q , chunk , func , by_ndim ) -> None :
1909
1909
array = np .array ([[1 , - 1 , np .nan , 3 , 4 , 10 , 5 ], [1 , np .nan , np .nan , 3 , 4 , np .nan , np .nan ]])
1910
1910
labels = np .array ([0 , 0 , 0 , 1 , 0 , 1 , 1 ])
1911
1911
if by_ndim == 2 :
@@ -1916,38 +1916,37 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
1916
1916
if chunk :
1917
1917
array = dask .array .from_array (array , chunks = (1 ,) + (- 1 ,) * by_ndim )
1918
1918
1919
- actual , _ = groupby_reduce (array , labels , func = func , finalize_kwargs = dict (q = q ), axis = axis )
1919
+ actual , * _ = groupby_reduce (array , labels , func = func , finalize_kwargs = dict (q = q ), axis = axis )
1920
1920
sorted_array = array [..., [0 , 1 , 2 , 4 , 3 , 5 , 6 ]]
1921
1921
f = partial (getattr (np , func ), q = q , axis = axis , keepdims = True )
1922
1922
if chunk :
1923
- sorted_array = sorted_array .compute ()
1923
+ sorted_array = sorted_array .compute () # type: ignore[attr-defined]
1924
1924
expected = np .concatenate ((f (sorted_array [..., :4 ]), f (sorted_array [..., 4 :])), axis = - 1 )
1925
1925
if by_ndim == 2 :
1926
1926
expected = expected .squeeze (axis = - 2 )
1927
1927
assert_equal (expected , actual , tolerance = {"atol" : 1e-14 })
1928
1928
1929
1929
1930
1930
@pytest .mark .parametrize ("dtype" , ["U3" , "S3" ])
1931
- def test_nanlen_string (dtype , engine ):
1931
+ def test_nanlen_string (dtype , engine ) -> None :
1932
1932
array = np .array (["ABC" , "DEF" , "GHI" , "JKL" , "MNO" , "PQR" ], dtype = dtype )
1933
1933
by = np .array ([0 , 0 , 1 , 2 , 1 , 0 ])
1934
1934
expected = np .array ([3 , 2 , 1 ], dtype = np .intp )
1935
1935
actual , * _ = groupby_reduce (array , by , func = "count" , engine = engine )
1936
1936
assert_equal (expected , actual )
1937
1937
1938
1938
1939
- def test_cumusm ():
1939
+ def test_cumusm () -> None :
1940
1940
array = np .array ([1 , 1 , 1 ], dtype = np .uint64 )
1941
1941
by = np .array ([0 ] * array .shape [- 1 ])
1942
- kwargs = {"func" : "nancumsum" , "axis" : - 1 }
1943
1942
expected = np .nancumsum (array , axis = - 1 )
1944
1943
1945
- actual = groupby_scan (array , by , ** kwargs )
1944
+ actual = groupby_scan (array , by , func = "nancumsum" , axis = - 1 )
1946
1945
assert_equal (expected , actual )
1947
1946
1948
1947
if has_dask :
1949
1948
da = dask .array .from_array (array , chunks = 2 )
1950
- actual = groupby_scan (da , by , ** kwargs )
1949
+ actual = groupby_scan (da , by , func = "nancumsum" , axis = - 1 )
1951
1950
assert_equal (expected , actual )
1952
1951
1953
1952
@@ -1962,7 +1961,7 @@ def test_cumusm():
1962
1961
@pytest .mark .parametrize ("size" , ((1 , 12 ), (12 ,), (12 , 9 )))
1963
1962
@pytest .mark .parametrize ("add_nan_by" , [True , False ])
1964
1963
@pytest .mark .parametrize ("func" , ["ffill" , "bfill" ])
1965
- def test_ffill_bfill (chunks , size , add_nan_by , func ):
1964
+ def test_ffill_bfill (chunks , size , add_nan_by , func ) -> None :
1966
1965
array , by = gen_array_by (size , func )
1967
1966
if chunks :
1968
1967
array = dask .array .from_array (array , chunks = chunks )
@@ -1976,11 +1975,11 @@ def test_ffill_bfill(chunks, size, add_nan_by, func):
1976
1975
1977
1976
1978
1977
@requires_dask
1979
- def test_blockwise_nans ():
1978
+ def test_blockwise_nans () -> None :
1980
1979
array = dask .array .ones ((1 , 10 ), chunks = 2 )
1981
1980
by = np .array ([- 1 , 0 , - 1 , 1 , - 1 , 2 , - 1 , 3 , 4 , 4 ])
1982
- actual , actual_groups = flox .groupby_reduce (array , by , func = "sum" , expected_groups = pd .RangeIndex (0 , 5 ))
1983
- expected , expected_groups = flox .groupby_reduce (
1981
+ actual , * actual_groups = flox .groupby_reduce (array , by , func = "sum" , expected_groups = pd .RangeIndex (0 , 5 ))
1982
+ expected , * expected_groups = flox .groupby_reduce (
1984
1983
array .compute (), by , func = "sum" , expected_groups = pd .RangeIndex (0 , 5 )
1985
1984
)
1986
1985
assert_equal (expected_groups , actual_groups )
@@ -1989,50 +1988,68 @@ def test_blockwise_nans():
1989
1988
1990
1989
@pytest .mark .parametrize ("func" , ["sum" , "prod" , "count" , "nansum" ])
1991
1990
@pytest .mark .parametrize ("engine" , ["flox" , "numpy" ])
1992
- def test_agg_dtypes (func , engine ):
1991
+ def test_agg_dtypes (func , engine ) -> None :
1993
1992
# regression test for GH388
1994
1993
counts = np .array ([0 , 2 , 1 , 0 , 1 ])
1995
1994
group = np .array ([1 , 1 , 1 , 2 , 2 ])
1996
- actual , _ = groupby_reduce (
1995
+ actual , * _ = groupby_reduce (
1997
1996
counts , group , expected_groups = (np .array ([1 , 2 ]),), func = func , dtype = "uint8" , engine = engine
1998
1997
)
1999
1998
expected = _get_array_func (func )(counts , dtype = "uint8" )
2000
1999
assert actual .dtype == np .uint8 == expected .dtype
2001
2000
2002
2001
2003
2002
@requires_dask
2004
- def test_blockwise_avoid_rechunk ():
2003
+ def test_blockwise_avoid_rechunk () -> None :
2005
2004
array = dask .array .zeros ((6 ,), chunks = (2 , 4 ), dtype = np .int64 )
2006
2005
by = np .array (["1" , "1" , "0" , "" , "0" , "" ], dtype = "<U1" )
2007
- actual , groups = groupby_reduce (array , by , func = "first" )
2008
- assert_equal (groups , ["" , "0" , "1" ])
2006
+ actual , * groups = groupby_reduce (array , by , func = "first" )
2007
+ assert_equal (groups , [[ "" , "0" , "1" ] ])
2009
2008
assert_equal (actual , np .array ([0 , 0 , 0 ], dtype = np .int64 ))
2010
2009
2011
2010
2012
- def test_datetime_minmax (engine ):
2011
+ def test_datetime_minmax (engine ) -> None :
2013
2012
# GH403
2014
2013
array = np .array ([np .datetime64 ("2000-01-01" ), np .datetime64 ("2000-01-02" ), np .datetime64 ("2000-01-03" )])
2015
2014
by = np .array ([0 , 0 , 1 ])
2016
- actual , _ = flox .groupby_reduce (array , by , func = "nanmin" , engine = engine )
2015
+ actual , * _ = flox .groupby_reduce (array , by , func = "nanmin" , engine = engine )
2017
2016
expected = array [[0 , 2 ]]
2018
2017
assert_equal (expected , actual )
2019
2018
2020
2019
expected = array [[1 , 2 ]]
2021
- actual , _ = flox .groupby_reduce (array , by , func = "nanmax" , engine = engine )
2020
+ actual , * _ = flox .groupby_reduce (array , by , func = "nanmax" , engine = engine )
2022
2021
assert_equal (expected , actual )
2023
2022
2024
2023
2025
2024
@pytest .mark .parametrize ("func" , ["first" , "last" , "nanfirst" , "nanlast" ])
2026
- def test_datetime_timedelta_first_last (engine , func ):
2025
+ def test_datetime_timedelta_first_last (engine , func ) -> None :
2027
2026
import flox
2028
2027
2029
2028
idx = 0 if "first" in func else - 1
2029
+ idx1 = 2 if "first" in func else - 1
2030
2030
2031
+ ## datetime
2031
2032
dt = pd .date_range ("2001-01-01" , freq = "d" , periods = 5 ).values
2032
2033
by = np .ones (dt .shape , dtype = int )
2033
- actual , _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
2034
+ actual , * _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
2034
2035
assert_equal (actual , dt [[idx ]])
2035
2036
2037
+ # missing group
2038
+ by = np .array ([0 , 2 , 3 , 3 , 3 ])
2039
+ actual , * _ = flox .groupby_reduce (
2040
+ dt , by , expected_groups = ([0 , 1 , 2 , 3 ],), func = func , engine = engine , fill_value = dtypes .NA
2041
+ )
2042
+ assert_equal (actual , [dt [0 ], np .datetime64 ("NaT" ), dt [1 ], dt [idx1 ]])
2043
+
2044
+ ## timedelta
2036
2045
dt = dt - dt [0 ]
2037
- actual , _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
2046
+ by = np .ones (dt .shape , dtype = int )
2047
+ actual , * _ = flox .groupby_reduce (dt , by , func = func , engine = engine )
2038
2048
assert_equal (actual , dt [[idx ]])
2049
+
2050
+ # missing group
2051
+ by = np .array ([0 , 2 , 3 , 3 , 3 ])
2052
+ actual , * _ = flox .groupby_reduce (
2053
+ dt , by , expected_groups = ([0 , 1 , 2 , 3 ],), func = func , engine = engine , fill_value = dtypes .NA
2054
+ )
2055
+ assert_equal (actual , [dt [0 ], np .timedelta64 ("NaT" ), dt [1 ], dt [idx1 ]])
0 commit comments