1
+ from typing import Any , Callable
2
+
1
3
import pandas as pd
2
4
import pytest
3
5
14
16
from flox .core import groupby_reduce , groupby_scan
15
17
16
18
from . import assert_equal
17
- from .strategies import all_arrays , by_arrays , chunked_arrays , func_st , numeric_arrays
19
+ from .strategies import by_arrays , chunked_arrays , func_st , numeric_arrays
18
20
19
21
dask .config .set (scheduler = "sync" )
20
22
@@ -31,14 +33,14 @@ def bfill(array, axis, dtype=None):
31
33
)[::- 1 ]
32
34
33
35
34
- NUMPY_SCAN_FUNCS = {
36
+ NUMPY_SCAN_FUNCS : dict [ str , Callable ] = {
35
37
"nancumsum" : np .nancumsum ,
36
38
"ffill" : ffill ,
37
39
"bfill" : bfill ,
38
40
} # "cumsum": np.cumsum,
39
41
40
42
41
- def not_overflowing_array (array ) -> bool :
43
+ def not_overflowing_array (array : np . ndarray [ Any , Any ] ) -> bool :
42
44
if array .dtype .kind == "f" :
43
45
info = np .finfo (array .dtype )
44
46
elif array .dtype .kind in ["i" , "u" ]:
@@ -51,9 +53,8 @@ def not_overflowing_array(array) -> bool:
51
53
return result
52
54
53
55
54
- # TODO: migrate to by_arrays but with constant value
55
56
@given (data = st .data (), array = numeric_arrays , func = func_st )
56
- def test_groupby_reduce (data , array , func ) :
57
+ def test_groupby_reduce (data , array , func : str ) -> None :
57
58
# overflow behaviour differs between bincount and sum (for example)
58
59
assume (not_overflowing_array (array ))
59
60
# TODO: fix var for complex numbers upstream
@@ -71,14 +72,14 @@ def test_groupby_reduce(data, array, func):
71
72
"min_size" : 1 ,
72
73
"max_size" : 1 ,
73
74
},
74
- shape = array .shape [- 1 ],
75
+ shape = ( array .shape [- 1 ],) ,
75
76
)
76
77
)
77
78
assert len (np .unique (by )) == 1
78
79
kwargs = {"q" : 0.8 } if "quantile" in func else {}
79
- flox_kwargs = {}
80
+ flox_kwargs : dict [ str , Any ] = {}
80
81
with np .errstate (invalid = "ignore" , divide = "ignore" ):
81
- actual , _ = groupby_reduce (
82
+ actual , * _ = groupby_reduce (
82
83
array , by , func = func , axis = axis , engine = "numpy" , ** flox_kwargs , finalize_kwargs = kwargs
83
84
)
84
85
@@ -112,10 +113,10 @@ def test_groupby_reduce(data, array, func):
112
113
113
114
@given (
114
115
data = st .data (),
115
- array = chunked_arrays (),
116
+ array = chunked_arrays (arrays = numeric_arrays ),
116
117
func = st .sampled_from (tuple (NUMPY_SCAN_FUNCS )),
117
118
)
118
- def test_scans (data , array , func ) :
119
+ def test_scans (data , array : dask . array . Array , func : str ) -> None :
119
120
assume (not_overflowing_array (np .asarray (array )))
120
121
121
122
by = data .draw (by_arrays (shape = (array .shape [- 1 ],)))
@@ -148,7 +149,7 @@ def test_scans(data, array, func):
148
149
149
150
150
151
@given (data = st .data (), array = chunked_arrays ())
151
- def test_ffill_bfill_reverse (data , array ) :
152
+ def test_ffill_bfill_reverse (data , array : dask . array . Array ) -> None :
152
153
# TODO: test NaT and timedelta, datetime
153
154
assume (not_overflowing_array (np .asarray (array )))
154
155
by = data .draw (by_arrays (shape = (array .shape [- 1 ],)))
@@ -168,10 +169,10 @@ def reverse(arr):
168
169
169
170
@given (
170
171
data = st .data (),
171
- array = chunked_arrays (arrays = all_arrays ),
172
+ array = chunked_arrays (),
172
173
func = st .sampled_from (["first" , "last" , "nanfirst" , "nanlast" ]),
173
174
)
174
- def test_first_last (data , array , func ) :
175
+ def test_first_last (data , array : dask . array . Array , func : str ) -> None :
175
176
by = data .draw (by_arrays (shape = (array .shape [- 1 ],)))
176
177
177
178
INVERSES = {"first" : "last" , "last" : "first" , "nanfirst" : "nanlast" , "nanlast" : "nanfirst" }
@@ -183,8 +184,8 @@ def test_first_last(data, array, func):
183
184
array = array .rechunk ((* array .chunks [:- 1 ], - 1 ))
184
185
185
186
for arr in [array , array .compute ()]:
186
- forward , fg = groupby_reduce (arr , by , func = func , engine = "flox" )
187
- reverse , rg = groupby_reduce (arr [..., ::- 1 ], by [..., ::- 1 ], func = inverse , engine = "flox" )
187
+ forward , * fg = groupby_reduce (arr , by , func = func , engine = "flox" )
188
+ reverse , * rg = groupby_reduce (arr [..., ::- 1 ], by [..., ::- 1 ], func = inverse , engine = "flox" )
188
189
189
190
assert forward .dtype == reverse .dtype
190
191
assert forward .dtype == arr .dtype
@@ -196,6 +197,6 @@ def test_first_last(data, array, func):
196
197
if mate in ["first" , "last" ]:
197
198
array = array .rechunk ((* array .chunks [:- 1 ], - 1 ))
198
199
199
- first , _ = groupby_reduce (array , by , func = func , engine = "flox" )
200
- second , _ = groupby_reduce (array , by , func = mate , engine = "flox" )
200
+ first , * _ = groupby_reduce (array , by , func = func , engine = "flox" )
201
+ second , * _ = groupby_reduce (array , by , func = mate , engine = "flox" )
201
202
assert_equal (first , second )
0 commit comments