3
3
4
4
pytest .importorskip ("hypothesis" )
5
5
pytest .importorskip ("dask" )
6
+ pytest .importorskip ("cftime" )
6
7
8
+ import cftime
7
9
import dask
8
10
import hypothesis .extra .numpy as npst
9
11
import hypothesis .strategies as st
@@ -66,11 +68,55 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]:
66
68
elements = {"allow_subnormal" : False }, shape = npst .array_shapes (), dtype = supported_dtypes ()
67
69
)
68
70
71
+ calendars = st .sampled_from (
72
+ [
73
+ "standard" ,
74
+ "gregorian" ,
75
+ "proleptic_gregorian" ,
76
+ "noleap" ,
77
+ "365_day" ,
78
+ "360_day" ,
79
+ "julian" ,
80
+ "all_leap" ,
81
+ "366_day" ,
82
+ ]
83
+ )
84
+
85
+
86
+ @st .composite
87
+ def units (draw , * , calendar : str ):
88
+ choices = ["days" , "hours" , "minutes" , "seconds" , "milliseconds" , "microseconds" ]
89
+ if calendar == "360_day" :
90
+ choices += ["months" ]
91
+ elif calendar == "noleap" :
92
+ choices += ["common_years" ]
93
+ time_units = draw (st .sampled_from (choices ))
94
+
95
+ dt = draw (st .datetimes ())
96
+ year , month , day = dt .year , dt .month , dt .day
97
+ if calendar == "360_day" :
98
+ month %= 30
99
+ return f"{ time_units } since { year } -{ month } -{ day } "
69
100
70
- def by_arrays (shape ):
71
- return npst .arrays (
72
- dtype = npst .integer_dtypes (endianness = "=" ) | npst .unicode_string_dtypes (endianness = "=" ),
73
- shape = shape ,
101
+
102
+ @st .composite
103
+ def cftime_arrays (draw , * , shape , calendars = calendars , elements = None ):
104
+ if elements is None :
105
+ elements = {"min_value" : - 10_000 , "max_value" : 10_000 }
106
+ cal = draw (calendars )
107
+ values = draw (npst .arrays (dtype = np .int64 , shape = shape , elements = elements ))
108
+ unit = draw (units (calendar = cal ))
109
+ return cftime .num2date (values , units = unit , calendar = cal )
110
+
111
+
112
+ def by_arrays (shape , * , elements = None ):
113
+ return st .one_of (
114
+ npst .arrays (
115
+ dtype = npst .integer_dtypes (endianness = "=" ) | npst .unicode_string_dtypes (endianness = "=" ),
116
+ shape = shape ,
117
+ elements = elements ,
118
+ ),
119
+ cftime_arrays (shape = shape , elements = elements ),
74
120
)
75
121
76
122
@@ -87,8 +133,43 @@ def not_overflowing_array(array) -> bool:
87
133
return result
88
134
89
135
90
- @given (array = numeric_arrays , dtype = by_dtype_st , func = func_st )
91
- def test_groupby_reduce (array , dtype , func ):
136
+ @st .composite
137
+ def chunks (draw , * , shape : tuple [int , ...]) -> tuple [tuple [int , ...], ...]:
138
+ chunks = []
139
+ for size in shape :
140
+ if size > 1 :
141
+ nchunks = draw (st .integers (min_value = 1 , max_value = size - 1 ))
142
+ dividers = sorted (
143
+ set (draw (st .integers (min_value = 1 , max_value = size - 1 )) for _ in range (nchunks - 1 ))
144
+ )
145
+ chunks .append (tuple (a - b for a , b in zip (dividers + [size ], [0 ] + dividers )))
146
+ else :
147
+ chunks .append ((1 ,))
148
+ return tuple (chunks )
149
+
150
+
151
+ @st .composite
152
+ def chunked_arrays (draw , * , chunks = chunks , arrays = numeric_arrays , from_array = dask .array .from_array ):
153
+ array = draw (arrays )
154
+ chunks = draw (chunks (shape = array .shape ))
155
+
156
+ if array .dtype .kind in "cf" :
157
+ nan_idx = draw (
158
+ st .lists (
159
+ st .integers (min_value = 0 , max_value = array .shape [- 1 ] - 1 ),
160
+ max_size = array .shape [- 1 ] - 1 ,
161
+ unique = True ,
162
+ )
163
+ )
164
+ if nan_idx :
165
+ array [..., nan_idx ] = np .nan
166
+
167
+ return from_array (array , chunks = chunks )
168
+
169
+
170
+ # TODO: migrate to by_arrays but with constant value
171
+ @given (data = st .data (), array = numeric_arrays , func = func_st )
172
+ def test_groupby_reduce (data , array , func ):
92
173
# overflow behaviour differs between bincount and sum (for example)
93
174
assume (not_overflowing_array (array ))
94
175
# TODO: fix var for complex numbers upstream
@@ -97,7 +178,19 @@ def test_groupby_reduce(array, dtype, func):
97
178
assume ("arg" not in func and not np .any (np .isnan (array ).ravel ()))
98
179
99
180
axis = - 1
100
- by = np .ones ((array .shape [- 1 ],), dtype = dtype )
181
+ by = data .draw (
182
+ by_arrays (
183
+ elements = {
184
+ "alphabet" : st .just ("a" ),
185
+ "min_value" : 1 ,
186
+ "max_value" : 1 ,
187
+ "min_size" : 1 ,
188
+ "max_size" : 1 ,
189
+ },
190
+ shape = array .shape [- 1 ],
191
+ )
192
+ )
193
+ assert len (np .unique (by )) == 1
101
194
kwargs = {"q" : 0.8 } if "quantile" in func else {}
102
195
flox_kwargs = {}
103
196
with np .errstate (invalid = "ignore" , divide = "ignore" ):
@@ -133,40 +226,6 @@ def test_groupby_reduce(array, dtype, func):
133
226
assert_equal (expected , actual , tolerance )
134
227
135
228
136
- @st .composite
137
- def chunks (draw , * , shape : tuple [int , ...]) -> tuple [tuple [int , ...], ...]:
138
- chunks = []
139
- for size in shape :
140
- if size > 1 :
141
- nchunks = draw (st .integers (min_value = 1 , max_value = size - 1 ))
142
- dividers = sorted (
143
- set (draw (st .integers (min_value = 1 , max_value = size - 1 )) for _ in range (nchunks - 1 ))
144
- )
145
- chunks .append (tuple (a - b for a , b in zip (dividers + [size ], [0 ] + dividers )))
146
- else :
147
- chunks .append ((1 ,))
148
- return tuple (chunks )
149
-
150
-
151
- @st .composite
152
- def chunked_arrays (draw , * , chunks = chunks , arrays = numeric_arrays , from_array = dask .array .from_array ):
153
- array = draw (arrays )
154
- chunks = draw (chunks (shape = array .shape ))
155
-
156
- if array .dtype .kind in "cf" :
157
- nan_idx = draw (
158
- st .lists (
159
- st .integers (min_value = 0 , max_value = array .shape [- 1 ] - 1 ),
160
- max_size = array .shape [- 1 ] - 1 ,
161
- unique = True ,
162
- )
163
- )
164
- if nan_idx :
165
- array [..., nan_idx ] = np .nan
166
-
167
- return from_array (array , chunks = chunks )
168
-
169
-
170
229
@given (
171
230
data = st .data (),
172
231
array = chunked_arrays (),
0 commit comments