@@ -21,6 +21,7 @@ use arrow::util::bench_util::{
21
21
create_primitive_array, create_string_view_array_with_len,
22
22
create_string_view_array_with_max_len,
23
23
} ;
24
+ use arrow:: util:: test_util:: seedable_rng;
24
25
use arrow_schema:: DataType ;
25
26
use criterion:: measurement:: WallTime ;
26
27
use criterion:: {
@@ -29,6 +30,7 @@ use criterion::{
29
30
use datafusion_physical_plan:: aggregates:: group_values:: multi_group_by:: bytes_view:: ByteViewGroupValueBuilder ;
30
31
use datafusion_physical_plan:: aggregates:: group_values:: multi_group_by:: primitive:: PrimitiveGroupValueBuilder ;
31
32
use datafusion_physical_plan:: aggregates:: group_values:: multi_group_by:: GroupColumn ;
33
+ use rand:: distr:: { Bernoulli , Distribution } ;
32
34
use std:: sync:: Arc ;
33
35
34
36
const SIZES : [ usize ; 3 ] = [ 1_000 , 10_000 , 100_000 ] ;
@@ -87,10 +89,8 @@ fn bytes_bench(
87
89
input : & ArrayRef ,
88
90
) {
89
91
// vectorized_append
90
- let id = BenchmarkId :: new (
91
- format ! ( "{bench_prefix}_null_{null_density:.1}_size_{size}" ) ,
92
- "vectorized_append" ,
93
- ) ;
92
+ let function_name = format ! ( "{bench_prefix}_null_{null_density:.1}_size_{size}" ) ;
93
+ let id = BenchmarkId :: new ( & function_name, "vectorized_append" ) ;
94
94
group. bench_function ( id, |b| {
95
95
b. iter ( || {
96
96
let mut builder = ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ;
@@ -99,10 +99,7 @@ fn bytes_bench(
99
99
} ) ;
100
100
101
101
// append_val
102
- let id = BenchmarkId :: new (
103
- format ! ( "{bench_prefix}_null_{null_density:.1}_size_{size}" ) ,
104
- "append_val" ,
105
- ) ;
102
+ let id = BenchmarkId :: new ( & function_name, "append_val" ) ;
106
103
group. bench_function ( id, |b| {
107
104
b. iter ( || {
108
105
let mut builder = ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ;
@@ -113,18 +110,55 @@ fn bytes_bench(
113
110
} ) ;
114
111
115
112
// vectorized_equal_to
116
- let id = BenchmarkId :: new (
117
- format ! ( "{bench_prefix}_null_{null_density:.1}_size_{size}" ) ,
118
- "vectorized_equal_to" ,
113
+ vectorized_equal_to (
114
+ group,
115
+ ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ,
116
+ & function_name,
117
+ rows,
118
+ input,
119
+ "all_true" ,
120
+ vec ! [ true ; size] ,
119
121
) ;
120
- group. bench_function ( id, |b| {
121
- let mut builder = ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ;
122
- builder. vectorized_append ( input, rows) . unwrap ( ) ;
123
- let mut results = vec ! [ true ; size] ;
124
- b. iter ( || {
125
- builder. vectorized_equal_to ( rows, input, rows, & mut results) ;
126
- } ) ;
127
- } ) ;
122
+ vectorized_equal_to (
123
+ group,
124
+ ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ,
125
+ & function_name,
126
+ rows,
127
+ input,
128
+ "0.75 true" ,
129
+ {
130
+ let mut rng = seedable_rng ( ) ;
131
+ let d = Bernoulli :: new ( 0.75 ) . unwrap ( ) ;
132
+ ( 0 ..size) . map ( |_| d. sample ( & mut rng) ) . collect :: < Vec < _ > > ( )
133
+ } ,
134
+ ) ;
135
+ vectorized_equal_to (
136
+ group,
137
+ ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ,
138
+ & function_name,
139
+ rows,
140
+ input,
141
+ "0.5 true" ,
142
+ {
143
+ let mut rng = seedable_rng ( ) ;
144
+ let d = Bernoulli :: new ( 0.5 ) . unwrap ( ) ;
145
+ ( 0 ..size) . map ( |_| d. sample ( & mut rng) ) . collect :: < Vec < _ > > ( )
146
+ } ,
147
+ ) ;
148
+ vectorized_equal_to (
149
+ group,
150
+ ByteViewGroupValueBuilder :: < StringViewType > :: new ( ) ,
151
+ & function_name,
152
+ rows,
153
+ input,
154
+ "0.25 true" ,
155
+ {
156
+ let mut rng = seedable_rng ( ) ;
157
+ let d = Bernoulli :: new ( 0.25 ) . unwrap ( ) ;
158
+ ( 0 ..size) . map ( |_| d. sample ( & mut rng) ) . collect :: < Vec < _ > > ( )
159
+ } ,
160
+ ) ;
161
+ // Not adding 0 true case here as if we optimize for 0 true cases the caller should avoid calling this method at all
128
162
}
129
163
130
164
fn primitive_vectorized_append ( c : & mut Criterion ) {
@@ -184,15 +218,82 @@ fn bench_single_primitive<const NULLABLE: bool>(
184
218
} ) ;
185
219
186
220
// vectorized_equal_to
187
- let id = BenchmarkId :: new ( & function_name, "vectorized_equal_to" ) ;
221
+ vectorized_equal_to (
222
+ group,
223
+ PrimitiveGroupValueBuilder :: < Int32Type , NULLABLE > :: new ( DataType :: Int32 ) ,
224
+ & function_name,
225
+ rows,
226
+ & input,
227
+ "all_true" ,
228
+ vec ! [ true ; size] ,
229
+ ) ;
230
+ vectorized_equal_to (
231
+ group,
232
+ PrimitiveGroupValueBuilder :: < Int32Type , NULLABLE > :: new ( DataType :: Int32 ) ,
233
+ & function_name,
234
+ rows,
235
+ & input,
236
+ "0.75 true" ,
237
+ {
238
+ let mut rng = seedable_rng ( ) ;
239
+ let d = Bernoulli :: new ( 0.75 ) . unwrap ( ) ;
240
+ ( 0 ..size) . map ( |_| d. sample ( & mut rng) ) . collect :: < Vec < _ > > ( )
241
+ } ,
242
+ ) ;
243
+ vectorized_equal_to (
244
+ group,
245
+ PrimitiveGroupValueBuilder :: < Int32Type , NULLABLE > :: new ( DataType :: Int32 ) ,
246
+ & function_name,
247
+ rows,
248
+ & input,
249
+ "0.5 true" ,
250
+ {
251
+ let mut rng = seedable_rng ( ) ;
252
+ let d = Bernoulli :: new ( 0.5 ) . unwrap ( ) ;
253
+ ( 0 ..size) . map ( |_| d. sample ( & mut rng) ) . collect :: < Vec < _ > > ( )
254
+ } ,
255
+ ) ;
256
+ vectorized_equal_to (
257
+ group,
258
+ PrimitiveGroupValueBuilder :: < Int32Type , NULLABLE > :: new ( DataType :: Int32 ) ,
259
+ & function_name,
260
+ rows,
261
+ & input,
262
+ "0.25 true" ,
263
+ {
264
+ let mut rng = seedable_rng ( ) ;
265
+ let d = Bernoulli :: new ( 0.25 ) . unwrap ( ) ;
266
+ ( 0 ..size) . map ( |_| d. sample ( & mut rng) ) . collect :: < Vec < _ > > ( )
267
+ } ,
268
+ ) ;
269
+ // Not adding 0 true case here as if we optimize for 0 true cases the caller should avoid calling this method at all
270
+ }
271
+
272
+ /// Test `vectorized_equal_to` with different number of true in the initial results
273
+ fn vectorized_equal_to < GroupColumnBuilder : GroupColumn > (
274
+ group : & mut BenchmarkGroup < WallTime > ,
275
+ mut builder : GroupColumnBuilder ,
276
+ function_name : & str ,
277
+ rows : & [ usize ] ,
278
+ input : & ArrayRef ,
279
+ equal_to_result_description : & str ,
280
+ equal_to_results : Vec < bool > ,
281
+ ) {
282
+ let id = BenchmarkId :: new (
283
+ function_name,
284
+ format ! ( "vectorized_equal_to_{equal_to_result_description}" ) ,
285
+ ) ;
188
286
group. bench_function ( id, |b| {
189
- let mut builder =
190
- PrimitiveGroupValueBuilder :: < Int32Type , NULLABLE > :: new ( DataType :: Int32 ) ;
191
- builder. vectorized_append ( & input, rows) . unwrap ( ) ;
192
- let mut results = vec ! [ true ; size] ;
287
+ builder. vectorized_append ( input, rows) . unwrap ( ) ;
193
288
194
289
b. iter ( || {
195
- builder. vectorized_equal_to ( rows, & input, rows, & mut results) ;
290
+ // Cloning is a must as `vectorized_equal_to` will modify the input vec
291
+ // and without cloning all benchmarks after the first one won't be meaningful
292
+ let mut equal_to_results = equal_to_results. clone ( ) ;
293
+ builder. vectorized_equal_to ( rows, input, rows, & mut equal_to_results) ;
294
+
295
+ // Make sure that the compiler does not optimize away the call
296
+ criterion:: black_box ( equal_to_results) ;
196
297
} ) ;
197
298
} ) ;
198
299
}
0 commit comments