Skip to content

Commit 07a7eb2

Browse files
authored
bench: fix vectorized_equal_to bench mutated between iterations (#17968)
* bench: fix `vectorized_equal_to` bench mutated between iterations * added comment
1 parent 60b37b4 commit 07a7eb2

File tree

1 file changed

+126
-25
lines changed

1 file changed

+126
-25
lines changed

datafusion/physical-plan/benches/aggregate_vectorized.rs

Lines changed: 126 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use arrow::util::bench_util::{
2121
create_primitive_array, create_string_view_array_with_len,
2222
create_string_view_array_with_max_len,
2323
};
24+
use arrow::util::test_util::seedable_rng;
2425
use arrow_schema::DataType;
2526
use criterion::measurement::WallTime;
2627
use criterion::{
@@ -29,6 +30,7 @@ use criterion::{
2930
use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder;
3031
use datafusion_physical_plan::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder;
3132
use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
33+
use rand::distr::{Bernoulli, Distribution};
3234
use std::sync::Arc;
3335

3436
const SIZES: [usize; 3] = [1_000, 10_000, 100_000];
@@ -87,10 +89,8 @@ fn bytes_bench(
8789
input: &ArrayRef,
8890
) {
8991
// vectorized_append
90-
let id = BenchmarkId::new(
91-
format!("{bench_prefix}_null_{null_density:.1}_size_{size}"),
92-
"vectorized_append",
93-
);
92+
let function_name = format!("{bench_prefix}_null_{null_density:.1}_size_{size}");
93+
let id = BenchmarkId::new(&function_name, "vectorized_append");
9494
group.bench_function(id, |b| {
9595
b.iter(|| {
9696
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
@@ -99,10 +99,7 @@ fn bytes_bench(
9999
});
100100

101101
// append_val
102-
let id = BenchmarkId::new(
103-
format!("{bench_prefix}_null_{null_density:.1}_size_{size}"),
104-
"append_val",
105-
);
102+
let id = BenchmarkId::new(&function_name, "append_val");
106103
group.bench_function(id, |b| {
107104
b.iter(|| {
108105
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
@@ -113,18 +110,55 @@ fn bytes_bench(
113110
});
114111

115112
// vectorized_equal_to
116-
let id = BenchmarkId::new(
117-
format!("{bench_prefix}_null_{null_density:.1}_size_{size}"),
118-
"vectorized_equal_to",
113+
vectorized_equal_to(
114+
group,
115+
ByteViewGroupValueBuilder::<StringViewType>::new(),
116+
&function_name,
117+
rows,
118+
input,
119+
"all_true",
120+
vec![true; size],
119121
);
120-
group.bench_function(id, |b| {
121-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
122-
builder.vectorized_append(input, rows).unwrap();
123-
let mut results = vec![true; size];
124-
b.iter(|| {
125-
builder.vectorized_equal_to(rows, input, rows, &mut results);
126-
});
127-
});
122+
vectorized_equal_to(
123+
group,
124+
ByteViewGroupValueBuilder::<StringViewType>::new(),
125+
&function_name,
126+
rows,
127+
input,
128+
"0.75 true",
129+
{
130+
let mut rng = seedable_rng();
131+
let d = Bernoulli::new(0.75).unwrap();
132+
(0..size).map(|_| d.sample(&mut rng)).collect::<Vec<_>>()
133+
},
134+
);
135+
vectorized_equal_to(
136+
group,
137+
ByteViewGroupValueBuilder::<StringViewType>::new(),
138+
&function_name,
139+
rows,
140+
input,
141+
"0.5 true",
142+
{
143+
let mut rng = seedable_rng();
144+
let d = Bernoulli::new(0.5).unwrap();
145+
(0..size).map(|_| d.sample(&mut rng)).collect::<Vec<_>>()
146+
},
147+
);
148+
vectorized_equal_to(
149+
group,
150+
ByteViewGroupValueBuilder::<StringViewType>::new(),
151+
&function_name,
152+
rows,
153+
input,
154+
"0.25 true",
155+
{
156+
let mut rng = seedable_rng();
157+
let d = Bernoulli::new(0.25).unwrap();
158+
(0..size).map(|_| d.sample(&mut rng)).collect::<Vec<_>>()
159+
},
160+
);
161+
// Not adding 0 true case here as if we optimize for 0 true cases the caller should avoid calling this method at all
128162
}
129163

130164
fn primitive_vectorized_append(c: &mut Criterion) {
@@ -184,15 +218,82 @@ fn bench_single_primitive<const NULLABLE: bool>(
184218
});
185219

186220
// vectorized_equal_to
187-
let id = BenchmarkId::new(&function_name, "vectorized_equal_to");
221+
vectorized_equal_to(
222+
group,
223+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32),
224+
&function_name,
225+
rows,
226+
&input,
227+
"all_true",
228+
vec![true; size],
229+
);
230+
vectorized_equal_to(
231+
group,
232+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32),
233+
&function_name,
234+
rows,
235+
&input,
236+
"0.75 true",
237+
{
238+
let mut rng = seedable_rng();
239+
let d = Bernoulli::new(0.75).unwrap();
240+
(0..size).map(|_| d.sample(&mut rng)).collect::<Vec<_>>()
241+
},
242+
);
243+
vectorized_equal_to(
244+
group,
245+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32),
246+
&function_name,
247+
rows,
248+
&input,
249+
"0.5 true",
250+
{
251+
let mut rng = seedable_rng();
252+
let d = Bernoulli::new(0.5).unwrap();
253+
(0..size).map(|_| d.sample(&mut rng)).collect::<Vec<_>>()
254+
},
255+
);
256+
vectorized_equal_to(
257+
group,
258+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32),
259+
&function_name,
260+
rows,
261+
&input,
262+
"0.25 true",
263+
{
264+
let mut rng = seedable_rng();
265+
let d = Bernoulli::new(0.25).unwrap();
266+
(0..size).map(|_| d.sample(&mut rng)).collect::<Vec<_>>()
267+
},
268+
);
269+
// Not adding 0 true case here as if we optimize for 0 true cases the caller should avoid calling this method at all
270+
}
271+
272+
/// Test `vectorized_equal_to` with different number of true in the initial results
273+
fn vectorized_equal_to<GroupColumnBuilder: GroupColumn>(
274+
group: &mut BenchmarkGroup<WallTime>,
275+
mut builder: GroupColumnBuilder,
276+
function_name: &str,
277+
rows: &[usize],
278+
input: &ArrayRef,
279+
equal_to_result_description: &str,
280+
equal_to_results: Vec<bool>,
281+
) {
282+
let id = BenchmarkId::new(
283+
function_name,
284+
format!("vectorized_equal_to_{equal_to_result_description}"),
285+
);
188286
group.bench_function(id, |b| {
189-
let mut builder =
190-
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32);
191-
builder.vectorized_append(&input, rows).unwrap();
192-
let mut results = vec![true; size];
287+
builder.vectorized_append(input, rows).unwrap();
193288

194289
b.iter(|| {
195-
builder.vectorized_equal_to(rows, &input, rows, &mut results);
290+
// Cloning is a must as `vectorized_equal_to` will modify the input vec
291+
// and without cloning all benchmarks after the first one won't be meaningful
292+
let mut equal_to_results = equal_to_results.clone();
293+
builder.vectorized_equal_to(rows, input, rows, &mut equal_to_results);
294+
295+
// Make sure that the compiler does not optimize away the call
296+
criterion::black_box(equal_to_results);
196297
});
197298
});
198299
}

0 commit comments

Comments
 (0)