Skip to content

Commit 4904a42

Browse files
authored
perf: add to aggregate_vectorized PrimitiveGroupValueBuilder benchmark as well (#17930)
I have pretty major perf optimization to primitive impl so adding this before I open my PR
1 parent 0f515dc commit 4904a42

File tree

2 files changed

+137
-123
lines changed

2 files changed

+137
-123
lines changed

datafusion/physical-plan/benches/aggregate_vectorized.rs

Lines changed: 136 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,30 @@
1616
// under the License.
1717

1818
use arrow::array::ArrayRef;
19-
use arrow::datatypes::StringViewType;
19+
use arrow::datatypes::{Int32Type, StringViewType};
2020
use arrow::util::bench_util::{
21-
create_string_view_array_with_len, create_string_view_array_with_max_len,
21+
create_primitive_array, create_string_view_array_with_len,
22+
create_string_view_array_with_max_len,
23+
};
24+
use arrow_schema::DataType;
25+
use criterion::measurement::WallTime;
26+
use criterion::{
27+
criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
2228
};
23-
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
2429
use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder;
30+
use datafusion_physical_plan::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder;
2531
use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
2632
use std::sync::Arc;
2733

2834
const SIZES: [usize; 3] = [1_000, 10_000, 100_000];
2935
const NULL_DENSITIES: [f32; 3] = [0.0, 0.1, 0.5];
3036

3137
fn bench_vectorized_append(c: &mut Criterion) {
38+
byte_view_vectorized_append(c);
39+
primitive_vectorized_append(c);
40+
}
41+
42+
fn byte_view_vectorized_append(c: &mut Criterion) {
3243
let mut group = c.benchmark_group("ByteViewGroupValueBuilder_vectorized_append");
3344

3445
for &size in &SIZES {
@@ -38,150 +49,153 @@ fn bench_vectorized_append(c: &mut Criterion) {
3849
let input = create_string_view_array_with_len(size, null_density, 8, false);
3950
let input: ArrayRef = Arc::new(input);
4051

41-
// vectorized_append
42-
let id = BenchmarkId::new(
43-
format!("inlined_null_{null_density:.1}_size_{size}"),
44-
"vectorized_append",
45-
);
46-
group.bench_function(id, |b| {
47-
b.iter(|| {
48-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
49-
builder.vectorized_append(&input, &rows).unwrap();
50-
});
51-
});
52-
53-
// append_val
54-
let id = BenchmarkId::new(
55-
format!("inlined_null_{null_density:.1}_size_{size}"),
56-
"append_val",
57-
);
58-
group.bench_function(id, |b| {
59-
b.iter(|| {
60-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
61-
for &i in &rows {
62-
builder.append_val(&input, i).unwrap();
63-
}
64-
});
65-
});
66-
67-
// vectorized_equal_to
68-
let id = BenchmarkId::new(
69-
format!("inlined_null_{null_density:.1}_size_{size}"),
70-
"vectorized_equal_to",
71-
);
72-
group.bench_function(id, |b| {
73-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
74-
builder.vectorized_append(&input, &rows).unwrap();
75-
let mut results = vec![true; size];
76-
b.iter(|| {
77-
builder.vectorized_equal_to(&rows, &input, &rows, &mut results);
78-
});
79-
});
52+
bytes_bench(&mut group, "inline", size, &rows, null_density, &input);
8053
}
8154
}
8255

8356
for &size in &SIZES {
8457
let rows: Vec<usize> = (0..size).collect();
8558

8659
for &null_density in &NULL_DENSITIES {
87-
let scenario = "mixed";
8860
let input = create_string_view_array_with_len(size, null_density, 64, true);
8961
let input: ArrayRef = Arc::new(input);
9062

91-
// vectorized_append
92-
let id = BenchmarkId::new(
93-
format!("{scenario}_null_{null_density:.1}_size_{size}"),
94-
"vectorized_append",
95-
);
96-
group.bench_function(id, |b| {
97-
b.iter(|| {
98-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
99-
builder.vectorized_append(&input, &rows).unwrap();
100-
});
101-
});
102-
103-
// append_val
104-
let id = BenchmarkId::new(
105-
format!("{scenario}_null_{null_density:.1}_size_{size}"),
106-
"append_val",
107-
);
108-
group.bench_function(id, |b| {
109-
b.iter(|| {
110-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
111-
for &i in &rows {
112-
builder.append_val(&input, i).unwrap();
113-
}
114-
});
115-
});
116-
117-
// vectorized_equal_to
118-
let id = BenchmarkId::new(
119-
format!("{scenario}_null_{null_density:.1}_size_{size}"),
120-
"vectorized_equal_to",
121-
);
122-
group.bench_function(id, |b| {
123-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
124-
builder.vectorized_append(&input, &rows).unwrap();
125-
let mut results = vec![true; size];
126-
b.iter(|| {
127-
builder.vectorized_equal_to(&rows, &input, &rows, &mut results);
128-
});
129-
});
63+
bytes_bench(&mut group, "scenario", size, &rows, null_density, &input);
13064
}
13165
}
13266

13367
for &size in &SIZES {
13468
let rows: Vec<usize> = (0..size).collect();
13569

13670
for &null_density in &NULL_DENSITIES {
137-
let scenario = "random";
13871
let input = create_string_view_array_with_max_len(size, null_density, 400);
13972
let input: ArrayRef = Arc::new(input);
14073

141-
// vectorized_append
142-
let id = BenchmarkId::new(
143-
format!("{scenario}_null_{null_density:.1}_size_{size}"),
144-
"vectorized_append",
145-
);
146-
group.bench_function(id, |b| {
147-
b.iter(|| {
148-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
149-
builder.vectorized_append(&input, &rows).unwrap();
150-
});
151-
});
152-
153-
// append_val
154-
let id = BenchmarkId::new(
155-
format!("{scenario}_null_{null_density:.1}_size_{size}"),
156-
"append_val",
157-
);
158-
group.bench_function(id, |b| {
159-
b.iter(|| {
160-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
161-
for &i in &rows {
162-
builder.append_val(&input, i).unwrap();
163-
}
164-
});
165-
});
166-
167-
// vectorized_equal_to
168-
let id = BenchmarkId::new(
169-
format!("{scenario}_null_{null_density:.1}_size_{size}"),
170-
"vectorized_equal_to",
171-
);
172-
group.bench_function(id, |b| {
173-
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
174-
builder.vectorized_append(&input, &rows).unwrap();
175-
let mut results = vec![true; size];
176-
b.iter(|| {
177-
builder.vectorized_equal_to(&rows, &input, &rows, &mut results);
178-
});
179-
});
74+
bytes_bench(&mut group, "random", size, &rows, null_density, &input);
75+
}
76+
}
77+
78+
group.finish();
79+
}
80+
81+
fn bytes_bench(
82+
group: &mut BenchmarkGroup<WallTime>,
83+
bench_prefix: &str,
84+
size: usize,
85+
rows: &Vec<usize>,
86+
null_density: f32,
87+
input: &ArrayRef,
88+
) {
89+
// vectorized_append
90+
let id = BenchmarkId::new(
91+
format!("{bench_prefix}_null_{null_density:.1}_size_{size}"),
92+
"vectorized_append",
93+
);
94+
group.bench_function(id, |b| {
95+
b.iter(|| {
96+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
97+
builder.vectorized_append(input, rows).unwrap();
98+
});
99+
});
100+
101+
// append_val
102+
let id = BenchmarkId::new(
103+
format!("{bench_prefix}_null_{null_density:.1}_size_{size}"),
104+
"append_val",
105+
);
106+
group.bench_function(id, |b| {
107+
b.iter(|| {
108+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
109+
for &i in rows {
110+
builder.append_val(input, i).unwrap();
111+
}
112+
});
113+
});
114+
115+
// vectorized_equal_to
116+
let id = BenchmarkId::new(
117+
format!("{bench_prefix}_null_{null_density:.1}_size_{size}"),
118+
"vectorized_equal_to",
119+
);
120+
group.bench_function(id, |b| {
121+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
122+
builder.vectorized_append(input, rows).unwrap();
123+
let mut results = vec![true; size];
124+
b.iter(|| {
125+
builder.vectorized_equal_to(rows, input, rows, &mut results);
126+
});
127+
});
128+
}
129+
130+
fn primitive_vectorized_append(c: &mut Criterion) {
131+
let mut group = c.benchmark_group("PrimitiveGroupValueBuilder_vectorized_append");
132+
133+
for &size in &SIZES {
134+
let rows: Vec<usize> = (0..size).collect();
135+
136+
for &null_density in &NULL_DENSITIES {
137+
if null_density == 0.0 {
138+
bench_single_primitive::<false>(&mut group, size, &rows, null_density)
139+
}
140+
bench_single_primitive::<true>(&mut group, size, &rows, null_density);
180141
}
181142
}
182143

183144
group.finish();
184145
}
185146

147+
fn bench_single_primitive<const NULLABLE: bool>(
148+
group: &mut BenchmarkGroup<WallTime>,
149+
size: usize,
150+
rows: &Vec<usize>,
151+
null_density: f32,
152+
) {
153+
if !NULLABLE {
154+
assert_eq!(
155+
null_density, 0.0,
156+
"non-nullable case must have null_density 0"
157+
);
158+
}
159+
160+
let input = create_primitive_array::<Int32Type>(size, null_density);
161+
let input: ArrayRef = Arc::new(input);
162+
let function_name = format!("null_{null_density:.1}_nullable_{NULLABLE}_size_{size}");
163+
164+
// vectorized_append
165+
let id = BenchmarkId::new(&function_name, "vectorized_append");
166+
group.bench_function(id, |b| {
167+
b.iter(|| {
168+
let mut builder =
169+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32);
170+
builder.vectorized_append(&input, rows).unwrap();
171+
});
172+
});
173+
174+
// append_val
175+
let id = BenchmarkId::new(&function_name, "append_val");
176+
group.bench_function(id, |b| {
177+
b.iter(|| {
178+
let mut builder =
179+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32);
180+
for &i in rows {
181+
builder.append_val(&input, i).unwrap();
182+
}
183+
});
184+
});
185+
186+
// vectorized_equal_to
187+
let id = BenchmarkId::new(&function_name, "vectorized_equal_to");
188+
group.bench_function(id, |b| {
189+
let mut builder =
190+
PrimitiveGroupValueBuilder::<Int32Type, NULLABLE>::new(DataType::Int32);
191+
builder.vectorized_append(&input, rows).unwrap();
192+
let mut results = vec![true; size];
193+
194+
b.iter(|| {
195+
builder.vectorized_equal_to(rows, &input, rows, &mut results);
196+
});
197+
});
198+
}
199+
186200
criterion_group!(benches, bench_vectorized_append);
187201
criterion_main!(benches);

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
mod boolean;
2121
mod bytes;
2222
pub mod bytes_view;
23-
mod primitive;
23+
pub mod primitive;
2424

2525
use std::mem::{self, size_of};
2626

0 commit comments

Comments
 (0)