Skip to content

Commit 6076181

Browse files
authored
Add benchmarks for StructArray scalar_at and take (#4737)
Some of these code paths are known to be slow, and this will make it easier to benchmark and improve directly without having to pull other repos or create one-off demos. --------- Signed-off-by: Adam Gutglick <[email protected]>
1 parent b89c1f8 commit 6076181

File tree

3 files changed

+212
-0
lines changed

3 files changed

+212
-0
lines changed

vortex-array/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,11 @@ harness = false
109109
[[bench]]
110110
name = "chunk_array_builder"
111111
harness = false
112+
113+
[[bench]]
114+
name = "take_struct"
115+
harness = false
116+
117+
[[bench]]
118+
name = "scalar_at_struct"
119+
harness = false
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![allow(clippy::unwrap_used)]
5+
6+
use divan::Bencher;
7+
use rand::distr::Uniform;
8+
use rand::rngs::StdRng;
9+
use rand::{Rng, SeedableRng};
10+
use vortex_array::IntoArray;
11+
use vortex_array::arrays::StructArray;
12+
use vortex_array::validity::Validity;
13+
use vortex_buffer::Buffer;
14+
use vortex_dtype::FieldNames;
15+
16+
fn main() {
17+
divan::main();
18+
}
19+
20+
const ARRAY_SIZE: usize = 100_000;
21+
const NUM_ACCESSES: usize = 1000;
22+
23+
#[divan::bench]
24+
fn scalar_at_struct_simple(bencher: Bencher) {
25+
let mut rng = StdRng::seed_from_u64(0);
26+
let range = Uniform::new(0i64, 100_000_000).unwrap();
27+
28+
// Create single field for the struct
29+
let field = (0..ARRAY_SIZE)
30+
.map(|_| rng.sample(range))
31+
.collect::<Buffer<i64>>()
32+
.into_array();
33+
34+
let struct_array = StructArray::try_new(
35+
FieldNames::from(["value"]),
36+
vec![field],
37+
ARRAY_SIZE,
38+
Validity::NonNullable,
39+
)
40+
.unwrap();
41+
42+
let indices: Vec<usize> = (0..NUM_ACCESSES)
43+
.map(|_| rng.random_range(0..ARRAY_SIZE))
44+
.collect();
45+
46+
bencher
47+
.with_inputs(|| (&struct_array, &indices))
48+
.bench_refs(|(array, indices)| {
49+
for &idx in indices.iter() {
50+
divan::black_box(array.scalar_at(idx));
51+
}
52+
});
53+
}
54+
55+
#[divan::bench]
56+
fn scalar_at_struct_wide(bencher: Bencher) {
57+
let mut rng = StdRng::seed_from_u64(0);
58+
let range = Uniform::new(0i64, 100_000_000).unwrap();
59+
60+
// Create a struct with many fields (8 fields)
61+
let fields: Vec<_> = (0..8)
62+
.map(|_| {
63+
(0..ARRAY_SIZE)
64+
.map(|_| rng.sample(range))
65+
.collect::<Buffer<i64>>()
66+
.into_array()
67+
})
68+
.collect();
69+
70+
let field_names = FieldNames::from([
71+
"field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8",
72+
]);
73+
74+
let struct_array =
75+
StructArray::try_new(field_names, fields, ARRAY_SIZE, Validity::NonNullable).unwrap();
76+
77+
let indices: Vec<usize> = (0..NUM_ACCESSES)
78+
.map(|_| rng.random_range(0..ARRAY_SIZE))
79+
.collect();
80+
81+
bencher
82+
.with_inputs(|| (&struct_array, &indices))
83+
.bench_refs(|(array, indices)| {
84+
for &idx in indices.iter() {
85+
divan::black_box(array.scalar_at(idx));
86+
}
87+
});
88+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#![allow(clippy::unwrap_used)]
5+
6+
use divan::Bencher;
7+
use rand::distr::Uniform;
8+
use rand::rngs::StdRng;
9+
use rand::{Rng, SeedableRng};
10+
use vortex_array::IntoArray;
11+
use vortex_array::arrays::StructArray;
12+
use vortex_array::compute::take;
13+
use vortex_array::validity::Validity;
14+
use vortex_buffer::Buffer;
15+
use vortex_dtype::FieldNames;
16+
17+
fn main() {
18+
divan::main();
19+
}
20+
21+
const ARRAY_SIZE: usize = 100_000;
22+
const TAKE_SIZE: usize = 1000;
23+
24+
#[divan::bench]
25+
fn take_struct_simple(bencher: Bencher) {
26+
let mut rng = StdRng::seed_from_u64(0);
27+
let range = Uniform::new(0i64, 100_000_000).unwrap();
28+
29+
// Create single field for the struct
30+
let field = (0..ARRAY_SIZE)
31+
.map(|_| rng.sample(range))
32+
.collect::<Buffer<i64>>()
33+
.into_array();
34+
35+
let struct_array = StructArray::try_new(
36+
FieldNames::from(["value"]),
37+
vec![field],
38+
ARRAY_SIZE,
39+
Validity::NonNullable,
40+
)
41+
.unwrap();
42+
43+
let indices: Buffer<u64> = (0..TAKE_SIZE)
44+
.map(|_| rng.random_range(0..ARRAY_SIZE) as u64)
45+
.collect();
46+
let indices_array = indices.into_array();
47+
48+
bencher
49+
.with_inputs(|| (&struct_array, &indices_array))
50+
.bench_refs(|(array, indices)| {
51+
divan::black_box(take(array.as_ref(), indices.as_ref()).unwrap());
52+
});
53+
}
54+
55+
#[divan::bench(args = [8])]
56+
fn take_struct_wide(bencher: Bencher, width: usize) {
57+
let mut rng = StdRng::seed_from_u64(0);
58+
let range = Uniform::new(0i64, 100_000_000).unwrap();
59+
60+
let fields: Vec<_> = (0..width)
61+
.map(|_| {
62+
(0..ARRAY_SIZE)
63+
.map(|_| rng.sample(range))
64+
.collect::<Buffer<i64>>()
65+
.into_array()
66+
})
67+
.collect();
68+
69+
let field_names = FieldNames::from([
70+
"field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8",
71+
]);
72+
73+
let struct_array =
74+
StructArray::try_new(field_names, fields, ARRAY_SIZE, Validity::NonNullable).unwrap();
75+
76+
let indices: Buffer<u64> = (0..TAKE_SIZE)
77+
.map(|_| rng.random_range(0..ARRAY_SIZE) as u64)
78+
.collect();
79+
let indices_array = indices.into_array();
80+
81+
bencher
82+
.with_inputs(|| (&struct_array, &indices_array))
83+
.bench_refs(|(array, indices)| {
84+
divan::black_box(take(array.as_ref(), indices.as_ref()).unwrap());
85+
});
86+
}
87+
88+
#[divan::bench]
89+
fn take_struct_sequential_indices(bencher: Bencher) {
90+
let mut rng = StdRng::seed_from_u64(0);
91+
let range = Uniform::new(0i64, 100_000_000).unwrap();
92+
93+
// Create single field for the struct
94+
let field = (0..ARRAY_SIZE)
95+
.map(|_| rng.sample(range))
96+
.collect::<Buffer<i64>>()
97+
.into_array();
98+
99+
let struct_array = StructArray::try_new(
100+
FieldNames::from(["value"]),
101+
vec![field],
102+
ARRAY_SIZE,
103+
Validity::NonNullable,
104+
)
105+
.unwrap();
106+
107+
// Sequential indices for better cache performance
108+
let indices: Buffer<u64> = (0..TAKE_SIZE as u64).collect();
109+
let indices_array = indices.into_array();
110+
111+
bencher
112+
.with_inputs(|| (&struct_array, &indices_array))
113+
.bench_refs(|(array, indices)| {
114+
divan::black_box(take(array.as_ref(), indices.as_ref()).unwrap());
115+
});
116+
}

0 commit comments

Comments
 (0)