Skip to content

Commit b340645

Browse files
committed
add benchmark comparing new vs old take
Signed-off-by: Connor Tsui <[email protected]>
1 parent 07f98c2 commit b340645

File tree

3 files changed

+110
-0
lines changed

3 files changed

+110
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-array/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ arrow-cast = { workspace = true }
9797
divan = { workspace = true }
9898
futures = { workspace = true, features = ["executor"] }
9999
insta = { workspace = true }
100+
rand_distr = { workspace = true }
100101
rstest = { workspace = true }
101102
vortex-array = { path = ".", features = ["test-harness", "table-display"] }
102103

@@ -167,3 +168,7 @@ harness = false
167168
[[bench]]
168169
name = "varbinview_zip"
169170
harness = false
171+
172+
[[bench]]
173+
name = "take_primitive"
174+
harness = false
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks comparing [`PVector`] take vs [`DictArray`] canonicalization.
5+
//!
6+
//! Both are tracked by number of indices/codes for fair comparison.
7+
8+
#![allow(clippy::cast_possible_truncation)]
9+
#![allow(clippy::unwrap_used)]
10+
11+
use divan::Bencher;
12+
use rand::distr::Uniform;
13+
use rand::prelude::*;
14+
use rand_distr::Zipf;
15+
use vortex_array::IntoArray;
16+
use vortex_array::arrays::DictArray;
17+
use vortex_array::arrays::PrimitiveArray;
18+
use vortex_buffer::Buffer;
19+
use vortex_compute::take::Take;
20+
use vortex_mask::Mask;
21+
use vortex_vector::primitive::PVector;
22+
23+
fn main() {
24+
divan::main();
25+
}
26+
27+
/// Number of indices/codes to process.
28+
const NUM_INDICES: &[usize] = &[1_000, 10_000, 100_000];
29+
30+
// --- PVector take benchmarks ---
31+
// Source vector is 1/10th the indices size (same as dict values).
32+
33+
#[divan::bench(args = NUM_INDICES, sample_count = 100_000)]
34+
fn pvector_take_uniform(bencher: Bencher, num_indices: usize) {
35+
let vector_size = num_indices / 10;
36+
let data: Buffer<u32> = (0..vector_size as u32).collect();
37+
let pvector = PVector::new(data, Mask::AllTrue(vector_size));
38+
39+
let rng = StdRng::seed_from_u64(0);
40+
let range = Uniform::new(0u32, vector_size as u32).unwrap();
41+
let indices: Vec<u32> = rng.sample_iter(range).take(num_indices).collect();
42+
43+
bencher
44+
.with_inputs(|| (&pvector, indices.as_slice()))
45+
.bench_refs(|(pv, idx)| pv.take(*idx));
46+
}
47+
48+
#[divan::bench(args = NUM_INDICES, sample_count = 100_000)]
49+
fn pvector_take_zipfian(bencher: Bencher, num_indices: usize) {
50+
let vector_size = num_indices / 10;
51+
let data: Buffer<u32> = (0..vector_size as u32).collect();
52+
let pvector = PVector::new(data, Mask::AllTrue(vector_size));
53+
54+
let rng = StdRng::seed_from_u64(0);
55+
let zipf = Zipf::new(vector_size as f64, 1.0).unwrap();
56+
let indices: Vec<u32> = rng
57+
.sample_iter(&zipf)
58+
.take(num_indices)
59+
.map(|i: f64| (i as u32 - 1).min(vector_size as u32 - 1))
60+
.collect();
61+
62+
bencher
63+
.with_inputs(|| (&pvector, indices.as_slice()))
64+
.bench_refs(|(pv, idx)| pv.take(*idx));
65+
}
66+
67+
// --- DictArray canonicalization benchmarks ---
68+
// Dictionary has num_indices/10 unique values, num_indices codes.
69+
70+
#[divan::bench(args = NUM_INDICES, sample_count = 100_000)]
71+
fn dict_canonicalize_uniform(bencher: Bencher, num_indices: usize) {
72+
let num_values = num_indices / 10;
73+
let values = PrimitiveArray::from_iter(0..num_values as u32);
74+
75+
let rng = StdRng::seed_from_u64(0);
76+
let range = Uniform::new(0u32, num_values as u32).unwrap();
77+
let codes = PrimitiveArray::from_iter(rng.sample_iter(range).take(num_indices));
78+
79+
let dict = DictArray::try_new(codes.into_array(), values.into_array()).unwrap();
80+
81+
bencher
82+
.with_inputs(|| &dict)
83+
.bench_refs(|dict| dict.to_canonical());
84+
}
85+
86+
#[divan::bench(args = NUM_INDICES, sample_count = 100_000)]
87+
fn dict_canonicalize_zipfian(bencher: Bencher, num_indices: usize) {
88+
let num_values = num_indices / 10;
89+
let values = PrimitiveArray::from_iter(0..num_values as u32);
90+
91+
let rng = StdRng::seed_from_u64(0);
92+
let zipf = Zipf::new(num_values as f64, 1.0).unwrap();
93+
let codes = PrimitiveArray::from_iter(
94+
rng.sample_iter(&zipf)
95+
.take(num_indices)
96+
.map(|i: f64| (i as u32 - 1).min(num_values as u32 - 1)),
97+
);
98+
99+
let dict = DictArray::try_new(codes.into_array(), values.into_array()).unwrap();
100+
101+
bencher
102+
.with_inputs(|| &dict)
103+
.bench_refs(|dict| dict.to_canonical());
104+
}

0 commit comments

Comments
 (0)