Skip to content

Commit 2171765

Browse files
authored
Merge branch 'develop' into ct/optimize-ci
2 parents 14062e7 + 1174399 commit 2171765

File tree

27 files changed

+1455
-281
lines changed

27 files changed

+1455
-281
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-array/Cargo.toml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ arrow-cast = { workspace = true }
9797
divan = { workspace = true }
9898
futures = { workspace = true, features = ["executor"] }
9999
insta = { workspace = true }
100+
rand_distr = { workspace = true }
100101
rstest = { workspace = true }
101102
vortex-array = { path = ".", features = ["test-harness", "table-display"] }
102103

@@ -112,10 +113,6 @@ harness = false
112113
name = "compare"
113114
harness = false
114115

115-
[[bench]]
116-
name = "take_strings"
117-
harness = false
118-
119116
[[bench]]
120117
name = "take_patches"
121118
harness = false
@@ -124,10 +121,6 @@ harness = false
124121
name = "chunk_array_builder"
125122
harness = false
126123

127-
[[bench]]
128-
name = "take_struct"
129-
harness = false
130-
131124
[[bench]]
132125
name = "scalar_at_struct"
133126
harness = false
@@ -167,3 +160,7 @@ harness = false
167160
[[bench]]
168161
name = "varbinview_zip"
169162
harness = false
163+
164+
[[bench]]
165+
name = "take_primitive"
166+
harness = false
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks comparing [`PVector`] take vs [`DictArray`] canonicalization.
5+
//!
6+
//! Both are tracked by number of indices/codes for fair comparison.
7+
8+
#![allow(clippy::cast_possible_truncation)]
9+
#![allow(clippy::unwrap_used)]
10+
11+
use divan::Bencher;
12+
use rand::distr::Uniform;
13+
use rand::prelude::*;
14+
use rand_distr::Zipf;
15+
use vortex_array::IntoArray;
16+
use vortex_array::arrays::DictArray;
17+
use vortex_array::arrays::PrimitiveArray;
18+
use vortex_buffer::Buffer;
19+
use vortex_compute::take::Take;
20+
use vortex_mask::Mask;
21+
use vortex_vector::primitive::PVector;
22+
23+
fn main() {
24+
divan::main();
25+
}
26+
27+
/// Number of indices to take.
28+
const NUM_INDICES: &[usize] = &[1_000, 10_000, 100_000];
29+
30+
/// Size of the source vector / dictionary values.
31+
const VECTOR_SIZE: &[usize] = &[16, 256, 2048, 8192];
32+
33+
// --- PVector take benchmarks ---
34+
35+
#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
36+
fn pvector_take_uniform<const VECTOR_SIZE: usize>(bencher: Bencher, num_indices: usize) {
37+
let data: Buffer<u32> = (0..VECTOR_SIZE as u32).collect();
38+
let pvector = PVector::new(data, Mask::AllTrue(VECTOR_SIZE));
39+
40+
let rng = StdRng::seed_from_u64(0);
41+
let range = Uniform::new(0u32, VECTOR_SIZE as u32).unwrap();
42+
let indices: Vec<u32> = rng.sample_iter(range).take(num_indices).collect();
43+
44+
bencher
45+
.with_inputs(|| (&pvector, indices.as_slice()))
46+
.bench_refs(|(pv, idx)| pv.take(*idx));
47+
}
48+
49+
#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
50+
fn pvector_take_zipfian<const VECTOR_SIZE: usize>(bencher: Bencher, num_indices: usize) {
51+
let data: Buffer<u32> = (0..VECTOR_SIZE as u32).collect();
52+
let pvector = PVector::new(data, Mask::AllTrue(VECTOR_SIZE));
53+
54+
let rng = StdRng::seed_from_u64(0);
55+
let zipf = Zipf::new(VECTOR_SIZE as f64, 1.0).unwrap();
56+
let indices: Vec<u32> = rng
57+
.sample_iter(&zipf)
58+
.take(num_indices)
59+
.map(|i: f64| (i as u32 - 1).min(VECTOR_SIZE as u32 - 1))
60+
.collect();
61+
62+
bencher
63+
.with_inputs(|| (&pvector, indices.as_slice()))
64+
.bench_refs(|(pv, idx)| pv.take(*idx));
65+
}
66+
67+
// --- DictArray canonicalization benchmarks ---
68+
69+
#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
70+
fn dict_canonicalize_uniform<const NUM_VALUES: usize>(bencher: Bencher, num_indices: usize) {
71+
let values = PrimitiveArray::from_iter(0..NUM_VALUES as u32);
72+
73+
let rng = StdRng::seed_from_u64(0);
74+
let range = Uniform::new(0u32, NUM_VALUES as u32).unwrap();
75+
let codes = PrimitiveArray::from_iter(rng.sample_iter(range).take(num_indices));
76+
77+
let dict = DictArray::try_new(codes.into_array(), values.into_array()).unwrap();
78+
79+
bencher
80+
.with_inputs(|| &dict)
81+
.bench_refs(|dict| dict.to_canonical());
82+
}
83+
84+
#[divan::bench(args = NUM_INDICES, consts = VECTOR_SIZE, sample_count = 100_000)]
85+
fn dict_canonicalize_zipfian<const NUM_VALUES: usize>(bencher: Bencher, num_indices: usize) {
86+
let values = PrimitiveArray::from_iter(0..NUM_VALUES as u32);
87+
88+
let rng = StdRng::seed_from_u64(0);
89+
let zipf = Zipf::new(NUM_VALUES as f64, 1.0).unwrap();
90+
let codes = PrimitiveArray::from_iter(
91+
rng.sample_iter(&zipf)
92+
.take(num_indices)
93+
.map(|i: f64| (i as u32 - 1).min(NUM_VALUES as u32 - 1)),
94+
);
95+
96+
let dict = DictArray::try_new(codes.into_array(), values.into_array()).unwrap();
97+
98+
bencher
99+
.with_inputs(|| &dict)
100+
.bench_refs(|dict| dict.to_canonical());
101+
}

vortex-array/benches/take_strings.rs

Lines changed: 0 additions & 87 deletions
This file was deleted.

vortex-array/benches/take_struct.rs

Lines changed: 0 additions & 111 deletions
This file was deleted.

vortex-array/src/arrays/scalar_fn/vtable/validity.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ impl ValidityVTable<ScalarFnVTable> for ScalarFnVTable {
4646
let vector = array
4747
.execute(&SCALAR_FN_SESSION)
4848
.vortex_expect("Validity mask computation should be fallible");
49-
Mask::from_buffer(vector.into_bool().into_bits())
49+
Mask::from_buffer(vector.into_bool().into_parts().0)
5050
}
5151
}

vortex-array/src/scalar_fns/mask/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ impl VTable for MaskFn {
6363
}
6464
(Datum::Scalar(input), BoolDatum::Vector(mask)) => {
6565
let mut result = input.repeat(args.row_count()).freeze();
66-
result.mask_validity(&Mask::from(mask.into_bits()));
66+
result.mask_validity(&Mask::from(mask.into_parts().0));
6767
Ok(Datum::Vector(result))
6868
}
6969
(Datum::Vector(input_array), BoolDatum::Scalar(mask)) => {
@@ -76,7 +76,7 @@ impl VTable for MaskFn {
7676
}
7777
(Datum::Vector(input_array), BoolDatum::Vector(mask)) => {
7878
let mut result = input_array;
79-
result.mask_validity(&Mask::from(mask.into_bits()));
79+
result.mask_validity(&Mask::from(mask.into_parts().0));
8080
Ok(Datum::Vector(result))
8181
}
8282
}

0 commit comments

Comments
 (0)