Skip to content

Commit 5c5f7d1

Browse files
authored
Perf: Optimized bool take (#5701)
Adds an optimization for `BoolVector::take` which is similar to the optimization we have for `BoolArray`'s canonicalize function, but does an additional check for zero or one `false`s (instead of just zero or one `true`s). That code is located at https://github.com/vortex-data/vortex/blob/develop/vortex-array/src/arrays/dict/vtable/canonical.rs. The difference here is that I use a heuristic check on the default take implementation on `BoolVector` (instead of only use this optimization for dictionary decompression) because I don't think there is any reason not to utilize this in general. I still need to add some benchmarks. --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 7738f09 commit 5c5f7d1

File tree

5 files changed

+371
-12
lines changed

5 files changed

+371
-12
lines changed

vortex-compute/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,7 @@ harness = false
4949
[[bench]]
5050
name = "expand_buffer"
5151
harness = false
52+
53+
[[bench]]
54+
name = "bool_take"
55+
harness = false
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Benchmarks comparing optimized vs default bool take.
5+
//!
6+
//! The optimized take has special fast paths for:
7+
//! - All true values → broadcast true
8+
//! - All false values → broadcast false
9+
//! - Single true value → comparison against that index
10+
//! - Single false value → comparison and negate
11+
//! - All null values → broadcast null
12+
//! - Multiple true/false → fallback to default
13+
14+
use std::fmt;
15+
use std::sync::LazyLock;
16+
17+
use itertools::Itertools;
18+
use vortex_compute::take::Take;
19+
use vortex_compute::take::default_take;
20+
use vortex_compute::take::optimized_take;
21+
use vortex_vector::VectorMutOps;
22+
use vortex_vector::VectorOps;
23+
use vortex_vector::bool::BoolVector;
24+
use vortex_vector::bool::BoolVectorMut;
25+
26+
fn main() {
27+
divan::main();
28+
}
29+
30+
/// Value array patterns that exercise different optimization paths.
31+
#[derive(Clone, Copy, Debug)]
32+
enum ValuePattern {
33+
/// All values are true → optimized broadcasts true.
34+
AllTrue,
35+
/// All values are false → optimized broadcasts false.
36+
AllFalse,
37+
/// Single true among falses: [true, false, false, false] → optimized uses comparison.
38+
SingleTrue,
39+
/// Single false among trues: [false, true, true, true] → optimized uses comparison + negate.
40+
SingleFalse,
41+
/// Multiple true and false: [true, false, true, false] → falls back to default.
42+
Mixed,
43+
/// All values are null → optimized broadcasts null.
44+
AllNull,
45+
/// Single null with true: [null, true] → optimized broadcasts true (null skipped).
46+
SingleNullWithTrue,
47+
/// Single null with false: [null, false] → optimized broadcasts false (null skipped).
48+
SingleNullWithFalse,
49+
/// Mixed values with some nulls.
50+
MixedWithNulls,
51+
}
52+
53+
impl ValuePattern {
54+
const ALL: &[Self] = &[
55+
Self::AllTrue,
56+
Self::AllFalse,
57+
Self::SingleTrue,
58+
Self::SingleFalse,
59+
Self::Mixed,
60+
Self::AllNull,
61+
Self::SingleNullWithTrue,
62+
Self::SingleNullWithFalse,
63+
Self::MixedWithNulls,
64+
];
65+
66+
fn create_values(self) -> BoolVector {
67+
match self {
68+
Self::AllTrue => BoolVectorMut::from_iter([Some(true), Some(true)]).freeze(),
69+
Self::AllFalse => BoolVectorMut::from_iter([Some(false), Some(false)]).freeze(),
70+
Self::SingleTrue => {
71+
// One true among multiple falses.
72+
BoolVectorMut::from_iter([Some(true), Some(false), Some(false), Some(false)])
73+
.freeze()
74+
}
75+
Self::SingleFalse => {
76+
// One false among multiple trues.
77+
BoolVectorMut::from_iter([Some(false), Some(true), Some(true), Some(true)]).freeze()
78+
}
79+
Self::Mixed => {
80+
BoolVectorMut::from_iter([Some(true), Some(false), Some(true), Some(false)])
81+
.freeze()
82+
}
83+
Self::AllNull => BoolVectorMut::from_iter([None, None]).freeze(),
84+
Self::SingleNullWithTrue => BoolVectorMut::from_iter([None, Some(true)]).freeze(),
85+
Self::SingleNullWithFalse => BoolVectorMut::from_iter([None, Some(false)]).freeze(),
86+
Self::MixedWithNulls => {
87+
BoolVectorMut::from_iter([Some(true), None, Some(false), None]).freeze()
88+
}
89+
}
90+
}
91+
92+
fn max_index(self) -> usize {
93+
match self {
94+
Self::SingleTrue | Self::SingleFalse | Self::Mixed | Self::MixedWithNulls => 4,
95+
_ => 2,
96+
}
97+
}
98+
99+
const fn name(self) -> &'static str {
100+
match self {
101+
Self::AllTrue => "all_true",
102+
Self::AllFalse => "all_false",
103+
Self::SingleTrue => "single_true",
104+
Self::SingleFalse => "single_false",
105+
Self::Mixed => "mixed",
106+
Self::AllNull => "all_null",
107+
Self::SingleNullWithTrue => "null_with_true",
108+
Self::SingleNullWithFalse => "null_with_false",
109+
Self::MixedWithNulls => "mixed_nulls",
110+
}
111+
}
112+
}
113+
114+
const INDICES_SIZES: &[usize] = &[1_000, 10_000, 100_000];
115+
116+
/// Benchmark parameters wrapper for Display impl.
117+
#[derive(Clone, Copy)]
118+
struct Params {
119+
indices_len: usize,
120+
pattern: ValuePattern,
121+
}
122+
123+
impl fmt::Display for Params {
124+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125+
write!(f, "{}_{}", self.indices_len, self.pattern.name())
126+
}
127+
}
128+
129+
static PARAMS: LazyLock<Vec<Params>> = LazyLock::new(|| {
130+
INDICES_SIZES
131+
.iter()
132+
.cartesian_product(ValuePattern::ALL.iter())
133+
.map(|(&indices_len, &pattern)| Params {
134+
indices_len,
135+
pattern,
136+
})
137+
.collect()
138+
});
139+
140+
/// Creates indices that cycle through valid values for the pattern.
141+
fn create_indices(len: usize, max_index: usize) -> Vec<u32> {
142+
#[expect(clippy::cast_possible_truncation)]
143+
(0..len).map(|i| (i % max_index) as u32).collect()
144+
}
145+
146+
#[divan::bench(args = &*PARAMS, sample_count = 1000)]
147+
fn default(bencher: divan::Bencher, params: &Params) {
148+
bencher
149+
.with_inputs(|| {
150+
let values = params.pattern.create_values();
151+
let indices = create_indices(params.indices_len, params.pattern.max_index());
152+
(values, indices)
153+
})
154+
.bench_refs(|(values, indices)| default_take(values, indices.as_slice()));
155+
}
156+
157+
#[divan::bench(args = &*PARAMS, sample_count = 1000)]
158+
fn optimized(bencher: divan::Bencher, params: &Params) {
159+
bencher
160+
.with_inputs(|| {
161+
let values = params.pattern.create_values();
162+
let indices = create_indices(params.indices_len, params.pattern.max_index());
163+
(values, indices)
164+
})
165+
.bench_refs(|(values, indices)| {
166+
optimized_take(values, indices.as_slice(), || {
167+
values.validity().take(indices.as_slice())
168+
})
169+
});
170+
}

vortex-compute/src/take/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ mod mask;
99
pub mod slice;
1010
mod vector;
1111

12+
pub use vector::default_take;
13+
pub use vector::optimized_take;
14+
1215
/// The size of a page in Linux.
1316
const LINUX_PAGE_SIZE: usize = 4096;
1417

0 commit comments

Comments
 (0)