Skip to content

Commit 570b252

Browse files
committed
feat: impl expand for Buffer
Signed-off-by: Alexander Droste <[email protected]>
1 parent f231932 commit 570b252

File tree

5 files changed

+273
-0
lines changed

5 files changed

+273
-0
lines changed

vortex-compute/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,7 @@ divan = { workspace = true }
4141
[[bench]]
4242
name = "filter_buffer_mut"
4343
harness = false
44+
45+
[[bench]]
46+
name = "expand_buffer"
47+
harness = false
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Expand benchmarks for `Buffer`.
5+
6+
use divan::Bencher;
7+
use vortex_buffer::Buffer;
8+
use vortex_compute::expand::Expand;
9+
use vortex_mask::Mask;
10+
11+
fn main() {
12+
divan::main();
13+
}
14+
15+
const BUFFER_SIZE: usize = 1024;
16+
17+
const SELECTIVITIES: &[f64] = &[
18+
0.01, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.99,
19+
];
20+
21+
fn create_test_buffer<T>(size: usize) -> Buffer<T>
22+
where
23+
T: Copy + Default + From<u8> + Send + 'static,
24+
{
25+
let mut data = Vec::with_capacity(size);
26+
for i in 0..size {
27+
#[expect(clippy::cast_possible_truncation)]
28+
data.push(T::from((i % 256) as u8));
29+
}
30+
Buffer::from(data)
31+
}
32+
33+
fn generate_mask(len: usize, selectivity: f64) -> Mask {
34+
let mut selection = vec![false; len];
35+
let mut indices: Vec<usize> = (0..len).collect();
36+
37+
// Shuffle indices deterministically.
38+
const SHUFFLE_MULTIPLIER: usize = 13;
39+
for idx in (1..len).rev() {
40+
indices.swap(idx, (idx * SHUFFLE_MULTIPLIER) % (idx + 1));
41+
}
42+
43+
#[expect(clippy::cast_possible_truncation)]
44+
let num_selected = ((len as f64) * selectivity).round() as usize;
45+
for i in 0..num_selected {
46+
selection[indices[i]] = true;
47+
}
48+
49+
Mask::from_iter(selection)
50+
}
51+
52+
#[divan::bench(types = [u8, u32, u64], args = SELECTIVITIES, sample_count = 1000)]
53+
fn expand_selectivity<T: Copy + Default + From<u8> + Send + 'static>(
54+
bencher: Bencher,
55+
selectivity: f64,
56+
) {
57+
bencher
58+
.with_inputs(|| {
59+
let mask = generate_mask(BUFFER_SIZE, selectivity);
60+
let true_count = mask.true_count();
61+
let buffer = create_test_buffer::<T>(true_count);
62+
(buffer, mask)
63+
})
64+
.bench_values(|(buffer, mask)| {
65+
let result = buffer.expand(&mask);
66+
divan::black_box(result);
67+
});
68+
}
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_buffer::Buffer;
5+
use vortex_mask::{Mask, MaskValues};
6+
7+
use crate::expand::Expand;
8+
9+
impl<T: Copy> Expand for Buffer<T> {
10+
type Output = Buffer<T>;
11+
12+
fn expand(self, mask: &Mask) -> Self::Output {
13+
assert_eq!(
14+
mask.true_count(),
15+
self.len(),
16+
"Expand mask true count must equal the buffer length"
17+
);
18+
19+
match mask {
20+
Mask::AllTrue(_) => self,
21+
Mask::AllFalse(_) => Buffer::empty(),
22+
Mask::Values(mask_values) => expand_indices(self, mask_values),
23+
}
24+
}
25+
}
26+
27+
impl<T: Copy> Expand for &Buffer<T> {
28+
type Output = Buffer<T>;
29+
30+
fn expand(self, mask: &Mask) -> Self::Output {
31+
assert_eq!(
32+
mask.true_count(),
33+
self.len(),
34+
"Expand mask true count must equal the buffer length"
35+
);
36+
37+
match mask {
38+
Mask::AllTrue(_) => self.clone(),
39+
Mask::AllFalse(_) => Buffer::empty(),
40+
Mask::Values(mask_values) => expand_indices(self.clone(), mask_values),
41+
}
42+
}
43+
}
44+
45+
/// Expands a buffer by placing its elements at positions marked as `true` in the mask.
46+
///
47+
/// # Arguments
48+
///
49+
/// * `buf` - The buffer containing elements to scatter
50+
/// * `mask_values` - The mask indicating where elements should be placed
51+
///
52+
/// # Panics
53+
///
54+
/// Panics if the number of `true` values in the mask does not equal the buffer length.
55+
fn expand_indices<T: Copy>(buf: Buffer<T>, mask_values: &MaskValues) -> Buffer<T> {
56+
let buf_len = buf.len();
57+
58+
assert_eq!(
59+
mask_values.true_count(),
60+
buf_len,
61+
"Mask true count must equal buffer length"
62+
);
63+
64+
if buf.is_empty() {
65+
return Buffer::empty();
66+
}
67+
68+
let mut buf_mut = buf.into_mut();
69+
let mask_len = mask_values.len();
70+
buf_mut.reserve(mask_len - buf_len);
71+
72+
// Expand to the new buffer size which is equals the length of the mask.
73+
unsafe {
74+
buf_mut.set_len(mask_len);
75+
}
76+
77+
let buf_slice = buf_mut.as_mut_slice();
78+
let mut element_idx = buf_len;
79+
80+
// Pick the first value as a default value. The buffer is not empty, and we
81+
// know that the first value is guaranteed to be initialized. By doing this
82+
// T does does not require to implement `Default`.
83+
let pseudo_default_value = buf_slice[0];
84+
85+
// Iterate backwards through the mask to avoid overwriting unprocessed elements.
86+
for mask_idx in (buf_len..mask_len).rev() {
87+
if mask_values.value(mask_idx) {
88+
element_idx -= 1;
89+
buf_slice[mask_idx] = buf_slice[element_idx];
90+
} else {
91+
// Initialize with a pseudo-default value.
92+
buf_slice[mask_idx] = pseudo_default_value;
93+
}
94+
}
95+
96+
for mask_idx in (0..buf_len).rev() {
97+
if mask_values.value(mask_idx) {
98+
element_idx -= 1;
99+
buf_slice[mask_idx] = buf_slice[element_idx];
100+
}
101+
// For the range up to buffer length, all positions are already initialized.
102+
}
103+
104+
buf_mut.freeze()
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use vortex_buffer::buffer;
110+
use vortex_mask::Mask;
111+
112+
use super::*;
113+
114+
#[test]
115+
fn test_expand_scattered() {
116+
let buf = buffer![100u32, 200, 300];
117+
// Mask with scattered true values: [T, F, T, F, T]
118+
let mask = Mask::from_iter([true, false, true, false, true]);
119+
120+
let result = buf.expand(&mask);
121+
assert_eq!(result.len(), 5);
122+
assert_eq!(result.as_slice()[0], 100);
123+
assert_eq!(result.as_slice()[2], 200);
124+
assert_eq!(result.as_slice()[4], 300);
125+
}
126+
127+
#[test]
128+
fn test_expand_all_true() {
129+
let buf = buffer![10u32, 20, 30];
130+
let mask = Mask::new_true(3);
131+
132+
let result = buf.expand(&mask);
133+
assert_eq!(result, buffer![10u32, 20, 30]);
134+
}
135+
136+
#[test]
137+
fn test_expand_all_false() {
138+
let buf: Buffer<u32> = Buffer::empty();
139+
let mask = Mask::new_false(0);
140+
141+
let result = buf.expand(&mask);
142+
assert!(result.is_empty());
143+
}
144+
145+
#[test]
146+
fn test_expand_contiguous_start() {
147+
let buf = buffer![10u32, 20, 30, 40];
148+
// Mask with true values at start: [T, T, T, T, F, F, F]
149+
let mask = Mask::from_iter([true, true, true, true, false, false, false]);
150+
151+
let result = buf.expand(&mask);
152+
assert_eq!(result.len(), 7);
153+
assert_eq!(result.as_slice()[0..4], [10u32, 20, 30, 40]);
154+
}
155+
156+
#[test]
157+
fn test_expand_contiguous_end() {
158+
let buf = buffer![100u32, 200, 300];
159+
// Mask with true values at end: [F, F, F, F, T, T, T]
160+
let mask = Mask::from_iter([false, false, false, false, true, true, true]);
161+
162+
let result = buf.expand(&mask);
163+
assert_eq!(result.len(), 7);
164+
assert_eq!(result.as_slice()[4..7], [100u32, 200, 300]);
165+
}
166+
167+
#[test]
168+
#[should_panic(expected = "Expand mask true count must equal the buffer length")]
169+
fn test_expand_mismatch_true_count() {
170+
let buf = buffer![10u32, 20];
171+
// Mask has 3 true values but buffer has only 2 elements
172+
let mask = Mask::from_iter([true, true, true, false]);
173+
buf.expand(&mask);
174+
}
175+
}

vortex-compute/src/expand/mod.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Expand function.
5+
6+
mod buffer;
7+
8+
use vortex_mask::Mask;
9+
10+
/// Function for expanding values of `self` to the true positions of a mask.
11+
pub trait Expand {
12+
/// The result type after expansion.
13+
type Output;
14+
15+
/// Expands `self` using the provided mask.
16+
///
17+
///
18+
/// The result will have length equal to the mask. All values of `self` are
19+
/// then scattered to the true positions of the mask.
20+
///
21+
/// # Panics
22+
///
23+
/// Panics if the number of true count of the mask does not equal the length of `self`.
24+
fn expand(self, mask: &Mask) -> Self::Output;
25+
}

vortex-compute/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub mod arithmetic;
1111
#[cfg(feature = "arrow")]
1212
pub mod arrow;
1313
pub mod comparison;
14+
pub mod expand;
1415
pub mod filter;
1516
pub mod logical;
1617
pub mod mask;

0 commit comments

Comments
 (0)