Skip to content

Commit 7331a28

Browse files
committed
perf: optimize expand
Signed-off-by: Alexander Droste <[email protected]>
1 parent 374e089 commit 7331a28

File tree

2 files changed

+89
-43
lines changed

2 files changed

+89
-43
lines changed

vortex-compute/benches/expand_buffer.rs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//! Expand benchmarks for `Buffer`.
55
66
use divan::Bencher;
7-
use vortex_buffer::Buffer;
7+
use vortex_buffer::{Buffer, BufferMut};
88
use vortex_compute::expand::Expand;
99
use vortex_mask::Mask;
1010

@@ -30,6 +30,18 @@ where
3030
Buffer::from(data)
3131
}
3232

33+
fn create_test_buffer_mut<T>(size: usize) -> BufferMut<T>
34+
where
35+
T: Copy + Default + From<u8> + Send + 'static,
36+
{
37+
let mut data = Vec::with_capacity(size);
38+
for i in 0..size {
39+
#[expect(clippy::cast_possible_truncation)]
40+
data.push(T::from((i % 256) as u8));
41+
}
42+
Buffer::from(data).into_mut()
43+
}
44+
3345
fn generate_mask(len: usize, selectivity: f64) -> Mask {
3446
let mut selection = vec![false; len];
3547
let mut indices: Vec<usize> = (0..len).collect();
@@ -50,18 +62,33 @@ fn generate_mask(len: usize, selectivity: f64) -> Mask {
5062
}
5163

5264
#[divan::bench(types = [u8, u32, u64], args = SELECTIVITIES, sample_count = 1000)]
53-
fn expand_selectivity<T: Copy + Default + From<u8> + Send + 'static>(
65+
fn expand_copy<T: Copy + Default + From<u8> + Send + 'static>(bencher: Bencher, selectivity: f64) {
66+
bencher
67+
.with_inputs(|| {
68+
let mask = generate_mask(BUFFER_SIZE, selectivity);
69+
let true_count = mask.true_count();
70+
let buffer = create_test_buffer::<T>(true_count);
71+
(buffer, mask)
72+
})
73+
.bench_values(|(buffer, mask)| {
74+
let result = buffer.expand(&mask);
75+
divan::black_box(result);
76+
});
77+
}
78+
79+
#[divan::bench(types = [u8, u32, u64], args = SELECTIVITIES, sample_count = 1000)]
80+
fn expand_inplace<T: Copy + Default + From<u8> + Send + 'static>(
5481
bencher: Bencher,
5582
selectivity: f64,
5683
) {
5784
bencher
5885
.with_inputs(|| {
5986
let mask = generate_mask(BUFFER_SIZE, selectivity);
6087
let true_count = mask.true_count();
61-
let buffer = create_test_buffer::<T>(true_count);
88+
let buffer = create_test_buffer_mut::<T>(true_count);
6289
(buffer, mask)
6390
})
64-
.bench_values(|(buffer, mask)| {
91+
.bench_values(|(mut buffer, mask)| {
6592
let result = buffer.expand(&mask);
6693
divan::black_box(result);
6794
});

vortex-compute/src/expand/buffer.rs

Lines changed: 58 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ fn expand_inplace<T: Copy>(buf_mut: &mut BufferMut<T>, mask_values: &MaskValues)
8888

8989
// SAFETY: Sufficient capacity has been reserved.
9090
unsafe { buf_mut.set_len(mask_len) };
91-
9291
let buf_slice = buf_mut.as_mut_slice();
9392

9493
// Pick the first value as a default value. The buffer is not empty, and we
@@ -99,21 +98,32 @@ fn expand_inplace<T: Copy>(buf_mut: &mut BufferMut<T>, mask_values: &MaskValues)
9998
let mut element_idx = buf_len;
10099

101100
// Iterate backwards through the mask to avoid overwriting unprocessed elements.
102-
for mask_idx in (buf_len..mask_len).rev() {
103-
// NOTE(0ax1): .value is slow => optimize
104-
if mask_values.value(mask_idx) {
101+
for (mask_idx, is_valid) in mask_values
102+
.bit_buffer()
103+
.slice(buf_len..)
104+
.iter()
105+
.rev()
106+
.enumerate()
107+
{
108+
if is_valid {
105109
element_idx -= 1;
106-
buf_slice[mask_idx] = buf_slice[element_idx];
110+
unsafe { *buf_slice.get_unchecked_mut(mask_idx) = buf_slice[element_idx] };
107111
} else {
108112
// Initialize with a pseudo-default value.
109-
buf_slice[mask_idx] = pseudo_default_value;
113+
unsafe { *buf_slice.get_unchecked_mut(mask_idx) = pseudo_default_value };
110114
}
111115
}
112116

113-
for mask_idx in (0..buf_len).rev() {
114-
if mask_values.value(mask_idx) {
117+
for (mask_idx, is_valid) in mask_values
118+
.bit_buffer()
119+
.slice(..buf_len)
120+
.iter()
121+
.rev()
122+
.enumerate()
123+
{
124+
if is_valid {
115125
element_idx -= 1;
116-
buf_slice[mask_idx] = buf_slice[element_idx];
126+
unsafe { *buf_slice.get_unchecked_mut(mask_idx) = buf_slice[element_idx] };
117127
}
118128
// For the range up to buffer length, all positions are already initialized.
119129
}
@@ -139,20 +149,24 @@ fn expand_copy<T: Copy>(src: &[T], mask_values: &MaskValues) -> Buffer<T> {
139149

140150
// Pick the first value as a default value.
141151
let pseudo_default_value = src[0];
142-
143-
let src_len = src.len();
144-
let mut element_idx = src_len;
145-
146-
// Iterate backwards through the mask to avoid any issues.
147-
for mask_idx in (0..mask_len).rev() {
148-
// NOTE(0ax1): .value is slow => optimize
149-
if mask_values.value(mask_idx) {
150-
element_idx -= 1;
151-
target_slice[mask_idx].write(src[element_idx]);
152+
let mut element_idx = 0;
153+
154+
for (mask_idx, is_valid) in mask_values.bit_buffer().iter().enumerate() {
155+
if is_valid {
156+
unsafe {
157+
target_slice
158+
.get_unchecked_mut(mask_idx)
159+
.write(src[element_idx])
160+
};
161+
element_idx += 1;
152162
} else {
153-
// Initialize with a pseudo-default value. In case we expand into a
154-
// new buffer all false positions need to be initialized.
155-
target_slice[mask_idx].write(pseudo_default_value);
163+
// Initialize with a pseudo-default value. In case we expand
164+
// into a new buffer all false positions need to be initialized.
165+
unsafe {
166+
target_slice
167+
.get_unchecked_mut(mask_idx)
168+
.write(pseudo_default_value)
169+
};
156170
}
157171
}
158172

@@ -305,22 +319,6 @@ mod tests {
305319
assert_eq!(buf.as_slice()[4..7], [100u32, 200, 300]);
306320
}
307321

308-
#[test]
309-
fn test_expand_mut_dense() {
310-
let mut buf = buffer_mut![1u32, 2, 3, 4, 5];
311-
let mask = Mask::from_iter([
312-
true, false, true, true, false, true, true, false, false, false,
313-
]);
314-
315-
(&mut buf).expand(&mask);
316-
assert_eq!(buf.len(), 10);
317-
assert_eq!(buf.as_slice()[0], 1);
318-
assert_eq!(buf.as_slice()[2], 2);
319-
assert_eq!(buf.as_slice()[3], 3);
320-
assert_eq!(buf.as_slice()[5], 4);
321-
assert_eq!(buf.as_slice()[6], 5);
322-
}
323-
324322
#[test]
325323
#[should_panic(expected = "Expand mask true count must equal the buffer length")]
326324
fn test_expand_mut_mismatch_true_count() {
@@ -330,7 +328,28 @@ mod tests {
330328
}
331329

332330
#[test]
333-
fn test_expand_into_new_buffer() {
331+
fn test_expand_inplace() {
332+
let mut buf = BufferMut::from_iter([10u32, 20, 30, 40, 50]);
333+
// Alternating pattern with gaps: [T, F, T, F, T, F, T, F, T, F]
334+
let mask = Mask::from_iter([
335+
true, false, true, false, true, false, true, false, true, false,
336+
]);
337+
338+
let Mask::Values(mask_values) = mask else {
339+
panic!("Expected Mask::Values");
340+
};
341+
342+
expand_inplace(&mut buf, &mask_values);
343+
assert_eq!(buf.len(), 10);
344+
assert_eq!(buf.as_slice()[0], 10);
345+
assert_eq!(buf.as_slice()[2], 20);
346+
assert_eq!(buf.as_slice()[4], 30);
347+
assert_eq!(buf.as_slice()[6], 40);
348+
assert_eq!(buf.as_slice()[8], 50);
349+
}
350+
351+
#[test]
352+
fn test_expand_copy() {
334353
let src = [10u32, 20, 30, 40, 50];
335354
// Alternating pattern with gaps: [T, F, T, F, T, F, T, F, T, F]
336355
let mask = Mask::from_iter([

0 commit comments

Comments
 (0)