Skip to content

Commit 5783a6b

Browse files
committed
optimize take_scalar
Signed-off-by: Connor Tsui <[email protected]>
1 parent 5c5f7d1 commit 5783a6b

File tree

1 file changed

+17
-2
lines changed
  • vortex-compute/src/take/slice

1 file changed

+17
-2
lines changed

vortex-compute/src/take/slice/mod.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
//! Take function implementations on slices.
55
66
use vortex_buffer::Buffer;
7+
use vortex_buffer::BufferMut;
78
use vortex_dtype::UnsignedPType;
89

910
use crate::take::Take;
@@ -43,7 +44,21 @@ impl<T: Copy, I: UnsignedPType> Take<[I]> for &[T] {
4344
unused,
4445
reason = "Compiler may see this as unused based on enabled features"
4546
)]
46-
#[inline]
4747
fn take_scalar<T: Copy, I: UnsignedPType>(buffer: &[T], indices: &[I]) -> Buffer<T> {
48-
indices.iter().map(|idx| buffer[idx.as_()]).collect()
48+
// NB: The simpler `indices.iter().map(|idx| buff1er[idx.as_()]).collect()` generates suboptimal
49+
// assembly where the buffer length is repeatedly loaded from the stack on each iteration.
50+
51+
let mut result = BufferMut::with_capacity(indices.len());
52+
let ptr = result.spare_capacity_mut().as_mut_ptr().cast::<T>();
53+
54+
// This explicit loop with pointer writes keeps the length in a register and avoids per-element
55+
// capacity checks from `push()`.
56+
for (i, idx) in indices.iter().enumerate() {
57+
// SAFETY: We reserved `indices.len()` capacity, so `ptr.add(i)` is valid.
58+
unsafe { ptr.add(i).write(buffer[idx.as_()]) };
59+
}
60+
61+
// SAFETY: We just wrote exactly `indices.len()` elements.
62+
unsafe { result.set_len(indices.len()) };
63+
result.freeze()
4964
}

0 commit comments

Comments
 (0)