File tree Expand file tree Collapse file tree 1 file changed +17
-2
lines changed
vortex-compute/src/take/slice Expand file tree Collapse file tree 1 file changed +17
-2
lines changed Original file line number Diff line number Diff line change 44//! Take function implementations on slices.
55
66use vortex_buffer:: Buffer ;
7+ use vortex_buffer:: BufferMut ;
78use vortex_dtype:: UnsignedPType ;
89
910use crate :: take:: Take ;
@@ -43,7 +44,21 @@ impl<T: Copy, I: UnsignedPType> Take<[I]> for &[T] {
4344 unused,
4445 reason = "Compiler may see this as unused based on enabled features"
4546) ]
46- #[ inline]
4747fn take_scalar < T : Copy , I : UnsignedPType > ( buffer : & [ T ] , indices : & [ I ] ) -> Buffer < T > {
48- indices. iter ( ) . map ( |idx| buffer[ idx. as_ ( ) ] ) . collect ( )
48+ // NB: The simpler `indices.iter().map(|idx| buff1er[idx.as_()]).collect()` generates suboptimal
49+ // assembly where the buffer length is repeatedly loaded from the stack on each iteration.
50+
51+ let mut result = BufferMut :: with_capacity ( indices. len ( ) ) ;
52+ let ptr = result. spare_capacity_mut ( ) . as_mut_ptr ( ) . cast :: < T > ( ) ;
53+
54+ // This explicit loop with pointer writes keeps the length in a register and avoids per-element
55+ // capacity checks from `push()`.
56+ for ( i, idx) in indices. iter ( ) . enumerate ( ) {
57+ // SAFETY: We reserved `indices.len()` capacity, so `ptr.add(i)` is valid.
58+ unsafe { ptr. add ( i) . write ( buffer[ idx. as_ ( ) ] ) } ;
59+ }
60+
61+ // SAFETY: We just wrote exactly `indices.len()` elements.
62+ unsafe { result. set_len ( indices. len ( ) ) } ;
63+ result. freeze ( )
4964}
You can’t perform that action at this time.
0 commit comments