File tree Expand file tree Collapse file tree 2 files changed +42
-0
lines changed
Expand file tree Collapse file tree 2 files changed +42
-0
lines changed Original file line number Diff line number Diff line change 1+ use core:: ptr:: NonNull ;
12use test:: black_box;
23use test:: Bencher ;
34
@@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) {
162163 black_box ( slice. fill ( black_box ( NewType ( 42 ) ) ) ) ;
163164 } ) ;
164165}
166+
167+ // Tests the ability of the compiler to recognize that only the last slice item is needed
168+ // based on issue #106288
169+ #[ bench]
170+ fn fold_to_last ( b : & mut Bencher ) {
171+ let slice: & [ i32 ] = & [ 0 ; 1024 ] ;
172+ b. iter ( || black_box ( slice) . iter ( ) . fold ( None , |_, r| Some ( NonNull :: from ( r) ) ) ) ;
173+ }
Original file line number Diff line number Diff line change @@ -191,6 +191,39 @@ macro_rules! iterator {
191191 self . next_back( )
192192 }
193193
194+ #[ inline]
195+ fn fold<B , F >( self , init: B , mut f: F ) -> B
196+ where
197+ F : FnMut ( B , Self :: Item ) -> B ,
198+ {
199+ // this implementation consists of the following optimizations compared to the
200+ // default implementation:
201+ // - do-while loop, as is llvm's preferred loop shape,
202+ // see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
203+ // - bumps an index instead of a pointer since the latter case inhibits
204+ // some optimizations, see #111603
205+ // - avoids Option wrapping/matching
206+ if is_empty!( self ) {
207+ return init;
208+ }
209+ let mut acc = init;
210+ let mut i = 0 ;
211+ let len = len!( self ) ;
212+ loop {
213+ // SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
214+ // the slice allocation
215+ acc = f( acc, unsafe { & $( $mut_ ) ? * self . ptr. add( i) . as_ptr( ) } ) ;
216+ // SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
217+ // slice had that length, in which case we'll break out of the loop
218+ // after the increment
219+ i = unsafe { i. unchecked_add( 1 ) } ;
220+ if i == len {
221+ break ;
222+ }
223+ }
224+ acc
225+ }
226+
194227 // We override the default implementation, which uses `try_fold`,
195228 // because this simple implementation generates less LLVM IR and is
196229 // faster to compile.
You can’t perform that action at this time.
0 commit comments