-
Notifications
You must be signed in to change notification settings - Fork 14.7k
Open
Description
Consider this rust function for indexing into a slice:
https://godbolt.org/z/fsfdxPs8e
#[inline(never)]
pub fn get(slice: &[u8], range: std::ops::Range<usize>) -> Option<&[u8]> {
slice.get(range)
}
on AArch64, this produces
;; parameters:
;; x0: slice.ptr
;; x1: slice.len
;; x2: range.start
;; x3: range.end
;;
;; return:
;; x0: ret.ptr
;; x1: ret.len
example::get::h40fd170020d218c4:
cmp x3, x1 ;; range.end <=> slice.len
add x8, x0, x2 ;; new_ptr = slice.ptr + range.start
sub x1, x3, x2 ;; ret.len = range.end - range.start
ccmp x3, x2, #0, ls ;; range.end <=> range.start
csel x0, xzr, x8, lo ;; ret.ptr = (range.start > range.end || range.end > slice.len) ? null : new_ptr
ret
Notice that the comparison between range.end
and range.start
is performed twice:
- In
sub x1, x3, x2
, we calculaterange.end - range.start
(without setting flags) - In
ccmp x3, x2, #0, ls
, we calculaterange.end - range.start
again, this time only to modify the flags
We could avoid the redundant calculation by performing the subtraction and setting the flags in the same instruction (subs
) and using ccmp
for comparing range.end
with slice.len
:
;; x0: slice.ptr
;; x1: slice.len
;; x2: range.start
;; x3: range.end
;;
;; return:
;; x0: ret.ptr
;; x1: ret.len
add x8, x0, x2 ;; new_ptr = slice.ptr + range.start
subs x9, x3, x2 ;; new_len = range.end - range.start and set flags
ccmp x3, x1, #0, lo ;; range.end <=> range.start
csel x0, xzr, x8, ls ;; ret.ptr = (range.start > range.end || range.end > slice.len) ? null : new_ptr
mov x1, x9 ;; ret.len = new_len
ret