-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Closed
Description
Consider this rust function for indexing into a slice:
https://godbolt.org/z/fsfdxPs8e
#[inline(never)]
pub fn get(slice: &[u8], range: std::ops::Range<usize>) -> Option<&[u8]> {
slice.get(range)
}on AArch64, this produces
;; parameters:
;; x0: slice.ptr
;; x1: slice.len
;; x2: range.start
;; x3: range.end
;;
;; return:
;; x0: ret.ptr
;; x1: ret.len
example::get::h40fd170020d218c4:
cmp x3, x1 ;; range.end <=> slice.len
add x8, x0, x2 ;; new_ptr = slice.ptr + range.start
sub x1, x3, x2 ;; ret.len = range.end - range.start
ccmp x3, x2, #0, ls ;; range.end <=> range.start
csel x0, xzr, x8, lo ;; ret.ptr = (range.start > range.end || range.end > slice.len) ? null : new_ptr
retNotice that the comparison between range.end and range.start is performed twice:
- In
sub x1, x3, x2, we calculaterange.end - range.start(without setting flags) - In
ccmp x3, x2, #0, ls, we calculaterange.end - range.startagain, this time only to modify the flags
We could avoid the redundant calculation by performing the subtraction and setting the flags in the same instruction (subs) and using ccmp for comparing range.end with slice.len:
;; x0: slice.ptr
;; x1: slice.len
;; x2: range.start
;; x3: range.end
;;
;; return:
;; x0: ret.ptr
;; x1: ret.len
add x8, x0, x2 ;; new_ptr = slice.ptr + range.start
subs x9, x3, x2 ;; new_len = range.end - range.start and set flags
ccmp x3, x1, #0, lo ;; range.end <=> range.start
csel x0, xzr, x8, ls ;; ret.ptr = (range.start > range.end || range.end > slice.len) ? null : new_ptr
mov x1, x9 ;; ret.len = new_len
ret