-
Notifications
You must be signed in to change notification settings - Fork 300
Open
Description
Consider the following code:
#![allow(improper_ctypes_definitions)]
use core::arch::aarch64::*;
#[inline(never)]
#[unsafe(no_mangle)]
#[target_feature(enable = "neon")]
extern "C" fn f(x: uint8x16_t) -> uint16x8_t {
vreinterpretq_u16_u8(x)
}
fn main() {
unsafe {
let src = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
let x: uint8x16_t = core::mem::transmute(src);
let y: [u8; 16] = std::mem::transmute(f(x));
dbg!(y);
}
}
cargo +nightly run --target aarch64_be-unknown-linux-gnu -Zbuild-std --release
:
[src/main.rs:17:9] y = [
1,
0,
3,
2,
5,
4,
7,
6,
9,
8,
11,
10,
13,
12,
15,
14,
]
llvm-objdump -d
:
0000000000028870 <f>:
28870: 4e200800 rev64 v0.16b, v0.16b
28874: 6e004000 ext v0.16b, v0.16b, v0.16b, #0x8
28878: 4e201800 rev16 v0.16b, v0.16b
2887c: 4e200800 rev64 v0.16b, v0.16b
28880: 6e004000 ext v0.16b, v0.16b, v0.16b, #0x8
28884: d65f03c0 ret
vreinterpret
should not generate instructions, but it does, and it also swaps the byte order of each element.
Clang does not generate any instructions with --target=aarch64_be-linux-gnu
: https://godbolt.org/z/Erse7GnEK
Metadata
Metadata
Assignees
Labels
No labels