diff --git a/example/neon.rs b/example/neon.rs index 00e437d7e..69ce17d3d 100644 --- a/example/neon.rs +++ b/example/neon.rs @@ -202,6 +202,44 @@ unsafe fn test_vqadd_u8() { assert_eq!(r, e); } +#[cfg(target_arch = "aarch64")] +unsafe fn test_vmaxq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32 + let a = f32x4::from([0., -1., 2., -3.]); + let b = f32x4::from([-4., 5., -6., 7.]); + let e = f32x4::from([0., 5., 2., 7.]); + let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vminq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32 + let a = f32x4::from([0., -1., 2., -3.]); + let b = f32x4::from([-4., 5., -6., 7.]); + let e = f32x4::from([-4., -1., -6., -3.]); + let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vaddvq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32 + let a = f32x4::from([0., 1., 2., 3.]); + let e = 6f32; + let r = vaddvq_f32(transmute(a)); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vrndnq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32 + let a = f32x4::from([0.1, -1.9, 4.5, 5.5]); + let e = f32x4::from([0., -2., 4., 6.]); + let r: f32x4 = transmute(vrndnq_f32(transmute(a))); + assert_eq!(r, e); +} + #[cfg(target_arch = "aarch64")] fn main() { unsafe { @@ -229,6 +267,11 @@ fn main() { test_vqsub_u8(); test_vqadd_u8(); + + test_vmaxq_f32(); + test_vminq_f32(); + test_vaddvq_f32(); + test_vrndnq_f32(); } } diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index f0fb18608..39f6763d9 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -91,6 +91,44 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + _ if intrinsic.starts_with("llvm.aarch64.neon.fmax.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.fmin.v") => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + simd_pair_for_each_lane( + fx, + x, + y, + ret, + &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane), + ); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => { + intrinsic_args!(fx, args => (v); intrinsic); + + simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().fadd(a, b)); + } + + _ if intrinsic.starts_with("llvm.aarch64.neon.frintn.v") => { + intrinsic_args!(fx, args => (v); intrinsic); + + simd_for_each_lane(fx, v, ret, &|fx, _lane_ty, _res_lane_ty, lane| { + fx.bcx.ins().nearest(lane) + }); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => { intrinsic_args!(fx, args => (v); intrinsic);