Skip to content

Commit 67e66e3

Browse files
committed
support aarch64 neon intrinsics: vmaxq_f32, vminq_f32, vaddvq_f32, vrndnq_f32
1 parent 5349365 commit 67e66e3

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

example/neon.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,43 @@ unsafe fn test_vqadd_u8() {
202202
assert_eq!(r, e);
203203
}
204204

205+
#[cfg(target_arch = "aarch64")]
206+
unsafe fn test_vmaxq_f32() {
207+
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32
208+
let a = f32x4::from([0., -1., 2., -3.]);
209+
let b = f32x4::from([-4., 5., -6., 7.]);
210+
let e = f32x4::from([0., 5., 2., 7.]);
211+
let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
212+
assert_eq!(r, e);
213+
}
214+
215+
#[cfg(target_arch = "aarch64")]
216+
unsafe fn test_vminq_f32() {
217+
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32
218+
let a = f32x4::from([0., -1., 2., -3.]);
219+
let b = f32x4::from([-4., 5., -6., 7.]);
220+
let e = f32x4::from([-4., -1., -6., -3.]);
221+
let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
222+
assert_eq!(r, e);
223+
}
224+
225+
#[cfg(target_arch = "aarch64")]
226+
unsafe fn test_vaddvq_f32() {
227+
// AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32
228+
let a = f32x4::from([0., 1., 2., 3.]);
229+
let e = 6f32;
230+
let r = vaddvq_f32(transmute(a));
231+
assert_eq!(r, e);
232+
}
233+
234+
unsafe fn test_vrndnq_f32() {
235+
// AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32
236+
let a = f32x4::from([0.1, -1.9, 4.5, 5.5]);
237+
let e = f32x4::from([0., -2., 4., 6.]);
238+
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
239+
assert_eq!(r, e);
240+
}
241+
205242
#[cfg(target_arch = "aarch64")]
206243
fn main() {
207244
unsafe {
@@ -229,6 +266,11 @@ fn main() {
229266

230267
test_vqsub_u8();
231268
test_vqadd_u8();
269+
270+
test_vmaxq_f32();
271+
test_vminq_f32();
272+
test_vaddvq_f32();
273+
test_vrndnq_f32();
232274
}
233275
}
234276

src/intrinsics/llvm_aarch64.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,50 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
9191
);
9292
}
9393

94+
_ if intrinsic.starts_with("llvm.aarch64.neon.fmax.v") => {
95+
intrinsic_args!(fx, args => (x, y); intrinsic);
96+
97+
simd_pair_for_each_lane(
98+
fx,
99+
x,
100+
y,
101+
ret,
102+
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
103+
fx.bcx.ins().fmax(x_lane, y_lane)
104+
}
105+
);
106+
}
107+
108+
_ if intrinsic.starts_with("llvm.aarch64.neon.fmin.v") => {
109+
intrinsic_args!(fx, args => (x, y); intrinsic);
110+
111+
simd_pair_for_each_lane(
112+
fx,
113+
x,
114+
y,
115+
ret,
116+
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
117+
fx.bcx.ins().fmin(x_lane, y_lane)
118+
}
119+
);
120+
}
121+
122+
_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
123+
intrinsic_args!(fx, args => (v); intrinsic);
124+
125+
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
126+
fx.bcx.ins().fadd(a, b)
127+
});
128+
}
129+
130+
_ if intrinsic.starts_with("llvm.aarch64.neon.frintn.v") => {
131+
intrinsic_args!(fx, args => (v); intrinsic);
132+
133+
simd_for_each_lane(fx, v, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
134+
fx.bcx.ins().nearest(lane)
135+
});
136+
}
137+
94138
_ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
95139
intrinsic_args!(fx, args => (v); intrinsic);
96140

0 commit comments

Comments
 (0)