Skip to content

Commit 8e0aba5

Browse files
[AArch64] Implement NEON vamin/vamax intrinsics
This patch implements the intrinsics of the form floatNxM_t vamin[q]_fN(floatNxM_t vn, floatNxM_t vm); floatNxM_t vamax[q]_fN(floatNxM_t vn, floatNxM_t vm); as defined in ARM-software/acle#324 Co-authored-by: Hassnaa Hamdi <[email protected]>
1 parent 4ed0f84 commit 8e0aba5

File tree

7 files changed

+255
-2
lines changed

7 files changed

+255
-2
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,3 +2096,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r
20962096
def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
20972097
def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
20982098
}
2099+
2100+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "faminmax" in {
2101+
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
2102+
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
2103+
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13398,6 +13398,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1339813398
Int = Intrinsic::aarch64_neon_suqadd;
1339913399
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
1340013400
}
13401+
13402+
case NEON::BI__builtin_neon_vamin_f16:
13403+
case NEON::BI__builtin_neon_vaminq_f16:
13404+
case NEON::BI__builtin_neon_vamin_f32:
13405+
case NEON::BI__builtin_neon_vaminq_f32:
13406+
case NEON::BI__builtin_neon_vaminq_f64: {
13407+
Int = Intrinsic::aarch64_neon_famin;
13408+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
13409+
}
13410+
case NEON::BI__builtin_neon_vamax_f16:
13411+
case NEON::BI__builtin_neon_vamaxq_f16:
13412+
case NEON::BI__builtin_neon_vamax_f32:
13413+
case NEON::BI__builtin_neon_vamaxq_f32:
13414+
case NEON::BI__builtin_neon_vamaxq_f64: {
13415+
Int = Intrinsic::aarch64_neon_famax;
13416+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
13417+
}
1340113418
}
1340213419
}
1340313420

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
#include <arm_neon.h>
3+
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s
6+
7+
// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16(
8+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
11+
// CHECK-NEXT: ret <4 x half> [[FAMIN2_I]]
12+
//
13+
float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) {
14+
return vamin_f16(vn, vm);
15+
}
16+
17+
// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16(
18+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
19+
// CHECK-NEXT: [[ENTRY:.*:]]
20+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
21+
// CHECK-NEXT: ret <8 x half> [[FAMIN2_I]]
22+
//
23+
float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) {
24+
return vaminq_f16(vn, vm);
25+
26+
}
27+
28+
// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32(
29+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
30+
// CHECK-NEXT: [[ENTRY:.*:]]
31+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
32+
// CHECK-NEXT: ret <2 x float> [[FAMIN2_I]]
33+
//
34+
float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) {
35+
return vamin_f32(vn, vm);
36+
37+
}
38+
39+
// CHECK-LABEL: define dso_local <4 x float> @test_vaminq_f32(
40+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
41+
// CHECK-NEXT: [[ENTRY:.*:]]
42+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
43+
// CHECK-NEXT: ret <4 x float> [[FAMIN2_I]]
44+
//
45+
float32x4_t test_vaminq_f32(float32x4_t vn, float32x4_t vm) {
46+
return vaminq_f32(vn, vm);
47+
48+
}
49+
50+
// CHECK-LABEL: define dso_local <2 x double> @test_vaminq_f64(
51+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
52+
// CHECK-NEXT: [[ENTRY:.*:]]
53+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
54+
// CHECK-NEXT: ret <2 x double> [[FAMIN2_I]]
55+
//
56+
float64x2_t test_vaminq_f64(float64x2_t vn, float64x2_t vm) {
57+
return vaminq_f64(vn, vm);
58+
}
59+
60+
61+
// CHECK-LABEL: define dso_local <4 x half> @test_vamax_f16(
62+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
63+
// CHECK-NEXT: [[ENTRY:.*:]]
64+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
65+
// CHECK-NEXT: ret <4 x half> [[FAMAX2_I]]
66+
//
67+
float16x4_t test_vamax_f16(float16x4_t vn, float16x4_t vm) {
68+
return vamax_f16(vn, vm);
69+
}
70+
71+
// CHECK-LABEL: define dso_local <8 x half> @test_vamaxq_f16(
72+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
73+
// CHECK-NEXT: [[ENTRY:.*:]]
74+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
75+
// CHECK-NEXT: ret <8 x half> [[FAMAX2_I]]
76+
//
77+
float16x8_t test_vamaxq_f16(float16x8_t vn, float16x8_t vm) {
78+
return vamaxq_f16(vn, vm);
79+
80+
}
81+
82+
// CHECK-LABEL: define dso_local <2 x float> @test_vamax_f32(
83+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
84+
// CHECK-NEXT: [[ENTRY:.*:]]
85+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
86+
// CHECK-NEXT: ret <2 x float> [[FAMAX2_I]]
87+
//
88+
float32x2_t test_vamax_f32(float32x2_t vn, float32x2_t vm) {
89+
return vamax_f32(vn, vm);
90+
91+
}
92+
93+
// CHECK-LABEL: define dso_local <4 x float> @test_vamaxq_f32(
94+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
95+
// CHECK-NEXT: [[ENTRY:.*:]]
96+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
97+
// CHECK-NEXT: ret <4 x float> [[FAMAX2_I]]
98+
//
99+
float32x4_t test_vamaxq_f32(float32x4_t vn, float32x4_t vm) {
100+
return vamaxq_f32(vn, vm);
101+
102+
}
103+
104+
// CHECK-LABEL: define dso_local <2 x double> @test_vamaxq_f64(
105+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
106+
// CHECK-NEXT: [[ENTRY:.*:]]
107+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
108+
// CHECK-NEXT: ret <2 x double> [[FAMAX2_I]]
109+
//
110+
float64x2_t test_vamaxq_f64(float64x2_t vn, float64x2_t vm) {
111+
return vamaxq_f64(vn, vm);
112+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3730,3 +3730,6 @@ def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic
37303730
def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
37313731
def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
37323732

3733+
// Neon absolute maximum and minimum
3734+
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
3735+
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5985,6 +5985,26 @@ multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
59855985
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
59865986
}
59875987

5988+
let mayRaiseFPException = 1, Uses = [FPCR] in
5989+
multiclass SIMDThreeVectorFP<bit U, bit S, bits<3> opc,
5990+
string asm, SDPatternOperator OpNode> {
5991+
def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
5992+
asm, ".4h",
5993+
[(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4i16 V64:$Rm)))]>;
5994+
def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
5995+
asm, ".8h",
5996+
[(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8i16 V128:$Rm)))]>;
5997+
def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
5998+
asm, ".2s",
5999+
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2i32 V64:$Rm)))]>;
6000+
def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
6001+
asm, ".4s",
6002+
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4i32 V128:$Rm)))]>;
6003+
def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
6004+
asm, ".2d",
6005+
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2i64 V128:$Rm)))]>;
6006+
}
6007+
59886008
let mayRaiseFPException = 1, Uses = [FPCR] in
59896009
multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
59906010
string asm,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10016,8 +10016,8 @@ let Predicates = [HasFP8] in {
1001610016
} // End let Predicates = [HasFP8]
1001710017

1001810018
let Predicates = [HasFAMINMAX] in {
10019-
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
10020-
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
10019+
defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", int_aarch64_neon_famax>;
10020+
defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", int_aarch64_neon_famin>;
1002110021
} // End let Predicates = [HasFAMAXMIN]
1002210022

1002310023
let Predicates = [HasFP8FMA] in {
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
define <4 x half> @test_famin_f16(<4 x half> %vn, <4 x half> %vm) #0 {
7+
; CHECK-LABEL: test_famin_f16:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
10+
; CHECK-NEXT: ret
11+
%res = call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> %vn, <4 x half> %vm)
12+
ret <4 x half> %res
13+
}
14+
15+
define <8 x half> @test_famin2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
16+
; CHECK-LABEL: test_famin2_f16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
19+
; CHECK-NEXT: ret
20+
%res = call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> %vn, <8 x half> %vm)
21+
ret <8 x half> %res
22+
}
23+
24+
define <2 x float> @test_famin_f32(<2 x float> %vn, <2 x float> %vm) #0 {
25+
; CHECK-LABEL: test_famin_f32:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
28+
; CHECK-NEXT: ret
29+
%res = call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> %vn, <2 x float> %vm)
30+
ret <2 x float> %res
31+
}
32+
33+
define <4 x float> @test_famin2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
34+
; CHECK-LABEL: test_famin2_f32:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
37+
; CHECK-NEXT: ret
38+
%res = call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> %vn, <4 x float> %vm)
39+
ret <4 x float> %res
40+
}
41+
42+
define <2 x double> @test_famin_f64(<2 x double> %vn, <2 x double> %vm) #0 {
43+
; CHECK-LABEL: test_famin_f64:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
46+
; CHECK-NEXT: ret
47+
%res = call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> %vn, <2 x double> %vm)
48+
ret <2 x double> %res
49+
}
50+
51+
define <4 x half> @test_famax_f16(<4 x half> %vn, <4 x half> %vm) #0 {
52+
; CHECK-LABEL: test_famax_f16:
53+
; CHECK: // %bb.0:
54+
; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
55+
; CHECK-NEXT: ret
56+
%res = call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> %vn, <4 x half> %vm)
57+
ret <4 x half> %res
58+
}
59+
60+
define <8 x half> @test_famax2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
61+
; CHECK-LABEL: test_famax2_f16:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
64+
; CHECK-NEXT: ret
65+
%res = call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> %vn, <8 x half> %vm)
66+
ret <8 x half> %res
67+
}
68+
69+
define <2 x float> @test_famax_f32(<2 x float> %vn, <2 x float> %vm) #0 {
70+
; CHECK-LABEL: test_famax_f32:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
73+
; CHECK-NEXT: ret
74+
%res = call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> %vn, <2 x float> %vm)
75+
ret <2 x float> %res
76+
}
77+
78+
define <4 x float> @test_famax2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
79+
; CHECK-LABEL: test_famax2_f32:
80+
; CHECK: // %bb.0:
81+
; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
82+
; CHECK-NEXT: ret
83+
%res = call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> %vn, <4 x float> %vm)
84+
ret <4 x float> %res
85+
}
86+
87+
define <2 x double> @test_famax_f64(<2 x double> %vn, <2 x double> %vm) #0 {
88+
; CHECK-LABEL: test_famax_f64:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
91+
; CHECK-NEXT: ret
92+
%res = call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> %vn, <2 x double> %vm)
93+
ret <2 x double> %res
94+
}
95+
96+
attributes #0 = { "target-features"="+neon,+faminmax" }

0 commit comments

Comments
 (0)