Skip to content

Commit b2cea57

Browse files
author
Simon Moll
committed
[VE] FADD,FSUB,FMUL,FDIV v256f32|f64 isel and tests
Depends on D115940 for the `Binary_rv_vr_vv` pattern class op isel fragment used for divisions. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D116035
1 parent 2926d6d commit b2cea57

File tree

7 files changed

+366
-0
lines changed

7 files changed

+366
-0
lines changed

llvm/lib/Target/VE/VVPInstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
2929
IsVLVT<4>
3030
]>;
3131

32+
// BinaryFPOp(x,y,mask,vl)
33+
def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
34+
SDTCisSameAs<0, 1>,
35+
SDTCisSameAs<0, 2>,
36+
SDTCisFP<0>,
37+
SDTCisInt<3>,
38+
SDTCisSameNumEltsAs<0, 3>,
39+
IsVLVT<4>
40+
]>;
41+
3242
// Binary operator commutative pattern.
3343
class vvp_commutative<SDNode RootOp> :
3444
PatFrags<
@@ -61,4 +71,11 @@ def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>;
6171
def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>;
6272
def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>;
6373

74+
def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>;
75+
def c_vvp_fadd : vvp_commutative<vvp_fadd>;
76+
def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>;
77+
def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>;
78+
def c_vvp_fmul : vvp_commutative<vvp_fmul>;
79+
def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
80+
6481
// } Binary Operators

llvm/lib/Target/VE/VVPInstrPatternsVec.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,16 @@ defm : Binary_vr_vv_ShortLong<vvp_sra,
178178
defm : Binary_vr_vv_ShortLong<vvp_srl,
179179
i64, v256i64, "VSRL",
180180
i32, v256i32, "PVSRLLO">;
181+
182+
defm : Binary_rv_vv_ShortLong<c_vvp_fadd,
183+
f64, v256f64, "VFADDD",
184+
f32, v256f32, "PVFADDUP">;
185+
defm : Binary_rv_vv_ShortLong<c_vvp_fmul,
186+
f64, v256f64, "VFMULD",
187+
f32, v256f32, "PVFMULUP">;
188+
defm : Binary_rv_vv_ShortLong<vvp_fsub,
189+
f64, v256f64, "VFSUBD",
190+
f32, v256f32, "PVFSUBUP">;
191+
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
192+
f64, v256f64, "VFDIVD",
193+
f32, v256f32, "VFDIVS">;

llvm/lib/Target/VE/VVPNodes.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ ADD_BINARY_VVP_OP_COMPACT(AND)
5353
ADD_BINARY_VVP_OP_COMPACT(OR)
5454
ADD_BINARY_VVP_OP_COMPACT(XOR)
5555

56+
// FP arithmetic.
57+
ADD_BINARY_VVP_OP_COMPACT(FADD)
58+
ADD_BINARY_VVP_OP_COMPACT(FSUB)
59+
ADD_BINARY_VVP_OP_COMPACT(FMUL)
60+
ADD_BINARY_VVP_OP_COMPACT(FDIV)
61+
5662
#undef ADD_BINARY_VVP_OP
5763
#undef ADD_BINARY_VVP_OP_COMPACT
5864
#undef ADD_VVP_OP
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fadd_v256f32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: pvfadd.up %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
13+
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
14+
ret <256 x float> %r0
15+
}
16+
17+
define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fadd_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fadd_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x float> undef, float %s1, i32 0
38+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x float> %r0
41+
}
42+
43+
44+
declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
45+
46+
define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_fadd_v256f64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vfadd.d %v0, %v0, %v1, %vm1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x double> %r0
55+
}
56+
57+
define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_fadd_v256f64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x double> undef, double %s0, i32 0
65+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x double> %r0
68+
}
69+
70+
define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_fadd_v256f64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x double> undef, double %s1, i32 0
78+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x double> %r0
81+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fdiv_v256f32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
13+
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
14+
ret <256 x float> %r0
15+
}
16+
17+
define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fdiv_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fdiv_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x float> undef, float %s1, i32 0
38+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x float> %r0
41+
}
42+
43+
44+
declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
45+
46+
define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_fdiv_v256f64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x double> %r0
55+
}
56+
57+
define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_fdiv_v256f64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x double> undef, double %s0, i32 0
65+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x double> %r0
68+
}
69+
70+
define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_fdiv_v256f64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x double> undef, double %s1, i32 0
78+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x double> %r0
81+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fmul_v256f32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: pvfmul.up %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
13+
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
14+
ret <256 x float> %r0
15+
}
16+
17+
define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fmul_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fmul_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x float> undef, float %s1, i32 0
38+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x float> %r0
41+
}
42+
43+
44+
declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
45+
46+
define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_fmul_v256f64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vfmul.d %v0, %v0, %v1, %vm1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x double> %r0
55+
}
56+
57+
define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_fmul_v256f64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x double> undef, double %s0, i32 0
65+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x double> %r0
68+
}
69+
70+
define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_fmul_v256f64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x double> undef, double %s1, i32 0
78+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x double> %r0
81+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x float> @llvm.vp.fsub.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fsub_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fsub_v256f32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
13+
%r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
14+
ret <256 x float> %r0
15+
}
16+
17+
define fastcc <256 x float> @test_vp_fsub_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fsub_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: pvfsub.up %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fsub_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fsub_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lea %s2, 256
35+
; CHECK-NEXT: lvl %s2
36+
; CHECK-NEXT: vbrd %v1, %s0
37+
; CHECK-NEXT: lvl %s1
38+
; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1
39+
; CHECK-NEXT: b.l.t (, %s10)
40+
%yins = insertelement <256 x float> undef, float %s1, i32 0
41+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
42+
%r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
43+
ret <256 x float> %r0
44+
}
45+
46+
47+
declare <256 x double> @llvm.vp.fsub.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
48+
49+
define fastcc <256 x double> @test_vp_fsub_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
50+
; CHECK-LABEL: test_vp_fsub_v256f64_vv:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: and %s0, %s0, (32)0
53+
; CHECK-NEXT: lvl %s0
54+
; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
55+
; CHECK-NEXT: b.l.t (, %s10)
56+
%r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
57+
ret <256 x double> %r0
58+
}
59+
60+
define fastcc <256 x double> @test_vp_fsub_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
61+
; CHECK-LABEL: test_vp_fsub_v256f64_rv:
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: and %s1, %s1, (32)0
64+
; CHECK-NEXT: lvl %s1
65+
; CHECK-NEXT: vfsub.d %v0, %s0, %v0, %vm1
66+
; CHECK-NEXT: b.l.t (, %s10)
67+
%xins = insertelement <256 x double> undef, double %s0, i32 0
68+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
69+
%r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
70+
ret <256 x double> %r0
71+
}
72+
73+
define fastcc <256 x double> @test_vp_fsub_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
74+
; CHECK-LABEL: test_vp_fsub_v256f64_vr:
75+
; CHECK: # %bb.0:
76+
; CHECK-NEXT: and %s1, %s1, (32)0
77+
; CHECK-NEXT: lea %s2, 256
78+
; CHECK-NEXT: lvl %s2
79+
; CHECK-NEXT: vbrd %v1, %s0
80+
; CHECK-NEXT: lvl %s1
81+
; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1
82+
; CHECK-NEXT: b.l.t (, %s10)
83+
%yins = insertelement <256 x double> undef, double %s1, i32 0
84+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
85+
%r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
86+
ret <256 x double> %r0
87+
}

0 commit comments

Comments
 (0)