Skip to content

Commit 50cc58c

Browse files
committed
[SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}
This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while.
1 parent a565f9e commit 50cc58c

File tree

5 files changed

+535
-15
lines changed

5 files changed

+535
-15
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2913,31 +2913,34 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
29132913
}
29142914

29152915
SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) {
2916-
MVT VecVT = Node->getOperand(1).getSimpleValueType();
2916+
bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode());
2917+
MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType()
2918+
: Node->getOperand(0).getSimpleValueType();
29172919
MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
29182920
MVT ScalarVT = Node->getSimpleValueType(0);
29192921
MVT NewScalarVT = NewVecVT.getVectorElementType();
29202922

29212923
SDLoc DL(Node);
29222924
SmallVector<SDValue, 4> Operands(Node->getNumOperands());
29232925

2924-
// promote the initial value.
29252926
// FIXME: Support integer.
29262927
assert(Node->getOperand(0).getValueType().isFloatingPoint() &&
29272928
"Only FP promotion is supported");
2928-
Operands[0] =
2929-
DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
29302929

2931-
for (unsigned j = 1; j != Node->getNumOperands(); ++j)
2930+
for (unsigned j = 0; j != Node->getNumOperands(); ++j)
29322931
if (Node->getOperand(j).getValueType().isVector() &&
2933-
!(ISD::isVPOpcode(Node->getOpcode()) &&
2932+
!(IsVPOpcode &&
29342933
ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand.
29352934
// promote the vector operand.
29362935
// FIXME: Support integer.
29372936
assert(Node->getOperand(j).getValueType().isFloatingPoint() &&
29382937
"Only FP promotion is supported");
29392938
Operands[j] =
29402939
DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
2940+
} else if (Node->getOperand(j).getValueType().isFloatingPoint()) {
2941+
// prmote the initial value.
2942+
Operands[j] =
2943+
DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j));
29412944
} else {
29422945
Operands[j] = Node->getOperand(j); // Skip VL operand.
29432946
}
@@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
50495052
Node->getOpcode() == ISD::SINT_TO_FP ||
50505053
Node->getOpcode() == ISD::SETCC ||
50515054
Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
5052-
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
5055+
Node->getOpcode() == ISD::INSERT_VECTOR_ELT ||
5056+
Node->getOpcode() == ISD::VECREDUCE_FMAX ||
5057+
Node->getOpcode() == ISD::VECREDUCE_FMIN ||
5058+
Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM ||
5059+
Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) {
50535060
OVT = Node->getOperand(0).getSimpleValueType();
50545061
}
50555062
if (Node->getOpcode() == ISD::ATOMIC_STORE ||
@@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
57965803
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
57975804
break;
57985805
}
5806+
case ISD::VECREDUCE_FMAX:
5807+
case ISD::VECREDUCE_FMIN:
5808+
case ISD::VECREDUCE_FMAXIMUM:
5809+
case ISD::VECREDUCE_FMINIMUM:
57995810
case ISD::VP_REDUCE_FMAX:
58005811
case ISD::VP_REDUCE_FMIN:
58015812
case ISD::VP_REDUCE_FMAXIMUM:

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
503503
case ISD::VECREDUCE_UMIN:
504504
case ISD::VECREDUCE_FADD:
505505
case ISD::VECREDUCE_FMUL:
506+
case ISD::VECTOR_FIND_LAST_ACTIVE:
507+
Action = TLI.getOperationAction(Node->getOpcode(),
508+
Node->getOperand(0).getValueType());
509+
break;
506510
case ISD::VECREDUCE_FMAX:
507511
case ISD::VECREDUCE_FMIN:
508512
case ISD::VECREDUCE_FMAXIMUM:
509513
case ISD::VECREDUCE_FMINIMUM:
510-
case ISD::VECTOR_FIND_LAST_ACTIVE:
511514
Action = TLI.getOperationAction(Node->getOpcode(),
512515
Node->getOperand(0).getValueType());
516+
// Defer non-vector results to LegalizeDAG.
517+
if (Action == TargetLowering::Promote)
518+
Action = TargetLowering::Legal;
513519
break;
514520
case ISD::VECREDUCE_SEQ_FADD:
515521
case ISD::VECREDUCE_SEQ_FMUL:

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
959959

960960
// TODO: support more ops.
961961
static const unsigned ZvfhminZvfbfminPromoteOps[] = {
962-
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
963-
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
964-
ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
965-
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
966-
ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
967-
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
968-
ISD::STRICT_FMA};
962+
ISD::FMINNUM,
963+
ISD::FMAXNUM,
964+
ISD::FADD,
965+
ISD::FSUB,
966+
ISD::FMUL,
967+
ISD::FMA,
968+
ISD::FDIV,
969+
ISD::FSQRT,
970+
ISD::FCEIL,
971+
ISD::FTRUNC,
972+
ISD::FFLOOR,
973+
ISD::FROUND,
974+
ISD::FROUNDEVEN,
975+
ISD::FRINT,
976+
ISD::FNEARBYINT,
977+
ISD::IS_FPCLASS,
978+
ISD::SETCC,
979+
ISD::FMAXIMUM,
980+
ISD::FMINIMUM,
981+
ISD::STRICT_FADD,
982+
ISD::STRICT_FSUB,
983+
ISD::STRICT_FMUL,
984+
ISD::STRICT_FDIV,
985+
ISD::STRICT_FSQRT,
986+
ISD::STRICT_FMA,
987+
ISD::VECREDUCE_FMIN,
988+
ISD::VECREDUCE_FMAX,
989+
ISD::VECREDUCE_FMINIMUM,
990+
ISD::VECREDUCE_FMAXIMUM};
969991

970992
// TODO: support more vp ops.
971993
static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
3+
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
4+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
6+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
7+
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
8+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
9+
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
10+
11+
define half @vreduce_fmin_nxv4f16(<vscale x 4 x half> %val) {
12+
; ZVFH-LABEL: vreduce_fmin_nxv4f16:
13+
; ZVFH: # %bb.0:
14+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
15+
; ZVFH-NEXT: vfredmin.vs v8, v8, v8
16+
; ZVFH-NEXT: vfmv.f.s fa0, v8
17+
; ZVFH-NEXT: ret
18+
;
19+
; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16:
20+
; ZVFHMIN: # %bb.0:
21+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
22+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
23+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
24+
; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
25+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
26+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
27+
; ZVFHMIN-NEXT: ret
28+
%s = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
29+
ret half %s
30+
}
31+
32+
define half @vreduce_fmax_nxv4f16(<vscale x 4 x half> %val) {
33+
; ZVFH-LABEL: vreduce_fmax_nxv4f16:
34+
; ZVFH: # %bb.0:
35+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
36+
; ZVFH-NEXT: vfredmax.vs v8, v8, v8
37+
; ZVFH-NEXT: vfmv.f.s fa0, v8
38+
; ZVFH-NEXT: ret
39+
;
40+
; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16:
41+
; ZVFHMIN: # %bb.0:
42+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
43+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
44+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
45+
; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
46+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
47+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
48+
; ZVFHMIN-NEXT: ret
49+
%s = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
50+
ret half %s
51+
}
52+
53+
define half @vreduce_fmin_nnan_nxv4f16(<vscale x 4 x half> %val) {
54+
; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16:
55+
; ZVFH: # %bb.0:
56+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
57+
; ZVFH-NEXT: vfredmin.vs v8, v8, v8
58+
; ZVFH-NEXT: vfmv.f.s fa0, v8
59+
; ZVFH-NEXT: ret
60+
;
61+
; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16:
62+
; ZVFHMIN: # %bb.0:
63+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
64+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
65+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
66+
; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
67+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
68+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
69+
; ZVFHMIN-NEXT: ret
70+
%s = call nnan half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %val)
71+
ret half %s
72+
}
73+
74+
define half @vreduce_fmax_nnan_nxv4f16(<vscale x 4 x half> %val) {
75+
; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16:
76+
; ZVFH: # %bb.0:
77+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
78+
; ZVFH-NEXT: vfredmax.vs v8, v8, v8
79+
; ZVFH-NEXT: vfmv.f.s fa0, v8
80+
; ZVFH-NEXT: ret
81+
;
82+
; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16:
83+
; ZVFHMIN: # %bb.0:
84+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
85+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
86+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
87+
; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
88+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
89+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
90+
; ZVFHMIN-NEXT: ret
91+
%s = call nnan half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %val)
92+
ret half %s
93+
}
94+
95+
define half @vreduce_fminimum_nxv4f16(<vscale x 4 x half> %val) {
96+
; ZVFH-LABEL: vreduce_fminimum_nxv4f16:
97+
; ZVFH: # %bb.0:
98+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
99+
; ZVFH-NEXT: vmfne.vv v9, v8, v8
100+
; ZVFH-NEXT: vcpop.m a0, v9
101+
; ZVFH-NEXT: beqz a0, .LBB4_2
102+
; ZVFH-NEXT: # %bb.1:
103+
; ZVFH-NEXT: lui a0, %hi(.LCPI4_0)
104+
; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0)
105+
; ZVFH-NEXT: ret
106+
; ZVFH-NEXT: .LBB4_2:
107+
; ZVFH-NEXT: vfredmin.vs v8, v8, v8
108+
; ZVFH-NEXT: vfmv.f.s fa0, v8
109+
; ZVFH-NEXT: ret
110+
;
111+
; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16:
112+
; ZVFHMIN: # %bb.0:
113+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
114+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
115+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
116+
; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
117+
; ZVFHMIN-NEXT: vcpop.m a0, v8
118+
; ZVFHMIN-NEXT: beqz a0, .LBB4_2
119+
; ZVFHMIN-NEXT: # %bb.1:
120+
; ZVFHMIN-NEXT: lui a0, 523264
121+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
122+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
123+
; ZVFHMIN-NEXT: ret
124+
; ZVFHMIN-NEXT: .LBB4_2:
125+
; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
126+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
127+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
128+
; ZVFHMIN-NEXT: ret
129+
%s = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
130+
ret half %s
131+
}
132+
133+
define half @vreduce_fmaximum_nxv4f16(<vscale x 4 x half> %val) {
134+
; ZVFH-LABEL: vreduce_fmaximum_nxv4f16:
135+
; ZVFH: # %bb.0:
136+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
137+
; ZVFH-NEXT: vmfne.vv v9, v8, v8
138+
; ZVFH-NEXT: vcpop.m a0, v9
139+
; ZVFH-NEXT: beqz a0, .LBB5_2
140+
; ZVFH-NEXT: # %bb.1:
141+
; ZVFH-NEXT: lui a0, %hi(.LCPI5_0)
142+
; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0)
143+
; ZVFH-NEXT: ret
144+
; ZVFH-NEXT: .LBB5_2:
145+
; ZVFH-NEXT: vfredmax.vs v8, v8, v8
146+
; ZVFH-NEXT: vfmv.f.s fa0, v8
147+
; ZVFH-NEXT: ret
148+
;
149+
; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16:
150+
; ZVFHMIN: # %bb.0:
151+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
152+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
153+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
154+
; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10
155+
; ZVFHMIN-NEXT: vcpop.m a0, v8
156+
; ZVFHMIN-NEXT: beqz a0, .LBB5_2
157+
; ZVFHMIN-NEXT: # %bb.1:
158+
; ZVFHMIN-NEXT: lui a0, 523264
159+
; ZVFHMIN-NEXT: fmv.w.x fa5, a0
160+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
161+
; ZVFHMIN-NEXT: ret
162+
; ZVFHMIN-NEXT: .LBB5_2:
163+
; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
164+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
165+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
166+
; ZVFHMIN-NEXT: ret
167+
%s = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
168+
ret half %s
169+
}
170+
171+
define half @vreduce_fminimum_nnan_nxv4f16(<vscale x 4 x half> %val) {
172+
; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16:
173+
; ZVFH: # %bb.0:
174+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
175+
; ZVFH-NEXT: vfredmin.vs v8, v8, v8
176+
; ZVFH-NEXT: vfmv.f.s fa0, v8
177+
; ZVFH-NEXT: ret
178+
;
179+
; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16:
180+
; ZVFHMIN: # %bb.0:
181+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
182+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
183+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
184+
; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10
185+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
186+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
187+
; ZVFHMIN-NEXT: ret
188+
%s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %val)
189+
ret half %s
190+
}
191+
192+
define half @vreduce_fmaximum_nnan_nxv4f16(<vscale x 4 x half> %val) {
193+
; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16:
194+
; ZVFH: # %bb.0:
195+
; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
196+
; ZVFH-NEXT: vfredmax.vs v8, v8, v8
197+
; ZVFH-NEXT: vfmv.f.s fa0, v8
198+
; ZVFH-NEXT: ret
199+
;
200+
; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16:
201+
; ZVFHMIN: # %bb.0:
202+
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
203+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
204+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
205+
; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10
206+
; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
207+
; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
208+
; ZVFHMIN-NEXT: ret
209+
%s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %val)
210+
ret half %s
211+
}
212+

0 commit comments

Comments
 (0)