Skip to content

Commit fd1dce3

Browse files
committed
[LegalizeTypes][VP] Add splitting support for vp.reduction.*
Split vp.reduction.* intrinsics by splitting the vector to reduce in two halves, perform the reduction operation in each one of them and accumulate the results of both operations. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117469
1 parent c154f39 commit fd1dce3

9 files changed

+387
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
818818
void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
819819
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
820820

821+
/// Split mask operator of a VP intrinsic.
822+
std::pair<SDValue, SDValue> SplitMask(SDValue Mask);
823+
821824
// Helper function for incrementing the pointer when splitting
822825
// memory operations
823826
void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
@@ -864,6 +867,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
864867
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
865868
SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
866869
SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N);
870+
SDValue SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo);
867871
SDValue SplitVecOp_UnaryOp(SDNode *N);
868872
SDValue SplitVecOp_TruncateHelper(SDNode *N);
869873

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,16 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
11171117
}
11181118
}
11191119

1120+
std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
1121+
SDValue MaskLo, MaskHi;
1122+
EVT MaskVT = Mask.getValueType();
1123+
if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
1124+
GetSplitVector(Mask, MaskLo, MaskHi);
1125+
else
1126+
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
1127+
return std::make_pair(MaskLo, MaskHi);
1128+
}
1129+
11201130
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
11211131
SDValue LHSLo, LHSHi;
11221132
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
@@ -1135,14 +1145,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
11351145
assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
11361146
assert(N->isVPOpcode() && "Expected VP opcode");
11371147

1138-
// Split the mask.
11391148
SDValue MaskLo, MaskHi;
1140-
SDValue Mask = N->getOperand(2);
1141-
EVT MaskVT = Mask.getValueType();
1142-
if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
1143-
GetSplitVector(Mask, MaskLo, MaskHi);
1144-
else
1145-
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
1149+
std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
11461150

11471151
SDValue EVLLo, EVLHi;
11481152
std::tie(EVLLo, EVLHi) =
@@ -2342,6 +2346,23 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
23422346
case ISD::VECREDUCE_SEQ_FMUL:
23432347
Res = SplitVecOp_VECREDUCE_SEQ(N);
23442348
break;
2349+
case ISD::VP_REDUCE_FADD:
2350+
case ISD::VP_REDUCE_SEQ_FADD:
2351+
case ISD::VP_REDUCE_FMUL:
2352+
case ISD::VP_REDUCE_SEQ_FMUL:
2353+
case ISD::VP_REDUCE_ADD:
2354+
case ISD::VP_REDUCE_MUL:
2355+
case ISD::VP_REDUCE_AND:
2356+
case ISD::VP_REDUCE_OR:
2357+
case ISD::VP_REDUCE_XOR:
2358+
case ISD::VP_REDUCE_SMAX:
2359+
case ISD::VP_REDUCE_SMIN:
2360+
case ISD::VP_REDUCE_UMAX:
2361+
case ISD::VP_REDUCE_UMIN:
2362+
case ISD::VP_REDUCE_FMAX:
2363+
case ISD::VP_REDUCE_FMIN:
2364+
Res = SplitVecOp_VP_REDUCE(N, OpNo);
2365+
break;
23452366
}
23462367

23472368
// If the result is null, the sub-method took care of registering results etc.
@@ -2438,6 +2459,33 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
24382459
return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
24392460
}
24402461

2462+
SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
2463+
assert(N->isVPOpcode() && "Expected VP opcode");
2464+
assert(OpNo == 1 && "Can only split reduce vector operand");
2465+
2466+
unsigned Opc = N->getOpcode();
2467+
EVT ResVT = N->getValueType(0);
2468+
SDValue Lo, Hi;
2469+
SDLoc dl(N);
2470+
2471+
SDValue VecOp = N->getOperand(OpNo);
2472+
EVT VecVT = VecOp.getValueType();
2473+
assert(VecVT.isVector() && "Can only split reduce vector operand");
2474+
GetSplitVector(VecOp, Lo, Hi);
2475+
2476+
SDValue MaskLo, MaskHi;
2477+
std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
2478+
2479+
SDValue EVLLo, EVLHi;
2480+
std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl);
2481+
2482+
const SDNodeFlags Flags = N->getFlags();
2483+
2484+
SDValue ResLo =
2485+
DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags);
2486+
return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags);
2487+
}
2488+
24412489
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
24422490
// The result has a legal vector type, but the input needs splitting.
24432491
EVT ResVT = N->getValueType(0);

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,31 +373,46 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
373373
llvm_unreachable("Expected VECREDUCE opcode");
374374
case ISD::VECREDUCE_FADD:
375375
case ISD::VECREDUCE_SEQ_FADD:
376+
case ISD::VP_REDUCE_FADD:
377+
case ISD::VP_REDUCE_SEQ_FADD:
376378
return ISD::FADD;
377379
case ISD::VECREDUCE_FMUL:
378380
case ISD::VECREDUCE_SEQ_FMUL:
381+
case ISD::VP_REDUCE_FMUL:
382+
case ISD::VP_REDUCE_SEQ_FMUL:
379383
return ISD::FMUL;
380384
case ISD::VECREDUCE_ADD:
385+
case ISD::VP_REDUCE_ADD:
381386
return ISD::ADD;
382387
case ISD::VECREDUCE_MUL:
388+
case ISD::VP_REDUCE_MUL:
383389
return ISD::MUL;
384390
case ISD::VECREDUCE_AND:
391+
case ISD::VP_REDUCE_AND:
385392
return ISD::AND;
386393
case ISD::VECREDUCE_OR:
394+
case ISD::VP_REDUCE_OR:
387395
return ISD::OR;
388396
case ISD::VECREDUCE_XOR:
397+
case ISD::VP_REDUCE_XOR:
389398
return ISD::XOR;
390399
case ISD::VECREDUCE_SMAX:
400+
case ISD::VP_REDUCE_SMAX:
391401
return ISD::SMAX;
392402
case ISD::VECREDUCE_SMIN:
403+
case ISD::VP_REDUCE_SMIN:
393404
return ISD::SMIN;
394405
case ISD::VECREDUCE_UMAX:
406+
case ISD::VP_REDUCE_UMAX:
395407
return ISD::UMAX;
396408
case ISD::VECREDUCE_UMIN:
409+
case ISD::VP_REDUCE_UMIN:
397410
return ISD::UMIN;
398411
case ISD::VECREDUCE_FMAX:
412+
case ISD::VP_REDUCE_FMAX:
399413
return ISD::FMAXNUM;
400414
case ISD::VECREDUCE_FMIN:
415+
case ISD::VP_REDUCE_FMIN:
401416
return ISD::FMINNUM;
402417
}
403418
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,72 @@ define float @vpreduce_ord_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32
116116
ret float %r
117117
}
118118

119+
declare float @llvm.vp.reduce.fadd.v64f32(float, <64 x float>, <64 x i1>, i32)
120+
121+
define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) {
122+
; CHECK-LABEL: vpreduce_fadd_v64f32:
123+
; CHECK: # %bb.0:
124+
; CHECK-NEXT: addi a2, a0, -32
125+
; CHECK-NEXT: li a1, 0
126+
; CHECK-NEXT: bltu a0, a2, .LBB8_2
127+
; CHECK-NEXT: # %bb.1:
128+
; CHECK-NEXT: mv a1, a2
129+
; CHECK-NEXT: .LBB8_2:
130+
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
131+
; CHECK-NEXT: li a2, 32
132+
; CHECK-NEXT: vslidedown.vi v24, v0, 4
133+
; CHECK-NEXT: bltu a0, a2, .LBB8_4
134+
; CHECK-NEXT: # %bb.3:
135+
; CHECK-NEXT: li a0, 32
136+
; CHECK-NEXT: .LBB8_4:
137+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
138+
; CHECK-NEXT: vfmv.s.f v25, fa0
139+
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu
140+
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
141+
; CHECK-NEXT: vfmv.f.s ft0, v25
142+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
143+
; CHECK-NEXT: vfmv.s.f v8, ft0
144+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu
145+
; CHECK-NEXT: vmv1r.v v0, v24
146+
; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
147+
; CHECK-NEXT: vfmv.f.s fa0, v8
148+
; CHECK-NEXT: ret
149+
%r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl)
150+
ret float %r
151+
}
152+
153+
define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) {
154+
; CHECK-LABEL: vpreduce_ord_fadd_v64f32:
155+
; CHECK: # %bb.0:
156+
; CHECK-NEXT: addi a2, a0, -32
157+
; CHECK-NEXT: li a1, 0
158+
; CHECK-NEXT: bltu a0, a2, .LBB9_2
159+
; CHECK-NEXT: # %bb.1:
160+
; CHECK-NEXT: mv a1, a2
161+
; CHECK-NEXT: .LBB9_2:
162+
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
163+
; CHECK-NEXT: li a2, 32
164+
; CHECK-NEXT: vslidedown.vi v24, v0, 4
165+
; CHECK-NEXT: bltu a0, a2, .LBB9_4
166+
; CHECK-NEXT: # %bb.3:
167+
; CHECK-NEXT: li a0, 32
168+
; CHECK-NEXT: .LBB9_4:
169+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
170+
; CHECK-NEXT: vfmv.s.f v25, fa0
171+
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu
172+
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
173+
; CHECK-NEXT: vfmv.f.s ft0, v25
174+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
175+
; CHECK-NEXT: vfmv.s.f v8, ft0
176+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu
177+
; CHECK-NEXT: vmv1r.v v0, v24
178+
; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
179+
; CHECK-NEXT: vfmv.f.s fa0, v8
180+
; CHECK-NEXT: ret
181+
%r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl)
182+
ret float %r
183+
}
184+
119185
declare double @llvm.vp.reduce.fadd.v2f64(double, <2 x double>, <2 x i1>, i32)
120186

121187
define double @vpreduce_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,40 @@ define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m
824824
ret i32 %r
825825
}
826826

827+
declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32)
828+
829+
define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) {
830+
; CHECK-LABEL: vpreduce_xor_v64i32:
831+
; CHECK: # %bb.0:
832+
; CHECK-NEXT: addi a3, a1, -32
833+
; CHECK-NEXT: li a2, 0
834+
; CHECK-NEXT: bltu a1, a3, .LBB48_2
835+
; CHECK-NEXT: # %bb.1:
836+
; CHECK-NEXT: mv a2, a3
837+
; CHECK-NEXT: .LBB48_2:
838+
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
839+
; CHECK-NEXT: li a3, 32
840+
; CHECK-NEXT: vslidedown.vi v24, v0, 4
841+
; CHECK-NEXT: bltu a1, a3, .LBB48_4
842+
; CHECK-NEXT: # %bb.3:
843+
; CHECK-NEXT: li a1, 32
844+
; CHECK-NEXT: .LBB48_4:
845+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
846+
; CHECK-NEXT: vmv.s.x v25, a0
847+
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu
848+
; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t
849+
; CHECK-NEXT: vmv.x.s a0, v25
850+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
851+
; CHECK-NEXT: vmv.s.x v8, a0
852+
; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, mu
853+
; CHECK-NEXT: vmv1r.v v0, v24
854+
; CHECK-NEXT: vredxor.vs v8, v16, v8, v0.t
855+
; CHECK-NEXT: vmv.x.s a0, v8
856+
; CHECK-NEXT: ret
857+
%r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl)
858+
ret i32 %r
859+
}
860+
827861
declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32)
828862

829863
define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,41 @@ define signext i1 @vpreduce_and_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m,
229229
ret i1 %r
230230
}
231231

232+
declare i1 @llvm.vp.reduce.and.v256i1(i1, <256 x i1>, <256 x i1>, i32)
233+
234+
define signext i1 @vpreduce_and_v256i1(i1 signext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) {
235+
; CHECK-LABEL: vpreduce_and_v256i1:
236+
; CHECK: # %bb.0:
237+
; CHECK-NEXT: addi a2, a1, -128
238+
; CHECK-NEXT: vmv1r.v v11, v0
239+
; CHECK-NEXT: li a3, 0
240+
; CHECK-NEXT: bltu a1, a2, .LBB13_2
241+
; CHECK-NEXT: # %bb.1:
242+
; CHECK-NEXT: mv a3, a2
243+
; CHECK-NEXT: .LBB13_2:
244+
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
245+
; CHECK-NEXT: vmnand.mm v8, v8, v8
246+
; CHECK-NEXT: vmv1r.v v0, v10
247+
; CHECK-NEXT: vcpop.m a2, v8, v0.t
248+
; CHECK-NEXT: li a3, 128
249+
; CHECK-NEXT: seqz a2, a2
250+
; CHECK-NEXT: bltu a1, a3, .LBB13_4
251+
; CHECK-NEXT: # %bb.3:
252+
; CHECK-NEXT: li a1, 128
253+
; CHECK-NEXT: .LBB13_4:
254+
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
255+
; CHECK-NEXT: vmnand.mm v8, v11, v11
256+
; CHECK-NEXT: vmv1r.v v0, v9
257+
; CHECK-NEXT: vcpop.m a1, v8, v0.t
258+
; CHECK-NEXT: seqz a1, a1
259+
; CHECK-NEXT: and a0, a1, a0
260+
; CHECK-NEXT: and a0, a2, a0
261+
; CHECK-NEXT: neg a0, a0
262+
; CHECK-NEXT: ret
263+
%r = call i1 @llvm.vp.reduce.and.v256i1(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %evl)
264+
ret i1 %r
265+
}
266+
232267
declare i1 @llvm.vp.reduce.or.v16i1(i1, <16 x i1>, <16 x i1>, i32)
233268

234269
define signext i1 @vpreduce_or_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) {

llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,78 @@ define half @vpreduce_ord_fadd_nxv4f16(half %s, <vscale x 4 x half> %v, <vscale
8888
ret half %r
8989
}
9090

91+
declare half @llvm.vp.reduce.fadd.nxv64f16(half, <vscale x 64 x half>, <vscale x 64 x i1>, i32)
92+
93+
define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
94+
; CHECK-LABEL: vpreduce_fadd_nxv64f16:
95+
; CHECK: # %bb.0:
96+
; CHECK-NEXT: csrr a2, vlenb
97+
; CHECK-NEXT: srli a1, a2, 1
98+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
99+
; CHECK-NEXT: slli a2, a2, 2
100+
; CHECK-NEXT: vfmv.s.f v25, fa0
101+
; CHECK-NEXT: mv a3, a0
102+
; CHECK-NEXT: bltu a0, a2, .LBB6_2
103+
; CHECK-NEXT: # %bb.1:
104+
; CHECK-NEXT: mv a3, a2
105+
; CHECK-NEXT: .LBB6_2:
106+
; CHECK-NEXT: li a4, 0
107+
; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, mu
108+
; CHECK-NEXT: vslidedown.vx v24, v0, a1
109+
; CHECK-NEXT: vsetvli zero, a3, e16, m8, tu, mu
110+
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
111+
; CHECK-NEXT: vfmv.f.s ft0, v25
112+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
113+
; CHECK-NEXT: sub a1, a0, a2
114+
; CHECK-NEXT: vfmv.s.f v8, ft0
115+
; CHECK-NEXT: bltu a0, a1, .LBB6_4
116+
; CHECK-NEXT: # %bb.3:
117+
; CHECK-NEXT: mv a4, a1
118+
; CHECK-NEXT: .LBB6_4:
119+
; CHECK-NEXT: vsetvli zero, a4, e16, m8, tu, mu
120+
; CHECK-NEXT: vmv1r.v v0, v24
121+
; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
122+
; CHECK-NEXT: vfmv.f.s fa0, v8
123+
; CHECK-NEXT: ret
124+
%r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
125+
ret half %r
126+
}
127+
128+
define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
129+
; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16:
130+
; CHECK: # %bb.0:
131+
; CHECK-NEXT: csrr a2, vlenb
132+
; CHECK-NEXT: srli a1, a2, 1
133+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
134+
; CHECK-NEXT: slli a2, a2, 2
135+
; CHECK-NEXT: vfmv.s.f v25, fa0
136+
; CHECK-NEXT: mv a3, a0
137+
; CHECK-NEXT: bltu a0, a2, .LBB7_2
138+
; CHECK-NEXT: # %bb.1:
139+
; CHECK-NEXT: mv a3, a2
140+
; CHECK-NEXT: .LBB7_2:
141+
; CHECK-NEXT: li a4, 0
142+
; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, mu
143+
; CHECK-NEXT: vslidedown.vx v24, v0, a1
144+
; CHECK-NEXT: vsetvli zero, a3, e16, m8, tu, mu
145+
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
146+
; CHECK-NEXT: vfmv.f.s ft0, v25
147+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
148+
; CHECK-NEXT: sub a1, a0, a2
149+
; CHECK-NEXT: vfmv.s.f v8, ft0
150+
; CHECK-NEXT: bltu a0, a1, .LBB7_4
151+
; CHECK-NEXT: # %bb.3:
152+
; CHECK-NEXT: mv a4, a1
153+
; CHECK-NEXT: .LBB7_4:
154+
; CHECK-NEXT: vsetvli zero, a4, e16, m8, tu, mu
155+
; CHECK-NEXT: vmv1r.v v0, v24
156+
; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
157+
; CHECK-NEXT: vfmv.f.s fa0, v8
158+
; CHECK-NEXT: ret
159+
%r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
160+
ret half %r
161+
}
162+
91163
declare float @llvm.vp.reduce.fadd.nxv1f32(float, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
92164

93165
define float @vpreduce_fadd_nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {

0 commit comments

Comments
 (0)