Skip to content

Commit e6f2468

Browse files
fixup! refactor to address preames comments
1 parent b8c3411 commit e6f2468

File tree

7 files changed

+144
-112
lines changed

7 files changed

+144
-112
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 88 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
5050
StringRef getPassName() const override { return PASS_NAME; }
5151

5252
private:
53+
std::optional<const MachineOperand> getVLForUser(MachineOperand &UserOp);
5354
/// Returns the largest common VL MachineOperand that may be used to optimize
5455
/// MI. Returns std::nullopt if it failed to find a suitable VL.
5556
std::optional<const MachineOperand> checkUsers(MachineInstr &MI);
@@ -97,6 +98,8 @@ struct OperandInfo {
9798
OperandInfo(std::pair<unsigned, bool> EMUL, unsigned Log2EEW)
9899
: S(State::Known), EMUL(EMUL), Log2EEW(Log2EEW) {}
99100

101+
OperandInfo(unsigned Log2EEW) : S(State::Known), Log2EEW(Log2EEW) {}
102+
100103
OperandInfo() : S(State::Unknown) {}
101104

102105
bool isUnknown() const { return S == State::Unknown; }
@@ -109,6 +112,11 @@ struct OperandInfo {
109112
A.EMUL->second == B.EMUL->second;
110113
}
111114

115+
static bool EEWAreEqual(const OperandInfo &A, const OperandInfo &B) {
116+
assert(A.isKnown() && B.isKnown() && "Both operands must be known");
117+
return A.Log2EEW == B.Log2EEW;
118+
}
119+
112120
void print(raw_ostream &OS) const {
113121
if (isUnknown()) {
114122
OS << "Unknown";
@@ -720,8 +728,8 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
720728

721729
// Vector Reduction Operations
722730
// Vector Single-Width Integer Reduction Instructions
723-
// The Dest and VS1 only read element 0 of the vector register. Return unknown
724-
// for these. VS2 has EEW=SEW and EMUL=LMUL.
731+
// The Dest and VS1 only read element 0 of the vector register. Return just
732+
// the EEW for these. VS2 has EEW=SEW and EMUL=LMUL.
725733
case RISCV::VREDAND_VS:
726734
case RISCV::VREDMAX_VS:
727735
case RISCV::VREDMAXU_VS:
@@ -732,7 +740,7 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
732740
case RISCV::VREDXOR_VS: {
733741
if (MO.getOperandNo() == 2)
734742
return OperandInfo(MIVLMul, MILog2SEW);
735-
return {};
743+
return OperandInfo(MILog2SEW);
736744
}
737745

738746
default:
@@ -1047,48 +1055,64 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
10471055
return true;
10481056
}
10491057

1058+
std::optional<const MachineOperand>
1059+
RISCVVLOptimizer::getVLForUser(MachineOperand &UserOp) {
1060+
const MachineInstr &UserMI = *UserOp.getParent();
1061+
const MCInstrDesc &Desc = UserMI.getDesc();
1062+
1063+
// Instructions like reductions may use a vector register as a scalar
1064+
// register. In this case, we should treat it like a scalar register which
1065+
// does not impact the decision on whether to optimize VL. But if there is
1066+
// another user of MI and it may have VL=0, we need to be sure not to reduce
1067+
// the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
1068+
// we can reduce it to is one.
1069+
if (isVectorOpUsedAsScalarOp(UserOp)) {
1070+
[[maybe_unused]] Register R = UserOp.getReg();
1071+
[[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
1072+
assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
1073+
"Expect LMUL 1 register class for vector as scalar operands!");
1074+
LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
1075+
// VMV_X_S and VFMV_F_S do not have a VL opt which would cause an assert
1076+
// assert failure if we called getVLOpNum. Therefore, we will set the
1077+
// CommonVL in that case as 1, even if it could have been set to 0.
1078+
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
1079+
return MachineOperand::CreateImm(1);
1080+
1081+
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1082+
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1083+
if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0))
1084+
return MachineOperand::CreateImm(1);
1085+
LLVM_DEBUG(dbgs() << " Abort because could not determine VL of vector "
1086+
"operand used as scalar operand\n");
1087+
1088+
return std::nullopt;
1089+
}
1090+
1091+
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1092+
LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
1093+
" use VLMAX\n");
1094+
return std::nullopt;
1095+
}
1096+
1097+
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1098+
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1099+
// Looking for an immediate or a register VL that isn't X0.
1100+
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1101+
"Did not expect X0 VL");
1102+
return VLOp;
1103+
}
1104+
10501105
std::optional<const MachineOperand>
10511106
RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
10521107
// FIXME: Avoid visiting each user for each time we visit something on the
10531108
// worklist, combined with an extra visit from the outer loop. Restructure
10541109
// along lines of an instcombine style worklist which integrates the outer
10551110
// pass.
10561111
bool CanReduceVL = true;
1057-
const MachineOperand *CommonVL = nullptr;
1058-
const MachineOperand One = MachineOperand::CreateImm(1);
1112+
std::optional<const MachineOperand> CommonVL;
10591113
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
10601114
const MachineInstr &UserMI = *UserOp.getParent();
10611115
LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
1062-
1063-
// Instructions like reductions may use a vector register as a scalar
1064-
// register. In this case, we should treat it like a scalar register which
1065-
// does not impact the decision on whether to optimize VL. But if there is
1066-
// another user of MI and it may have VL=0, we need to be sure not to reduce
1067-
// the VL of MI to zero when the VLOp of UserOp is may be non-zero. The most
1068-
// we can reduce it to is one.
1069-
if (isVectorOpUsedAsScalarOp(UserOp)) {
1070-
[[maybe_unused]] Register R = UserOp.getReg();
1071-
[[maybe_unused]] const TargetRegisterClass *RC = MRI->getRegClass(R);
1072-
assert(RISCV::VRRegClass.hasSubClassEq(RC) &&
1073-
"Expect LMUL 1 register class for vector as scalar operands!");
1074-
LLVM_DEBUG(dbgs() << " Used this operand as a scalar operand\n");
1075-
const MCInstrDesc &Desc = UserMI.getDesc();
1076-
// VMV_X_S and VFMV_F_S do not have a VL opt which would cause an assert
1077-
// assert failure if we called getVLOpNum. Therefore, we will set the
1078-
// CommonVL in that case as 1, even if it could have been set to 0.
1079-
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1080-
CommonVL = &One;
1081-
continue;
1082-
}
1083-
1084-
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1085-
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1086-
if (VLOp.isReg() || (VLOp.isImm() && VLOp.getImm() != 0)) {
1087-
CommonVL = &One;
1088-
continue;
1089-
}
1090-
}
1091-
10921116
if (mayReadPastVL(UserMI)) {
10931117
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
10941118
CanReduceVL = false;
@@ -1102,45 +1126,55 @@ RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
11021126
break;
11031127
}
11041128

1105-
const MCInstrDesc &Desc = UserMI.getDesc();
1106-
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
1107-
LLVM_DEBUG(dbgs() << " Abort due to lack of VL or SEW, assume that"
1108-
" use VLMAX\n");
1129+
auto VLOp = getVLForUser(UserOp);
1130+
if (!VLOp) {
11091131
CanReduceVL = false;
11101132
break;
11111133
}
11121134

1113-
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
1114-
const MachineOperand &VLOp = UserMI.getOperand(VLOpNum);
1115-
1116-
// Looking for an immediate or a register VL that isn't X0.
1117-
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
1118-
"Did not expect X0 VL");
1119-
11201135
// Use the largest VL among all the users. If we cannot determine this
11211136
// statically, then we cannot optimize the VL.
1122-
if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, VLOp)) {
1123-
CommonVL = &VLOp;
1137+
if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {
1138+
CommonVL.emplace(*VLOp);
11241139
LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n");
1125-
} else if (!RISCV::isVLKnownLE(VLOp, *CommonVL)) {
1140+
} else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {
11261141
LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n");
11271142
CanReduceVL = false;
11281143
break;
11291144
}
11301145

1131-
// The SEW and LMUL of destination and source registers need to match.
1146+
if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
1147+
LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
1148+
CanReduceVL = false;
1149+
break;
1150+
}
1151+
11321152
OperandInfo ConsumerInfo = getOperandInfo(UserOp, MRI);
11331153
OperandInfo ProducerInfo = getOperandInfo(MI.getOperand(0), MRI);
1134-
if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown() ||
1135-
!OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo)) {
1136-
LLVM_DEBUG(dbgs() << " Abort due to incompatible or unknown "
1137-
"information for EMUL or EEW.\n");
1154+
if (ConsumerInfo.isUnknown() || ProducerInfo.isUnknown()) {
1155+
LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
1156+
LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
1157+
LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
1158+
CanReduceVL = false;
1159+
break;
1160+
}
1161+
1162+
// If the operand is used as a scalar operand, then the EEW must be
1163+
// compatible. Otherwise, the EMUL *and* EEW must be compatible.
1164+
if ((isVectorOpUsedAsScalarOp(UserOp) &&
1165+
!OperandInfo::EEWAreEqual(ConsumerInfo, ProducerInfo)) ||
1166+
(!isVectorOpUsedAsScalarOp(UserOp) &&
1167+
!OperandInfo::EMULAndEEWAreEqual(ConsumerInfo, ProducerInfo))) {
1168+
LLVM_DEBUG(
1169+
dbgs()
1170+
<< " Abort due to incompatible information for EMUL or EEW.\n");
11381171
LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
11391172
LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
11401173
CanReduceVL = false;
11411174
break;
11421175
}
11431176
}
1177+
11441178
return CanReduceVL && CommonVL
11451179
? std::make_optional<const MachineOperand>(*CommonVL)
11461180
: std::nullopt;

llvm/test/CodeGen/RISCV/double_reduct.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,8 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
171171
define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
172172
; CHECK-LABEL: and_i32:
173173
; CHECK: # %bb.0:
174-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
175-
; CHECK-NEXT: vand.vv v8, v8, v9
176174
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
175+
; CHECK-NEXT: vand.vv v8, v8, v9
177176
; CHECK-NEXT: vredand.vs v8, v8, v8
178177
; CHECK-NEXT: vmv.x.s a0, v8
179178
; CHECK-NEXT: ret
@@ -186,9 +185,8 @@ define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
186185
define i32 @or_i32(<4 x i32> %a, <4 x i32> %b) {
187186
; CHECK-LABEL: or_i32:
188187
; CHECK: # %bb.0:
189-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
190-
; CHECK-NEXT: vor.vv v8, v8, v9
191188
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
189+
; CHECK-NEXT: vor.vv v8, v8, v9
192190
; CHECK-NEXT: vredor.vs v8, v8, v8
193191
; CHECK-NEXT: vmv.x.s a0, v8
194192
; CHECK-NEXT: ret
@@ -216,9 +214,8 @@ define i32 @xor_i32(<4 x i32> %a, <4 x i32> %b) {
216214
define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
217215
; CHECK-LABEL: umin_i32:
218216
; CHECK: # %bb.0:
219-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
220-
; CHECK-NEXT: vminu.vv v8, v8, v9
221217
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
218+
; CHECK-NEXT: vminu.vv v8, v8, v9
222219
; CHECK-NEXT: vredminu.vs v8, v8, v8
223220
; CHECK-NEXT: vmv.x.s a0, v8
224221
; CHECK-NEXT: ret
@@ -231,9 +228,8 @@ define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
231228
define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
232229
; CHECK-LABEL: umax_i32:
233230
; CHECK: # %bb.0:
234-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
235-
; CHECK-NEXT: vmaxu.vv v8, v8, v9
236231
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
232+
; CHECK-NEXT: vmaxu.vv v8, v8, v9
237233
; CHECK-NEXT: vredmaxu.vs v8, v8, v8
238234
; CHECK-NEXT: vmv.x.s a0, v8
239235
; CHECK-NEXT: ret
@@ -246,9 +242,8 @@ define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
246242
define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
247243
; CHECK-LABEL: smin_i32:
248244
; CHECK: # %bb.0:
249-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
250-
; CHECK-NEXT: vmin.vv v8, v8, v9
251245
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
246+
; CHECK-NEXT: vmin.vv v8, v8, v9
252247
; CHECK-NEXT: vredmin.vs v8, v8, v8
253248
; CHECK-NEXT: vmv.x.s a0, v8
254249
; CHECK-NEXT: ret
@@ -261,9 +256,8 @@ define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
261256
define i32 @smax_i32(<4 x i32> %a, <4 x i32> %b) {
262257
; CHECK-LABEL: smax_i32:
263258
; CHECK: # %bb.0:
264-
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
265-
; CHECK-NEXT: vmax.vv v8, v8, v9
266259
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
260+
; CHECK-NEXT: vmax.vv v8, v8, v9
267261
; CHECK-NEXT: vredmax.vs v8, v8, v8
268262
; CHECK-NEXT: vmv.x.s a0, v8
269263
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
1212
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1313
; RV32-NEXT: vmv.v.i v8, 0
1414
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
15-
; RV32-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1615
; RV32-NEXT: vid.v v9
1716
; RV32-NEXT: vrsub.vi v9, v9, 4
1817
; RV32-NEXT: vand.vv v8, v8, v9
19-
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
2018
; RV32-NEXT: vredmaxu.vs v8, v8, v8
2119
; RV32-NEXT: vmv.x.s a0, v8
2220
; RV32-NEXT: li a1, 4
@@ -31,11 +29,9 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
3129
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
3230
; RV64-NEXT: vmv.v.i v8, 0
3331
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
34-
; RV64-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3532
; RV64-NEXT: vid.v v9
3633
; RV64-NEXT: vrsub.vi v9, v9, 4
3734
; RV64-NEXT: vand.vv v8, v8, v9
38-
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
3935
; RV64-NEXT: vredmaxu.vs v8, v8, v8
4036
; RV64-NEXT: vmv.x.s a0, v8
4137
; RV64-NEXT: li a1, 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ declare <2 x i32> @llvm.masked.load.v2i32(ptr, i32, <2 x i1>, <2 x i32>)
481481
define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwind {
482482
; RV32-SLOW-LABEL: masked_load_v2i32_align1:
483483
; RV32-SLOW: # %bb.0:
484-
; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
484+
; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
485485
; RV32-SLOW-NEXT: vmseq.vi v8, v8, 0
486486
; RV32-SLOW-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
487487
; RV32-SLOW-NEXT: vmv.x.s a2, v8
@@ -499,7 +499,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
499499
; RV32-SLOW-NEXT: slli a6, a6, 24
500500
; RV32-SLOW-NEXT: or a4, a6, a5
501501
; RV32-SLOW-NEXT: or a3, a4, a3
502-
; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
502+
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
503503
; RV32-SLOW-NEXT: vmv.v.x v8, a3
504504
; RV32-SLOW-NEXT: .LBB8_2: # %else
505505
; RV32-SLOW-NEXT: andi a2, a2, 2
@@ -515,19 +515,17 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
515515
; RV32-SLOW-NEXT: slli a0, a0, 24
516516
; RV32-SLOW-NEXT: or a0, a0, a4
517517
; RV32-SLOW-NEXT: or a0, a0, a2
518-
; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
518+
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
519519
; RV32-SLOW-NEXT: vmv.s.x v9, a0
520520
; RV32-SLOW-NEXT: vslideup.vi v8, v9, 1
521-
; RV32-SLOW-NEXT: vse32.v v8, (a1)
522-
; RV32-SLOW-NEXT: ret
523-
; RV32-SLOW-NEXT: .LBB8_4:
524-
; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
521+
; RV32-SLOW-NEXT: .LBB8_4: # %else2
522+
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
525523
; RV32-SLOW-NEXT: vse32.v v8, (a1)
526524
; RV32-SLOW-NEXT: ret
527525
;
528526
; RV64-SLOW-LABEL: masked_load_v2i32_align1:
529527
; RV64-SLOW: # %bb.0:
530-
; RV64-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
528+
; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
531529
; RV64-SLOW-NEXT: vmseq.vi v8, v8, 0
532530
; RV64-SLOW-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
533531
; RV64-SLOW-NEXT: vmv.x.s a2, v8
@@ -545,7 +543,7 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
545543
; RV64-SLOW-NEXT: slli a6, a6, 24
546544
; RV64-SLOW-NEXT: or a4, a6, a5
547545
; RV64-SLOW-NEXT: or a3, a4, a3
548-
; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
546+
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
549547
; RV64-SLOW-NEXT: vmv.v.x v8, a3
550548
; RV64-SLOW-NEXT: .LBB8_2: # %else
551549
; RV64-SLOW-NEXT: andi a2, a2, 2
@@ -561,13 +559,11 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
561559
; RV64-SLOW-NEXT: slli a0, a0, 24
562560
; RV64-SLOW-NEXT: or a0, a0, a4
563561
; RV64-SLOW-NEXT: or a0, a0, a2
564-
; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
562+
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
565563
; RV64-SLOW-NEXT: vmv.s.x v9, a0
566564
; RV64-SLOW-NEXT: vslideup.vi v8, v9, 1
567-
; RV64-SLOW-NEXT: vse32.v v8, (a1)
568-
; RV64-SLOW-NEXT: ret
569-
; RV64-SLOW-NEXT: .LBB8_4:
570-
; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
565+
; RV64-SLOW-NEXT: .LBB8_4: # %else2
566+
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
571567
; RV64-SLOW-NEXT: vse32.v v8, (a1)
572568
; RV64-SLOW-NEXT: ret
573569
;
@@ -589,7 +585,7 @@ declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
589585
define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nounwind {
590586
; SLOW-LABEL: masked_store_v2i32_align2:
591587
; SLOW: # %bb.0:
592-
; SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
588+
; SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
593589
; SLOW-NEXT: vmseq.vi v9, v9, 0
594590
; SLOW-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
595591
; SLOW-NEXT: vmv.x.s a1, v9

0 commit comments

Comments
 (0)