Skip to content

Commit d5117bc

Browse files
committed
[RISCV][VLOPT] Remove unnecessary passthru restriction
We currently check for passthrus in two places, on the instruction to reduce in isCandidate, and on the users in checkUsers. We cannot reduce the VL if an instruction has a user that's a passthru, because the user will read elements past VL in the tail. However it's fine to reduce an instruction if it itself contains a non-undef passthru. Since the VL can only be reduced, not increased, the previous tail will always remain the same.
1 parent a755bba commit d5117bc

File tree

3 files changed

+33
-36
lines changed

3 files changed

+33
-36
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,27 +1143,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
11431143
if (MI.getNumDefs() != 1)
11441144
return false;
11451145

1146-
// If we're not using VLMAX, then we need to be careful whether we are using
1147-
// TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it
1148-
// does not matter whether we are using TA/TU with a non-undef Passthru, since
1149-
// there are no tail elements to be preserved.
11501146
unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
11511147
const MachineOperand &VLOp = MI.getOperand(VLOpNum);
1152-
if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) {
1153-
// If MI has a non-undef passthru, we will not try to optimize it since
1154-
// that requires us to preserve tail elements according to TA/TU.
1155-
// Otherwise, The MI has an undef Passthru, so it doesn't matter whether we
1156-
// are using TA/TU.
1157-
bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc);
1158-
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
1159-
if (HasPassthru &&
1160-
MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) {
1161-
LLVM_DEBUG(
1162-
dbgs() << " Not a candidate because it uses non-undef passthru"
1163-
" with non-VLMAX VL\n");
1164-
return false;
1165-
}
1166-
}
11671148

11681149
// If the VL is 1, then there is no need to reduce it. This is an
11691150
// optimization, not needed to preserve correctness.
@@ -1247,7 +1228,7 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
12471228
return std::nullopt;
12481229
}
12491230

1250-
// Tied operands might pass through.
1231+
// If used as a passthru, elements past VL will be read.
12511232
if (UserOp.isTied()) {
12521233
LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n");
12531234
return std::nullopt;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3919,11 +3919,12 @@ define void @trunc_v6bf16(ptr %x) {
39193919
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
39203920
; CHECK-NEXT: vfabs.v v8, v10
39213921
; CHECK-NEXT: vmflt.vf v0, v8, fa5
3922+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
39223923
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
39233924
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
39243925
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
39253926
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
3926-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3927+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
39273928
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
39283929
; CHECK-NEXT: vse16.v v8, (a0)
39293930
; CHECK-NEXT: ret
@@ -4002,11 +4003,12 @@ define void @trunc_v6f16(ptr %x) {
40024003
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
40034004
; ZVFHMIN-NEXT: vfabs.v v8, v10
40044005
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4006+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
40054007
; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
40064008
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
40074009
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
40084010
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4009-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4011+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
40104012
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
40114013
; ZVFHMIN-NEXT: vse16.v v8, (a0)
40124014
; ZVFHMIN-NEXT: ret
@@ -4098,12 +4100,13 @@ define void @ceil_v6bf16(ptr %x) {
40984100
; CHECK-NEXT: vfabs.v v8, v10
40994101
; CHECK-NEXT: vmflt.vf v0, v8, fa5
41004102
; CHECK-NEXT: fsrmi a1, 3
4103+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
41014104
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
41024105
; CHECK-NEXT: fsrm a1
41034106
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
41044107
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
41054108
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4106-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4109+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
41074110
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
41084111
; CHECK-NEXT: vse16.v v8, (a0)
41094112
; CHECK-NEXT: ret
@@ -4189,12 +4192,13 @@ define void @ceil_v6f16(ptr %x) {
41894192
; ZVFHMIN-NEXT: vfabs.v v8, v10
41904193
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
41914194
; ZVFHMIN-NEXT: fsrmi a1, 3
4195+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
41924196
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
41934197
; ZVFHMIN-NEXT: fsrm a1
41944198
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
41954199
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
41964200
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4197-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4201+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
41984202
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
41994203
; ZVFHMIN-NEXT: vse16.v v8, (a0)
42004204
; ZVFHMIN-NEXT: ret
@@ -4290,12 +4294,13 @@ define void @floor_v6bf16(ptr %x) {
42904294
; CHECK-NEXT: vfabs.v v8, v10
42914295
; CHECK-NEXT: vmflt.vf v0, v8, fa5
42924296
; CHECK-NEXT: fsrmi a1, 2
4297+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
42934298
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
42944299
; CHECK-NEXT: fsrm a1
42954300
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
42964301
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
42974302
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4298-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4303+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
42994304
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
43004305
; CHECK-NEXT: vse16.v v8, (a0)
43014306
; CHECK-NEXT: ret
@@ -4381,12 +4386,13 @@ define void @floor_v6f16(ptr %x) {
43814386
; ZVFHMIN-NEXT: vfabs.v v8, v10
43824387
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
43834388
; ZVFHMIN-NEXT: fsrmi a1, 2
4389+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
43844390
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
43854391
; ZVFHMIN-NEXT: fsrm a1
43864392
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
43874393
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
43884394
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4389-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4395+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
43904396
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
43914397
; ZVFHMIN-NEXT: vse16.v v8, (a0)
43924398
; ZVFHMIN-NEXT: ret
@@ -4482,12 +4488,13 @@ define void @round_v6bf16(ptr %x) {
44824488
; CHECK-NEXT: vfabs.v v8, v10
44834489
; CHECK-NEXT: vmflt.vf v0, v8, fa5
44844490
; CHECK-NEXT: fsrmi a1, 4
4491+
; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
44854492
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
44864493
; CHECK-NEXT: fsrm a1
44874494
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
44884495
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
44894496
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4490-
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4497+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
44914498
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
44924499
; CHECK-NEXT: vse16.v v8, (a0)
44934500
; CHECK-NEXT: ret
@@ -4573,12 +4580,13 @@ define void @round_v6f16(ptr %x) {
45734580
; ZVFHMIN-NEXT: vfabs.v v8, v10
45744581
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
45754582
; ZVFHMIN-NEXT: fsrmi a1, 4
4583+
; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
45764584
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
45774585
; ZVFHMIN-NEXT: fsrm a1
45784586
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
45794587
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
45804588
; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4581-
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4589+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
45824590
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
45834591
; ZVFHMIN-NEXT: vse16.v v8, (a0)
45844592
; ZVFHMIN-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,22 @@ define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <v
126126
; We can propagate VL to a tail-undisturbed policy, provided none of its users
127127
; are passthrus (i.e. read past VL).
128128
define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
129-
; CHECK-LABEL: different_imm_vl_with_tu:
130-
; CHECK: # %bb.0:
131-
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
132-
; CHECK-NEXT: vmv2r.v v14, v10
133-
; CHECK-NEXT: vadd.vv v14, v10, v12
134-
; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma
135-
; CHECK-NEXT: vadd.vv v8, v14, v10
136-
; CHECK-NEXT: ret
129+
; NOVLOPT-LABEL: different_imm_vl_with_tu:
130+
; NOVLOPT: # %bb.0:
131+
; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma
132+
; NOVLOPT-NEXT: vmv2r.v v14, v10
133+
; NOVLOPT-NEXT: vadd.vv v14, v10, v12
134+
; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
135+
; NOVLOPT-NEXT: vadd.vv v8, v14, v10
136+
; NOVLOPT-NEXT: ret
137+
;
138+
; VLOPT-LABEL: different_imm_vl_with_tu:
139+
; VLOPT: # %bb.0:
140+
; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma
141+
; VLOPT-NEXT: vmv2r.v v14, v10
142+
; VLOPT-NEXT: vadd.vv v14, v10, v12
143+
; VLOPT-NEXT: vadd.vv v8, v14, v10
144+
; VLOPT-NEXT: ret
137145
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
138146
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
139147
ret <vscale x 4 x i32> %w

0 commit comments

Comments
 (0)