Skip to content

Commit eafe31b

Browse files
authored
[RISCV] Don't lose elements from False in vmerge -> vmv peephole (#149720)
In the vmerge peephole, we currently allow different AVLs for the vmerge and its true operand. If vmerge's VL > true's VL, vmerge can "preserve" elements from false that would otherwise be clobbered with a tail agnostic policy on true. mask 1 1 1 1 0 0 0 0 true x x x x|. . . . AVL=4 vmerge x x x x f f|. . AVL=6 If we convert this to vmv.v.v we will lose those false elements: mask 1 1 1 1 0 0 0 0 true x x x x|. . . . AVL=4 vmv.v.v x x x x . .|. . AVL=6 Fix this by checking that vmerge's AVL is <= true's AVL. Should fix #149335
1 parent 95201b2 commit eafe31b

File tree

3 files changed

+84
-10
lines changed

3 files changed

+84
-10
lines changed

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,15 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
434434
if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
435435
return false;
436436

437+
// Masked off lanes past TrueVL will come from False, and converting to vmv
438+
// will lose these lanes unless MIVL <= TrueVL.
439+
// TODO: We could relax this for False == Passthru and True policy == TU
440+
const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
441+
const MachineOperand &TrueVL =
442+
True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
443+
if (!RISCV::isVLKnownLE(MIVL, TrueVL))
444+
return false;
445+
437446
// True's passthru needs to be equivalent to False
438447
Register TruePassthruReg = True->getOperand(1).getReg();
439448
Register FalseReg = MI.getOperand(2).getReg();

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,12 @@ body: |
7878
; CHECK-NEXT: %false:vrnov0 = COPY $v9
7979
; CHECK-NEXT: %mask:vmv0 = COPY $v0
8080
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
81-
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
81+
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */
8282
%pt:vrnov0 = COPY $v8
8383
%false:vrnov0 = COPY $v9
8484
%mask:vmv0 = COPY $v0
85-
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
86-
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */
85+
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */
86+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */
8787
...
8888
---
8989
# Shouldn't be converted because false operands are different
@@ -163,3 +163,47 @@ body: |
163163
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
164164
bb.1:
165165
%5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */
166+
...
167+
---
168+
# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
169+
name: preserve_false
170+
body: |
171+
bb.0:
172+
liveins: $v8, $v9, $v0, $x8, $x9
173+
; CHECK-LABEL: name: preserve_false
174+
; CHECK: liveins: $v8, $v9, $v0, $x8, $x9
175+
; CHECK-NEXT: {{ $}}
176+
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
177+
; CHECK-NEXT: %false:vr = COPY $v9
178+
; CHECK-NEXT: %mask:vmv0 = COPY $v0
179+
; CHECK-NEXT: %avl1:gprnox0 = COPY $x8
180+
; CHECK-NEXT: %avl2:gprnox0 = COPY $x9
181+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
182+
; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
183+
%pt:vrnov0 = COPY $v8
184+
%false:vr = COPY $v9
185+
%mask:vmv0 = COPY $v0
186+
%avl1:gprnox0 = COPY $x8
187+
%avl2:gprnox0 = COPY $x9
188+
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
189+
%5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
190+
...
191+
---
192+
# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl.
193+
name: preserve_false_avl_known_le
194+
body: |
195+
bb.0:
196+
liveins: $v8, $v9, $v0
197+
; CHECK-LABEL: name: preserve_false_avl_known_le
198+
; CHECK: liveins: $v8, $v9, $v0
199+
; CHECK-NEXT: {{ $}}
200+
; CHECK-NEXT: %pt:vr = COPY $v8
201+
; CHECK-NEXT: %false:vrnov0 = COPY $v9
202+
; CHECK-NEXT: %mask:vmv0 = COPY $v0
203+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */
204+
; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */
205+
%pt:vrnov0 = COPY $v8
206+
%false:vr = COPY $v9
207+
%mask:vmv0 = COPY $v0
208+
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */
209+
%5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */

llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
7171
ret <vscale x 8 x i64> %1
7272
}
7373

74-
declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
75-
declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
76-
declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
77-
declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
78-
declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
79-
declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
80-
declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
74+
; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
75+
define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
76+
; CHECK-LABEL: preserve_false:
77+
; CHECK: # %bb.0:
78+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
79+
; CHECK-NEXT: vmv1r.v v10, v9
80+
; CHECK-NEXT: vle32.v v10, (a0), v0.t
81+
; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
82+
; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
83+
; CHECK-NEXT: ret
84+
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
85+
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
86+
ret <vscale x 2 x i32> %res
87+
}
88+
89+
; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
90+
define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
91+
; CHECK-LABEL: preserve_false_avl_known_le:
92+
; CHECK: # %bb.0:
93+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
94+
; CHECK-NEXT: vle32.v v9, (a0), v0.t
95+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
96+
; CHECK-NEXT: vmv.v.v v8, v9
97+
; CHECK-NEXT: ret
98+
%true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
99+
%res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
100+
ret <vscale x 2 x i32> %res
101+
}

0 commit comments

Comments
 (0)