Skip to content

Commit 4fcb6e1

Browse files
authored
[RISCV] Commute Src in foldVMV_V_V (llvm#170536)
In llvm#156499 we taught the vmerge peephole to commute operands so that the passthru operands lined up. We can do the same for the vmv.v.v peephole, which allows us fold more vmv.v.vs away. This is needed to prevent a regression in an upcoming patch that adds a combine for vmerge.vvm to vmv.v.v.
1 parent 6c73f5e commit 4fcb6e1

File tree

3 files changed

+52
-2
lines changed

3 files changed

+52
-2
lines changed

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -651,11 +651,23 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
651651
if (!hasSameEEW(MI, *Src))
652652
return false;
653653

654+
std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
655+
654656
// Src needs to have the same passthru as VMV_V_V
655657
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
656658
if (SrcPassthru.getReg().isValid() &&
657-
SrcPassthru.getReg() != Passthru.getReg())
658-
return false;
659+
SrcPassthru.getReg() != Passthru.getReg()) {
660+
// If Src's passthru != Passthru, check if it uses Passthru in another
661+
// operand and try to commute it.
662+
int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI);
663+
if (OtherIdx == -1)
664+
return false;
665+
unsigned OpIdx1 = OtherIdx;
666+
unsigned OpIdx2 = Src->getNumExplicitDefs();
667+
if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2))
668+
return false;
669+
NeedsCommute = {OpIdx1, OpIdx2};
670+
}
659671

660672
// Src VL will have already been reduced if legal (see tryToReduceVL),
661673
// so we don't need to handle a smaller source VL here. However, the
@@ -668,6 +680,13 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
668680
if (!ensureDominates(Passthru, *Src))
669681
return false;
670682

683+
if (NeedsCommute) {
684+
auto [OpIdx1, OpIdx2] = *NeedsCommute;
685+
[[maybe_unused]] bool Commuted =
686+
TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2);
687+
assert(Commuted && "Failed to commute Src?");
688+
}
689+
671690
if (SrcPassthru.getReg() != Passthru.getReg()) {
672691
SrcPassthru.setReg(Passthru.getReg());
673692
// If Src is masked then its passthru needs to be in VRNoV0.

llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,3 +245,14 @@ define <vscale x 1 x i64> @vmerge(<vscale x 1 x i64> %passthru, <vscale x 1 x i6
245245
%b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %a, iXLen %avl)
246246
ret <vscale x 1 x i64> %b
247247
}
248+
249+
define <vscale x 4 x float> @commute_vfmadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
250+
; CHECK-LABEL: commute_vfmadd:
251+
; CHECK: # %bb.0:
252+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
253+
; CHECK-NEXT: vfmacc.vv v8, v12, v10
254+
; CHECK-NEXT: ret
255+
%v = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %passthru, iXLen 7, iXLen %vl, iXLen 3)
256+
%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
257+
ret <vscale x 4 x float> %w
258+
}

llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,23 @@ body: |
168168
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */
169169
%z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
170170
...
171+
---
172+
name: commute_vfmadd
173+
body: |
174+
bb.0:
175+
liveins: $x8, $v0, $v8, $v9, $v10
176+
; CHECK-LABEL: name: commute_vfmadd
177+
; CHECK: liveins: $x8, $v0, $v8, $v9, $v10
178+
; CHECK-NEXT: {{ $}}
179+
; CHECK-NEXT: %avl:gprnox0 = COPY $x8
180+
; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
181+
; CHECK-NEXT: %x:vr = COPY $v9
182+
; CHECK-NEXT: %y:vr = COPY $v10
183+
; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMACC_VV_M1_E32 %passthru, %y, %x, 7, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit $frm
184+
%avl:gprnox0 = COPY $x8
185+
%passthru:vrnov0 = COPY $v8
186+
%x:vr = COPY $v9
187+
%y:vr = COPY $v10
188+
%vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, -1, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
189+
%vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */
190+
...

0 commit comments

Comments
 (0)