diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 6ddca4a3e0909..a5385be0c011c 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -651,11 +651,23 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (!hasSameEEW(MI, *Src)) return false; + std::optional> NeedsCommute; + // Src needs to have the same passthru as VMV_V_V MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs()); if (SrcPassthru.getReg().isValid() && - SrcPassthru.getReg() != Passthru.getReg()) - return false; + SrcPassthru.getReg() != Passthru.getReg()) { + // If Src's passthru != Passthru, check if it uses Passthru in another + // operand and try to commute it. + int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI); + if (OtherIdx == -1) + return false; + unsigned OpIdx1 = OtherIdx; + unsigned OpIdx2 = Src->getNumExplicitDefs(); + if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2)) + return false; + NeedsCommute = {OpIdx1, OpIdx2}; + } // Src VL will have already been reduced if legal (see tryToReduceVL), // so we don't need to handle a smaller source VL here. However, the @@ -668,6 +680,13 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { if (!ensureDominates(Passthru, *Src)) return false; + if (NeedsCommute) { + auto [OpIdx1, OpIdx2] = *NeedsCommute; + [[maybe_unused]] bool Commuted = + TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2); + assert(Commuted && "Failed to commute Src?"); + } + if (SrcPassthru.getReg() != Passthru.getReg()) { SrcPassthru.setReg(Passthru.getReg()); // If Src is masked then its passthru needs to be in VRNoV0. diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll index c2638127e47af..698d47f3be720 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll @@ -245,3 +245,14 @@ define @vmerge( %passthru, @llvm.riscv.vmv.v.v.nxv1i64( %passthru, %a, iXLen %avl) ret %b } + +define @commute_vfmadd( %passthru, %a, %b, iXLen %vl) { +; CHECK-LABEL: commute_vfmadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v12, v10 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vfmadd( %a, %b, %passthru, iXLen 7, iXLen %vl, iXLen 3) + %w = call @llvm.riscv.vmv.v.v( %passthru, %v, iXLen %vl) + ret %w +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir index 95232e734bb18..68e74ff6ba05b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir @@ -168,3 +168,23 @@ body: | %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */ %z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */ ... +--- +name: commute_vfmadd +body: | + bb.0: + liveins: $x8, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: commute_vfmadd + ; CHECK: liveins: $x8, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 + ; CHECK-NEXT: %x:vr = COPY $v9 + ; CHECK-NEXT: %y:vr = COPY $v10 + ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMACC_VV_M1_E32 %passthru, %y, %x, 7, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit $frm + %avl:gprnox0 = COPY $x8 + %passthru:vrnov0 = COPY $v8 + %x:vr = COPY $v9 + %y:vr = COPY $v10 + %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, -1, 5 /* e32 */, 3 /* ta, ma */, implicit $frm + %vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */ +...