From d6b3d9f3b4b7b77667f0550129e02cabbe382736 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 3 Sep 2025 01:08:04 +0800 Subject: [PATCH 1/3] Precommit tests --- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 14 +++++++++++ .../CodeGen/RISCV/rvv/vmerge-peephole.mir | 23 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 6c5346d72aec4..7382e9be25aba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1215,3 +1215,17 @@ define @unfoldable_mismatched_sew( %passthr ) ret %b } + +define @commute_vfmadd( %passthru, %x, %y, %mask, i32 zeroext %evl) { +; CHECK-LABEL: commute_vfmadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmadd.vv v9, v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %fmul = fmul contract %x, %y + %fadd = fadd contract %fmul, %passthru + %merge = call @llvm.vp.merge( %mask, %fadd, %passthru, i32 %evl) + ret %merge +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir index 0b95e558d8236..7e0e395298ca9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir @@ -94,3 +94,26 @@ body: | %mask:vmv0 = COPY $v0 %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */ ... +--- +name: commute_vfmadd +body: | + bb.0: + liveins: $x8, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: commute_vfmadd + ; CHECK: liveins: $x8, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x8 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 + ; CHECK-NEXT: %x:vr = COPY $v9 + ; CHECK-NEXT: %y:vr = COPY $v10 + ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, %avl, 5 /* e32 */, 3 /* ta, ma */, implicit $frm + ; CHECK-NEXT: %vmerge:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %vfmadd, %mask, %avl, 5 /* e32 */ + %avl:gprnox0 = COPY $x8 + %mask:vmv0 = COPY $v0 + %passthru:vrnov0 = COPY $v8 + %x:vr = COPY $v9 + %y:vr = COPY $v10 + %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, -1, 5 /* e32 */, 3 /* ta, ma */, implicit $frm + %vmerge:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %vfmadd, %mask, %avl, 5 +... From 47d48048a7fa16d8ff8caa665c5d187a90450549 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 3 Sep 2025 01:12:26 +0800 Subject: [PATCH 2/3] [RISCV] Commute True in foldVMergeToMask In order to fold a vmerge into a pseudo, the pseudo's passthru needs to be the same as vmerge's false operand. If they don't match we can try and commute the instruction if possible, e.g. here we can commute v9 and v8 to fold the vmerge: vsetvli zero, a0, e32, m1, ta, ma vfmadd.vv v9, v10, v8 vsetvli zero, zero, e32, m1, tu, ma vmerge.vvm v8, v8, v9, v0 vsetvli zero, a0, e32, m1, tu, mu vfmacc.vv v8, v9, v10, v0.t Previously this wasn't possible because we did the peephole in SelectionDAG, but now that it's been migrated to MachineInstr in #144076 we can reuse the commuting infrastructure in TargetInstrInfo. This fixes the extra vmv.v.v in the "mul" example here: https://github.com/llvm/llvm-project/issues/123069#issuecomment-3137997141 It should also allow us to remove the isel patterns described in #141885 later. --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 24 +- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 426 +++++++----------- .../CodeGen/RISCV/rvv/vmerge-peephole.mir | 3 +- 4 files changed, 177 insertions(+), 282 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 050de3d58a2f2..454915aae0f58 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -745,12 +745,24 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg)) return false; + std::optional> NeedsCommute; + // If True has a passthru operand then it needs to be the same as vmerge's // False, since False will be used for the result's passthru operand. Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg(); if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru && - !isKnownSameDefs(TruePassthru, FalseReg)) - return false; + !isKnownSameDefs(TruePassthru, FalseReg)) { + // If True's passthru != False, check if it uses False in another operand + // and try to commute it. + int OtherIdx = True.findRegisterUseOperandIdx(FalseReg, TRI); + if (OtherIdx == -1) + return false; + unsigned OpIdx1 = OtherIdx; + unsigned OpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; + if (!TII->findCommutedOpIndices(True, OpIdx1, OpIdx2)) + return false; + NeedsCommute = {OpIdx1, OpIdx2}; + } // Make sure it doesn't raise any observable fp exceptions, since changing the // active elements will affect how fflags is set. @@ -796,6 +808,14 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (!ensureDominates(MaskOp, True)) return false; + if (NeedsCommute) { + auto [OpIdx1, OpIdx2] = *NeedsCommute; + [[maybe_unused]] bool Commuted = + TII->commuteInstruction(True, /*NewMI=*/false, OpIdx1, OpIdx2); + assert(Commuted && "Failed to commute True?"); + Info = RISCV::lookupMaskedIntrinsicByUnmasked(True.getOpcode()); + } + True.setDesc(TII->get(Info->MaskedPseudo)); // Insert the mask operand. diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 7382e9be25aba..60ddb45fe07c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1219,10 +1219,8 @@ define @unfoldable_mismatched_sew( %passthr define @commute_vfmadd( %passthru, %x, %y, %mask, i32 zeroext %evl) { ; CHECK-LABEL: commute_vfmadd: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmadd.vv v9, v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %fmul = fmul contract %x, %y %fadd = fadd contract %fmul, %passthru diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index a29af3d5b54b0..f55c7c0b90b3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -12,10 +12,8 @@ declare @llvm.vp.select.nxv1i8(, @vmadd_vv_nxv1i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) @@ -26,10 +24,8 @@ define @vmadd_vv_nxv1i8( %a, @vmadd_vv_nxv1i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) @@ -68,9 +64,8 @@ define @vmadd_vx_nxv1i8_unmasked( %a, i8 %b, define @vmadd_vv_nxv1i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) @@ -100,10 +95,8 @@ declare @llvm.vp.select.nxv2i8(, @vmadd_vv_nxv2i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) @@ -114,10 +107,8 @@ define @vmadd_vv_nxv2i8( %a, @vmadd_vv_nxv2i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) @@ -156,9 +147,8 @@ define @vmadd_vx_nxv2i8_unmasked( %a, i8 %b, define @vmadd_vv_nxv2i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) @@ -188,10 +178,8 @@ declare @llvm.vp.select.nxv4i8(, @vmadd_vv_nxv4i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) @@ -202,10 +190,8 @@ define @vmadd_vv_nxv4i8( %a, @vmadd_vv_nxv4i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) @@ -244,9 +230,8 @@ define @vmadd_vx_nxv4i8_unmasked( %a, i8 %b, define @vmadd_vv_nxv4i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) @@ -276,10 +261,8 @@ declare @llvm.vp.select.nxv8i8(, @vmadd_vv_nxv8i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) @@ -290,10 +273,8 @@ define @vmadd_vv_nxv8i8( %a, @vmadd_vv_nxv8i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) @@ -332,9 +313,8 @@ define @vmadd_vx_nxv8i8_unmasked( %a, i8 %b, define @vmadd_vv_nxv8i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) @@ -364,10 +344,8 @@ declare @llvm.vp.select.nxv16i8(, @vmadd_vv_nxv16i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) @@ -378,10 +356,8 @@ define @vmadd_vv_nxv16i8( %a, @vmadd_vv_nxv16i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e8, m2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v10, v12 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) @@ -420,9 +396,8 @@ define @vmadd_vx_nxv16i8_unmasked( %a, i8 % define @vmadd_vv_nxv16i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv16i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) @@ -452,10 +427,8 @@ declare @llvm.vp.select.nxv32i8(, @vmadd_vv_nxv32i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) @@ -466,10 +439,8 @@ define @vmadd_vv_nxv32i8( %a, @vmadd_vv_nxv32i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv32i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, ma -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vmadd.vv v8, v12, v16 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) @@ -508,9 +479,8 @@ define @vmadd_vx_nxv32i8_unmasked( %a, i8 % define @vmadd_vv_nxv32i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv32i8_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) @@ -541,10 +511,8 @@ define @vmadd_vv_nxv64i8( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -556,10 +524,8 @@ define @vmadd_vv_nxv64i8_unmasked( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -599,9 +565,8 @@ define @vmadd_vv_nxv64i8_ta( %a, @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) @@ -631,10 +596,8 @@ declare @llvm.vp.select.nxv1i16(, @vmadd_vv_nxv1i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) @@ -645,10 +608,8 @@ define @vmadd_vv_nxv1i16( %a, @vmadd_vv_nxv1i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) @@ -687,9 +648,8 @@ define @vmadd_vx_nxv1i16_unmasked( %a, i16 define @vmadd_vv_nxv1i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) @@ -719,10 +679,8 @@ declare @llvm.vp.select.nxv2i16(, @vmadd_vv_nxv2i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) @@ -733,10 +691,8 @@ define @vmadd_vv_nxv2i16( %a, @vmadd_vv_nxv2i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) @@ -775,9 +731,8 @@ define @vmadd_vx_nxv2i16_unmasked( %a, i16 define @vmadd_vv_nxv2i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) @@ -807,10 +762,8 @@ declare @llvm.vp.select.nxv4i16(, @vmadd_vv_nxv4i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) @@ -821,10 +774,8 @@ define @vmadd_vv_nxv4i16( %a, @vmadd_vv_nxv4i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) @@ -863,9 +814,8 @@ define @vmadd_vx_nxv4i16_unmasked( %a, i16 define @vmadd_vv_nxv4i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) @@ -895,10 +845,8 @@ declare @llvm.vp.select.nxv8i16(, @vmadd_vv_nxv8i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) @@ -909,10 +857,8 @@ define @vmadd_vv_nxv8i16( %a, @vmadd_vv_nxv8i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v10, v12 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) @@ -951,9 +897,8 @@ define @vmadd_vx_nxv8i16_unmasked( %a, i16 define @vmadd_vv_nxv8i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) @@ -983,10 +928,8 @@ declare @llvm.vp.select.nxv16i16(, @vmadd_vv_nxv16i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) @@ -997,10 +940,8 @@ define @vmadd_vv_nxv16i16( %a, @vmadd_vv_nxv16i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, tu, ma -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vmadd.vv v8, v12, v16 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1039,9 +980,8 @@ define @vmadd_vx_nxv16i16_unmasked( %a, i define @vmadd_vv_nxv16i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv16i16_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1072,10 +1012,8 @@ define @vmadd_vv_nxv32i16( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1087,10 +1025,8 @@ define @vmadd_vv_nxv32i16_unmasked( %a, < ; CHECK-LABEL: vmadd_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vmacc.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmadd.vv v8, v16, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1130,9 +1066,8 @@ define @vmadd_vv_nxv32i16_ta( %a, @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) @@ -1162,10 +1097,8 @@ declare @llvm.vp.select.nxv1i32(, @vmadd_vv_nxv1i32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1176,10 +1109,8 @@ define @vmadd_vv_nxv1i32( %a, @vmadd_vv_nxv1i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1218,9 +1149,8 @@ define @vmadd_vx_nxv1i32_unmasked( %a, i32 define @vmadd_vv_nxv1i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1250,10 +1180,8 @@ declare @llvm.vp.select.nxv2i32(, @vmadd_vv_nxv2i32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1264,10 +1192,8 @@ define @vmadd_vv_nxv2i32( %a, @vmadd_vv_nxv2i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1306,9 +1232,8 @@ define @vmadd_vx_nxv2i32_unmasked( %a, i32 define @vmadd_vv_nxv2i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1338,10 +1263,8 @@ declare @llvm.vp.select.nxv4i32(, @vmadd_vv_nxv4i32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1352,10 +1275,8 @@ define @vmadd_vv_nxv4i32( %a, @vmadd_vv_nxv4i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v10, v12 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1394,9 +1315,8 @@ define @vmadd_vx_nxv4i32_unmasked( %a, i32 define @vmadd_vv_nxv4i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1426,10 +1346,8 @@ declare @llvm.vp.select.nxv8i32(, @vmadd_vv_nxv8i32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1440,10 +1358,8 @@ define @vmadd_vv_nxv8i32( %a, @vmadd_vv_nxv8i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma +; CHECK-NEXT: vmadd.vv v8, v12, v16 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1482,9 +1398,8 @@ define @vmadd_vx_nxv8i32_unmasked( %a, i32 define @vmadd_vv_nxv8i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv8i32_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1515,10 +1430,8 @@ define @vmadd_vv_nxv16i32( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1530,10 +1443,8 @@ define @vmadd_vv_nxv16i32_unmasked( %a, < ; CHECK-LABEL: vmadd_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmacc.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmadd.vv v8, v16, v24 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1573,9 +1484,8 @@ define @vmadd_vv_nxv16i32_ta( %a, @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) @@ -1605,10 +1515,8 @@ declare @llvm.vp.select.nxv1i64(, @vmadd_vv_nxv1i64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1619,10 +1527,8 @@ define @vmadd_vv_nxv1i64( %a, @vmadd_vv_nxv1i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma +; CHECK-NEXT: vmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1640,9 +1546,8 @@ define @vmadd_vx_nxv1i64( %a, i64 %b, @vmadd_vx_nxv1i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmadd.vv v10, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: vmadd.vv v8, v10, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -1693,9 +1597,8 @@ define @vmadd_vx_nxv1i64_unmasked( %a, i64 define @vmadd_vv_nxv1i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv1i64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmadd.vv v9, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1711,10 +1614,9 @@ define @vmadd_vx_nxv1i64_ta( %a, i64 %b, @llvm.vp.select.nxv2i64(, @vmadd_vv_nxv2i64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1754,10 +1654,8 @@ define @vmadd_vv_nxv2i64( %a, @vmadd_vv_nxv2i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma +; CHECK-NEXT: vmadd.vv v8, v10, v12 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1775,9 +1673,8 @@ define @vmadd_vx_nxv2i64( %a, i64 %b, @vmadd_vx_nxv2i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmadd.vv v12, v8, v10 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma -; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: vmadd.vv v8, v12, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -1828,9 +1724,8 @@ define @vmadd_vx_nxv2i64_unmasked( %a, i64 define @vmadd_vv_nxv2i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv2i64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmadd.vv v10, v8, v12 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1846,10 +1741,9 @@ define @vmadd_vx_nxv2i64_ta( %a, i64 %b, @llvm.vp.select.nxv4i64(, @vmadd_vv_nxv4i64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1889,10 +1781,8 @@ define @vmadd_vv_nxv4i64( %a, @vmadd_vv_nxv4i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma +; CHECK-NEXT: vmadd.vv v8, v12, v16 ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1910,9 +1800,8 @@ define @vmadd_vx_nxv4i64( %a, i64 %b, @vmadd_vx_nxv4i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmadd.vv v16, v8, v12 ; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: vmadd.vv v8, v16, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -1963,9 +1851,8 @@ define @vmadd_vx_nxv4i64_unmasked( %a, i64 define @vmadd_vv_nxv4i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmadd_vv_nxv4i64_ta: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmadd.vv v12, v8, v16 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) @@ -1981,10 +1868,9 @@ define @vmadd_vx_nxv4i64_ta( %a, i64 %b, @vmadd_vv_nxv8i64( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -2026,10 +1910,8 @@ define @vmadd_vv_nxv8i64_unmasked( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -2047,9 +1929,8 @@ define @vmadd_vx_nxv8i64( %a, i64 %b, @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vmadd.vv v24, v8, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma -; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: vmadd.vv v8, v24, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret @@ -2101,9 +1981,8 @@ define @vmadd_vv_nxv8i64_ta( %a, @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) @@ -2119,10 +1998,9 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b, Date: Wed, 3 Sep 2025 08:32:40 +0800 Subject: [PATCH 3/3] Fix OpIdx2 to passthru --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 454915aae0f58..62651185137c9 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -758,7 +758,7 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (OtherIdx == -1) return false; unsigned OpIdx1 = OtherIdx; - unsigned OpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; + unsigned OpIdx2 = True.getNumExplicitDefs(); if (!TII->findCommutedOpIndices(True, OpIdx1, OpIdx2)) return false; NeedsCommute = {OpIdx1, OpIdx2};