-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] Combine vmerge_vl allones -> vmv_v_v, vmv_v_v splat(x) -> vmv_v_x #170539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesStacked on #170536 An upcoming patch aims to remove the last use of @llvm.experimental.vp.splat in RISCVCodegenPrepare by replacing it with a vp_merge of a regular splat. A vp_merge will get lowered to vmerge_vl, and if we combine vmerge_vl of a splat to vmv_v_x we can get the same behaviour as the vp.splat intrinsic. This adds the two combines needed. It was easier to do the combines on _vl nodes rather than on vp_merge itself, since the types are already legal for _vl nodes. Full diff: https://github.com/llvm/llvm-project/pull/170539.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab2652eac3823..899871edc9f7b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21872,6 +21872,44 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
+ case RISCVISD::VMERGE_VL: {
+ // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl
+ SDValue Mask = N->getOperand(0);
+ SDValue True = N->getOperand(1);
+ SDValue Passthru = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ // Fixed vectors are wrapped in scalable containers, unwrap them.
+ using namespace SDPatternMatch;
+ SDValue SubVec;
+ if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+ Mask = SubVec;
+
+ if (!isOneOrOneSplat(Mask))
+ break;
+
+ return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getSimpleValueType(0),
+ Passthru, True, VL);
+ }
+ case RISCVISD::VMV_V_V_VL: {
+ // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl
+ SDValue Passthru = N->getOperand(0);
+ SDValue Src = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+
+ // Fixed vectors are wrapped in scalable containers, unwrap them.
+ using namespace SDPatternMatch;
+ SDValue SubVec;
+ if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
+ Src = SubVec;
+
+ SDValue SplatVal = DAG.getSplatValue(Src);
+ if (!SplatVal)
+ break;
+ MVT VT = N->getSimpleValueType(0);
+ return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,
+ Subtarget);
+ }
case RISCVISD::VSLIDEDOWN_VL:
case RISCVISD::VSLIDEUP_VL:
if (N->getOperand(1)->isUndef())
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 6ddca4a3e0909..a5385be0c011c 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -651,11 +651,23 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!hasSameEEW(MI, *Src))
return false;
+ std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
+
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
if (SrcPassthru.getReg().isValid() &&
- SrcPassthru.getReg() != Passthru.getReg())
- return false;
+ SrcPassthru.getReg() != Passthru.getReg()) {
+ // If Src's passthru != Passthru, check if it uses Passthru in another
+ // operand and try to commute it.
+ int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI);
+ if (OtherIdx == -1)
+ return false;
+ unsigned OpIdx1 = OtherIdx;
+ unsigned OpIdx2 = Src->getNumExplicitDefs();
+ if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2))
+ return false;
+ NeedsCommute = {OpIdx1, OpIdx2};
+ }
// Src VL will have already been reduced if legal (see tryToReduceVL),
// so we don't need to handle a smaller source VL here. However, the
@@ -668,6 +680,13 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (!ensureDominates(Passthru, *Src))
return false;
+ if (NeedsCommute) {
+ auto [OpIdx1, OpIdx2] = *NeedsCommute;
+ [[maybe_unused]] bool Commuted =
+ TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2);
+ assert(Commuted && "Failed to commute Src?");
+ }
+
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 7968c5190eb01..0bacb5c26cb4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1353,3 +1353,48 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
%v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
ret <32 x double> %v
}
+
+define <4 x i32> @splat_v4i32(i32 %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i32> poison, i32 %x, i32 0
+ %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> %splat, <4 x i32> poison, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x float> @splat_v4f32(float %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x float> poison, float %x, i32 0
+ %splat = shufflevector <4 x float> %head, <4 x float> poison, <4 x i32> zeroinitializer
+ %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> %splat, <4 x float> poison, i32 %evl)
+ ret <4 x float> %v
+}
+
+define <4 x i32> @splat_v4i32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4i32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x i32> splat (i32 1), <4 x i32> poison, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x float> @splat_v4f32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_v4f32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 270976
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: ret
+ %v = call <4 x float> @llvm.vp.merge(<4 x i1> splat (i1 true), <4 x float> splat (float 42.0), <4 x float> poison, i32 %evl)
+ ret <4 x float> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index c2638127e47af..698d47f3be720 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -245,3 +245,14 @@ define <vscale x 1 x i64> @vmerge(<vscale x 1 x i64> %passthru, <vscale x 1 x i6
%b = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(<vscale x 1 x i64> %passthru, <vscale x 1 x i64> %a, iXLen %avl)
ret <vscale x 1 x i64> %b
}
+
+define <vscale x 4 x float> @commute_vfmadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; CHECK-LABEL: commute_vfmadd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: vfmacc.vv v8, v12, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %passthru, iXLen 7, iXLen %vl, iXLen 3)
+ %w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
+ ret <vscale x 4 x float> %w
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
index 95232e734bb18..68e74ff6ba05b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.mir
@@ -168,3 +168,23 @@ body: |
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %passthru, $noreg, %mask, 4, 5 /* e32 */
%z:vr = PseudoVMV_V_V_M1 %passthru, %x, 4, 5 /* e32 */, 0 /* tu, mu */
...
+---
+name: commute_vfmadd
+body: |
+ bb.0:
+ liveins: $x8, $v0, $v8, $v9, $v10
+ ; CHECK-LABEL: name: commute_vfmadd
+ ; CHECK: liveins: $x8, $v0, $v8, $v9, $v10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %avl:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %x:vr = COPY $v9
+ ; CHECK-NEXT: %y:vr = COPY $v10
+ ; CHECK-NEXT: %vfmadd:vrnov0 = nofpexcept PseudoVFMACC_VV_M1_E32 %passthru, %y, %x, 7, %avl, 5 /* e32 */, 0 /* tu, mu */, implicit $frm
+ %avl:gprnox0 = COPY $x8
+ %passthru:vrnov0 = COPY $v8
+ %x:vr = COPY $v9
+ %y:vr = COPY $v10
+ %vfmadd:vrnov0 = nofpexcept PseudoVFMADD_VV_M1_E32 %x, %y, %passthru, 7, -1, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
+ %vmerge:vrnov0 = PseudoVMV_V_V_M1 %passthru, %vfmadd, %avl, 5 /* e32 */, 0 /* tu, mu */
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 03697aafea45d..f92ee37051840 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1663,3 +1663,48 @@ define <vscale x 8 x double> @vpmerge_vf_nxv8f64(double %a, <vscale x 8 x double
%v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
ret <vscale x 8 x double> %v
}
+
+define <vscale x 2 x i32> @splat_nxv2i32(i32 %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i32> poison, i32 %x, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> %splat, <vscale x 2 x i32> poison, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x float> @splat_nxv2f32(float %x, i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x float> poison, float %x, i32 0
+ %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> %splat, <vscale x 2 x float> poison, i32 %evl)
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 2 x i32> @splat_nxv2i32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2i32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i32> splat (i32 1), <vscale x 2 x i32> poison, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x float> @splat_nxv2f32_const(i32 zeroext %evl) {
+; CHECK-LABEL: splat_nxv2f32_const:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, 270976
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x float> @llvm.vp.merge(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> poison, i32 %evl)
+ ret <vscale x 2 x float> %v
+}
|
…re. NFCI Stacked on llvm#170539 RISCVCodeGenPrepare is the last user of the vp.splat intrinsic, where it uses it to expand a zero strided load into a scalar load and splat. This replaces it with a regular splat followed by a vp_merge to set the lanes past EVL as poison. We need to set the EVL here because RISCVISelDAGToDAG will try and recombine it back into a zero strided load, and we want to preserve the original VL. We need to set
…_v_x Stacked on llvm#170536 An upcoming patch aims to remove the last use of @llvm.experimental.vp.splat in RISCVCodegenPrepare by replacing it with a vp_merge of a regular splat. A vp_merge will get lowered to vmerge_vl, and if we combine vmerge_vl of a splat to vmv_v_x we can get the same behaviour as the vp.splat intrinsic. This adds the two combines needed. It was easier to do the combines on _vl nodes rather than on vp_merge itself, since the types are already legal for _vl nodes.
1f1fef1 to
31c8f8e
Compare
…re. NFCI Stacked on llvm#170539 RISCVCodeGenPrepare is the last user of the vp.splat intrinsic, where it uses it to expand a zero strided load into a scalar load and splat. This replaces it with a regular splat followed by a vp_merge to set the lanes past EVL as poison. We need to set the EVL here because RISCVISelDAGToDAG will try and recombine it back into a zero strided load, and we want to preserve the original VL. We need to set
| break; | ||
| } | ||
| case RISCVISD::VMERGE_VL: { | ||
| // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realize that for VMERGE_VL, passthru is not the first operand just like others, why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure. Maybe it's trying to be like vp_merge where the mask is the first argument?
wangpc-pp
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
…re. NFCI Stacked on llvm#170539 RISCVCodeGenPrepare is the last user of the vp.splat intrinsic, where it uses it to expand a zero strided load into a scalar load and splat. This replaces it with a regular splat followed by a vp_merge to set the lanes past EVL as poison. We need to set the EVL here because RISCVISelDAGToDAG will try and recombine it back into a zero strided load, and we want to preserve the original VL. We need to set
…_v_x (llvm#170539) An upcoming patch aims to remove the last use of @llvm.experimental.vp.splat in RISCVCodegenPrepare by replacing it with a vp_merge of a regular splat. A vp_merge will get lowered to vmerge_vl, and if we combine vmerge_vl of a splat to vmv_v_x we can get the same behaviour as the vp.splat intrinsic. This adds the two combines needed. It was easier to do the combines on _vl nodes rather than on vp_merge itself, since the types are already legal for _vl nodes.
Stacked on #170536
An upcoming patch aims to remove the last use of @llvm.experimental.vp.splat in RISCVCodegenPrepare by replacing it with a vp_merge of a regular splat.
A vp_merge will get lowered to vmerge_vl, and if we combine vmerge_vl of a splat to vmv_v_x we can get the same behaviour as the vp.splat intrinsic.
This adds the two combines needed. It was easier to do the combines on _vl nodes rather than on vp_merge itself, since the types are already legal for _vl nodes.