From 0c43a14165be1a02df211f7e4539c17fc948a4ad Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 11 Apr 2025 07:56:31 -0700 Subject: [PATCH 1/4] [RISCV] Rewrite vrgather.vx undef, (vmv.s.x), 0, v0 as vmv.v.x This extends the DAG combine introduced in 336b2909 to handle the case where the prior value is defined by a vmv.s.x instead of a vmv.v.x. If the vrgather splats the single source element, and has no passthru we can replace it with a vmv.v.x - which will in turn usually get folded into a vmerge if a select follows. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 32 +++++++++++++++++-- .../RISCV/rvv/fixed-vectors-shuffle-fp.ll | 19 ++++------- .../RISCV/rvv/fixed-vectors-shuffle-int.ll | 17 ++++------ 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f24752b8721f5..4dd237b5415dd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5573,7 +5573,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const int Lane = SVN->getSplatIndex(); if (Lane >= 0) { MVT SVT = VT.getVectorElementType(); - // Turn splatted vector load into a strided load with an X0 stride. SDValue V = V1; // Peek through CONCAT_VECTORS as VectorCombine can concat a vector @@ -19710,20 +19709,47 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case RISCVISD::VRGATHER_VX_VL: { - // Drop a redundant vrgather_vx. + using namespace llvm::SDPatternMatch; // Note this assumes that out of bounds indices produce poison // and can thus be replaced without having to prove them inbounds.. + EVT VT = N->getValueType(0); SDValue Src = N->getOperand(0); + SDValue Idx = N->getOperand(1); SDValue Passthru = N->getOperand(2); SDValue VL = N->getOperand(4); + + // Warning: Unlike most cases we strip an insert_subvector, this one + // does not require the first operand to be undef. + if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && + sd_match(Src.getOperand(2), m_Zero())) + Src = Src.getOperand(1); + switch (Src.getOpcode()) { default: break; case RISCVISD::VMV_V_X_VL: case RISCVISD::VFMV_V_F_VL: - if (Passthru.isUndef() && VL == Src.getOperand(2)) + // Drop a redundant vrgather_vx. + // TODO: Remove the type restriction if we find a motivating + // test case? + if (Passthru.isUndef() && VL == Src.getOperand(2) && + Src.getValueType() == VT) return Src; break; + case RISCVISD::VMV_S_X_VL: + case RISCVISD::VFMV_S_F_VL: + // If this use only demands lane zero from the source vmv.s.x, and + // doesn't have a passthru, then this vrgather.vi/vx is equivalent to + // a vmv.v.x. Note that there can be other uses of the original + // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous) + if (sd_match(Idx, m_Zero()) && Passthru.isUndef() && + VL == Src.getOperand(2)) { + unsigned Opc = + VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; + return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1), + VL); + } + break; } break; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll index 5aac2687122ae..f580b1b993395 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-fp.ll @@ -96,13 +96,11 @@ define <8 x float> @vmerge_vxm(<8 x float> %v, float %s) { ; CHECK-LABEL: vmerge_vxm: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 25 -; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vmv2r.v v10, v8 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 0, v0.t -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x float> %v, float %s, i32 0 %shuf = shufflevector <8 x float> %ins, <8 x float> poison, <8 x i32> @@ -112,15 +110,10 @@ define <8 x float> @vmerge_vxm(<8 x float> %v, float %s) { define <8 x float> @vmerge_vxm2(<8 x float> %v, float %s) { ; CHECK-LABEL: vmerge_vxm2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m4, tu, ma -; CHECK-NEXT: vmv1r.v v12, v8 -; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: li a0, 25 -; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vmv1r.v v10, v12 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vrgather.vi v8, v10, 0, v0.t +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x float> %v, float %s, i32 0 %shuf = shufflevector <8 x float> %v, <8 x float> %ins, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll index 5c4ef29d7d5b7..8676803e20e3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll @@ -1448,13 +1448,11 @@ define <8 x i8> @vmerge_vxm(<8 x i8> %v, i8 %s) { ; CHECK-LABEL: vmerge_vxm: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 25 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x i8> %v, i8 %s, i32 0 %shuf = shufflevector <8 x i8> %ins, <8 x i8> poison, <8 x i32> @@ -1465,12 +1463,9 @@ define <8 x i8> @vmerge_vxm2(<8 x i8> %v, i8 %s) { ; CHECK-LABEL: vmerge_vxm2: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 25 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v8, v9, 0, v0.t +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %ins = insertelement <8 x i8> %v, i8 %s, i32 0 %shuf = shufflevector <8 x i8> %v, <8 x i8> %ins, <8 x i32> From 33c68698d96f25912cd53d4398c3b5f6923800b1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 17 Apr 2025 09:27:45 -0700 Subject: [PATCH 2/4] Address review comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4dd237b5415dd..8862a6e985870 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19721,7 +19721,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Warning: Unlike most cases we strip an insert_subvector, this one // does not require the first operand to be undef. if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && - sd_match(Src.getOperand(2), m_Zero())) + isNullConstant(Src.getOperand(2))) Src = Src.getOperand(1); switch (Src.getOpcode()) { @@ -19742,7 +19742,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // doesn't have a passthru, then this vrgather.vi/vx is equivalent to // a vmv.v.x. Note that there can be other uses of the original // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous) - if (sd_match(Idx, m_Zero()) && Passthru.isUndef() && + if (isNullConstant(Idx) && Passthru.isUndef() && VL == Src.getOperand(2)) { unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL; From 9f4fc5a3e0287fec0e2155de47dc03b59b99ad3b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 17 Apr 2025 10:02:16 -0700 Subject: [PATCH 3/4] Address review comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8862a6e985870..4b1f5cb00cdb0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19709,7 +19709,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case RISCVISD::VRGATHER_VX_VL: { - using namespace llvm::SDPatternMatch; // Note this assumes that out of bounds indices produce poison // and can thus be replaced without having to prove them inbounds.. EVT VT = N->getValueType(0); From af76305cb999564caf143bc469e4bff4d5fe0858 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 17 Apr 2025 10:04:41 -0700 Subject: [PATCH 4/4] Remove stray whitespace change --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4b1f5cb00cdb0..c983c0f881113 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5573,6 +5573,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const int Lane = SVN->getSplatIndex(); if (Lane >= 0) { MVT SVT = VT.getVectorElementType(); + // Turn splatted vector load into a strided load with an X0 stride. SDValue V = V1; // Peek through CONCAT_VECTORS as VectorCombine can concat a vector