-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Extract spread(2,4,8) shuffle lowering from interleave(2) #118822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is a prep patch for improving spread(4,8) shuffles. I also think it improves the readability of the existing code, but the primary motivation is simply staging work.
|
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis is a prep patch for improving spread(4,8) shuffles. I also think it improves the readability of the existing code, but the primary motivation is simply staging work. Full diff: https://github.com/llvm/llvm-project/pull/118822.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c2b2daad1b8987..bae1ac7903ad3c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4816,12 +4816,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
+// Given a vector a, b, c, d return a vector Factor times longer
+// with Factor-1 undef's between elements. Ex:
+// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
+// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
+static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
+ const SDLoc &DL, SelectionDAG &DAG) {
+
+ MVT VT = V.getSimpleValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ ElementCount EC = VT.getVectorElementCount();
+ V = DAG.getBitcast(VT.changeTypeToInteger(), V);
+
+ MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
+
+ SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
+ // TODO: On rv32, the constant becomes a splat_vector_parts which does not
+ // allow the SHL to fold away if Index is 0.
+ if (Index != 0)
+ Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
+ DAG.getConstant(EltBits * Index, DL, WideVT));
+ // Make sure to use original element type
+ MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
+ EC.multiplyCoefficientBy(Factor));
+ return DAG.getBitcast(ResultVT, Result);
+}
+
// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
// to create an interleaved vector of <[vscale x] n*2 x ty>.
// This requires that the size of ty is less than the subtarget's maximum ELEN.
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+
+ // FIXME: Not only does this optimize the code, it fixes some correctness
+ // issues because MIR does not have freeze.
+ if (EvenV.isUndef())
+ return getWideningSpread(OddV, 2, 1, DL, DAG);
+ else if (OddV.isUndef())
+ return getWideningSpread(EvenV, 2, 0, DL, DAG);
+
MVT VecVT = EvenV.getSimpleValueType();
MVT VecContainerVT = VecVT; // <vscale x n x ty>
// Convert fixed vectors to scalable if needed
@@ -4853,29 +4887,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
SDValue Interleaved;
- if (OddV.isUndef()) {
- // If OddV is undef, this is a zero extend.
- // FIXME: Not only does this optimize the code, it fixes some correctness
- // issues because MIR does not have freeze.
- Interleaved =
- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
- } else if (Subtarget.hasStdExtZvbb()) {
+ if (Subtarget.hasStdExtZvbb()) {
// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
SDValue OffsetVec =
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
OffsetVec, Passthru, Mask, VL);
- if (!EvenV.isUndef())
- Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
- Interleaved, EvenV, Passthru, Mask, VL);
- } else if (EvenV.isUndef()) {
- Interleaved =
- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
-
- SDValue OffsetVec =
- DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
- Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
- Interleaved, OffsetVec, Passthru, Mask, VL);
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
+ Interleaved, EvenV, Passthru, Mask, VL);
} else {
// FIXME: We should freeze the odd vector here. We already handled the case
// of provably undef/poison above.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index 97e458e70565ce..9b1e7012e217a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -247,12 +247,12 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; V128-NEXT: vslidedown.vi v24, v16, 16
; V128-NEXT: li a0, 32
-; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: lui a1, 699051
+; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; V128-NEXT: vzext.vf2 v8, v24
-; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: addi a1, a1, -1366
+; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: vmv.s.x v0, a1
; V128-NEXT: vsll.vx v8, v8, a0
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index a8eb1f97fd1a2c..bdc4e0f6256f2e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -410,12 +410,12 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; V128-NEXT: vslidedown.vi v24, v16, 16
; V128-NEXT: li a0, 32
-; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: lui a1, 699051
+; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; V128-NEXT: vzext.vf2 v8, v24
-; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: addi a1, a1, -1366
+; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: vmv.s.x v0, a1
; V128-NEXT: vsll.vx v8, v8, a0
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma
|
| // issues because MIR does not have freeze. | ||
| if (EvenV.isUndef()) | ||
| return getWideningSpread(OddV, 2, 1, DL, DAG); | ||
| else if (OddV.isUndef()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop else
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is a prep patch for improving spread(4,8) shuffles. I also think it improves the readability of the existing code, but the primary motivation is simply staging work.