Skip to content

Commit 019635b

Browse files
committed
[RISCV] Extract spread(2,4,8) shuffle lowering from interleave(2)
This is a prep patch for improving spread(4,8) shuffles. I also think it improves the readability of the existing code, but the primary motivation is simply staging work.
1 parent 1ef9410 commit 019635b

File tree

3 files changed

+41
-22
lines changed

3 files changed

+41
-22
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4816,12 +4816,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
48164816
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
48174817
}
48184818

4819+
// Given a vector a, b, c, d return a vector Factor times longer
4820+
// with Factor-1 undef's between elements. Ex:
4821+
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4822+
// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4823+
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4824+
const SDLoc &DL, SelectionDAG &DAG) {
4825+
4826+
MVT VT = V.getSimpleValueType();
4827+
unsigned EltBits = VT.getScalarSizeInBits();
4828+
ElementCount EC = VT.getVectorElementCount();
4829+
V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4830+
4831+
MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4832+
4833+
SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4834+
// TODO: On rv32, the constant becomes a splat_vector_parts which does not
4835+
// allow the SHL to fold away if Index is 0.
4836+
if (Index != 0)
4837+
Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4838+
DAG.getConstant(EltBits * Index, DL, WideVT));
4839+
// Make sure to use original element type
4840+
MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
4841+
EC.multiplyCoefficientBy(Factor));
4842+
return DAG.getBitcast(ResultVT, Result);
4843+
}
4844+
48194845
// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
48204846
// to create an interleaved vector of <[vscale x] n*2 x ty>.
48214847
// This requires that the size of ty is less than the subtarget's maximum ELEN.
48224848
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48234849
const SDLoc &DL, SelectionDAG &DAG,
48244850
const RISCVSubtarget &Subtarget) {
4851+
4852+
// FIXME: Not only does this optimize the code, it fixes some correctness
4853+
// issues because MIR does not have freeze.
4854+
if (EvenV.isUndef())
4855+
return getWideningSpread(OddV, 2, 1, DL, DAG);
4856+
else if (OddV.isUndef())
4857+
return getWideningSpread(EvenV, 2, 0, DL, DAG);
4858+
48254859
MVT VecVT = EvenV.getSimpleValueType();
48264860
MVT VecContainerVT = VecVT; // <vscale x n x ty>
48274861
// Convert fixed vectors to scalable if needed
@@ -4853,29 +4887,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48534887
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
48544888

48554889
SDValue Interleaved;
4856-
if (OddV.isUndef()) {
4857-
// If OddV is undef, this is a zero extend.
4858-
// FIXME: Not only does this optimize the code, it fixes some correctness
4859-
// issues because MIR does not have freeze.
4860-
Interleaved =
4861-
DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4862-
} else if (Subtarget.hasStdExtZvbb()) {
4890+
if (Subtarget.hasStdExtZvbb()) {
48634891
// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
48644892
SDValue OffsetVec =
48654893
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
48664894
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
48674895
OffsetVec, Passthru, Mask, VL);
4868-
if (!EvenV.isUndef())
4869-
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4870-
Interleaved, EvenV, Passthru, Mask, VL);
4871-
} else if (EvenV.isUndef()) {
4872-
Interleaved =
4873-
DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4874-
4875-
SDValue OffsetVec =
4876-
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4877-
Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4878-
Interleaved, OffsetVec, Passthru, Mask, VL);
4896+
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4897+
Interleaved, EvenV, Passthru, Mask, VL);
48794898
} else {
48804899
// FIXME: We should freeze the odd vector here. We already handled the case
48814900
// of provably undef/poison above.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,12 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
247247
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
248248
; V128-NEXT: vslidedown.vi v24, v16, 16
249249
; V128-NEXT: li a0, 32
250-
; V128-NEXT: vslidedown.vi v0, v8, 16
251250
; V128-NEXT: lui a1, 699051
251+
; V128-NEXT: vslidedown.vi v0, v8, 16
252252
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
253253
; V128-NEXT: vzext.vf2 v8, v24
254-
; V128-NEXT: vzext.vf2 v24, v0
255254
; V128-NEXT: addi a1, a1, -1366
255+
; V128-NEXT: vzext.vf2 v24, v0
256256
; V128-NEXT: vmv.s.x v0, a1
257257
; V128-NEXT: vsll.vx v8, v8, a0
258258
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,12 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
410410
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
411411
; V128-NEXT: vslidedown.vi v24, v16, 16
412412
; V128-NEXT: li a0, 32
413-
; V128-NEXT: vslidedown.vi v0, v8, 16
414413
; V128-NEXT: lui a1, 699051
414+
; V128-NEXT: vslidedown.vi v0, v8, 16
415415
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
416416
; V128-NEXT: vzext.vf2 v8, v24
417-
; V128-NEXT: vzext.vf2 v24, v0
418417
; V128-NEXT: addi a1, a1, -1366
418+
; V128-NEXT: vzext.vf2 v24, v0
419419
; V128-NEXT: vmv.s.x v0, a1
420420
; V128-NEXT: vsll.vx v8, v8, a0
421421
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma

0 commit comments

Comments
 (0)