diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c2b2daad1b898..558839ba2dd83 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4816,12 +4816,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, return convertFromScalableVector(VT, Vec, DAG, Subtarget); } +// Given a vector a, b, c, d return a vector Factor times longer +// with Factor-1 undef's between elements. Ex: +// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0) +// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1) +static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, + const SDLoc &DL, SelectionDAG &DAG) { + + MVT VT = V.getSimpleValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + ElementCount EC = VT.getVectorElementCount(); + V = DAG.getBitcast(VT.changeTypeToInteger(), V); + + MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC); + + SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V); + // TODO: On rv32, the constant becomes a splat_vector_parts which does not + // allow the SHL to fold away if Index is 0. + if (Index != 0) + Result = DAG.getNode(ISD::SHL, DL, WideVT, Result, + DAG.getConstant(EltBits * Index, DL, WideVT)); + // Make sure to use original element type + MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(), + EC.multiplyCoefficientBy(Factor)); + return DAG.getBitcast(ResultVT, Result); +} + // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx // to create an interleaved vector of <[vscale x] n*2 x ty>. // This requires that the size of ty is less than the subtarget's maximum ELEN. static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + + // FIXME: Not only does this optimize the code, it fixes some correctness + // issues because MIR does not have freeze. + if (EvenV.isUndef()) + return getWideningSpread(OddV, 2, 1, DL, DAG); + if (OddV.isUndef()) + return getWideningSpread(EvenV, 2, 0, DL, DAG); + MVT VecVT = EvenV.getSimpleValueType(); MVT VecContainerVT = VecVT; // // Convert fixed vectors to scalable if needed @@ -4853,29 +4887,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, SDValue Passthru = DAG.getUNDEF(WideContainerVT); SDValue Interleaved; - if (OddV.isUndef()) { - // If OddV is undef, this is a zero extend. - // FIXME: Not only does this optimize the code, it fixes some correctness - // issues because MIR does not have freeze. - Interleaved = - DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL); - } else if (Subtarget.hasStdExtZvbb()) { + if (Subtarget.hasStdExtZvbb()) { // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV. SDValue OffsetVec = DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT); Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV, OffsetVec, Passthru, Mask, VL); - if (!EvenV.isUndef()) - Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, - Interleaved, EvenV, Passthru, Mask, VL); - } else if (EvenV.isUndef()) { - Interleaved = - DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL); - - SDValue OffsetVec = - DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT); - Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT, - Interleaved, OffsetVec, Passthru, Mask, VL); + Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT, + Interleaved, EvenV, Passthru, Mask, VL); } else { // FIXME: We should freeze the odd vector here. We already handled the case // of provably undef/poison above. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index 97e458e70565c..9b1e7012e217a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -247,12 +247,12 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vslidedown.vi v24, v16, 16 ; V128-NEXT: li a0, 32 -; V128-NEXT: vslidedown.vi v0, v8, 16 ; V128-NEXT: lui a1, 699051 +; V128-NEXT: vslidedown.vi v0, v8, 16 ; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; V128-NEXT: vzext.vf2 v8, v24 -; V128-NEXT: vzext.vf2 v24, v0 ; V128-NEXT: addi a1, a1, -1366 +; V128-NEXT: vzext.vf2 v24, v0 ; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsll.vx v8, v8, a0 ; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index a8eb1f97fd1a2..bdc4e0f6256f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -410,12 +410,12 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vslidedown.vi v24, v16, 16 ; V128-NEXT: li a0, 32 -; V128-NEXT: vslidedown.vi v0, v8, 16 ; V128-NEXT: lui a1, 699051 +; V128-NEXT: vslidedown.vi v0, v8, 16 ; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; V128-NEXT: vzext.vf2 v8, v24 -; V128-NEXT: vzext.vf2 v24, v0 ; V128-NEXT: addi a1, a1, -1366 +; V128-NEXT: vzext.vf2 v24, v0 ; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsll.vx v8, v8, a0 ; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma