diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index de7d9e28bb93a..47e76e87726ac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -626,6 +626,7 @@ namespace { SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); SDValue combineFMulOrFDivWithIntPow2(SDNode *N); + SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf); SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex); @@ -26124,8 +26125,7 @@ static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef Mask) { /// If a shuffle inserts exactly one element from a source vector operand into /// another vector operand and we can access the specified element as a scalar, /// then we can eliminate the shuffle. -static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, - SelectionDAG &DAG) { +SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) { // First, check if we are taking one element of a vector and shuffling that // element into another vector. ArrayRef Mask = Shuf->getMask(); @@ -26148,7 +26148,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, // Now see if we can access that element as a scalar via a real insert element // instruction. // TODO: We can try harder to locate the element as a scalar. Examples: it - // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. + // could be an operand of BUILD_VECTOR, or a constant. assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && "Shuffle mask value must be from operand 0"); @@ -26171,6 +26171,16 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, Op1, Elt, NewInsIndex); } + if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType())) + return SDValue(); + + if (sd_match(Op0, m_UnaryOp(ISD::SCALAR_TO_VECTOR, m_Value(Elt))) && + Mask[ShufOp0Index] == 0) { + SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), + Op1, Elt, NewInsIndex); + } + return SDValue(); } @@ -26242,7 +26252,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } - if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) + if (SDValue InsElt = replaceShuffleOfInsert(SVN)) return InsElt; // A shuffle of a single vector that is a splatted value can always be folded. diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll index 402a4f34e62b2..d98b78dfdd3b0 100644 --- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll +++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll @@ -239,13 +239,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; ; CHECK-LE-P9-LABEL: test_none_v4i32: ; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: li r3, 0 -; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2 -; CHECK-LE-P9-NEXT: mtfprwz f0, r3 ; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1 +; CHECK-LE-P9-NEXT: lxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: xxperm v2, v2, vs0 ; CHECK-LE-P9-NEXT: stxv v2, 0(r5) ; CHECK-LE-P9-NEXT: blr ; @@ -263,14 +260,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; ; CHECK-BE-P9-LABEL: test_none_v4i32: ; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: li r3, 0 -; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2 -; CHECK-BE-P9-NEXT: mtfprwz f0, r3 ; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) -; CHECK-BE-P9-NEXT: xxperm vs0, v2, vs1 -; CHECK-BE-P9-NEXT: stxv vs0, 0(r5) +; CHECK-BE-P9-NEXT: lxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: xxperm v2, v2, vs0 +; CHECK-BE-P9-NEXT: stxv v2, 0(r5) ; CHECK-BE-P9-NEXT: blr ; ; CHECK-AIX-64-P8-LABEL: test_none_v4i32: @@ -286,13 +280,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; ; CHECK-AIX-64-P9-LABEL: test_none_v4i32: ; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: li r4, 0 -; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 -; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 ; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4) -; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1 -; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxperm v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) ; CHECK-AIX-64-P9-NEXT: blr ; ; CHECK-AIX-32-P8-LABEL: test_none_v4i32: @@ -308,13 +299,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { ; ; CHECK-AIX-32-P9-LABEL: test_none_v4i32: ; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 -; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) -; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4 ; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0 -; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4) -; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1 -; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxperm v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) ; CHECK-AIX-32-P9-NEXT: blr entry: %0 = extractelement <2 x i32> %vec, i64 0