@@ -8783,6 +8783,33 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
8783
8783
return LowerShift(Res, Subtarget, DAG);
8784
8784
}
8785
8785
8786
+ /// Attempt to lower a BUILD_VECTOR of scalar values to a shuffle of splats
8787
+ /// representing a blend.
8788
+ static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
8789
+ X86Subtarget const &Subtarget,
8790
+ SelectionDAG &DAG) {
8791
+ if (!Subtarget.hasAVX())
8792
+ return {};
8793
+
8794
+ auto VT = BVOp->getSimpleValueType(0u);
8795
+
8796
+ if (VT == MVT::v4f64 && BVOp->getNumOperands() == 4u) {
8797
+ SDValue Op0 = BVOp->getOperand(0u);
8798
+ SDValue Op1 = BVOp->getOperand(1u);
8799
+ SDValue Op2 = BVOp->getOperand(2u);
8800
+ SDValue Op3 = BVOp->getOperand(3u);
8801
+
8802
+ // Match X,Y,Y,X inputs.
8803
+ if (Op0 == Op3 && Op1 == Op2 && Op0 != Op1) {
8804
+ auto NewOp0 = DAG.getSplatBuildVector(VT, DL, Op0);
8805
+ auto NewOp1 = DAG.getSplatBuildVector(VT, DL, Op1);
8806
+ return DAG.getVectorShuffle(VT, DL, NewOp0, NewOp1, {0, 5, 6, 3});
8807
+ }
8808
+ }
8809
+
8810
+ return {};
8811
+ }
8812
+
8786
8813
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
8787
8814
/// functionality to do this, so it's all zeros, all ones, or some derivation
8788
8815
/// that is cheap to calculate.
@@ -9133,39 +9160,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
9133
9160
MVT OpEltVT = Op.getOperand(0).getSimpleValueType();
9134
9161
unsigned NumElems = Op.getNumOperands();
9135
9162
9136
- // Match BUILD_VECTOR of scalars that we can lower to X86ISD::BLENDI via
9137
- // shuffles.
9138
- //
9139
- // v4f64 = BUILD_VECTOR X,Y,Y,X
9140
- // >>>
9141
- // t1: v4f64 = BUILD_VECTOR X,u,u,u
9142
- // t3: v4f64 = vector_shuffle<0,u,u,0> t1, u
9143
- // t2: v4f64 = BUILD_VECTOR Y,u,u,u
9144
- // t4: v4f64 = vector_shuffle<u,0,0,u> t2, u
9145
- // v4f64 = vector_shuffle<0,5,6,3> t3, t4
9146
- //
9147
- if (Subtarget.hasAVX() && VT == MVT::v4f64 && Op->getNumOperands() == 4u) {
9148
- auto Op0 = Op->getOperand(0u);
9149
- auto Op1 = Op->getOperand(1u);
9150
- auto Op2 = Op->getOperand(2u);
9151
- auto Op3 = Op->getOperand(3u);
9152
-
9153
- // Match X,Y,Y,X inputs.
9154
- if (Op0 == Op3 && Op1 == Op2 && Op0 != Op1) {
9155
- auto PsnVal = DAG.getUNDEF(MVT::f64);
9156
-
9157
- auto NewOp0 = DAG.getBuildVector(VT, dl, {Op0, PsnVal, PsnVal, PsnVal});
9158
- NewOp0 = DAG.getVectorShuffle(VT, dl, NewOp0, DAG.getUNDEF(VT),
9159
- {0, -1, -1, 0});
9160
-
9161
- auto NewOp1 = DAG.getBuildVector(VT, dl, {Op1, PsnVal, PsnVal, PsnVal});
9162
- NewOp1 = DAG.getVectorShuffle(VT, dl, NewOp1, DAG.getUNDEF(VT),
9163
- {-1, 0, 0, -1});
9164
-
9165
- return DAG.getVectorShuffle(VT, dl, NewOp0, NewOp1, {0, 5, 6, 3});
9166
- }
9167
- }
9168
-
9169
9163
// Generate vectors for predicate vectors.
9170
9164
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
9171
9165
return LowerBUILD_VECTORvXi1(Op, dl, DAG, Subtarget);
@@ -9278,6 +9272,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
9278
9272
return Broadcast;
9279
9273
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
9280
9274
return BitOp;
9275
+ if (SDValue Blend = lowerBuildVectorAsBlend(BV, dl, Subtarget, DAG))
9276
+ return Blend;
9281
9277
9282
9278
unsigned NumZero = ZeroMask.popcount();
9283
9279
unsigned NumNonZero = NonZeroMask.popcount();
0 commit comments