@@ -8743,6 +8743,33 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
8743
8743
return LowerShift(Res, Subtarget, DAG);
8744
8744
}
8745
8745
8746
+ /// Attempt to lower a BUILD_VECTOR of scalar values to a shuffle of splats
8747
+ /// representing a blend.
8748
+ static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
8749
+ X86Subtarget const &Subtarget,
8750
+ SelectionDAG &DAG) {
8751
+ if (!Subtarget.hasAVX())
8752
+ return {};
8753
+
8754
+ auto VT = BVOp->getSimpleValueType(0u);
8755
+
8756
+ if (VT == MVT::v4f64 && BVOp->getNumOperands() == 4u) {
8757
+ SDValue Op0 = BVOp->getOperand(0u);
8758
+ SDValue Op1 = BVOp->getOperand(1u);
8759
+ SDValue Op2 = BVOp->getOperand(2u);
8760
+ SDValue Op3 = BVOp->getOperand(3u);
8761
+
8762
+ // Match X,Y,Y,X inputs.
8763
+ if (Op0 == Op3 && Op1 == Op2 && Op0 != Op1) {
8764
+ auto NewOp0 = DAG.getSplatBuildVector(VT, DL, Op0);
8765
+ auto NewOp1 = DAG.getSplatBuildVector(VT, DL, Op1);
8766
+ return DAG.getVectorShuffle(VT, DL, NewOp0, NewOp1, {0, 5, 6, 3});
8767
+ }
8768
+ }
8769
+
8770
+ return {};
8771
+ }
8772
+
8746
8773
/// Create a vector constant without a load. SSE/AVX provide the bare minimum
8747
8774
/// functionality to do this, so it's all zeros, all ones, or some derivation
8748
8775
/// that is cheap to calculate.
@@ -9093,39 +9120,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
9093
9120
MVT OpEltVT = Op.getOperand(0).getSimpleValueType();
9094
9121
unsigned NumElems = Op.getNumOperands();
9095
9122
9096
- // Match BUILD_VECTOR of scalars that we can lower to X86ISD::BLENDI via
9097
- // shuffles.
9098
- //
9099
- // v4f64 = BUILD_VECTOR X,Y,Y,X
9100
- // >>>
9101
- // t1: v4f64 = BUILD_VECTOR X,u,u,u
9102
- // t3: v4f64 = vector_shuffle<0,u,u,0> t1, u
9103
- // t2: v4f64 = BUILD_VECTOR Y,u,u,u
9104
- // t4: v4f64 = vector_shuffle<u,0,0,u> t2, u
9105
- // v4f64 = vector_shuffle<0,5,6,3> t3, t4
9106
- //
9107
- if (Subtarget.hasAVX() && VT == MVT::v4f64 && Op->getNumOperands() == 4u) {
9108
- auto Op0 = Op->getOperand(0u);
9109
- auto Op1 = Op->getOperand(1u);
9110
- auto Op2 = Op->getOperand(2u);
9111
- auto Op3 = Op->getOperand(3u);
9112
-
9113
- // Match X,Y,Y,X inputs.
9114
- if (Op0 == Op3 && Op1 == Op2 && Op0 != Op1) {
9115
- auto PsnVal = DAG.getUNDEF(MVT::f64);
9116
-
9117
- auto NewOp0 = DAG.getBuildVector(VT, dl, {Op0, PsnVal, PsnVal, PsnVal});
9118
- NewOp0 = DAG.getVectorShuffle(VT, dl, NewOp0, DAG.getUNDEF(VT),
9119
- {0, -1, -1, 0});
9120
-
9121
- auto NewOp1 = DAG.getBuildVector(VT, dl, {Op1, PsnVal, PsnVal, PsnVal});
9122
- NewOp1 = DAG.getVectorShuffle(VT, dl, NewOp1, DAG.getUNDEF(VT),
9123
- {-1, 0, 0, -1});
9124
-
9125
- return DAG.getVectorShuffle(VT, dl, NewOp0, NewOp1, {0, 5, 6, 3});
9126
- }
9127
- }
9128
-
9129
9123
// Generate vectors for predicate vectors.
9130
9124
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
9131
9125
return LowerBUILD_VECTORvXi1(Op, dl, DAG, Subtarget);
@@ -9238,6 +9232,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
9238
9232
return Broadcast;
9239
9233
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG))
9240
9234
return BitOp;
9235
+ if (SDValue Blend = lowerBuildVectorAsBlend(BV, dl, Subtarget, DAG))
9236
+ return Blend;
9241
9237
9242
9238
unsigned NumZero = ZeroMask.popcount();
9243
9239
unsigned NumNonZero = NonZeroMask.popcount();
0 commit comments