@@ -15990,14 +15990,13 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
15990
15990
}
15991
15991
15992
15992
/// Helper to recursively truncate vector elements in half with PACKSS.
15993
- /// It makes use of the fact that vector comparison results will be all-zeros
15994
- /// or all-ones to prevent the PACKSS from saturating the results.
15993
+ /// It makes use of the fact that vectors with enough leading sign bits
15994
+ /// prevent the PACKSS from saturating the results.
15995
15995
/// AVX2 (Int256) sub-targets require extra shuffling as the PACKSS operates
15996
15996
/// within each 128-bit lane.
15997
- static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In,
15998
- const SDLoc &DL,
15999
- SelectionDAG &DAG,
16000
- const X86Subtarget &Subtarget) {
15997
+ static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
15998
+ SelectionDAG &DAG,
15999
+ const X86Subtarget &Subtarget) {
16001
16000
// Requires SSE2 but AVX512 has fast truncate.
16002
16001
if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
16003
16002
return SDValue();
@@ -16065,18 +16064,18 @@ static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In,
16065
16064
// If 512bit -> 128bit truncate another stage.
16066
16065
EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
16067
16066
Res = DAG.getBitcast(PackedVT, Res);
16068
- return truncateVectorCompareWithPACKSS (DstVT, Res, DL, DAG, Subtarget);
16067
+ return truncateVectorWithPACKSS (DstVT, Res, DL, DAG, Subtarget);
16069
16068
}
16070
16069
16071
16070
// Recursively pack lower/upper subvectors, concat result and pack again.
16072
16071
assert(SrcSizeInBits >= 512 && "Expected 512-bit vector or greater");
16073
16072
EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumSubElts);
16074
- Lo = truncateVectorCompareWithPACKSS (PackedVT, Lo, DL, DAG, Subtarget);
16075
- Hi = truncateVectorCompareWithPACKSS (PackedVT, Hi, DL, DAG, Subtarget);
16073
+ Lo = truncateVectorWithPACKSS (PackedVT, Lo, DL, DAG, Subtarget);
16074
+ Hi = truncateVectorWithPACKSS (PackedVT, Hi, DL, DAG, Subtarget);
16076
16075
16077
16076
PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
16078
16077
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
16079
- return truncateVectorCompareWithPACKSS (DstVT, Res, DL, DAG, Subtarget);
16078
+ return truncateVectorWithPACKSS (DstVT, Res, DL, DAG, Subtarget);
16080
16079
}
16081
16080
16082
16081
static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
@@ -16140,7 +16139,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
16140
16139
16141
16140
// Truncate with PACKSS if we are truncating a vector zero/all-bits result.
16142
16141
if (InVT.getScalarSizeInBits() == DAG.ComputeNumSignBits(In))
16143
- if (SDValue V = truncateVectorCompareWithPACKSS (VT, In, DL, DAG, Subtarget))
16142
+ if (SDValue V = truncateVectorWithPACKSS (VT, In, DL, DAG, Subtarget))
16144
16143
return V;
16145
16144
16146
16145
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
@@ -34328,7 +34327,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL,
34328
34327
if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
34329
34328
return SDValue();
34330
34329
34331
- return truncateVectorCompareWithPACKSS (VT, In, DL, DAG, Subtarget);
34330
+ return truncateVectorWithPACKSS (VT, In, DL, DAG, Subtarget);
34332
34331
}
34333
34332
34334
34333
static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
0 commit comments