Skip to content

Commit fa1a682

Browse files
committed
[AArch64] Improve fptosi.sat vector lowering
Similar to D111236, this improves the lowering of vector fptosi.sat and fptoui.sat, using legal converts and further saturating from there with min/max. f64 are excluded for the moment due to producing worse code in places compared to the unrolling. Differential Revision: https://reviews.llvm.org/D111787
1 parent c450e47 commit fa1a682

File tree

3 files changed

+745
-2247
lines changed

3 files changed

+745
-2247
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3422,30 +3422,54 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
34223422
if (DstVT.isScalableVector())
34233423
return SDValue();
34243424

3425-
// TODO: Saturate to SatWidth explicitly.
3426-
if (SatWidth != DstElementWidth)
3427-
return SDValue();
3428-
34293425
EVT SrcElementVT = SrcVT.getVectorElementType();
34303426

3431-
// In the absence of FP16 support, promote f16 to f32, like
3432-
// LowerVectorFP_TO_INT().
3433-
if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3427+
// In the absence of FP16 support, promote f16 to f32 and saturate the result.
3428+
if (SrcElementVT == MVT::f16 &&
3429+
(!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
34343430
MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3435-
return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3436-
DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
3437-
Op.getOperand(1));
3438-
}
3431+
SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
3432+
SrcVT = F32VT;
3433+
SrcElementVT = MVT::f32;
3434+
SrcElementWidth = 32;
3435+
} else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
3436+
SrcElementVT != MVT::f16)
3437+
return SDValue();
34393438

3439+
SDLoc DL(Op);
34403440
// Cases that we can emit directly.
3441-
if ((SrcElementWidth == DstElementWidth) &&
3442-
(SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
3443-
(SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
3444-
return Op;
3441+
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
3442+
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
3443+
DAG.getValueType(DstVT.getScalarType()));
3444+
3445+
// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
3446+
// result. This is only valid if the legal cvt is larger than the saturate
3447+
// width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
3448+
// (at least until sqxtn is selected).
3449+
if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
3450+
return SDValue();
3451+
3452+
EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
3453+
SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
3454+
DAG.getValueType(IntVT.getScalarType()));
3455+
SDValue Sat;
3456+
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
3457+
SDValue MinC = DAG.getConstant(
3458+
APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
3459+
IntVT);
3460+
SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
3461+
SDValue MaxC = DAG.getConstant(
3462+
APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
3463+
IntVT);
3464+
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
3465+
} else {
3466+
SDValue MinC = DAG.getConstant(
3467+
APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
3468+
IntVT);
3469+
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
34453470
}
34463471

3447-
// For all other cases, fall back on the expanded form.
3448-
return SDValue();
3472+
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
34493473
}
34503474

34513475
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,

0 commit comments

Comments
 (0)