@@ -3422,30 +3422,54 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3422
3422
if (DstVT.isScalableVector())
3423
3423
return SDValue();
3424
3424
3425
- // TODO: Saturate to SatWidth explicitly.
3426
- if (SatWidth != DstElementWidth)
3427
- return SDValue();
3428
-
3429
3425
EVT SrcElementVT = SrcVT.getVectorElementType();
3430
3426
3431
- // In the absence of FP16 support, promote f16 to f32, like
3432
- // LowerVectorFP_TO_INT().
3433
- if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3427
+ // In the absence of FP16 support, promote f16 to f32 and saturate the result.
3428
+ if (SrcElementVT == MVT::f16 &&
3429
+ ( !Subtarget->hasFullFP16() || DstElementWidth > 16 )) {
3434
3430
MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3435
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3436
- DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
3437
- Op.getOperand(1));
3438
- }
3431
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
3432
+ SrcVT = F32VT;
3433
+ SrcElementVT = MVT::f32;
3434
+ SrcElementWidth = 32;
3435
+ } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
3436
+ SrcElementVT != MVT::f16)
3437
+ return SDValue();
3439
3438
3439
+ SDLoc DL(Op);
3440
3440
// Cases that we can emit directly.
3441
- if ((SrcElementWidth == DstElementWidth) &&
3442
- (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
3443
- (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
3444
- return Op;
3441
+ if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
3442
+ return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
3443
+ DAG.getValueType(DstVT.getScalarType()));
3444
+
3445
+ // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
3446
+ // result. This is only valid if the legal cvt is larger than the saturate
3447
+ // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
3448
+ // (at least until sqxtn is selected).
3449
+ if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
3450
+ return SDValue();
3451
+
3452
+ EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
3453
+ SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
3454
+ DAG.getValueType(IntVT.getScalarType()));
3455
+ SDValue Sat;
3456
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
3457
+ SDValue MinC = DAG.getConstant(
3458
+ APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
3459
+ IntVT);
3460
+ SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
3461
+ SDValue MaxC = DAG.getConstant(
3462
+ APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
3463
+ IntVT);
3464
+ Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
3465
+ } else {
3466
+ SDValue MinC = DAG.getConstant(
3467
+ APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
3468
+ IntVT);
3469
+ Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
3445
3470
}
3446
3471
3447
- // For all other cases, fall back on the expanded form.
3448
- return SDValue();
3472
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
3449
3473
}
3450
3474
3451
3475
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
0 commit comments