@@ -735,6 +735,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
735
735
setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
736
736
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
737
737
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
738
+ setOperationAction(ISD::LRINT, MVT::f16, Expand);
739
+ setOperationAction(ISD::LLRINT, MVT::f16, Expand);
738
740
739
741
// Lower this to MOVMSK plus an AND.
740
742
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
@@ -2309,6 +2311,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
2309
2311
setOperationAction(ISD::FMINIMUMNUM, MVT::f16, Custom);
2310
2312
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
2311
2313
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
2314
+ setOperationAction(ISD::LRINT, MVT::f16, Legal);
2315
+ setOperationAction(ISD::LLRINT, MVT::f16, Legal);
2312
2316
2313
2317
setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
2314
2318
setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
@@ -2356,6 +2360,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
2356
2360
setOperationAction(ISD::FMAXIMUM, MVT::v32f16, Custom);
2357
2361
setOperationAction(ISD::FMINIMUMNUM, MVT::v32f16, Custom);
2358
2362
setOperationAction(ISD::FMAXIMUMNUM, MVT::v32f16, Custom);
2363
+ setOperationAction(ISD::LRINT, MVT::v32f16, Legal);
2364
+ setOperationAction(ISD::LLRINT, MVT::v8f16, Legal);
2359
2365
}
2360
2366
2361
2367
if (Subtarget.hasVLX()) {
@@ -2410,6 +2416,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
2410
2416
setOperationAction(ISD::FMAXIMUM, MVT::v16f16, Custom);
2411
2417
setOperationAction(ISD::FMINIMUMNUM, MVT::v16f16, Custom);
2412
2418
setOperationAction(ISD::FMAXIMUMNUM, MVT::v16f16, Custom);
2419
+ setOperationAction(ISD::LRINT, MVT::v8f16, Legal);
2420
+ setOperationAction(ISD::LRINT, MVT::v16f16, Legal);
2413
2421
}
2414
2422
}
2415
2423
@@ -34080,8 +34088,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
34080
34088
case ISD::LRINT:
34081
34089
if (N->getValueType(0) == MVT::v2i32) {
34082
34090
SDValue Src = N->getOperand(0);
34083
- if (Src.getValueType() == MVT::v2f64)
34084
- Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
34091
+ if (Subtarget.hasFP16() && Src.getValueType() == MVT::v2f16) {
34092
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src,
34093
+ DAG.getUNDEF(MVT::v2f16));
34094
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Src,
34095
+ DAG.getUNDEF(MVT::v4f16));
34096
+ } else if (Src.getValueType() != MVT::v2f64) {
34097
+ return;
34098
+ }
34099
+ Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
34085
34100
return;
34086
34101
}
34087
34102
[[fallthrough]];
@@ -53687,13 +53702,35 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
53687
53702
EVT SrcVT = Src.getValueType();
53688
53703
SDLoc DL(N);
53689
53704
53690
- if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 ||
53691
- SrcVT != MVT::v2f32)
53705
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
53706
+
53707
+ // Let legalize expand this if it isn't a legal type yet.
53708
+ if (!TLI.isTypeLegal(VT))
53709
+ return SDValue();
53710
+
53711
+ if ((SrcVT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) ||
53712
+ (SrcVT.getScalarType() == MVT::f32 && !Subtarget.hasDQI()))
53692
53713
return SDValue();
53693
53714
53694
- return DAG.getNode(X86ISD::CVTP2SI, DL, VT,
53695
- DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, Src,
53696
- DAG.getUNDEF(SrcVT)));
53715
+ if (SrcVT == MVT::v2f16) {
53716
+ SrcVT = MVT::v4f16;
53717
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, Src,
53718
+ DAG.getUNDEF(MVT::v2f16));
53719
+ }
53720
+
53721
+ if (SrcVT == MVT::v4f16) {
53722
+ SrcVT = MVT::v8f16;
53723
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, Src,
53724
+ DAG.getUNDEF(MVT::v4f16));
53725
+ } else if (SrcVT == MVT::v2f32) {
53726
+ SrcVT = MVT::v4f32;
53727
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, Src,
53728
+ DAG.getUNDEF(MVT::v2f32));
53729
+ } else {
53730
+ return SDValue();
53731
+ }
53732
+
53733
+ return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
53697
53734
}
53698
53735
53699
53736
// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
0 commit comments