-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DAGCombiner] Extend FP-to-Int cast without requiring nsz #161093
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
51bf419
cccc073
37413f3
b127081
36447e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18862,27 +18862,57 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { | |
|
|
||
| static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, | ||
| const TargetLowering &TLI) { | ||
| // We only do this if the target has legal ftrunc. Otherwise, we'd likely be | ||
| // replacing casts with a libcall. We also must be allowed to ignore -0.0 | ||
| // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer | ||
| // conversions would return +0.0. | ||
| // We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc. | ||
| // If NoSignedZerosFPMath is enabled, this is a direct replacement. | ||
| // Otherwise, for strict math, we must handle edge cases: | ||
| // 1. For signed conversions, clamp out-of-range values to the valid | ||
| // integer range before the trunc. | ||
| // 2. For unsigned conversions, use FABS. A negative float becomes integer 0, | ||
| // which must convert back to +0.0. FTRUNC on its own could produce -0.0. | ||
|
|
||
| // FIXME: We should be able to use node-level FMF here. | ||
| // TODO: If strict math, should we use FABS (+ range check for signed cast)? | ||
| EVT VT = N->getValueType(0); | ||
| if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || | ||
| !DAG.getTarget().Options.NoSignedZerosFPMath) | ||
| if (!TLI.isOperationLegal(ISD::FTRUNC, VT)) | ||
| return SDValue(); | ||
|
|
||
| // fptosi/fptoui round towards zero, so converting from FP to integer and | ||
| // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X | ||
| SDValue N0 = N->getOperand(0); | ||
| if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && | ||
| N0.getOperand(0).getValueType() == VT) | ||
| return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); | ||
| N0.getOperand(0).getValueType() == VT) { | ||
| if (DAG.getTarget().Options.NoSignedZerosFPMath) | ||
| return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); | ||
|
|
||
| // Strict math: clamp to the signed integer range before truncating. | ||
| unsigned IntWidth = N0.getValueSizeInBits(); | ||
| APInt APMax = APInt::getSignedMaxValue(IntWidth); | ||
| APInt APMin = APInt::getSignedMinValue(IntWidth); | ||
|
|
||
| APFloat MaxAPF(VT.getFltSemantics()); | ||
| MaxAPF.convertFromAPInt(APMax, true, APFloat::rmTowardZero); | ||
| APFloat MinAPF(VT.getFltSemantics()); | ||
| MinAPF.convertFromAPInt(APMin, true, APFloat::rmTowardZero); | ||
|
|
||
| SDValue MaxFP = DAG.getConstantFP(MaxAPF, DL, VT); | ||
| SDValue MinFP = DAG.getConstantFP(MinAPF, DL, VT); | ||
|
|
||
| SDValue Clamped = DAG.getNode( | ||
| ISD::FMINNUM, DL, VT, | ||
| DAG.getNode(ISD::FMAXNUM, DL, VT, N0->getOperand(0), MinFP), MaxFP); | ||
| return DAG.getNode(ISD::FTRUNC, DL, VT, Clamped); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be easier to handle the signed and unsigned cases in separate PRs, the signed case is less obviously profitable
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, will focus on handling unsigned cases in this PR. |
||
|
|
||
| if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && | ||
| N0.getOperand(0).getValueType() == VT) | ||
| return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); | ||
| N0.getOperand(0).getValueType() == VT) { | ||
| if (DAG.getTarget().Options.NoSignedZerosFPMath) | ||
| return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); | ||
|
|
||
| // Strict math: use FABS to handle negative inputs correctly. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, nneg flag does not help! The problematic cases are inputs like -0.5. In that case the input to uitofp would be 0 which is not negative.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, my fault, seems both nneg and nsz are requied but in current implementation it is impossible 🥲
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! would address this! |
||
| if (TLI.isFAbsFree(VT)) { | ||
| SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0)); | ||
| return DAG.getNode(ISD::FTRUNC, DL, VT, Abs); | ||
| } | ||
| } | ||
|
|
||
| return SDValue(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,8 +4,13 @@ | |
| define double @t1(double %x) { | ||
| ; CHECK-LABEL: t1: | ||
| ; CHECK: // %bb.0: // %entry | ||
| ; CHECK-NEXT: fcvtzs d0, d0 | ||
| ; CHECK-NEXT: scvtf d0, d0 | ||
| ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 | ||
| ; CHECK-NEXT: fmov d1, x8 | ||
| ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff | ||
| ; CHECK-NEXT: fmaxnm d0, d0, d1 | ||
| ; CHECK-NEXT: fmov d1, x8 | ||
| ; CHECK-NEXT: fminnm d0, d0, d1 | ||
| ; CHECK-NEXT: frintz d0, d0 | ||
| ; CHECK-NEXT: ret | ||
| entry: | ||
| %conv = fptosi double %x to i64 | ||
|
|
@@ -16,8 +21,12 @@ entry: | |
| define float @t2(float %x) { | ||
| ; CHECK-LABEL: t2: | ||
| ; CHECK: // %bb.0: // %entry | ||
| ; CHECK-NEXT: fcvtzs s0, s0 | ||
| ; CHECK-NEXT: scvtf s0, s0 | ||
| ; CHECK-NEXT: movi v1.2s, #207, lsl #24 | ||
|
||
| ; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff | ||
| ; CHECK-NEXT: fmaxnm s0, s0, s1 | ||
| ; CHECK-NEXT: fmov s1, w8 | ||
| ; CHECK-NEXT: fminnm s0, s0, s1 | ||
| ; CHECK-NEXT: frintz s0, s0 | ||
| ; CHECK-NEXT: ret | ||
| entry: | ||
| %conv = fptosi float %x to i32 | ||
|
|
@@ -28,8 +37,13 @@ entry: | |
| define half @t3(half %x) { | ||
| ; CHECK-LABEL: t3: | ||
| ; CHECK: // %bb.0: // %entry | ||
| ; CHECK-NEXT: fcvtzs h0, h0 | ||
| ; CHECK-NEXT: scvtf h0, h0 | ||
| ; CHECK-NEXT: mov w8, #64511 // =0xfbff | ||
| ; CHECK-NEXT: fmov h1, w8 | ||
| ; CHECK-NEXT: mov w8, #31743 // =0x7bff | ||
| ; CHECK-NEXT: fmaxnm h0, h0, h1 | ||
| ; CHECK-NEXT: fmov h1, w8 | ||
| ; CHECK-NEXT: fminnm h0, h0, h1 | ||
| ; CHECK-NEXT: frintz h0, h0 | ||
| ; CHECK-NEXT: ret | ||
| entry: | ||
| %conv = fptosi half %x to i32 | ||
|
|
@@ -170,8 +184,14 @@ entry: | |
| define i64 @tests_f64_multiuse(double %x) { | ||
| ; CHECK-LABEL: tests_f64_multiuse: | ||
| ; CHECK: // %bb.0: // %entry | ||
| ; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 | ||
| ; CHECK-NEXT: fmov d1, x8 | ||
| ; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff | ||
| ; CHECK-NEXT: fmov d2, x8 | ||
| ; CHECK-NEXT: fcvtzs x8, d0 | ||
| ; CHECK-NEXT: scvtf d1, x8 | ||
| ; CHECK-NEXT: fmaxnm d1, d0, d1 | ||
| ; CHECK-NEXT: fminnm d1, d1, d2 | ||
| ; CHECK-NEXT: frintz d1, d1 | ||
| ; CHECK-NEXT: fcmp d0, d1 | ||
| ; CHECK-NEXT: csel x0, x8, xzr, eq | ||
| ; CHECK-NEXT: ret | ||
|
|
@@ -186,8 +206,13 @@ entry: | |
| define i32 @tests_f32_multiuse(float %x) { | ||
| ; CHECK-LABEL: tests_f32_multiuse: | ||
| ; CHECK: // %bb.0: // %entry | ||
| ; CHECK-NEXT: movi v1.2s, #207, lsl #24 | ||
| ; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff | ||
| ; CHECK-NEXT: fmov s2, w8 | ||
| ; CHECK-NEXT: fcvtzs w8, s0 | ||
| ; CHECK-NEXT: scvtf s1, w8 | ||
| ; CHECK-NEXT: fmaxnm s1, s0, s1 | ||
| ; CHECK-NEXT: fminnm s1, s1, s2 | ||
| ; CHECK-NEXT: frintz s1, s1 | ||
| ; CHECK-NEXT: fcmp s0, s1 | ||
| ; CHECK-NEXT: csel w0, w8, wzr, eq | ||
| ; CHECK-NEXT: ret | ||
|
|
@@ -202,8 +227,14 @@ entry: | |
| define i32 @tests_f16_multiuse(half %x) { | ||
| ; CHECK-LABEL: tests_f16_multiuse: | ||
| ; CHECK: // %bb.0: // %entry | ||
| ; CHECK-NEXT: mov w8, #64511 // =0xfbff | ||
| ; CHECK-NEXT: fmov h1, w8 | ||
| ; CHECK-NEXT: mov w8, #31743 // =0x7bff | ||
| ; CHECK-NEXT: fmov h2, w8 | ||
| ; CHECK-NEXT: fcvtzs w8, h0 | ||
| ; CHECK-NEXT: scvtf h1, w8 | ||
| ; CHECK-NEXT: fmaxnm h1, h0, h1 | ||
| ; CHECK-NEXT: fminnm h1, h1, h2 | ||
| ; CHECK-NEXT: frintz h1, h1 | ||
| ; CHECK-NEXT: fcmp h0, h1 | ||
| ; CHECK-NEXT: csel w0, w8, wzr, eq | ||
| ; CHECK-NEXT: ret | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NoSignedZerosFPMathmight be not enough to cover the range check part. BTW, the constrained floating point intrinsics are converted into the SDNode with prefix STRICT, which might cause confusion.