-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Lower mathlib call ldexp into scalef when avx512 is enabled #166839
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
9f7bddb
484cea4
8649bce
1806c40
c30244b
e1bcedc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -1829,6 +1829,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |||||
| setOperationAction(ISD::FCOPYSIGN, VT, Custom); | ||||||
| setOperationAction(ISD::FCANONICALIZE, VT, Custom); | ||||||
| } | ||||||
|
|
||||||
| setOperationAction(ISD::LRINT, MVT::v16f32, | ||||||
| Subtarget.hasDQI() ? Legal : Custom); | ||||||
| setOperationAction(ISD::LRINT, MVT::v8f64, | ||||||
|
|
@@ -2101,6 +2102,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |||||
| // These operations are handled on non-VLX by artificially widening in | ||||||
| // isel patterns. | ||||||
|
|
||||||
| for (MVT VT : {MVT::f16, MVT::f32, MVT::f64, MVT::v8f16, MVT::v4f32, | ||||||
| MVT::v2f64, MVT::v16f16, MVT::v8f32, MVT::v4f64, MVT::v32f16, | ||||||
| MVT::v16f32, MVT::v8f64}) | ||||||
| setOperationAction(ISD::FLDEXP, VT, Custom); | ||||||
|
|
||||||
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom); | ||||||
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom); | ||||||
| setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); | ||||||
|
|
@@ -19149,6 +19155,81 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, | |||||
| return SDValue(); | ||||||
| } | ||||||
|
|
||||||
| static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget, | ||||||
| SelectionDAG &DAG) { | ||||||
| SDLoc DL(Op); | ||||||
| SDValue X = Op.getOperand(0); | ||||||
| MVT XTy = X.getSimpleValueType(); | ||||||
| SDValue Exp = Op.getOperand(1); | ||||||
| MVT ExtVT; | ||||||
|
|
||||||
| switch (XTy.SimpleTy) { | ||||||
| default: | ||||||
| return SDValue(); | ||||||
| case MVT::f16: | ||||||
| if (!Subtarget.hasFP16()) | ||||||
| X = DAG.getFPExtendOrRound(X, DL, MVT::f32); | ||||||
| [[fallthrough]]; | ||||||
| case MVT::f32: | ||||||
| case MVT::f64: { | ||||||
| MVT VT = MVT::getVectorVT(X.getSimpleValueType(), | ||||||
| 128 / X.getSimpleValueType().getSizeInBits()); | ||||||
| Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp); | ||||||
| SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, X); | ||||||
| SDValue VExp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Exp); | ||||||
| SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, VT, VX, VExp, VX); | ||||||
|
||||||
| SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0); | ||||||
| if (X.getValueType() != XTy) | ||||||
| Final = DAG.getFPExtendOrRound(Final, DL, XTy); | ||||||
| return Final; | ||||||
|
||||||
| return Final; | |
| return DAG.getFPExtendOrRound(Final, DL, XTy); |
getFPExtendOrRound will just return a no-op for matching types anyway
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are converting Exp here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean why convert Exp before the widenSubVector? I think for the non-fp16 vector cases, it seems the simplest to convert Exp to fp as oppose to converting WideExp to fp. Otherwise, Exp and X could have different element count.
v4f64 -> v8f64
v4i32 -> v16i32
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My main concern is that after the switch, Exp might be a fp OR a int vector type
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After the switch, the only case where Exp is an integer is for vector fp16, but I do convert it to an fp after widening. I tried converting i16 vector exponents before widening, but that seemed very difficult.
Would it be preferable to convert Exp after the switch statement like so?
case MVT::v8f64:
// Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
if (XTy.getSizeInBits() == 512 || Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
}
break;
...
if (X.getValueType().getScalarType() != MVT::f16)
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
SDValue WideX = widenSubVector(X, true, Subtarget, DAG, DL, 512);
SDValue WideExp = widenSubVector(Exp, true, Subtarget, DAG, DL, 512);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alternatively for the vector fp16 cases, I realized it's possible to convert Exp to fp after extending it to i32. So Exp would be fp for all cases after the switch statement.
ExtVT = XTy.changeVectorElementType(MVT::f32);
X = DAG.getFPExtendOrRound(X, DL, ExtVT);
Exp = DAG.getSExtOrTrunc(Exp, DL, ExtVT.changeTypeToInteger());
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, ExtVT, Exp);
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does SCALEF/S take 3 operands?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use DAG.getInsertVectorElt
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should be able to just use widenSubVector and tell it to widen to 512-bits instead of recomputing XVT/ExptVT
RKSimon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this the same as XTy != X.getValueType()?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will fix.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) - don't use if-else chains if every block returns:
if (XTy.isVector()) {
...
return DAG.getExtractSubvector(DL, XTy, Scalef, 0);
}
MVT VT = MVT::getVectorVT(X.getSimpleValueType(),
...
Alternatively you could move the scalar handling into the f32/f64 switch case above (and have f16 fallthrough)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will fix. I'll opt for the latter.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these insertions just getNode(ISD::SCALAR_TO_VECTOR ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I can switch to ISD::SCALAR_TO_VECTOR.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You might be able to use splitVectorOp here?
Uh oh!
There was an error while loading. Please reload this page.