-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Extend combinei64TruncSrlAdd to handle patterns with or and xor
#128435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fe8151f
5574dce
55189cc
d2f27bf
1ba8e32
696f3a5
7caceb1
4388e73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53733,36 +53733,35 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, | |
| return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); | ||
| } | ||
|
|
||
| // Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to | ||
| // (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able | ||
| // to avoid generating code with MOVABS and large constants in certain cases. | ||
| static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, | ||
| const SDLoc &DL) { | ||
| using namespace llvm::SDPatternMatch; | ||
| // Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to | ||
| // (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we | ||
| // are able to avoid generating code with MOVABS and large constants in certain | ||
| // cases. | ||
| static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, | ||
| const SDLoc &DL) { | ||
|
|
||
| SDValue AddLhs; | ||
| APInt AddConst, SrlConst; | ||
| if (VT != MVT::i32 || | ||
| !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), | ||
| m_Srl(m_OneUse(m_Add(m_Value(AddLhs), | ||
| m_ConstInt(AddConst))), | ||
| m_ConstInt(SrlConst))))) | ||
| return SDValue(); | ||
| SDValue Op = N.getOperand(0); | ||
| APInt OpConst = Op.getConstantOperandAPInt(1); | ||
| APInt SrlConst = N.getConstantOperandAPInt(1); | ||
| uint64_t SrlConstVal = SrlConst.getZExtValue(); | ||
| unsigned Opcode = Op.getOpcode(); | ||
|
|
||
| if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue()) | ||
| if (SrlConst.ule(32) || | ||
| (Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal)) | ||
| return SDValue(); | ||
|
|
||
| SDValue AddLHSSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); | ||
| SDValue OpLhsSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl); | ||
|
|
||
| APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
| SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); | ||
| SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); | ||
| APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
|
||
| SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT); | ||
| SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst); | ||
| EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal); | ||
|
|
||
| EVT CleanUpVT = | ||
| EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); | ||
| return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT); | ||
| if (Opcode == ISD::ADD) | ||
| return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); | ||
| return NewOpNode; | ||
| } | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. APInt::extractBits ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm not mistaken, by using |
||
| /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify | ||
|
|
@@ -53810,11 +53809,21 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, | |
| if (!Src.hasOneUse()) | ||
| return SDValue(); | ||
|
|
||
| if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) | ||
| return R; | ||
| if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL && | ||
| isa<ConstantSDNode>(Src.getOperand(1))) { | ||
|
|
||
| unsigned SrcOpOpcode = Src.getOperand(0).getOpcode(); | ||
| if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR && | ||
| SrcOpOpcode != ISD::XOR) || | ||
| !isa<ConstantSDNode>(Src.getOperand(0).getOperand(1))) | ||
| return SDValue(); | ||
|
||
|
|
||
| if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL)) | ||
| return R; | ||
|
|
||
| return SDValue(); | ||
| } | ||
|
|
||
| // Only support vector truncation for now. | ||
| // TODO: i64 scalar math would benefit as well. | ||
| if (!VT.isVector()) | ||
| return SDValue(); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to use switch since we have limit to ADD/OR/XOR in
combineTruncatedArithmetic.