-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Extend combinei64TruncSrlAdd to handle patterns with or and xor
#128435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
fe8151f
5574dce
55189cc
d2f27bf
1ba8e32
696f3a5
7caceb1
4388e73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53733,36 +53733,42 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, | |
| return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); | ||
| } | ||
|
|
||
| // Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to | ||
| // (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able | ||
| // to avoid generating code with MOVABS and large constants in certain cases. | ||
| static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, | ||
| const SDLoc &DL) { | ||
| // Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to | ||
| // (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are | ||
| // able to avoid generating code with MOVABS and large constants in certain | ||
| // cases. | ||
| static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG, | ||
| const SDLoc &DL) { | ||
| using namespace llvm::SDPatternMatch; | ||
|
|
||
| SDValue AddLhs; | ||
| APInt AddConst, SrlConst; | ||
| SDValue BinopLhs; | ||
| APInt BinopConst, SrlConst; | ||
| if (VT != MVT::i32 || | ||
| !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), | ||
| m_Srl(m_OneUse(m_Add(m_Value(AddLhs), | ||
| m_ConstInt(AddConst))), | ||
| m_ConstInt(SrlConst))))) | ||
| !sd_match( | ||
| N, | ||
|
||
| m_AllOf(m_SpecificVT(MVT::i64), | ||
| m_Srl(m_OneUse(m_AnyOf( | ||
| m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)), | ||
| m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)), | ||
| m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))), | ||
|
||
| m_ConstInt(SrlConst))))) | ||
| return SDValue(); | ||
|
|
||
| if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue()) | ||
| if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue()) | ||
|
||
| return SDValue(); | ||
|
|
||
| SDValue AddLHSSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); | ||
| SDValue BinopLHSSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl); | ||
|
|
||
| APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
| SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); | ||
| SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); | ||
| APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
| SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT); | ||
| SDValue NewBinopNode = | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. APInt::extractBits ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm not mistaken, by using |
||
| DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst); | ||
|
|
||
| EVT CleanUpVT = | ||
| EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); | ||
| return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT); | ||
| return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT); | ||
dtcxzyw marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify | ||
|
|
@@ -53810,11 +53816,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, | |
| if (!Src.hasOneUse()) | ||
| return SDValue(); | ||
|
|
||
| if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) | ||
| if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL)) | ||
|
||
| return R; | ||
|
|
||
| // Only support vector truncation for now. | ||
| // TODO: i64 scalar math would benefit as well. | ||
| if (!VT.isVector()) | ||
| return SDValue(); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -128,6 +128,61 @@ define i32 @test_trunc_add(i64 %x) { | |
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_sub(i64 %x) { | ||
| ; X64-LABEL: test_trunc_sub: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $65522, %edi # imm = 0xFFF2 | ||
| ; X64-NEXT: movzwl %di, %eax | ||
| ; X64-NEXT: retq | ||
| %sub = sub i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %sub, 48 | ||
|
||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_and(i64 %x) { | ||
| ; X64-LABEL: test_trunc_and: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: andl $14, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %and = and i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %and, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_or(i64 %x) { | ||
| ; X64-LABEL: test_trunc_or: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: orl $14, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %or = or i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %or, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_xor(i64 %x) { | ||
| ; X64-LABEL: test_trunc_xor: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: xorl $14, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %xor = xor i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %xor, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| ; Make sure we don't crash on this test case. | ||
|
|
||
| define i32 @pr128158(i64 %x) { | ||
|
|
@@ -137,10 +192,10 @@ define i32 @pr128158(i64 %x) { | |
| ; X64-NEXT: addq %rdi, %rax | ||
| ; X64-NEXT: shrq $32, %rax | ||
| ; X64-NEXT: .p2align 4 | ||
| ; X64-NEXT: .LBB9_1: # %for.body | ||
| ; X64-NEXT: .LBB13_1: # %for.body | ||
| ; X64-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; X64-NEXT: cmpl $9, %eax | ||
| ; X64-NEXT: jb .LBB9_1 | ||
| ; X64-NEXT: jb .LBB13_1 | ||
| ; X64-NEXT: # %bb.2: # %exit | ||
| ; X64-NEXT: xorl %eax, %eax | ||
| ; X64-NEXT: retq | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can use
add/or/xorto replace binop