-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Extend combinei64TruncSrlAdd to handle patterns with or and xor
#128435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
fe8151f
5574dce
55189cc
d2f27bf
1ba8e32
696f3a5
7caceb1
4388e73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53733,36 +53733,47 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG, | |
| return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src); | ||
| } | ||
|
|
||
| // Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to | ||
| // (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able | ||
| // to avoid generating code with MOVABS and large constants in certain cases. | ||
| static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG, | ||
| const SDLoc &DL) { | ||
| using namespace llvm::SDPatternMatch; | ||
| // Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to | ||
| // (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we | ||
| // are able to avoid generating code with MOVABS and large constants in certain | ||
| // cases. | ||
| static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG, | ||
| const SDLoc &DL) { | ||
|
|
||
| SDValue AddLhs; | ||
| APInt AddConst, SrlConst; | ||
| if (VT != MVT::i32 || | ||
| !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64), | ||
| m_Srl(m_OneUse(m_Add(m_Value(AddLhs), | ||
| m_ConstInt(AddConst))), | ||
| m_ConstInt(SrlConst))))) | ||
| return SDValue(); | ||
| SDValue Op = N.getOperand(0); | ||
| APInt OpConst = Op.getConstantOperandAPInt(1); | ||
| APInt SrlConst = N.getConstantOperandAPInt(1); | ||
| unsigned Opcode = Op.getOpcode(); | ||
|
|
||
| if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue()) | ||
| switch (Opcode) { | ||
| default: | ||
| return SDValue(); | ||
| case ISD::ADD: | ||
| if (OpConst.countr_zero() < SrlConst.getZExtValue()) | ||
| return SDValue(); | ||
| [[fallthrough]]; | ||
| case ISD::OR: | ||
| case ISD::XOR: | ||
| if (SrlConst.ule(32)) | ||
|
||
| return SDValue(); | ||
| break; | ||
| } | ||
|
|
||
| SDValue AddLHSSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); | ||
|
|
||
| APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
| SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); | ||
| SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); | ||
| SDValue OpLhsSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl); | ||
|
|
||
| APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. APInt::extractBits ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm not mistaken, by using |
||
| SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT); | ||
| SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst); | ||
| EVT CleanUpVT = | ||
| EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue()); | ||
| return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT); | ||
|
|
||
| if (Opcode == ISD::ADD) | ||
| return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT); | ||
|
|
||
| SDValue CleanUp = DAG.getAnyExtOrTrunc(NewOpNode, DL, CleanUpVT); | ||
| return DAG.getAnyExtOrTrunc(CleanUp, DL, VT); | ||
|
||
| } | ||
|
|
||
| /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify | ||
|
|
@@ -53810,11 +53821,15 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, | |
| if (!Src.hasOneUse()) | ||
| return SDValue(); | ||
|
|
||
| if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL)) | ||
| return R; | ||
| if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL && | ||
| Src.getOperand(0).getNumOperands() == 2 && | ||
| isa<ConstantSDNode>(Src.getOperand(1)) && | ||
| isa<ConstantSDNode>(Src.getOperand(0).getOperand(1))) { | ||
| if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL)) | ||
| return R; | ||
| return SDValue(); | ||
| } | ||
|
||
|
|
||
| // Only support vector truncation for now. | ||
| // TODO: i64 scalar math would benefit as well. | ||
| if (!VT.isVector()) | ||
| return SDValue(); | ||
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -128,6 +128,103 @@ define i32 @test_trunc_add(i64 %x) { | |
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_sub(i64 %x) { | ||
| ; X64-LABEL: test_trunc_sub: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $65522, %edi # imm = 0xFFF2 | ||
| ; X64-NEXT: movzwl %di, %eax | ||
| ; X64-NEXT: retq | ||
| %sub = sub i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %sub, 48 | ||
|
||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_and_1(i64 %x) { | ||
| ; X64-LABEL: test_trunc_and_1: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: andl $14, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %and = and i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %and, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_or_1(i64 %x) { | ||
| ; X64-LABEL: test_trunc_or_1: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: orl $14, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %or = or i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %or, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_xor_1(i64 %x) { | ||
| ; X64-LABEL: test_trunc_xor_1: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: xorl $14, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %xor = xor i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %xor, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_and_2(i64 %x) { | ||
| ; X64-LABEL: test_trunc_and_2: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: andl $13, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %and = and i64 %x, 3940649673949183 | ||
| %shr = lshr i64 %and, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_or_2(i64 %x) { | ||
| ; X64-LABEL: test_trunc_or_2: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: orl $13, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %or = or i64 %x, 3940649673949183 | ||
| %shr = lshr i64 %or, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| define i32 @test_trunc_xor_2(i64 %x) { | ||
| ; X64-LABEL: test_trunc_xor_2: | ||
| ; X64: # %bb.0: | ||
| ; X64-NEXT: movq %rdi, %rax | ||
| ; X64-NEXT: shrq $48, %rax | ||
| ; X64-NEXT: xorl $13, %eax | ||
| ; X64-NEXT: # kill: def $eax killed $eax killed $rax | ||
| ; X64-NEXT: retq | ||
| %xor = xor i64 %x, 3940649673949183 | ||
| %shr = lshr i64 %xor, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| ret i32 %conv | ||
| } | ||
|
|
||
| ; Make sure we don't crash on this test case. | ||
|
|
||
| define i32 @pr128158(i64 %x) { | ||
|
|
@@ -137,10 +234,10 @@ define i32 @pr128158(i64 %x) { | |
| ; X64-NEXT: addq %rdi, %rax | ||
| ; X64-NEXT: shrq $32, %rax | ||
| ; X64-NEXT: .p2align 4 | ||
| ; X64-NEXT: .LBB9_1: # %for.body | ||
| ; X64-NEXT: .LBB16_1: # %for.body | ||
| ; X64-NEXT: # =>This Inner Loop Header: Depth=1 | ||
| ; X64-NEXT: cmpl $9, %eax | ||
| ; X64-NEXT: jb .LBB9_1 | ||
| ; X64-NEXT: jb .LBB16_1 | ||
| ; X64-NEXT: # %bb.2: # %exit | ||
| ; X64-NEXT: xorl %eax, %eax | ||
| ; X64-NEXT: retq | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to use switch since we have limit to ADD/OR/XOR in
combineTruncatedArithmetic.