-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') #126448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') #126448
Changes from 3 commits
3d73525
f530899
a9d77fd
bd8a2ef
b773400
b526b0f
e6fc7f0
55d53f5
8d3556e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48472,6 +48472,55 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, | |
| return SDValue(); | ||
| } | ||
|
|
||
| // Attempt to fold some (setcc (sub (truncate (srl (add X, C1), C2)), C3), CC) | ||
| // patterns to (setcc (cmp (add (truncate (srl X, C2)), C1'), C3), CC). C1' will | ||
| // be smaller than C1 so we are able to avoid generating code with MOVABS and | ||
| // large constants in certain cases. | ||
| static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC, | ||
| SelectionDAG &DAG) { | ||
| using namespace llvm::SDPatternMatch; | ||
| if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE || | ||
| CC == X86::COND_B)) | ||
| return SDValue(); | ||
|
|
||
| SDValue AddLhs; | ||
| APInt AddConst, SrlConst, CmpConst; | ||
| if (!sd_match(EFLAGS, | ||
| m_AllOf(m_SpecificVT(MVT::i32), | ||
| m_BinOp(X86ISD::SUB, | ||
| m_Trunc(m_Srl(m_Add(m_Value(AddLhs), | ||
| m_ConstInt(AddConst)), | ||
| m_ConstInt(SrlConst))), | ||
| m_ConstInt(CmpConst))))) | ||
| return SDValue(); | ||
|
|
||
| SDValue Srl; | ||
| if (!sd_match(EFLAGS.getOperand(0).getOperand(0), | ||
| m_AllOf(m_SpecificVT(MVT::i64), m_Value(Srl)))) | ||
|
||
| return SDValue(); | ||
|
|
||
| // Avoid changing the ADD if it is used elsewhere. | ||
| if (!Srl.getOperand(0).hasOneUse()) | ||
|
||
| return SDValue(); | ||
|
|
||
| EVT VT = EFLAGS.getValueType(); | ||
| APInt ShiftedAddConst = AddConst.lshr(SrlConst); | ||
| if (!CmpConst.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) || | ||
|
||
| (ShiftedAddConst.shl(SrlConst)) != AddConst) | ||
| return SDValue(); | ||
|
|
||
| SDLoc DL(EFLAGS); | ||
| SDValue AddLHSSrl = | ||
| DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, Srl.getOperand(1)); | ||
| SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl); | ||
|
|
||
| APInt NewAddConstVal = | ||
| (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits()); | ||
| SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT); | ||
| SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst); | ||
| return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1)); | ||
| } | ||
|
|
||
| /// Optimize an EFLAGS definition used according to the condition code \p CC | ||
| /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing | ||
| /// uses of chain values. | ||
|
|
@@ -48494,6 +48543,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, | |
| if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget)) | ||
| return R; | ||
|
|
||
| if (SDValue R = combineSetCCTruncAdd(EFLAGS, CC, DAG)) | ||
| return R; | ||
|
|
||
| return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 | ||
|
|
||
| ; Test for https://github.com/llvm/llvm-project/issues/123239 | ||
|
|
||
| define i1 @test_ult_trunc_add(i64 %x) { | ||
| ; X64-LABEL: test_ult_trunc_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E | ||
| ; X64-NEXT: cmpl $3, %edi | ||
| ; X64-NEXT: setb %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %add = add i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %add, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| %res = icmp ult i32 %conv, 3 | ||
| ret i1 %res | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if we can make it more general, e.g.: I feel the cmp is not necessary here.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like something like this works: https://alive2.llvm.org/ce/z/A8XKwW |
||
| } | ||
|
|
||
| define i1 @test_ult_add(i64 %x) { | ||
| ; X64-LABEL: test_ult_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E | ||
| ; X64-NEXT: cmpl $3, %edi | ||
| ; X64-NEXT: setb %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %0 = add i64 3940649673949184, %x | ||
| %1 = icmp ult i64 %0, 844424930131968 | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @test_ugt_trunc_add(i64 %x) { | ||
| ; X64-LABEL: test_ugt_trunc_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E | ||
| ; X64-NEXT: cmpl $4, %edi | ||
| ; X64-NEXT: setae %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %add = add i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %add, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| %res = icmp ugt i32 %conv, 3 | ||
| ret i1 %res | ||
| } | ||
|
|
||
| define i1 @test_ugt_add(i64 %x) { | ||
| ; X64-LABEL: test_ugt_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000 | ||
| ; X64-NEXT: addq %rdi, %rax | ||
| ; X64-NEXT: movabsq $844424930131968, %rcx # imm = 0x3000000000000 | ||
| ; X64-NEXT: cmpq %rcx, %rax | ||
| ; X64-NEXT: seta %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %0 = add i64 3940649673949184, %x | ||
| %1 = icmp ugt i64 %0, 844424930131968 | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @test_eq_trunc_add(i64 %x) { | ||
| ; X64-LABEL: test_eq_trunc_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E | ||
| ; X64-NEXT: cmpl $3, %edi | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %add = add i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %add, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| %res = icmp eq i32 %conv, 3 | ||
| ret i1 %res | ||
| } | ||
|
|
||
| define i1 @test_eq_add(i64 %x) { | ||
| ; X64-LABEL: test_eq_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000 | ||
| ; X64-NEXT: cmpq %rax, %rdi | ||
| ; X64-NEXT: sete %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %0 = add i64 3940649673949184, %x | ||
| %1 = icmp eq i64 %0, 844424930131968 | ||
| ret i1 %1 | ||
| } | ||
|
|
||
| define i1 @test_ne_trunc_add(i64 %x) { | ||
| ; X64-LABEL: test_ne_trunc_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: shrq $48, %rdi | ||
| ; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E | ||
| ; X64-NEXT: cmpl $3, %edi | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %add = add i64 %x, 3940649673949184 | ||
| %shr = lshr i64 %add, 48 | ||
| %conv = trunc i64 %shr to i32 | ||
| %res = icmp ne i32 %conv, 3 | ||
| ret i1 %res | ||
| } | ||
|
|
||
| define i1 @test_ne_add(i64 %x) { | ||
| ; X64-LABEL: test_ne_add: | ||
| ; X64: # %bb.0: # %entry | ||
| ; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000 | ||
| ; X64-NEXT: cmpq %rax, %rdi | ||
| ; X64-NEXT: setne %al | ||
| ; X64-NEXT: retq | ||
| entry: | ||
| %0 = add i64 3940649673949184, %x | ||
| %1 = icmp ne i64 %0, 844424930131968 | ||
| ret i1 %1 | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.