-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAG] isKnownNeverZero - add more cases for UDIV, SDIV, SRA, and SRL operations #89522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-support @llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) ChangesFull diff: https://github.com/llvm/llvm-project/pull/89522.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7dbf83b7adeef0..29a2b27db5f8df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5443,19 +5443,53 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (ValKnown.isNegative())
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
- APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ const KnownBits Shift = computeKnownBits(Op.getOperand(1), Depth + 1);
+ APInt MaxCnt = Shift.getMaxValue();
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
!ValKnown.One.lshr(MaxCnt).isZero())
return true;
+ // Similar to udiv but we try to see if we can turn it into a division
+ const KnownBits One =
+ KnownBits::makeConstant(APInt(ValKnown.getBitWidth(), 1));
+ if (KnownBits::uge(ValKnown,
+ KnownBits::lshr(One, Shift, Shift.isNonZero())))
+ return true;
break;
}
- case ISD::UDIV:
- case ISD::SDIV:
+ case ISD::UDIV: {
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ // True if Op0 u>= Op1
+ if (KnownBits::uge(Op0, Op1))
+ return true;
+ if (!Op0.getMinValue().udiv(Op1.getMaxValue()).isZero())
+ return true;
+ break;
+ }
+ case ISD::SDIV: {
// div exact can only produce a zero if the dividend is zero.
- // TODO: For udiv this is also true if Op1 u<= Op0
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op0.isNegative() && Op1.isStrictlyPositive())
+ return true;
+
+ if (Op0.isStrictlyPositive() && Op1.isNegative())
+ return true;
+
+ if (Op0.isNegative() && Op1.isNegative() && KnownBits::sge(Op0, Op1))
+ return true;
+
+ if (Op0.isStrictlyPositive() && Op1.isStrictlyPositive() &&
+ (KnownBits::uge(Op0, Op1) ||
+ !Op0.getMinValue().udiv(Op1.getMaxValue()).isZero()))
+ return true;
break;
+ }
case ISD::ADD:
if (Op->getFlags().hasNoUnsignedWrap())
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index e183bbc15617d5..31b4731613def9 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -118,12 +118,13 @@ define i1 @pow2_srl_fail0(i32 %x, i32 %y) {
; CHECK-LABEL: pow2_srl_fail0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $30, %cl
-; CHECK-NEXT: notl %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shll %cl, %edi
-; CHECK-NEXT: testl $1048576, %edi # imm = 0x100000
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: shll %cl, %eax
+; CHECK-NEXT: shrl $20, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%yy = and i32 %y, 30
%d = lshr i32 1048576, %yy
@@ -349,9 +350,8 @@ define i1 @pow2_umax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmoval %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -482,9 +482,8 @@ define i1 @pow2_smax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmovgl %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -555,9 +554,8 @@ define i1 @pow2_select_fail0(i1 %c, i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %r8d
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: cmovnel %edx, %r8d
-; CHECK-NEXT: notl %esi
-; CHECK-NEXT: testl %esi, %r8d
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %r8d, %esi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -696,10 +694,9 @@ define <4 x i1> @pow2_vselect_fail0_ne(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y,
; CHECK-NEXT: pand %xmm0, %xmm2
; CHECK-NEXT: pandn %xmm7, %xmm0
; CHECK-NEXT: por %xmm2, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%zz = lshr <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, %z
|
021fbee to
f3a9d1a
Compare
3e74e9e to
2579bbb
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
a933f60 to
a5d26f2
Compare
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you compare with the ValueTracking.cpp implementations? You'd be better off adding new handling in there first (and then updating SelectionDAG to match) as we have much better fuzz testing in the middle end for this kind of thing than in the backend.
|
Title needs to say cases for what |
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe split this into separate PRs for each opcode change?
These cases were ported from ValueTracking
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be much easier to review this if only one operation was covered per PR
| if (KnownBits::computeForAddSub( | ||
| /*Add=*/true, Op->getFlags().hasNoSignedWrap(), | ||
| Op->getFlags().hasNoUnsignedWrap(), Op0, Op1) | ||
| .isNonZero()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe there's now a ::add helper
| // The sign bit of Y is set. If some other bit is set then Y is not equal | ||
| // to INT_MIN. | ||
| if (Op1.One.intersects(Mask)) | ||
| return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you just check isSignBitSet then instead of intersect with Mask?
Co-authored-by: Matt Arsenault <[email protected]>
These cases were ported from ValueTracking