Skip to content

Commit f3a9d1a

Browse files
committed
[SelectionDAG]: Add more cases for UDIV, SDIV, SRA, and SRL
1 parent 2ed8092 commit f3a9d1a

File tree

2 files changed

+48
-20
lines changed

2 files changed

+48
-20
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5443,19 +5443,50 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
54435443
if (ValKnown.isNegative())
54445444
return true;
54455445
// If max shift cnt of known ones is non-zero, result is non-zero.
5446-
APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
5446+
const KnownBits Shift = computeKnownBits(Op.getOperand(1), Depth + 1);
5447+
APInt MaxCnt = Shift.getMaxValue();
54475448
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
54485449
!ValKnown.One.lshr(MaxCnt).isZero())
54495450
return true;
5451+
// Similar to udiv but we try to see if we can turn it into a division
5452+
const KnownBits One =
5453+
KnownBits::makeConstant(APInt(ValKnown.getBitWidth(), 1));
5454+
if (KnownBits::uge(ValKnown,
5455+
KnownBits::lshr(One, Shift, Shift.isNonZero())))
5456+
return true;
54505457
break;
54515458
}
5452-
case ISD::UDIV:
5453-
case ISD::SDIV:
5459+
case ISD::UDIV: {
5460+
if (Op->getFlags().hasExact())
5461+
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
5462+
KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
5463+
KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
5464+
// True if Op0 u>= Op1
5465+
if (KnownBits::uge(Op0, Op1))
5466+
return true;
5467+
break;
5468+
}
5469+
case ISD::SDIV: {
54545470
// div exact can only produce a zero if the dividend is zero.
5455-
// TODO: For udiv this is also true if Op1 u<= Op0
54565471
if (Op->getFlags().hasExact())
54575472
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
5473+
KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
5474+
KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
5475+
if (Op0.isNegative() && Op1.isStrictlyPositive())
5476+
return true;
5477+
5478+
if (Op0.isStrictlyPositive() && Op1.isNegative())
5479+
return true;
5480+
5481+
// For negative numbers, the comparison is reversed. Op0 <= Op1
5482+
if (Op0.isNegative() && Op1.isNegative() && KnownBits::sle(Op0, Op1))
5483+
return true;
5484+
5485+
if (Op0.isStrictlyPositive() && Op1.isStrictlyPositive() &&
5486+
KnownBits::uge(Op0, Op1))
5487+
return true;
54585488
break;
5489+
}
54595490

54605491
case ISD::ADD:
54615492
if (Op->getFlags().hasNoUnsignedWrap())

llvm/test/CodeGen/X86/known-pow2.ll

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,13 @@ define i1 @pow2_srl_fail0(i32 %x, i32 %y) {
118118
; CHECK-LABEL: pow2_srl_fail0:
119119
; CHECK: # %bb.0:
120120
; CHECK-NEXT: movl %esi, %ecx
121+
; CHECK-NEXT: movl %edi, %eax
121122
; CHECK-NEXT: andb $30, %cl
122-
; CHECK-NEXT: notl %edi
123123
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
124-
; CHECK-NEXT: shll %cl, %edi
125-
; CHECK-NEXT: testl $1048576, %edi # imm = 0x100000
126-
; CHECK-NEXT: sete %al
124+
; CHECK-NEXT: shll %cl, %eax
125+
; CHECK-NEXT: shrl $20, %eax
126+
; CHECK-NEXT: andl $1, %eax
127+
; CHECK-NEXT: # kill: def $al killed $al killed $eax
127128
; CHECK-NEXT: retq
128129
%yy = and i32 %y, 30
129130
%d = lshr i32 1048576, %yy
@@ -349,9 +350,8 @@ define i1 @pow2_umax_fail0(i32 %x, i32 %y, i32 %z) {
349350
; CHECK-NEXT: shrl %cl, %esi
350351
; CHECK-NEXT: cmpl %esi, %eax
351352
; CHECK-NEXT: cmoval %eax, %esi
352-
; CHECK-NEXT: notl %edi
353-
; CHECK-NEXT: testl %edi, %esi
354-
; CHECK-NEXT: sete %al
353+
; CHECK-NEXT: testl %esi, %edi
354+
; CHECK-NEXT: setne %al
355355
; CHECK-NEXT: retq
356356
%yy = shl i32 1, %y
357357
%zz = lshr i32 1073741824, %z
@@ -482,9 +482,8 @@ define i1 @pow2_smax_fail0(i32 %x, i32 %y, i32 %z) {
482482
; CHECK-NEXT: shrl %cl, %esi
483483
; CHECK-NEXT: cmpl %esi, %eax
484484
; CHECK-NEXT: cmovgl %eax, %esi
485-
; CHECK-NEXT: notl %edi
486-
; CHECK-NEXT: testl %edi, %esi
487-
; CHECK-NEXT: sete %al
485+
; CHECK-NEXT: testl %esi, %edi
486+
; CHECK-NEXT: setne %al
488487
; CHECK-NEXT: retq
489488
%yy = shl i32 1, %y
490489
%zz = lshr i32 1073741824, %z
@@ -555,9 +554,8 @@ define i1 @pow2_select_fail0(i1 %c, i32 %x, i32 %y, i32 %z) {
555554
; CHECK-NEXT: shrl %cl, %r8d
556555
; CHECK-NEXT: testb $1, %dil
557556
; CHECK-NEXT: cmovnel %edx, %r8d
558-
; CHECK-NEXT: notl %esi
559-
; CHECK-NEXT: testl %esi, %r8d
560-
; CHECK-NEXT: sete %al
557+
; CHECK-NEXT: testl %r8d, %esi
558+
; CHECK-NEXT: setne %al
561559
; CHECK-NEXT: retq
562560
%yy = shl i32 1, %y
563561
%zz = lshr i32 1073741824, %z
@@ -696,10 +694,9 @@ define <4 x i1> @pow2_vselect_fail0_ne(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y,
696694
; CHECK-NEXT: pand %xmm0, %xmm2
697695
; CHECK-NEXT: pandn %xmm7, %xmm0
698696
; CHECK-NEXT: por %xmm2, %xmm0
699-
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
700-
; CHECK-NEXT: pand %xmm0, %xmm1
697+
; CHECK-NEXT: pand %xmm1, %xmm0
698+
; CHECK-NEXT: pxor %xmm1, %xmm1
701699
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
702-
; CHECK-NEXT: pxor %xmm2, %xmm0
703700
; CHECK-NEXT: retq
704701
%yy = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
705702
%zz = lshr <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, %z

0 commit comments

Comments
 (0)