Revert "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."

topperc · topperc · commit 65aaecca8842 · 2022-10-24T07:12:54.000-07:00
This reverts commit f6a7b47. I received a report that this fails on 32-bit X86.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7168,17 +7168,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
 //   Remainder = Sum % Constant
 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
 //
-// For division, we can compute the remainder using the algorithm described
-// above, subtract it from the dividend to get an exact multiple of Constant.
-// Then multiply that extact multiply by the multiplicative inverse modulo
-// (1 << (BitWidth / 2)) to get the quotient.
-
-// If Constant is even, we can shift right the dividend and the divisor by the
-// number of trailing zeros in Constant before applying the remainder algorithm.
-// If we're after the quotient, we can subtract this value from the shifted
-// dividend and multiply by the multiplicative inverse of the shifted divisor.
-// If we want the remainder, we shift the value left by the number of trailing
-// zeros and add the bits that were shifted out of the dividend.
+// For division, we can compute the remainder, subtract it from the dividend,
+// and then multiply by the multiplicative inverse modulo (1 << (BitWidth / 2)).
 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
                                             SmallVectorImpl<SDValue> &Result,
                                             EVT HiLoVT, SelectionDAG &DAG,
@@ -7197,7 +7188,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
   if (!CN)
     return false;
 
-  APInt Divisor = CN->getAPIntValue();
+  const APInt &Divisor = CN->getAPIntValue();
   unsigned BitWidth = Divisor.getBitWidth();
   unsigned HBitWidth = BitWidth / 2;
   assert(VT.getScalarSizeInBits() == BitWidth &&
@@ -7218,20 +7209,12 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
   if (DAG.shouldOptForSize())
     return false;
 
-  // Early out for 0 or 1 divisors.
-  if (Divisor.ule(1))
+  // Early out for 0, 1 or even divisors.
+  if (Divisor.ule(1) || Divisor[0] == 0)
     return false;
 
-  // If the divisor is even, shift it until it becomes odd.
-  unsigned TrailingZeros = 0;
-  if (!Divisor[0]) {
-    TrailingZeros = Divisor.countTrailingZeros();
-    Divisor.lshrInPlace(TrailingZeros);
-  }
-
   SDLoc dl(N);
   SDValue Sum;
-  SDValue PartialRem;
 
   // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
   // then add in the carry.
@@ -7246,27 +7229,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
                        DAG.getIntPtrConstant(1, dl));
     }
 
-    // Shift the input by the number of TrailingZeros in the divisor. The
-    // shifted out bits will be added to the remainder later.
-    if (TrailingZeros) {
-      LL = DAG.getNode(
-          ISD::OR, dl, HiLoVT,
-          DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
-                      DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
-          DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
-                      DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
-                                                 HiLoVT, dl)));
-      LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
-                       DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
-
-      // Save the shifted off bits if we need the remainder.
-      if (Opcode != ISD::UDIV) {
-        APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
-        PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
-                                 DAG.getConstant(Mask, dl, HiLoVT));
-      }
-    }
-
     // Use addcarry if we can, otherwise use a compare to detect overflow.
     EVT SetCCType =
         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
@@ -7298,45 +7260,45 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
   SDValue RemL =
       DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
                   DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
+  // High half of the remainder is 0.
   SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
 
-  if (Opcode != ISD::UREM) {
-    // Subtract the remainder from the shifted dividend.
-    SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
-    SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
-
-    Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
-
-    // Multiply by the multiplicative inverse of the divisor modulo
-    // (1 << BitWidth).
-    APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
-    APInt MulFactor = Divisor.zext(BitWidth + 1);
-    MulFactor = MulFactor.multiplicativeInverse(Mod);
-    MulFactor = MulFactor.trunc(BitWidth);
-
-    SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
-                                   DAG.getConstant(MulFactor, dl, VT));
-
-    // Split the quotient into low and high parts.
-    SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
-                                DAG.getIntPtrConstant(0, dl));
-    SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
-                                DAG.getIntPtrConstant(1, dl));
-    Result.push_back(QuotL);
-    Result.push_back(QuotH);
-  }
-
-  if (Opcode != ISD::UDIV) {
-    // If we shifted the input, shift the remainder left and add the bits we
-    // shifted off the input.
-    if (TrailingZeros) {
-      APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
-      RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
-                         DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
-      RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
-    }
+  // If we only want remainder, we're done.
+  if (Opcode == ISD::UREM) {
+    Result.push_back(RemL);
+    Result.push_back(RemH);
+    return true;
+  }
+
+  // Otherwise, we need to compute the quotient.
+
+  // Join the remainder halves.
+  SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
+
+  // Subtract the remainder from the input.
+  SDValue In = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Rem);
+
+  // Multiply by the multiplicative inverse of the divisor modulo
+  // (1 << BitWidth).
+  APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
+  APInt MulFactor = Divisor.zext(BitWidth + 1);
+  MulFactor = MulFactor.multiplicativeInverse(Mod);
+  MulFactor = MulFactor.trunc(BitWidth);
+
+  SDValue Quotient =
+      DAG.getNode(ISD::MUL, dl, VT, In, DAG.getConstant(MulFactor, dl, VT));
+
+  // Split the quotient into low and high parts.
+  SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+                              DAG.getIntPtrConstant(0, dl));
+  SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
+                              DAG.getIntPtrConstant(1, dl));
+  Result.push_back(QuotL);
+  Result.push_back(QuotH);
+  // For DIVREM, also return the remainder parts.
+  if (Opcode == ISD::UDIVREM) {
     Result.push_back(RemL);
-    Result.push_back(DAG.getConstant(0, dl, HiLoVT));
+    Result.push_back(RemH);
   }
 
   return true;
diff --git a/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll b/llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll
@@ -502,59 +502,24 @@ define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind {
 define iXLen2 @test_udiv_12(iXLen2 %x) nounwind {
 ; RV32-LABEL: test_udiv_12:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 30
-; RV32-NEXT:    srli a0, a0, 2
-; RV32-NEXT:    or a0, a0, a2
-; RV32-NEXT:    srli a1, a1, 2
-; RV32-NEXT:    add a2, a0, a1
-; RV32-NEXT:    sltu a3, a2, a0
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    lui a3, 699051
-; RV32-NEXT:    addi a4, a3, -1365
-; RV32-NEXT:    mulhu a5, a2, a4
-; RV32-NEXT:    srli a6, a5, 1
-; RV32-NEXT:    andi a5, a5, -2
-; RV32-NEXT:    add a5, a5, a6
-; RV32-NEXT:    sub a2, a2, a5
-; RV32-NEXT:    sub a5, a0, a2
-; RV32-NEXT:    addi a3, a3, -1366
-; RV32-NEXT:    mul a3, a5, a3
-; RV32-NEXT:    mulhu a6, a5, a4
-; RV32-NEXT:    add a3, a6, a3
-; RV32-NEXT:    sltu a0, a0, a2
-; RV32-NEXT:    sub a0, a1, a0
-; RV32-NEXT:    mul a0, a0, a4
-; RV32-NEXT:    add a1, a3, a0
-; RV32-NEXT:    mul a0, a5, a4
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    li a2, 12
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    call __udivdi3@plt
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_udiv_12:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 62
-; RV64-NEXT:    srli a0, a0, 2
-; RV64-NEXT:    or a0, a0, a2
-; RV64-NEXT:    srli a1, a1, 2
-; RV64-NEXT:    lui a2, %hi(.LCPI10_0)
-; RV64-NEXT:    ld a2, %lo(.LCPI10_0)(a2)
-; RV64-NEXT:    add a3, a0, a1
-; RV64-NEXT:    sltu a4, a3, a0
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    mulhu a4, a3, a2
-; RV64-NEXT:    srli a5, a4, 1
-; RV64-NEXT:    andi a4, a4, -2
-; RV64-NEXT:    lui a6, %hi(.LCPI10_1)
-; RV64-NEXT:    ld a6, %lo(.LCPI10_1)(a6)
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    sub a3, a3, a4
-; RV64-NEXT:    sub a4, a0, a3
-; RV64-NEXT:    mul a5, a4, a6
-; RV64-NEXT:    mulhu a6, a4, a2
-; RV64-NEXT:    add a5, a6, a5
-; RV64-NEXT:    sltu a0, a0, a3
-; RV64-NEXT:    sub a0, a1, a0
-; RV64-NEXT:    mul a0, a0, a2
-; RV64-NEXT:    add a1, a5, a0
-; RV64-NEXT:    mul a0, a4, a2
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    li a2, 12
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    call __udivti3@plt
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %a = udiv iXLen2 %x, 12
   ret iXLen2 %a
diff --git a/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll b/llvm/test/CodeGen/RISCV/split-urem-by-constant.ll
@@ -335,46 +335,24 @@ define iXLen2 @test_urem_65537(iXLen2 %x) nounwind {
 define iXLen2 @test_urem_12(iXLen2 %x) nounwind {
 ; RV32-LABEL: test_urem_12:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 30
-; RV32-NEXT:    srli a0, a0, 2
-; RV32-NEXT:    or a0, a0, a2
-; RV32-NEXT:    srli a1, a1, 2
-; RV32-NEXT:    add a1, a0, a1
-; RV32-NEXT:    sltu a2, a1, a0
-; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    lui a2, 699051
-; RV32-NEXT:    addi a2, a2, -1365
-; RV32-NEXT:    mulhu a2, a1, a2
-; RV32-NEXT:    srli a3, a2, 1
-; RV32-NEXT:    andi a2, a2, -2
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    sub a1, a1, a2
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    andi a0, a0, 3
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    li a2, 12
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    call __umoddi3@plt
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_urem_12:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 62
-; RV64-NEXT:    srli a0, a0, 2
-; RV64-NEXT:    or a0, a0, a2
-; RV64-NEXT:    srli a1, a1, 2
-; RV64-NEXT:    lui a2, %hi(.LCPI10_0)
-; RV64-NEXT:    ld a2, %lo(.LCPI10_0)(a2)
-; RV64-NEXT:    add a1, a0, a1
-; RV64-NEXT:    sltu a3, a1, a0
-; RV64-NEXT:    add a1, a1, a3
-; RV64-NEXT:    mulhu a2, a1, a2
-; RV64-NEXT:    srli a3, a2, 1
-; RV64-NEXT:    andi a2, a2, -2
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    sub a1, a1, a2
-; RV64-NEXT:    slli a1, a1, 2
-; RV64-NEXT:    andi a0, a0, 3
-; RV64-NEXT:    or a0, a1, a0
-; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    li a2, 12
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    call __umodti3@plt
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %a = urem iXLen2 %x, 12
   ret iXLen2 %a
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -735,23 +735,13 @@ entry:
 define i64 @urem_i64_12(i64 %x) nounwind {
 ; X32-LABEL: urem_i64_12:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    shrdl $2, %ecx, %esi
-; X32-NEXT:    shrl $2, %ecx
-; X32-NEXT:    addl %esi, %ecx
-; X32-NEXT:    adcl $0, %ecx
-; X32-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    mull %edx
-; X32-NEXT:    shrl %edx
-; X32-NEXT:    leal (%edx,%edx,2), %eax
-; X32-NEXT:    subl %eax, %ecx
-; X32-NEXT:    andl $3, %esi
-; X32-NEXT:    leal (%esi,%ecx,4), %eax
-; X32-NEXT:    xorl %edx, %edx
-; X32-NEXT:    popl %esi
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $12
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll __umoddi3
+; X32-NEXT:    addl $28, %esp
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: urem_i64_12:
@@ -1126,33 +1116,13 @@ entry:
 define i64 @udiv_i64_12(i64 %x) nounwind {
 ; X32-LABEL: udiv_i64_12:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebx
-; X32-NEXT:    pushl %edi
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT:    shrdl $2, %edi, %ecx
-; X32-NEXT:    shrl $2, %edi
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    addl %edi, %esi
-; X32-NEXT:    adcl $0, %esi
-; X32-NEXT:    movl $-1431655765, %ebx # imm = 0xAAAAAAAB
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    mull %ebx
-; X32-NEXT:    shrl %edx
-; X32-NEXT:    leal (%edx,%edx,2), %eax
-; X32-NEXT:    subl %eax, %esi
-; X32-NEXT:    subl %esi, %ecx
-; X32-NEXT:    sbbl $0, %edi
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    mull %ebx
-; X32-NEXT:    imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
-; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
-; X32-NEXT:    addl %ecx, %edx
-; X32-NEXT:    popl %esi
-; X32-NEXT:    popl %edi
-; X32-NEXT:    popl %ebx
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $12
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll __udivdi3
+; X32-NEXT:    addl $28, %esp
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: udiv_i64_12:
diff --git a/llvm/test/CodeGen/X86/divmod128.ll b/llvm/test/CodeGen/X86/divmod128.ll