Skip to content

Commit 65aaecc

Browse files
committed
Revert "[TargetLowering][RISCV][X86] Support even divisors in expandDIVREMByConstant."
This reverts commit f6a7b47. I received a report that this fails on 32-bit X86.
1 parent 72711d4 commit 65aaecc

File tree

5 files changed

+117
-276
lines changed

5 files changed

+117
-276
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 41 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -7168,17 +7168,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
71687168
// Remainder = Sum % Constant
71697169
// This is based on "Remainder by Summing Digits" from Hacker's Delight.
71707170
//
7171-
// For division, we can compute the remainder using the algorithm described
7172-
// above, subtract it from the dividend to get an exact multiple of Constant.
7173-
// Then multiply that extact multiply by the multiplicative inverse modulo
7174-
// (1 << (BitWidth / 2)) to get the quotient.
7175-
7176-
// If Constant is even, we can shift right the dividend and the divisor by the
7177-
// number of trailing zeros in Constant before applying the remainder algorithm.
7178-
// If we're after the quotient, we can subtract this value from the shifted
7179-
// dividend and multiply by the multiplicative inverse of the shifted divisor.
7180-
// If we want the remainder, we shift the value left by the number of trailing
7181-
// zeros and add the bits that were shifted out of the dividend.
7171+
// For division, we can compute the remainder, subtract it from the dividend,
7172+
// and then multiply by the multiplicative inverse modulo (1 << (BitWidth / 2)).
71827173
bool TargetLowering::expandDIVREMByConstant(SDNode *N,
71837174
SmallVectorImpl<SDValue> &Result,
71847175
EVT HiLoVT, SelectionDAG &DAG,
@@ -7197,7 +7188,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
71977188
if (!CN)
71987189
return false;
71997190

7200-
APInt Divisor = CN->getAPIntValue();
7191+
const APInt &Divisor = CN->getAPIntValue();
72017192
unsigned BitWidth = Divisor.getBitWidth();
72027193
unsigned HBitWidth = BitWidth / 2;
72037194
assert(VT.getScalarSizeInBits() == BitWidth &&
@@ -7218,20 +7209,12 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
72187209
if (DAG.shouldOptForSize())
72197210
return false;
72207211

7221-
// Early out for 0 or 1 divisors.
7222-
if (Divisor.ule(1))
7212+
// Early out for 0, 1 or even divisors.
7213+
if (Divisor.ule(1) || Divisor[0] == 0)
72237214
return false;
72247215

7225-
// If the divisor is even, shift it until it becomes odd.
7226-
unsigned TrailingZeros = 0;
7227-
if (!Divisor[0]) {
7228-
TrailingZeros = Divisor.countTrailingZeros();
7229-
Divisor.lshrInPlace(TrailingZeros);
7230-
}
7231-
72327216
SDLoc dl(N);
72337217
SDValue Sum;
7234-
SDValue PartialRem;
72357218

72367219
// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
72377220
// then add in the carry.
@@ -7246,27 +7229,6 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
72467229
DAG.getIntPtrConstant(1, dl));
72477230
}
72487231

7249-
// Shift the input by the number of TrailingZeros in the divisor. The
7250-
// shifted out bits will be added to the remainder later.
7251-
if (TrailingZeros) {
7252-
LL = DAG.getNode(
7253-
ISD::OR, dl, HiLoVT,
7254-
DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7255-
DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7256-
DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7257-
DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7258-
HiLoVT, dl)));
7259-
LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7260-
DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7261-
7262-
// Save the shifted off bits if we need the remainder.
7263-
if (Opcode != ISD::UDIV) {
7264-
APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7265-
PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7266-
DAG.getConstant(Mask, dl, HiLoVT));
7267-
}
7268-
}
7269-
72707232
// Use addcarry if we can, otherwise use a compare to detect overflow.
72717233
EVT SetCCType =
72727234
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
@@ -7298,45 +7260,45 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
72987260
SDValue RemL =
72997261
DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
73007262
DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7263+
// High half of the remainder is 0.
73017264
SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
73027265

7303-
if (Opcode != ISD::UREM) {
7304-
// Subtract the remainder from the shifted dividend.
7305-
SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7306-
SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7307-
7308-
Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7309-
7310-
// Multiply by the multiplicative inverse of the divisor modulo
7311-
// (1 << BitWidth).
7312-
APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
7313-
APInt MulFactor = Divisor.zext(BitWidth + 1);
7314-
MulFactor = MulFactor.multiplicativeInverse(Mod);
7315-
MulFactor = MulFactor.trunc(BitWidth);
7316-
7317-
SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7318-
DAG.getConstant(MulFactor, dl, VT));
7319-
7320-
// Split the quotient into low and high parts.
7321-
SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
7322-
DAG.getIntPtrConstant(0, dl));
7323-
SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
7324-
DAG.getIntPtrConstant(1, dl));
7325-
Result.push_back(QuotL);
7326-
Result.push_back(QuotH);
7327-
}
7328-
7329-
if (Opcode != ISD::UDIV) {
7330-
// If we shifted the input, shift the remainder left and add the bits we
7331-
// shifted off the input.
7332-
if (TrailingZeros) {
7333-
APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7334-
RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7335-
DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7336-
RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7337-
}
7266+
// If we only want remainder, we're done.
7267+
if (Opcode == ISD::UREM) {
7268+
Result.push_back(RemL);
7269+
Result.push_back(RemH);
7270+
return true;
7271+
}
7272+
7273+
// Otherwise, we need to compute the quotient.
7274+
7275+
// Join the remainder halves.
7276+
SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7277+
7278+
// Subtract the remainder from the input.
7279+
SDValue In = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Rem);
7280+
7281+
// Multiply by the multiplicative inverse of the divisor modulo
7282+
// (1 << BitWidth).
7283+
APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
7284+
APInt MulFactor = Divisor.zext(BitWidth + 1);
7285+
MulFactor = MulFactor.multiplicativeInverse(Mod);
7286+
MulFactor = MulFactor.trunc(BitWidth);
7287+
7288+
SDValue Quotient =
7289+
DAG.getNode(ISD::MUL, dl, VT, In, DAG.getConstant(MulFactor, dl, VT));
7290+
7291+
// Split the quotient into low and high parts.
7292+
SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
7293+
DAG.getIntPtrConstant(0, dl));
7294+
SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
7295+
DAG.getIntPtrConstant(1, dl));
7296+
Result.push_back(QuotL);
7297+
Result.push_back(QuotH);
7298+
// For DIVREM, also return the remainder parts.
7299+
if (Opcode == ISD::UDIVREM) {
73387300
Result.push_back(RemL);
7339-
Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7301+
Result.push_back(RemH);
73407302
}
73417303

73427304
return true;

llvm/test/CodeGen/RISCV/split-udiv-by-constant.ll

Lines changed: 14 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -502,59 +502,24 @@ define iXLen2 @test_udiv_65537(iXLen2 %x) nounwind {
502502
define iXLen2 @test_udiv_12(iXLen2 %x) nounwind {
503503
; RV32-LABEL: test_udiv_12:
504504
; RV32: # %bb.0:
505-
; RV32-NEXT: slli a2, a1, 30
506-
; RV32-NEXT: srli a0, a0, 2
507-
; RV32-NEXT: or a0, a0, a2
508-
; RV32-NEXT: srli a1, a1, 2
509-
; RV32-NEXT: add a2, a0, a1
510-
; RV32-NEXT: sltu a3, a2, a0
511-
; RV32-NEXT: add a2, a2, a3
512-
; RV32-NEXT: lui a3, 699051
513-
; RV32-NEXT: addi a4, a3, -1365
514-
; RV32-NEXT: mulhu a5, a2, a4
515-
; RV32-NEXT: srli a6, a5, 1
516-
; RV32-NEXT: andi a5, a5, -2
517-
; RV32-NEXT: add a5, a5, a6
518-
; RV32-NEXT: sub a2, a2, a5
519-
; RV32-NEXT: sub a5, a0, a2
520-
; RV32-NEXT: addi a3, a3, -1366
521-
; RV32-NEXT: mul a3, a5, a3
522-
; RV32-NEXT: mulhu a6, a5, a4
523-
; RV32-NEXT: add a3, a6, a3
524-
; RV32-NEXT: sltu a0, a0, a2
525-
; RV32-NEXT: sub a0, a1, a0
526-
; RV32-NEXT: mul a0, a0, a4
527-
; RV32-NEXT: add a1, a3, a0
528-
; RV32-NEXT: mul a0, a5, a4
505+
; RV32-NEXT: addi sp, sp, -16
506+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
507+
; RV32-NEXT: li a2, 12
508+
; RV32-NEXT: li a3, 0
509+
; RV32-NEXT: call __udivdi3@plt
510+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
511+
; RV32-NEXT: addi sp, sp, 16
529512
; RV32-NEXT: ret
530513
;
531514
; RV64-LABEL: test_udiv_12:
532515
; RV64: # %bb.0:
533-
; RV64-NEXT: slli a2, a1, 62
534-
; RV64-NEXT: srli a0, a0, 2
535-
; RV64-NEXT: or a0, a0, a2
536-
; RV64-NEXT: srli a1, a1, 2
537-
; RV64-NEXT: lui a2, %hi(.LCPI10_0)
538-
; RV64-NEXT: ld a2, %lo(.LCPI10_0)(a2)
539-
; RV64-NEXT: add a3, a0, a1
540-
; RV64-NEXT: sltu a4, a3, a0
541-
; RV64-NEXT: add a3, a3, a4
542-
; RV64-NEXT: mulhu a4, a3, a2
543-
; RV64-NEXT: srli a5, a4, 1
544-
; RV64-NEXT: andi a4, a4, -2
545-
; RV64-NEXT: lui a6, %hi(.LCPI10_1)
546-
; RV64-NEXT: ld a6, %lo(.LCPI10_1)(a6)
547-
; RV64-NEXT: add a4, a4, a5
548-
; RV64-NEXT: sub a3, a3, a4
549-
; RV64-NEXT: sub a4, a0, a3
550-
; RV64-NEXT: mul a5, a4, a6
551-
; RV64-NEXT: mulhu a6, a4, a2
552-
; RV64-NEXT: add a5, a6, a5
553-
; RV64-NEXT: sltu a0, a0, a3
554-
; RV64-NEXT: sub a0, a1, a0
555-
; RV64-NEXT: mul a0, a0, a2
556-
; RV64-NEXT: add a1, a5, a0
557-
; RV64-NEXT: mul a0, a4, a2
516+
; RV64-NEXT: addi sp, sp, -16
517+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
518+
; RV64-NEXT: li a2, 12
519+
; RV64-NEXT: li a3, 0
520+
; RV64-NEXT: call __udivti3@plt
521+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
522+
; RV64-NEXT: addi sp, sp, 16
558523
; RV64-NEXT: ret
559524
%a = udiv iXLen2 %x, 12
560525
ret iXLen2 %a

llvm/test/CodeGen/RISCV/split-urem-by-constant.ll

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -335,46 +335,24 @@ define iXLen2 @test_urem_65537(iXLen2 %x) nounwind {
335335
define iXLen2 @test_urem_12(iXLen2 %x) nounwind {
336336
; RV32-LABEL: test_urem_12:
337337
; RV32: # %bb.0:
338-
; RV32-NEXT: slli a2, a1, 30
339-
; RV32-NEXT: srli a0, a0, 2
340-
; RV32-NEXT: or a0, a0, a2
341-
; RV32-NEXT: srli a1, a1, 2
342-
; RV32-NEXT: add a1, a0, a1
343-
; RV32-NEXT: sltu a2, a1, a0
344-
; RV32-NEXT: add a1, a1, a2
345-
; RV32-NEXT: lui a2, 699051
346-
; RV32-NEXT: addi a2, a2, -1365
347-
; RV32-NEXT: mulhu a2, a1, a2
348-
; RV32-NEXT: srli a3, a2, 1
349-
; RV32-NEXT: andi a2, a2, -2
350-
; RV32-NEXT: add a2, a2, a3
351-
; RV32-NEXT: sub a1, a1, a2
352-
; RV32-NEXT: slli a1, a1, 2
353-
; RV32-NEXT: andi a0, a0, 3
354-
; RV32-NEXT: or a0, a1, a0
355-
; RV32-NEXT: li a1, 0
338+
; RV32-NEXT: addi sp, sp, -16
339+
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
340+
; RV32-NEXT: li a2, 12
341+
; RV32-NEXT: li a3, 0
342+
; RV32-NEXT: call __umoddi3@plt
343+
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
344+
; RV32-NEXT: addi sp, sp, 16
356345
; RV32-NEXT: ret
357346
;
358347
; RV64-LABEL: test_urem_12:
359348
; RV64: # %bb.0:
360-
; RV64-NEXT: slli a2, a1, 62
361-
; RV64-NEXT: srli a0, a0, 2
362-
; RV64-NEXT: or a0, a0, a2
363-
; RV64-NEXT: srli a1, a1, 2
364-
; RV64-NEXT: lui a2, %hi(.LCPI10_0)
365-
; RV64-NEXT: ld a2, %lo(.LCPI10_0)(a2)
366-
; RV64-NEXT: add a1, a0, a1
367-
; RV64-NEXT: sltu a3, a1, a0
368-
; RV64-NEXT: add a1, a1, a3
369-
; RV64-NEXT: mulhu a2, a1, a2
370-
; RV64-NEXT: srli a3, a2, 1
371-
; RV64-NEXT: andi a2, a2, -2
372-
; RV64-NEXT: add a2, a2, a3
373-
; RV64-NEXT: sub a1, a1, a2
374-
; RV64-NEXT: slli a1, a1, 2
375-
; RV64-NEXT: andi a0, a0, 3
376-
; RV64-NEXT: or a0, a1, a0
377-
; RV64-NEXT: li a1, 0
349+
; RV64-NEXT: addi sp, sp, -16
350+
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
351+
; RV64-NEXT: li a2, 12
352+
; RV64-NEXT: li a3, 0
353+
; RV64-NEXT: call __umodti3@plt
354+
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
355+
; RV64-NEXT: addi sp, sp, 16
378356
; RV64-NEXT: ret
379357
%a = urem iXLen2 %x, 12
380358
ret iXLen2 %a

llvm/test/CodeGen/X86/divide-by-constant.ll

Lines changed: 14 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -735,23 +735,13 @@ entry:
735735
define i64 @urem_i64_12(i64 %x) nounwind {
736736
; X32-LABEL: urem_i64_12:
737737
; X32: # %bb.0: # %entry
738-
; X32-NEXT: pushl %esi
739-
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
740-
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
741-
; X32-NEXT: shrdl $2, %ecx, %esi
742-
; X32-NEXT: shrl $2, %ecx
743-
; X32-NEXT: addl %esi, %ecx
744-
; X32-NEXT: adcl $0, %ecx
745-
; X32-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
746-
; X32-NEXT: movl %ecx, %eax
747-
; X32-NEXT: mull %edx
748-
; X32-NEXT: shrl %edx
749-
; X32-NEXT: leal (%edx,%edx,2), %eax
750-
; X32-NEXT: subl %eax, %ecx
751-
; X32-NEXT: andl $3, %esi
752-
; X32-NEXT: leal (%esi,%ecx,4), %eax
753-
; X32-NEXT: xorl %edx, %edx
754-
; X32-NEXT: popl %esi
738+
; X32-NEXT: subl $12, %esp
739+
; X32-NEXT: pushl $0
740+
; X32-NEXT: pushl $12
741+
; X32-NEXT: pushl {{[0-9]+}}(%esp)
742+
; X32-NEXT: pushl {{[0-9]+}}(%esp)
743+
; X32-NEXT: calll __umoddi3
744+
; X32-NEXT: addl $28, %esp
755745
; X32-NEXT: retl
756746
;
757747
; X64-LABEL: urem_i64_12:
@@ -1126,33 +1116,13 @@ entry:
11261116
define i64 @udiv_i64_12(i64 %x) nounwind {
11271117
; X32-LABEL: udiv_i64_12:
11281118
; X32: # %bb.0: # %entry
1129-
; X32-NEXT: pushl %ebx
1130-
; X32-NEXT: pushl %edi
1131-
; X32-NEXT: pushl %esi
1132-
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1133-
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1134-
; X32-NEXT: shrdl $2, %edi, %ecx
1135-
; X32-NEXT: shrl $2, %edi
1136-
; X32-NEXT: movl %ecx, %esi
1137-
; X32-NEXT: addl %edi, %esi
1138-
; X32-NEXT: adcl $0, %esi
1139-
; X32-NEXT: movl $-1431655765, %ebx # imm = 0xAAAAAAAB
1140-
; X32-NEXT: movl %esi, %eax
1141-
; X32-NEXT: mull %ebx
1142-
; X32-NEXT: shrl %edx
1143-
; X32-NEXT: leal (%edx,%edx,2), %eax
1144-
; X32-NEXT: subl %eax, %esi
1145-
; X32-NEXT: subl %esi, %ecx
1146-
; X32-NEXT: sbbl $0, %edi
1147-
; X32-NEXT: movl %ecx, %eax
1148-
; X32-NEXT: mull %ebx
1149-
; X32-NEXT: imull $-1431655766, %ecx, %ecx # imm = 0xAAAAAAAA
1150-
; X32-NEXT: addl %ecx, %edx
1151-
; X32-NEXT: imull $-1431655765, %edi, %ecx # imm = 0xAAAAAAAB
1152-
; X32-NEXT: addl %ecx, %edx
1153-
; X32-NEXT: popl %esi
1154-
; X32-NEXT: popl %edi
1155-
; X32-NEXT: popl %ebx
1119+
; X32-NEXT: subl $12, %esp
1120+
; X32-NEXT: pushl $0
1121+
; X32-NEXT: pushl $12
1122+
; X32-NEXT: pushl {{[0-9]+}}(%esp)
1123+
; X32-NEXT: pushl {{[0-9]+}}(%esp)
1124+
; X32-NEXT: calll __udivdi3
1125+
; X32-NEXT: addl $28, %esp
11561126
; X32-NEXT: retl
11571127
;
11581128
; X64-LABEL: udiv_i64_12:

0 commit comments

Comments
 (0)