Skip to content

Commit 5893a47

Browse files
[DAGCombiner] Allow promoted constants when lowering vector UDIVs
1 parent 11ce0d3 commit 5893a47

File tree

4 files changed

+46
-130
lines changed

4 files changed

+46
-130
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
10651065

10661066
// Determines if it is a constant integer or a splat/build vector of constant
10671067
// integers (and undefs).
1068-
// Do not permit build vector implicit truncation.
1069-
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1068+
// Do not permit build vector implicit truncation unless AllowTruncation is set.
1069+
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1070+
bool AllowTruncation = false) {
10701071
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
10711072
return !(Const->isOpaque() && NoOpaques);
10721073
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,9 +1077,17 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
10761077
if (Op.isUndef())
10771078
continue;
10781079
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1079-
if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1080-
(Const->isOpaque() && NoOpaques))
1080+
if (!Const || (Const->isOpaque() && NoOpaques))
10811081
return false;
1082+
// When AllowTruncation is true, allow constants that have been promoted
1083+
// during type legalization as long as the value fits in the target type.
1084+
if (AllowTruncation) {
1085+
if (Const->getAPIntValue().getActiveBits() > BitWidth)
1086+
return false;
1087+
} else {
1088+
if (Const->getAPIntValue().getBitWidth() != BitWidth)
1089+
return false;
1090+
}
10821091
}
10831092
return true;
10841093
}
@@ -5322,7 +5331,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53225331
EVT VT = N->getValueType(0);
53235332

53245333
// fold (udiv x, (1 << c)) -> x >>u c
5325-
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5334+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5335+
/*AllowTruncation=*/true)) {
53265336
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
53275337
AddToWorklist(LogBase2.getNode());
53285338

@@ -5336,7 +5346,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53365346
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
53375347
if (N1.getOpcode() == ISD::SHL) {
53385348
SDValue N10 = N1.getOperand(0);
5339-
if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5349+
if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5350+
/*AllowTruncation=*/true)) {
53405351
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
53415352
AddToWorklist(LogBase2.getNode());
53425353

@@ -5352,7 +5363,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53525363

53535364
// fold (udiv x, c) -> alternate
53545365
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5355-
if (isConstantOrConstantVector(N1) &&
5366+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5367+
/*AllowTruncation=*/true) &&
53565368
!TLI.isIntDivCheap(N->getValueType(0), Attr))
53575369
if (SDValue Op = BuildUDIV(N))
53585370
return Op;

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67386738
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
67396739
if (C->isZero())
67406740
return false;
6741-
const APInt& Divisor = C->getAPIntValue();
6741+
// Truncate the divisor to the target scalar type in case it was promoted
6742+
// during type legalization.
6743+
APInt Divisor = C->getAPIntValue().trunc(EltBits);
67426744

67436745
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
67446746

@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67796781
};
67806782

67816783
// Collect the shifts/magic values from each element.
6782-
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6784+
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6785+
/*AllowTruncation=*/true))
67836786
return SDValue();
67846787

67856788
SDValue PreShift, PostShift, MagicFactor, NPQFactor;

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,35 +1433,13 @@ entry:
14331433
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
14341434
; CHECK-SD-LABEL: uv4i8_7:
14351435
; CHECK-SD: // %bb.0: // %entry
1436-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1437-
; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
1436+
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
14381437
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1439-
; CHECK-SD-NEXT: movk w8, #9362, lsl #16
1440-
; CHECK-SD-NEXT: umov w9, v0.h[0]
1441-
; CHECK-SD-NEXT: umov w10, v0.h[1]
1442-
; CHECK-SD-NEXT: umov w13, v0.h[2]
1443-
; CHECK-SD-NEXT: umov w15, v0.h[3]
1444-
; CHECK-SD-NEXT: umull x11, w9, w8
1445-
; CHECK-SD-NEXT: umull x12, w10, w8
1446-
; CHECK-SD-NEXT: umull x14, w13, w8
1447-
; CHECK-SD-NEXT: lsr x11, x11, #32
1448-
; CHECK-SD-NEXT: umull x8, w15, w8
1449-
; CHECK-SD-NEXT: lsr x12, x12, #32
1450-
; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
1451-
; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
1452-
; CHECK-SD-NEXT: lsr x8, x8, #32
1453-
; CHECK-SD-NEXT: add w9, w9, w11
1454-
; CHECK-SD-NEXT: fmov s0, w9
1455-
; CHECK-SD-NEXT: add w10, w10, w12
1456-
; CHECK-SD-NEXT: lsr x9, x14, #32
1457-
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
1458-
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
1459-
; CHECK-SD-NEXT: mov v0.h[1], w10
1460-
; CHECK-SD-NEXT: add w8, w15, w8
1461-
; CHECK-SD-NEXT: add w9, w13, w9
1462-
; CHECK-SD-NEXT: mov v0.h[2], w9
1463-
; CHECK-SD-NEXT: mov v0.h[3], w8
1464-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1438+
; CHECK-SD-NEXT: movi v2.4h, #7
1439+
; CHECK-SD-NEXT: dup v1.4h, w8
1440+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
1441+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
1442+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
14651443
; CHECK-SD-NEXT: ret
14661444
;
14671445
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
15081486
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
15091487
; CHECK-SD-LABEL: uv4i8_100:
15101488
; CHECK-SD: // %bb.0: // %entry
1511-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1512-
; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
1513-
; CHECK-SD-NEXT: mov w14, #100 // =0x64
1489+
; CHECK-SD-NEXT: mov w8, #656 // =0x290
15141490
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1515-
; CHECK-SD-NEXT: movk w8, #655, lsl #16
1516-
; CHECK-SD-NEXT: umov w9, v0.h[0]
1517-
; CHECK-SD-NEXT: umov w10, v0.h[1]
1518-
; CHECK-SD-NEXT: umov w12, v0.h[2]
1519-
; CHECK-SD-NEXT: umov w15, v0.h[3]
1520-
; CHECK-SD-NEXT: umull x11, w9, w8
1521-
; CHECK-SD-NEXT: umull x13, w10, w8
1522-
; CHECK-SD-NEXT: lsr x11, x11, #32
1523-
; CHECK-SD-NEXT: lsr x13, x13, #32
1524-
; CHECK-SD-NEXT: msub w9, w11, w14, w9
1525-
; CHECK-SD-NEXT: umull x11, w12, w8
1526-
; CHECK-SD-NEXT: msub w10, w13, w14, w10
1527-
; CHECK-SD-NEXT: fmov s0, w9
1528-
; CHECK-SD-NEXT: umull x8, w15, w8
1529-
; CHECK-SD-NEXT: lsr x9, x11, #32
1530-
; CHECK-SD-NEXT: mov v0.h[1], w10
1531-
; CHECK-SD-NEXT: msub w9, w9, w14, w12
1532-
; CHECK-SD-NEXT: lsr x8, x8, #32
1533-
; CHECK-SD-NEXT: msub w8, w8, w14, w15
1534-
; CHECK-SD-NEXT: mov v0.h[2], w9
1535-
; CHECK-SD-NEXT: mov v0.h[3], w8
1536-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1491+
; CHECK-SD-NEXT: movi v2.4h, #100
1492+
; CHECK-SD-NEXT: dup v1.4h, w8
1493+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
1494+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
1495+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
15371496
; CHECK-SD-NEXT: ret
15381497
;
15391498
; CHECK-GI-LABEL: uv4i8_100:

llvm/test/CodeGen/AArch64/udiv-by-const-promoted-ops.ll

Lines changed: 10 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -25,74 +25,16 @@ define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
2525
define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
2626
; CHECK-LABEL: udiv_v16i16_by_255:
2727
; CHECK: // %bb.0:
28-
; CHECK-NEXT: umov w9, v0.h[0]
29-
; CHECK-NEXT: umov w11, v1.h[0]
30-
; CHECK-NEXT: mov w8, #258 // =0x102
31-
; CHECK-NEXT: movk w8, #257, lsl #16
32-
; CHECK-NEXT: umov w10, v0.h[1]
33-
; CHECK-NEXT: umov w12, v1.h[1]
34-
; CHECK-NEXT: umov w13, v0.h[2]
35-
; CHECK-NEXT: umov w14, v1.h[2]
36-
; CHECK-NEXT: umull x9, w9, w8
37-
; CHECK-NEXT: umull x11, w11, w8
38-
; CHECK-NEXT: umull x10, w10, w8
39-
; CHECK-NEXT: umull x12, w12, w8
40-
; CHECK-NEXT: lsr x9, x9, #32
41-
; CHECK-NEXT: lsr x11, x11, #32
42-
; CHECK-NEXT: umull x13, w13, w8
43-
; CHECK-NEXT: fmov s2, w9
44-
; CHECK-NEXT: lsr x10, x10, #32
45-
; CHECK-NEXT: umov w9, v0.h[3]
46-
; CHECK-NEXT: fmov s3, w11
47-
; CHECK-NEXT: lsr x12, x12, #32
48-
; CHECK-NEXT: umull x11, w14, w8
49-
; CHECK-NEXT: umov w14, v1.h[3]
50-
; CHECK-NEXT: mov v2.h[1], w10
51-
; CHECK-NEXT: lsr x10, x13, #32
52-
; CHECK-NEXT: mov v3.h[1], w12
53-
; CHECK-NEXT: umov w12, v0.h[4]
54-
; CHECK-NEXT: lsr x11, x11, #32
55-
; CHECK-NEXT: umull x9, w9, w8
56-
; CHECK-NEXT: umull x13, w14, w8
57-
; CHECK-NEXT: umov w14, v1.h[4]
58-
; CHECK-NEXT: mov v2.h[2], w10
59-
; CHECK-NEXT: mov v3.h[2], w11
60-
; CHECK-NEXT: lsr x9, x9, #32
61-
; CHECK-NEXT: umull x10, w12, w8
62-
; CHECK-NEXT: lsr x12, x13, #32
63-
; CHECK-NEXT: umov w11, v0.h[5]
64-
; CHECK-NEXT: umull x13, w14, w8
65-
; CHECK-NEXT: umov w14, v1.h[5]
66-
; CHECK-NEXT: mov v2.h[3], w9
67-
; CHECK-NEXT: lsr x9, x10, #32
68-
; CHECK-NEXT: mov v3.h[3], w12
69-
; CHECK-NEXT: lsr x12, x13, #32
70-
; CHECK-NEXT: umull x10, w11, w8
71-
; CHECK-NEXT: umov w11, v0.h[6]
72-
; CHECK-NEXT: umull x13, w14, w8
73-
; CHECK-NEXT: umov w14, v1.h[6]
74-
; CHECK-NEXT: mov v2.h[4], w9
75-
; CHECK-NEXT: umov w9, v0.h[7]
76-
; CHECK-NEXT: mov v3.h[4], w12
77-
; CHECK-NEXT: lsr x10, x10, #32
78-
; CHECK-NEXT: lsr x12, x13, #32
79-
; CHECK-NEXT: umull x11, w11, w8
80-
; CHECK-NEXT: umull x13, w14, w8
81-
; CHECK-NEXT: umov w14, v1.h[7]
82-
; CHECK-NEXT: mov v2.h[5], w10
83-
; CHECK-NEXT: umull x9, w9, w8
84-
; CHECK-NEXT: mov v3.h[5], w12
85-
; CHECK-NEXT: lsr x10, x11, #32
86-
; CHECK-NEXT: lsr x11, x13, #32
87-
; CHECK-NEXT: umull x8, w14, w8
88-
; CHECK-NEXT: lsr x9, x9, #32
89-
; CHECK-NEXT: mov v2.h[6], w10
90-
; CHECK-NEXT: mov v3.h[6], w11
91-
; CHECK-NEXT: lsr x8, x8, #32
92-
; CHECK-NEXT: mov v2.h[7], w9
93-
; CHECK-NEXT: mov v3.h[7], w8
94-
; CHECK-NEXT: mov v0.16b, v2.16b
95-
; CHECK-NEXT: mov v1.16b, v3.16b
28+
; CHECK-NEXT: mov w8, #32897 // =0x8081
29+
; CHECK-NEXT: dup v2.8h, w8
30+
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
31+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
32+
; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
33+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
34+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
35+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
36+
; CHECK-NEXT: ushr v0.8h, v0.8h, #7
37+
; CHECK-NEXT: ushr v1.8h, v1.8h, #7
9638
; CHECK-NEXT: ret
9739
%div = udiv <16 x i16> %x, splat (i16 255)
9840
ret <16 x i16> %div

0 commit comments

Comments
 (0)