Skip to content

Commit ceaf56a

Browse files
Automerge: [DAGCombiner] Handle type-promoted constants in UDIV lowering (#169491)
2 parents 9f8ad29 + c5fa1f8 commit ceaf56a

File tree

4 files changed

+111
-62
lines changed

4 files changed

+111
-62
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,8 +1065,9 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
10651065

10661066
// Determines if it is a constant integer or a splat/build vector of constant
10671067
// integers (and undefs).
1068-
// Do not permit build vector implicit truncation.
1069-
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1068+
// Do not permit build vector implicit truncation unless AllowTruncation is set.
1069+
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false,
1070+
bool AllowTruncation = false) {
10701071
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
10711072
return !(Const->isOpaque() && NoOpaques);
10721073
if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
@@ -1076,8 +1077,13 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
10761077
if (Op.isUndef())
10771078
continue;
10781079
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1079-
if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1080-
(Const->isOpaque() && NoOpaques))
1080+
if (!Const || (Const->isOpaque() && NoOpaques))
1081+
return false;
1082+
// When AllowTruncation is true, allow constants that have been promoted
1083+
// during type legalization as long as the value fits in the target type.
1084+
if ((AllowTruncation &&
1085+
Const->getAPIntValue().getActiveBits() > BitWidth) ||
1086+
(!AllowTruncation && Const->getAPIntValue().getBitWidth() != BitWidth))
10811087
return false;
10821088
}
10831089
return true;
@@ -5322,7 +5328,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53225328
EVT VT = N->getValueType(0);
53235329

53245330
// fold (udiv x, (1 << c)) -> x >>u c
5325-
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
5331+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/true,
5332+
/*AllowTruncation=*/true)) {
53265333
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
53275334
AddToWorklist(LogBase2.getNode());
53285335

@@ -5336,7 +5343,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53365343
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
53375344
if (N1.getOpcode() == ISD::SHL) {
53385345
SDValue N10 = N1.getOperand(0);
5339-
if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
5346+
if (isConstantOrConstantVector(N10, /*NoOpaques=*/true,
5347+
/*AllowTruncation=*/true)) {
53405348
if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
53415349
AddToWorklist(LogBase2.getNode());
53425350

@@ -5352,7 +5360,8 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
53525360

53535361
// fold (udiv x, c) -> alternate
53545362
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5355-
if (isConstantOrConstantVector(N1) &&
5363+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5364+
/*AllowTruncation=*/true) &&
53565365
!TLI.isIntDivCheap(N->getValueType(0), Attr))
53575366
if (SDValue Op = BuildUDIV(N))
53585367
return Op;

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6738,7 +6738,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67386738
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
67396739
if (C->isZero())
67406740
return false;
6741-
const APInt& Divisor = C->getAPIntValue();
6741+
// Truncate the divisor to the target scalar type in case it was promoted
6742+
// during type legalization.
6743+
APInt Divisor = C->getAPIntValue().trunc(EltBits);
67426744

67436745
SDValue PreShift, MagicFactor, NPQFactor, PostShift;
67446746

@@ -6779,7 +6781,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
67796781
};
67806782

67816783
// Collect the shifts/magic values from each element.
6782-
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6784+
if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6785+
/*AllowTruncation=*/true))
67836786
return SDValue();
67846787

67856788
SDValue PreShift, PostShift, MagicFactor, NPQFactor;

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,35 +1433,13 @@ entry:
14331433
define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
14341434
; CHECK-SD-LABEL: uv4i8_7:
14351435
; CHECK-SD: // %bb.0: // %entry
1436-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1437-
; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
1436+
; CHECK-SD-NEXT: mov w8, #9363 // =0x2493
14381437
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1439-
; CHECK-SD-NEXT: movk w8, #9362, lsl #16
1440-
; CHECK-SD-NEXT: umov w9, v0.h[0]
1441-
; CHECK-SD-NEXT: umov w10, v0.h[1]
1442-
; CHECK-SD-NEXT: umov w13, v0.h[2]
1443-
; CHECK-SD-NEXT: umov w15, v0.h[3]
1444-
; CHECK-SD-NEXT: umull x11, w9, w8
1445-
; CHECK-SD-NEXT: umull x12, w10, w8
1446-
; CHECK-SD-NEXT: umull x14, w13, w8
1447-
; CHECK-SD-NEXT: lsr x11, x11, #32
1448-
; CHECK-SD-NEXT: umull x8, w15, w8
1449-
; CHECK-SD-NEXT: lsr x12, x12, #32
1450-
; CHECK-SD-NEXT: sub w11, w11, w11, lsl #3
1451-
; CHECK-SD-NEXT: sub w12, w12, w12, lsl #3
1452-
; CHECK-SD-NEXT: lsr x8, x8, #32
1453-
; CHECK-SD-NEXT: add w9, w9, w11
1454-
; CHECK-SD-NEXT: fmov s0, w9
1455-
; CHECK-SD-NEXT: add w10, w10, w12
1456-
; CHECK-SD-NEXT: lsr x9, x14, #32
1457-
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
1458-
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
1459-
; CHECK-SD-NEXT: mov v0.h[1], w10
1460-
; CHECK-SD-NEXT: add w8, w15, w8
1461-
; CHECK-SD-NEXT: add w9, w13, w9
1462-
; CHECK-SD-NEXT: mov v0.h[2], w9
1463-
; CHECK-SD-NEXT: mov v0.h[3], w8
1464-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1438+
; CHECK-SD-NEXT: movi v2.4h, #7
1439+
; CHECK-SD-NEXT: dup v1.4h, w8
1440+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
1441+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
1442+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
14651443
; CHECK-SD-NEXT: ret
14661444
;
14671445
; CHECK-GI-LABEL: uv4i8_7:
@@ -1508,32 +1486,13 @@ entry:
15081486
define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
15091487
; CHECK-SD-LABEL: uv4i8_100:
15101488
; CHECK-SD: // %bb.0: // %entry
1511-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1512-
; CHECK-SD-NEXT: mov w8, #23593 // =0x5c29
1513-
; CHECK-SD-NEXT: mov w14, #100 // =0x64
1489+
; CHECK-SD-NEXT: mov w8, #656 // =0x290
15141490
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1515-
; CHECK-SD-NEXT: movk w8, #655, lsl #16
1516-
; CHECK-SD-NEXT: umov w9, v0.h[0]
1517-
; CHECK-SD-NEXT: umov w10, v0.h[1]
1518-
; CHECK-SD-NEXT: umov w12, v0.h[2]
1519-
; CHECK-SD-NEXT: umov w15, v0.h[3]
1520-
; CHECK-SD-NEXT: umull x11, w9, w8
1521-
; CHECK-SD-NEXT: umull x13, w10, w8
1522-
; CHECK-SD-NEXT: lsr x11, x11, #32
1523-
; CHECK-SD-NEXT: lsr x13, x13, #32
1524-
; CHECK-SD-NEXT: msub w9, w11, w14, w9
1525-
; CHECK-SD-NEXT: umull x11, w12, w8
1526-
; CHECK-SD-NEXT: msub w10, w13, w14, w10
1527-
; CHECK-SD-NEXT: fmov s0, w9
1528-
; CHECK-SD-NEXT: umull x8, w15, w8
1529-
; CHECK-SD-NEXT: lsr x9, x11, #32
1530-
; CHECK-SD-NEXT: mov v0.h[1], w10
1531-
; CHECK-SD-NEXT: msub w9, w9, w14, w12
1532-
; CHECK-SD-NEXT: lsr x8, x8, #32
1533-
; CHECK-SD-NEXT: msub w8, w8, w14, w15
1534-
; CHECK-SD-NEXT: mov v0.h[2], w9
1535-
; CHECK-SD-NEXT: mov v0.h[3], w8
1536-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1491+
; CHECK-SD-NEXT: movi v2.4h, #100
1492+
; CHECK-SD-NEXT: dup v1.4h, w8
1493+
; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
1494+
; CHECK-SD-NEXT: shrn v1.4h, v1.4s, #16
1495+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
15371496
; CHECK-SD-NEXT: ret
15381497
;
15391498
; CHECK-GI-LABEL: uv4i8_100:
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
3+
4+
; This test verifies that udiv by constant works correctly even when type
5+
; legalization promotes constant operands (e.g., i16 -> i32 in BUILD_VECTOR).
6+
; This is a regression test for a bug where v16i16 would be split into two
7+
; v8i16 operations during legalization, the i16 constants would be promoted
8+
; to i32, and then the second DAGCombine round would fail to recognize the
9+
; promoted constants when trying to convert udiv into mul+shift.
10+
11+
define <8 x i16> @udiv_v8i16_by_255(<8 x i16> %x) {
12+
; CHECK-LABEL: udiv_v8i16_by_255:
13+
; CHECK: // %bb.0:
14+
; CHECK-NEXT: mov w8, #32897 // =0x8081
15+
; CHECK-NEXT: dup v1.8h, w8
16+
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
17+
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
18+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
19+
; CHECK-NEXT: ushr v0.8h, v0.8h, #7
20+
; CHECK-NEXT: ret
21+
%div = udiv <8 x i16> %x, splat (i16 255)
22+
ret <8 x i16> %div
23+
}
24+
25+
define <16 x i16> @udiv_v16i16_by_255(<16 x i16> %x) {
26+
; CHECK-LABEL: udiv_v16i16_by_255:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: mov w8, #32897 // =0x8081
29+
; CHECK-NEXT: dup v2.8h, w8
30+
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
31+
; CHECK-NEXT: umull v0.4s, v0.4h, v2.4h
32+
; CHECK-NEXT: umull2 v4.4s, v1.8h, v2.8h
33+
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
34+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
35+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
36+
; CHECK-NEXT: ushr v0.8h, v0.8h, #7
37+
; CHECK-NEXT: ushr v1.8h, v1.8h, #7
38+
; CHECK-NEXT: ret
39+
%div = udiv <16 x i16> %x, splat (i16 255)
40+
ret <16 x i16> %div
41+
}
42+
43+
define <8 x i16> @urem_v8i16_by_255(<8 x i16> %x) {
44+
; CHECK-LABEL: urem_v8i16_by_255:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: mov w8, #32897 // =0x8081
47+
; CHECK-NEXT: dup v1.8h, w8
48+
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
49+
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
50+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
51+
; CHECK-NEXT: movi v2.2d, #0xff00ff00ff00ff
52+
; CHECK-NEXT: ushr v1.8h, v1.8h, #7
53+
; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
54+
; CHECK-NEXT: ret
55+
%rem = urem <8 x i16> %x, splat (i16 255)
56+
ret <8 x i16> %rem
57+
}
58+
59+
define <16 x i16> @urem_v16i16_by_255(<16 x i16> %x) {
60+
; CHECK-LABEL: urem_v16i16_by_255:
61+
; CHECK: // %bb.0:
62+
; CHECK-NEXT: mov w8, #32897 // =0x8081
63+
; CHECK-NEXT: dup v2.8h, w8
64+
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
65+
; CHECK-NEXT: umull v4.4s, v0.4h, v2.4h
66+
; CHECK-NEXT: umull2 v5.4s, v1.8h, v2.8h
67+
; CHECK-NEXT: umull v2.4s, v1.4h, v2.4h
68+
; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h
69+
; CHECK-NEXT: movi v4.2d, #0xff00ff00ff00ff
70+
; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h
71+
; CHECK-NEXT: ushr v3.8h, v3.8h, #7
72+
; CHECK-NEXT: ushr v2.8h, v2.8h, #7
73+
; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h
74+
; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h
75+
; CHECK-NEXT: ret
76+
%rem = urem <16 x i16> %x, splat (i16 255)
77+
ret <16 x i16> %rem
78+
}

0 commit comments

Comments
 (0)