Skip to content

Commit a4466a7

Browse files
[DAGCombiner] Allow promoted constants when lowering vector SDIVs
1 parent 39bd917 commit a4466a7

File tree

4 files changed

+38
-193
lines changed

4 files changed

+38
-193
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5186,7 +5186,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) {
51865186
return false;
51875187
};
51885188

5189-
return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
5189+
return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
5190+
/*AllowTruncation=*/true);
51905191
}
51915192

51925193
SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
@@ -5250,7 +5251,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
52505251
// alternate sequence. Targets may check function attributes for size/speed
52515252
// trade-offs.
52525253
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5253-
if (isConstantOrConstantVector(N1) &&
5254+
if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
5255+
/*AllowTruncation=*/true) &&
52545256
!TLI.isIntDivCheap(N->getValueType(0), Attr))
52555257
if (SDValue Op = BuildSDIV(N))
52565258
return Op;

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6562,8 +6562,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
65626562
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
65636563
if (C->isZero())
65646564
return false;
6565-
6566-
const APInt &Divisor = C->getAPIntValue();
6565+
// Truncate the divisor to the target scalar type in case it was promoted
6566+
// during type legalization.
6567+
APInt Divisor = C->getAPIntValue().trunc(EltBits);
65676568
SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
65686569
int NumeratorFactor = 0;
65696570
int ShiftMask = -1;
@@ -6593,7 +6594,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
65936594
SDValue N1 = N->getOperand(1);
65946595

65956596
// Collect the shifts / magic values from each element.
6596-
if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6597+
if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6598+
/*AllowTruncation=*/true))
65976599
return SDValue();
65986600

65996601
SDValue MagicFactor, Factor, Shift, ShiftMask;

llvm/test/CodeGen/AArch64/rem-by-const.ll

Lines changed: 17 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -893,46 +893,15 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
893893
; CHECK-SD-LABEL: sv4i8_7:
894894
; CHECK-SD: // %bb.0: // %entry
895895
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
896-
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
897-
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
896+
; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
897+
; CHECK-SD-NEXT: movi v2.4h, #7
898+
; CHECK-SD-NEXT: dup v1.4h, w8
898899
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
899-
; CHECK-SD-NEXT: smov x10, v0.h[0]
900-
; CHECK-SD-NEXT: smov x9, v0.h[1]
901-
; CHECK-SD-NEXT: smov w12, v0.h[0]
902-
; CHECK-SD-NEXT: smov w11, v0.h[1]
903-
; CHECK-SD-NEXT: smov x13, v0.h[2]
904-
; CHECK-SD-NEXT: smov w14, v0.h[2]
905-
; CHECK-SD-NEXT: smov x17, v0.h[3]
906-
; CHECK-SD-NEXT: smull x10, w10, w8
907-
; CHECK-SD-NEXT: smull x9, w9, w8
908-
; CHECK-SD-NEXT: smull x13, w13, w8
909-
; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
910-
; CHECK-SD-NEXT: smull x8, w17, w8
911-
; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
912-
; CHECK-SD-NEXT: asr w16, w10, #2
913-
; CHECK-SD-NEXT: add x13, x14, x13, lsr #32
914-
; CHECK-SD-NEXT: asr w15, w9, #2
915-
; CHECK-SD-NEXT: add w10, w16, w10, lsr #31
916-
; CHECK-SD-NEXT: asr w16, w13, #2
917-
; CHECK-SD-NEXT: add w9, w15, w9, lsr #31
918-
; CHECK-SD-NEXT: smov w15, v0.h[3]
919-
; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
920-
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
921-
; CHECK-SD-NEXT: add w10, w12, w10
922-
; CHECK-SD-NEXT: fmov s0, w10
923-
; CHECK-SD-NEXT: add w9, w11, w9
924-
; CHECK-SD-NEXT: add w10, w16, w13, lsr #31
925-
; CHECK-SD-NEXT: add x8, x15, x8, lsr #32
926-
; CHECK-SD-NEXT: mov v0.h[1], w9
927-
; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3
928-
; CHECK-SD-NEXT: asr w10, w8, #2
929-
; CHECK-SD-NEXT: add w9, w14, w9
930-
; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
931-
; CHECK-SD-NEXT: mov v0.h[2], w9
932-
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
933-
; CHECK-SD-NEXT: add w8, w15, w8
934-
; CHECK-SD-NEXT: mov v0.h[3], w8
935-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
900+
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
901+
; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #17
902+
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
903+
; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
904+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
936905
; CHECK-SD-NEXT: ret
937906
;
938907
; CHECK-GI-LABEL: sv4i8_7:
@@ -978,39 +947,15 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
978947
; CHECK-SD-LABEL: sv4i8_100:
979948
; CHECK-SD: // %bb.0: // %entry
980949
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
981-
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
982-
; CHECK-SD-NEXT: mov w14, #100 // =0x64
983-
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
984-
; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8
985-
; CHECK-SD-NEXT: smov x9, v1.h[0]
986-
; CHECK-SD-NEXT: smov x10, v1.h[1]
987-
; CHECK-SD-NEXT: smov x11, v1.h[2]
988-
; CHECK-SD-NEXT: smov w12, v1.h[0]
989-
; CHECK-SD-NEXT: smov x13, v1.h[3]
990-
; CHECK-SD-NEXT: smov w15, v1.h[1]
991-
; CHECK-SD-NEXT: smull x9, w9, w8
992-
; CHECK-SD-NEXT: smull x10, w10, w8
993-
; CHECK-SD-NEXT: smull x11, w11, w8
994-
; CHECK-SD-NEXT: asr x9, x9, #37
995-
; CHECK-SD-NEXT: smull x8, w13, w8
996-
; CHECK-SD-NEXT: asr x10, x10, #37
997-
; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
998-
; CHECK-SD-NEXT: asr x11, x11, #37
999-
; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
1000-
; CHECK-SD-NEXT: asr x8, x8, #37
1001-
; CHECK-SD-NEXT: msub w9, w9, w14, w12
1002-
; CHECK-SD-NEXT: msub w10, w10, w14, w15
1003-
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
1004-
; CHECK-SD-NEXT: fmov s0, w9
1005-
; CHECK-SD-NEXT: add w9, w11, w11, lsr #31
1006-
; CHECK-SD-NEXT: smov w11, v1.h[2]
1007-
; CHECK-SD-NEXT: msub w9, w9, w14, w11
1008-
; CHECK-SD-NEXT: mov v0.h[1], w10
1009-
; CHECK-SD-NEXT: smov w10, v1.h[3]
1010-
; CHECK-SD-NEXT: msub w8, w8, w14, w10
1011-
; CHECK-SD-NEXT: mov v0.h[2], w9
1012-
; CHECK-SD-NEXT: mov v0.h[3], w8
1013-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
950+
; CHECK-SD-NEXT: mov w8, #5243 // =0x147b
951+
; CHECK-SD-NEXT: movi v2.4h, #100
952+
; CHECK-SD-NEXT: dup v1.4h, w8
953+
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
954+
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
955+
; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #19
956+
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
957+
; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
958+
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
1014959
; CHECK-SD-NEXT: ret
1015960
;
1016961
; CHECK-GI-LABEL: sv4i8_100:

llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll

Lines changed: 12 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -19,122 +19,18 @@ define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
1919
define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
2020
; CHECK-LABEL: sdiv_v16i16_by_7:
2121
; CHECK: // %bb.0:
22-
; CHECK-NEXT: smov x11, v0.h[1]
23-
; CHECK-NEXT: smov x10, v0.h[0]
24-
; CHECK-NEXT: mov x8, #-56173 // =0xffffffffffff2493
25-
; CHECK-NEXT: smov x13, v0.h[3]
26-
; CHECK-NEXT: smov x14, v1.h[1]
27-
; CHECK-NEXT: movk x8, #37449, lsl #16
28-
; CHECK-NEXT: smov x16, v1.h[0]
29-
; CHECK-NEXT: smov w12, v0.h[1]
30-
; CHECK-NEXT: smov w15, v0.h[0]
31-
; CHECK-NEXT: smov x18, v1.h[2]
32-
; CHECK-NEXT: smov w0, v0.h[3]
33-
; CHECK-NEXT: smov w1, v1.h[1]
34-
; CHECK-NEXT: smull x11, w11, w8
35-
; CHECK-NEXT: smov w2, v1.h[0]
36-
; CHECK-NEXT: smov x9, v0.h[2]
37-
; CHECK-NEXT: smull x10, w10, w8
38-
; CHECK-NEXT: smov w17, v0.h[2]
39-
; CHECK-NEXT: smov w3, v1.h[2]
40-
; CHECK-NEXT: smull x13, w13, w8
41-
; CHECK-NEXT: smull x14, w14, w8
42-
; CHECK-NEXT: add x12, x12, x11, lsr #32
43-
; CHECK-NEXT: smull x16, w16, w8
44-
; CHECK-NEXT: add x10, x15, x10, lsr #32
45-
; CHECK-NEXT: smull x15, w18, w8
46-
; CHECK-NEXT: add x11, x0, x13, lsr #32
47-
; CHECK-NEXT: smov x0, v0.h[4]
48-
; CHECK-NEXT: add x13, x1, x14, lsr #32
49-
; CHECK-NEXT: asr w18, w10, #2
50-
; CHECK-NEXT: smull x9, w9, w8
51-
; CHECK-NEXT: add x14, x2, x16, lsr #32
52-
; CHECK-NEXT: asr w16, w12, #2
53-
; CHECK-NEXT: smov x2, v1.h[3]
54-
; CHECK-NEXT: add w18, w18, w10, lsr #31
55-
; CHECK-NEXT: add x15, x3, x15, lsr #32
56-
; CHECK-NEXT: smov w10, v0.h[5]
57-
; CHECK-NEXT: add w12, w16, w12, lsr #31
58-
; CHECK-NEXT: asr w16, w14, #2
59-
; CHECK-NEXT: add x9, x17, x9, lsr #32
60-
; CHECK-NEXT: fmov s2, w18
61-
; CHECK-NEXT: smov w17, v0.h[4]
62-
; CHECK-NEXT: smull x0, w0, w8
63-
; CHECK-NEXT: add w14, w16, w14, lsr #31
64-
; CHECK-NEXT: asr w16, w13, #2
65-
; CHECK-NEXT: asr w1, w9, #2
66-
; CHECK-NEXT: smov x18, v0.h[5]
67-
; CHECK-NEXT: fmov s3, w14
68-
; CHECK-NEXT: mov v2.h[1], w12
69-
; CHECK-NEXT: add w12, w16, w13, lsr #31
70-
; CHECK-NEXT: smov w13, v1.h[3]
71-
; CHECK-NEXT: smov x14, v1.h[4]
72-
; CHECK-NEXT: smull x16, w2, w8
73-
; CHECK-NEXT: add w1, w1, w9, lsr #31
74-
; CHECK-NEXT: add x17, x17, x0, lsr #32
75-
; CHECK-NEXT: asr w0, w15, #2
76-
; CHECK-NEXT: mov v3.h[1], w12
77-
; CHECK-NEXT: smov w12, v1.h[4]
78-
; CHECK-NEXT: smull x18, w18, w8
79-
; CHECK-NEXT: mov v2.h[2], w1
80-
; CHECK-NEXT: asr w1, w11, #2
81-
; CHECK-NEXT: add w15, w0, w15, lsr #31
82-
; CHECK-NEXT: add x13, x13, x16, lsr #32
83-
; CHECK-NEXT: smov x16, v1.h[5]
84-
; CHECK-NEXT: smull x14, w14, w8
85-
; CHECK-NEXT: add w11, w1, w11, lsr #31
86-
; CHECK-NEXT: smov x0, v0.h[6]
87-
; CHECK-NEXT: add x10, x10, x18, lsr #32
88-
; CHECK-NEXT: asr w1, w13, #2
89-
; CHECK-NEXT: mov v3.h[2], w15
90-
; CHECK-NEXT: smov w15, v1.h[5]
91-
; CHECK-NEXT: add x12, x12, x14, lsr #32
92-
; CHECK-NEXT: mov v2.h[3], w11
93-
; CHECK-NEXT: asr w11, w17, #2
94-
; CHECK-NEXT: add w13, w1, w13, lsr #31
95-
; CHECK-NEXT: smull x16, w16, w8
96-
; CHECK-NEXT: smov x14, v1.h[6]
97-
; CHECK-NEXT: asr w18, w12, #2
98-
; CHECK-NEXT: add w11, w11, w17, lsr #31
99-
; CHECK-NEXT: smov w9, v0.h[6]
100-
; CHECK-NEXT: mov v3.h[3], w13
101-
; CHECK-NEXT: smull x17, w0, w8
102-
; CHECK-NEXT: smov x0, v1.h[7]
103-
; CHECK-NEXT: add x13, x15, x16, lsr #32
104-
; CHECK-NEXT: add w12, w18, w12, lsr #31
105-
; CHECK-NEXT: smov w16, v1.h[6]
106-
; CHECK-NEXT: mov v2.h[4], w11
107-
; CHECK-NEXT: smov x11, v0.h[7]
108-
; CHECK-NEXT: smull x14, w14, w8
109-
; CHECK-NEXT: asr w15, w10, #2
110-
; CHECK-NEXT: asr w18, w13, #2
111-
; CHECK-NEXT: smov w1, v0.h[7]
112-
; CHECK-NEXT: mov v3.h[4], w12
113-
; CHECK-NEXT: add x9, x9, x17, lsr #32
114-
; CHECK-NEXT: add w10, w15, w10, lsr #31
115-
; CHECK-NEXT: add w12, w18, w13, lsr #31
116-
; CHECK-NEXT: add x13, x16, x14, lsr #32
117-
; CHECK-NEXT: smov w14, v1.h[7]
118-
; CHECK-NEXT: smull x11, w11, w8
119-
; CHECK-NEXT: smull x8, w0, w8
120-
; CHECK-NEXT: mov v2.h[5], w10
121-
; CHECK-NEXT: asr w10, w9, #2
122-
; CHECK-NEXT: mov v3.h[5], w12
123-
; CHECK-NEXT: asr w12, w13, #2
124-
; CHECK-NEXT: add w9, w10, w9, lsr #31
125-
; CHECK-NEXT: add x10, x1, x11, lsr #32
126-
; CHECK-NEXT: add w11, w12, w13, lsr #31
127-
; CHECK-NEXT: add x8, x14, x8, lsr #32
128-
; CHECK-NEXT: mov v2.h[6], w9
129-
; CHECK-NEXT: asr w9, w10, #2
130-
; CHECK-NEXT: mov v3.h[6], w11
131-
; CHECK-NEXT: asr w11, w8, #2
132-
; CHECK-NEXT: add w9, w9, w10, lsr #31
133-
; CHECK-NEXT: add w8, w11, w8, lsr #31
134-
; CHECK-NEXT: mov v2.h[7], w9
135-
; CHECK-NEXT: mov v3.h[7], w8
136-
; CHECK-NEXT: mov v0.16b, v2.16b
137-
; CHECK-NEXT: mov v1.16b, v3.16b
22+
; CHECK-NEXT: mov w8, #18725 // =0x4925
23+
; CHECK-NEXT: dup v2.8h, w8
24+
; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h
25+
; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h
26+
; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
27+
; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
28+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
29+
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
30+
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
31+
; CHECK-NEXT: sshr v1.8h, v1.8h, #1
32+
; CHECK-NEXT: usra v0.8h, v0.8h, #15
33+
; CHECK-NEXT: usra v1.8h, v1.8h, #15
13834
; CHECK-NEXT: ret
13935
%div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
14036
ret <16 x i16> %div

0 commit comments

Comments
 (0)