@@ -15,56 +15,13 @@ define <16 x i8> @div16xi8(<16 x i8> %x) {
1515;
1616; CHECK-GI-LABEL: div16xi8:
1717; CHECK-GI: // %bb.0:
18- ; CHECK-GI-NEXT: smov w9, v0.b[0]
19- ; CHECK-GI-NEXT: mov w8, #25 // =0x19
20- ; CHECK-GI-NEXT: smov w10, v0.b[1]
21- ; CHECK-GI-NEXT: smov w11, v0.b[2]
22- ; CHECK-GI-NEXT: smov w12, v0.b[3]
23- ; CHECK-GI-NEXT: smov w13, v0.b[4]
24- ; CHECK-GI-NEXT: smov w14, v0.b[5]
25- ; CHECK-GI-NEXT: smov w15, v0.b[6]
26- ; CHECK-GI-NEXT: smov w16, v0.b[7]
27- ; CHECK-GI-NEXT: smov w17, v0.b[8]
28- ; CHECK-GI-NEXT: smov w18, v0.b[9]
29- ; CHECK-GI-NEXT: sdiv w9, w9, w8
30- ; CHECK-GI-NEXT: sdiv w10, w10, w8
31- ; CHECK-GI-NEXT: fmov s1, w9
32- ; CHECK-GI-NEXT: sdiv w11, w11, w8
33- ; CHECK-GI-NEXT: mov v1.b[1], w10
34- ; CHECK-GI-NEXT: smov w10, v0.b[10]
35- ; CHECK-GI-NEXT: sdiv w12, w12, w8
36- ; CHECK-GI-NEXT: mov v1.b[2], w11
37- ; CHECK-GI-NEXT: smov w11, v0.b[11]
38- ; CHECK-GI-NEXT: sdiv w13, w13, w8
39- ; CHECK-GI-NEXT: mov v1.b[3], w12
40- ; CHECK-GI-NEXT: smov w12, v0.b[12]
41- ; CHECK-GI-NEXT: sdiv w14, w14, w8
42- ; CHECK-GI-NEXT: mov v1.b[4], w13
43- ; CHECK-GI-NEXT: smov w13, v0.b[13]
44- ; CHECK-GI-NEXT: sdiv w15, w15, w8
45- ; CHECK-GI-NEXT: mov v1.b[5], w14
46- ; CHECK-GI-NEXT: sdiv w16, w16, w8
47- ; CHECK-GI-NEXT: mov v1.b[6], w15
48- ; CHECK-GI-NEXT: sdiv w17, w17, w8
49- ; CHECK-GI-NEXT: mov v1.b[7], w16
50- ; CHECK-GI-NEXT: sdiv w9, w18, w8
51- ; CHECK-GI-NEXT: mov v1.b[8], w17
52- ; CHECK-GI-NEXT: sdiv w10, w10, w8
53- ; CHECK-GI-NEXT: mov v1.b[9], w9
54- ; CHECK-GI-NEXT: smov w9, v0.b[14]
55- ; CHECK-GI-NEXT: sdiv w11, w11, w8
56- ; CHECK-GI-NEXT: mov v1.b[10], w10
57- ; CHECK-GI-NEXT: smov w10, v0.b[15]
58- ; CHECK-GI-NEXT: sdiv w12, w12, w8
59- ; CHECK-GI-NEXT: mov v1.b[11], w11
60- ; CHECK-GI-NEXT: sdiv w13, w13, w8
61- ; CHECK-GI-NEXT: mov v1.b[12], w12
62- ; CHECK-GI-NEXT: sdiv w9, w9, w8
63- ; CHECK-GI-NEXT: mov v1.b[13], w13
64- ; CHECK-GI-NEXT: sdiv w8, w10, w8
65- ; CHECK-GI-NEXT: mov v1.b[14], w9
66- ; CHECK-GI-NEXT: mov v1.b[15], w8
67- ; CHECK-GI-NEXT: mov v0.16b, v1.16b
18+ ; CHECK-GI-NEXT: movi v1.16b, #41
19+ ; CHECK-GI-NEXT: smull2 v2.8h, v0.16b, v1.16b
20+ ; CHECK-GI-NEXT: smull v0.8h, v0.8b, v1.8b
21+ ; CHECK-GI-NEXT: uzp2 v1.16b, v0.16b, v2.16b
22+ ; CHECK-GI-NEXT: sshr v0.16b, v1.16b, #2
23+ ; CHECK-GI-NEXT: ushr v0.16b, v0.16b, #7
24+ ; CHECK-GI-NEXT: ssra v0.16b, v1.16b, #2
6825; CHECK-GI-NEXT: ret
6926 %div = sdiv <16 x i8 > %x , <i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 >
7027 ret <16 x i8 > %div
@@ -85,32 +42,15 @@ define <8 x i16> @div8xi16(<8 x i16> %x) {
8542;
8643; CHECK-GI-LABEL: div8xi16:
8744; CHECK-GI: // %bb.0:
88- ; CHECK-GI-NEXT: smov w9, v0.h[0]
89- ; CHECK-GI-NEXT: mov w8, #6577 // =0x19b1
90- ; CHECK-GI-NEXT: smov w10, v0.h[1]
91- ; CHECK-GI-NEXT: smov w11, v0.h[2]
92- ; CHECK-GI-NEXT: smov w12, v0.h[3]
93- ; CHECK-GI-NEXT: smov w13, v0.h[4]
94- ; CHECK-GI-NEXT: smov w14, v0.h[5]
95- ; CHECK-GI-NEXT: sdiv w9, w9, w8
96- ; CHECK-GI-NEXT: sdiv w10, w10, w8
97- ; CHECK-GI-NEXT: fmov s1, w9
98- ; CHECK-GI-NEXT: sdiv w11, w11, w8
99- ; CHECK-GI-NEXT: mov v1.h[1], w10
100- ; CHECK-GI-NEXT: smov w10, v0.h[6]
101- ; CHECK-GI-NEXT: sdiv w12, w12, w8
102- ; CHECK-GI-NEXT: mov v1.h[2], w11
103- ; CHECK-GI-NEXT: smov w11, v0.h[7]
104- ; CHECK-GI-NEXT: sdiv w13, w13, w8
105- ; CHECK-GI-NEXT: mov v1.h[3], w12
106- ; CHECK-GI-NEXT: sdiv w9, w14, w8
107- ; CHECK-GI-NEXT: mov v1.h[4], w13
108- ; CHECK-GI-NEXT: sdiv w10, w10, w8
109- ; CHECK-GI-NEXT: mov v1.h[5], w9
110- ; CHECK-GI-NEXT: sdiv w8, w11, w8
111- ; CHECK-GI-NEXT: mov v1.h[6], w10
112- ; CHECK-GI-NEXT: mov v1.h[7], w8
113- ; CHECK-GI-NEXT: mov v0.16b, v1.16b
45+ ; CHECK-GI-NEXT: adrp x8, .LCPI1_0
46+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
47+ ; CHECK-GI-NEXT: smull2 v2.4s, v0.8h, v1.8h
48+ ; CHECK-GI-NEXT: smull v1.4s, v0.4h, v1.4h
49+ ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h
50+ ; CHECK-GI-NEXT: add v1.8h, v1.8h, v0.8h
51+ ; CHECK-GI-NEXT: sshr v0.8h, v1.8h, #12
52+ ; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #15
53+ ; CHECK-GI-NEXT: ssra v0.8h, v1.8h, #12
11454; CHECK-GI-NEXT: ret
11555 %div = sdiv <8 x i16 > %x , <i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 >
11656 ret <8 x i16 > %div
@@ -131,20 +71,14 @@ define <4 x i32> @div32xi4(<4 x i32> %x) {
13171;
13272; CHECK-GI-LABEL: div32xi4:
13373; CHECK-GI: // %bb.0:
134- ; CHECK-GI-NEXT: fmov w9, s0
135- ; CHECK-GI-NEXT: mov w8, #39957 // =0x9c15
136- ; CHECK-GI-NEXT: mov w10, v0.s[1]
137- ; CHECK-GI-NEXT: movk w8, #145, lsl #16
138- ; CHECK-GI-NEXT: mov w11, v0.s[2]
139- ; CHECK-GI-NEXT: mov w12, v0.s[3]
140- ; CHECK-GI-NEXT: sdiv w9, w9, w8
141- ; CHECK-GI-NEXT: sdiv w10, w10, w8
142- ; CHECK-GI-NEXT: fmov s0, w9
143- ; CHECK-GI-NEXT: sdiv w11, w11, w8
144- ; CHECK-GI-NEXT: mov v0.s[1], w10
145- ; CHECK-GI-NEXT: sdiv w8, w12, w8
146- ; CHECK-GI-NEXT: mov v0.s[2], w11
147- ; CHECK-GI-NEXT: mov v0.s[3], w8
74+ ; CHECK-GI-NEXT: adrp x8, .LCPI2_0
75+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
76+ ; CHECK-GI-NEXT: smull2 v2.2d, v0.4s, v1.4s
77+ ; CHECK-GI-NEXT: smull v0.2d, v0.2s, v1.2s
78+ ; CHECK-GI-NEXT: uzp2 v1.4s, v0.4s, v2.4s
79+ ; CHECK-GI-NEXT: sshr v0.4s, v1.4s, #22
80+ ; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #31
81+ ; CHECK-GI-NEXT: ssra v0.4s, v1.4s, #22
14882; CHECK-GI-NEXT: ret
14983 %div = sdiv <4 x i32 > %x , <i32 9542677 , i32 9542677 , i32 9542677 , i32 9542677 >
15084 ret <4 x i32 > %div
0 commit comments