@@ -19,122 +19,18 @@ define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
1919define <16 x i16 > @sdiv_v16i16_by_7 (<16 x i16 > %x ) {
2020; CHECK-LABEL: sdiv_v16i16_by_7:
2121; CHECK: // %bb.0:
22- ; CHECK-NEXT: smov x11, v0.h[1]
23- ; CHECK-NEXT: smov x10, v0.h[0]
24- ; CHECK-NEXT: mov x8, #-56173 // =0xffffffffffff2493
25- ; CHECK-NEXT: smov x13, v0.h[3]
26- ; CHECK-NEXT: smov x14, v1.h[1]
27- ; CHECK-NEXT: movk x8, #37449, lsl #16
28- ; CHECK-NEXT: smov x16, v1.h[0]
29- ; CHECK-NEXT: smov w12, v0.h[1]
30- ; CHECK-NEXT: smov w15, v0.h[0]
31- ; CHECK-NEXT: smov x18, v1.h[2]
32- ; CHECK-NEXT: smov w0, v0.h[3]
33- ; CHECK-NEXT: smov w1, v1.h[1]
34- ; CHECK-NEXT: smull x11, w11, w8
35- ; CHECK-NEXT: smov w2, v1.h[0]
36- ; CHECK-NEXT: smov x9, v0.h[2]
37- ; CHECK-NEXT: smull x10, w10, w8
38- ; CHECK-NEXT: smov w17, v0.h[2]
39- ; CHECK-NEXT: smov w3, v1.h[2]
40- ; CHECK-NEXT: smull x13, w13, w8
41- ; CHECK-NEXT: smull x14, w14, w8
42- ; CHECK-NEXT: add x12, x12, x11, lsr #32
43- ; CHECK-NEXT: smull x16, w16, w8
44- ; CHECK-NEXT: add x10, x15, x10, lsr #32
45- ; CHECK-NEXT: smull x15, w18, w8
46- ; CHECK-NEXT: add x11, x0, x13, lsr #32
47- ; CHECK-NEXT: smov x0, v0.h[4]
48- ; CHECK-NEXT: add x13, x1, x14, lsr #32
49- ; CHECK-NEXT: asr w18, w10, #2
50- ; CHECK-NEXT: smull x9, w9, w8
51- ; CHECK-NEXT: add x14, x2, x16, lsr #32
52- ; CHECK-NEXT: asr w16, w12, #2
53- ; CHECK-NEXT: smov x2, v1.h[3]
54- ; CHECK-NEXT: add w18, w18, w10, lsr #31
55- ; CHECK-NEXT: add x15, x3, x15, lsr #32
56- ; CHECK-NEXT: smov w10, v0.h[5]
57- ; CHECK-NEXT: add w12, w16, w12, lsr #31
58- ; CHECK-NEXT: asr w16, w14, #2
59- ; CHECK-NEXT: add x9, x17, x9, lsr #32
60- ; CHECK-NEXT: fmov s2, w18
61- ; CHECK-NEXT: smov w17, v0.h[4]
62- ; CHECK-NEXT: smull x0, w0, w8
63- ; CHECK-NEXT: add w14, w16, w14, lsr #31
64- ; CHECK-NEXT: asr w16, w13, #2
65- ; CHECK-NEXT: asr w1, w9, #2
66- ; CHECK-NEXT: smov x18, v0.h[5]
67- ; CHECK-NEXT: fmov s3, w14
68- ; CHECK-NEXT: mov v2.h[1], w12
69- ; CHECK-NEXT: add w12, w16, w13, lsr #31
70- ; CHECK-NEXT: smov w13, v1.h[3]
71- ; CHECK-NEXT: smov x14, v1.h[4]
72- ; CHECK-NEXT: smull x16, w2, w8
73- ; CHECK-NEXT: add w1, w1, w9, lsr #31
74- ; CHECK-NEXT: add x17, x17, x0, lsr #32
75- ; CHECK-NEXT: asr w0, w15, #2
76- ; CHECK-NEXT: mov v3.h[1], w12
77- ; CHECK-NEXT: smov w12, v1.h[4]
78- ; CHECK-NEXT: smull x18, w18, w8
79- ; CHECK-NEXT: mov v2.h[2], w1
80- ; CHECK-NEXT: asr w1, w11, #2
81- ; CHECK-NEXT: add w15, w0, w15, lsr #31
82- ; CHECK-NEXT: add x13, x13, x16, lsr #32
83- ; CHECK-NEXT: smov x16, v1.h[5]
84- ; CHECK-NEXT: smull x14, w14, w8
85- ; CHECK-NEXT: add w11, w1, w11, lsr #31
86- ; CHECK-NEXT: smov x0, v0.h[6]
87- ; CHECK-NEXT: add x10, x10, x18, lsr #32
88- ; CHECK-NEXT: asr w1, w13, #2
89- ; CHECK-NEXT: mov v3.h[2], w15
90- ; CHECK-NEXT: smov w15, v1.h[5]
91- ; CHECK-NEXT: add x12, x12, x14, lsr #32
92- ; CHECK-NEXT: mov v2.h[3], w11
93- ; CHECK-NEXT: asr w11, w17, #2
94- ; CHECK-NEXT: add w13, w1, w13, lsr #31
95- ; CHECK-NEXT: smull x16, w16, w8
96- ; CHECK-NEXT: smov x14, v1.h[6]
97- ; CHECK-NEXT: asr w18, w12, #2
98- ; CHECK-NEXT: add w11, w11, w17, lsr #31
99- ; CHECK-NEXT: smov w9, v0.h[6]
100- ; CHECK-NEXT: mov v3.h[3], w13
101- ; CHECK-NEXT: smull x17, w0, w8
102- ; CHECK-NEXT: smov x0, v1.h[7]
103- ; CHECK-NEXT: add x13, x15, x16, lsr #32
104- ; CHECK-NEXT: add w12, w18, w12, lsr #31
105- ; CHECK-NEXT: smov w16, v1.h[6]
106- ; CHECK-NEXT: mov v2.h[4], w11
107- ; CHECK-NEXT: smov x11, v0.h[7]
108- ; CHECK-NEXT: smull x14, w14, w8
109- ; CHECK-NEXT: asr w15, w10, #2
110- ; CHECK-NEXT: asr w18, w13, #2
111- ; CHECK-NEXT: smov w1, v0.h[7]
112- ; CHECK-NEXT: mov v3.h[4], w12
113- ; CHECK-NEXT: add x9, x9, x17, lsr #32
114- ; CHECK-NEXT: add w10, w15, w10, lsr #31
115- ; CHECK-NEXT: add w12, w18, w13, lsr #31
116- ; CHECK-NEXT: add x13, x16, x14, lsr #32
117- ; CHECK-NEXT: smov w14, v1.h[7]
118- ; CHECK-NEXT: smull x11, w11, w8
119- ; CHECK-NEXT: smull x8, w0, w8
120- ; CHECK-NEXT: mov v2.h[5], w10
121- ; CHECK-NEXT: asr w10, w9, #2
122- ; CHECK-NEXT: mov v3.h[5], w12
123- ; CHECK-NEXT: asr w12, w13, #2
124- ; CHECK-NEXT: add w9, w10, w9, lsr #31
125- ; CHECK-NEXT: add x10, x1, x11, lsr #32
126- ; CHECK-NEXT: add w11, w12, w13, lsr #31
127- ; CHECK-NEXT: add x8, x14, x8, lsr #32
128- ; CHECK-NEXT: mov v2.h[6], w9
129- ; CHECK-NEXT: asr w9, w10, #2
130- ; CHECK-NEXT: mov v3.h[6], w11
131- ; CHECK-NEXT: asr w11, w8, #2
132- ; CHECK-NEXT: add w9, w9, w10, lsr #31
133- ; CHECK-NEXT: add w8, w11, w8, lsr #31
134- ; CHECK-NEXT: mov v2.h[7], w9
135- ; CHECK-NEXT: mov v3.h[7], w8
136- ; CHECK-NEXT: mov v0.16b, v2.16b
137- ; CHECK-NEXT: mov v1.16b, v3.16b
22+ ; CHECK-NEXT: mov w8, #18725 // =0x4925
23+ ; CHECK-NEXT: dup v2.8h, w8
24+ ; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h
25+ ; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h
26+ ; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
27+ ; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
28+ ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
29+ ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
30+ ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
31+ ; CHECK-NEXT: sshr v1.8h, v1.8h, #1
32+ ; CHECK-NEXT: usra v0.8h, v0.8h, #15
33+ ; CHECK-NEXT: usra v1.8h, v1.8h, #15
13834; CHECK-NEXT: ret
13935 %div = sdiv <16 x i16 > %x , <i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 >
14036 ret <16 x i16 > %div
0 commit comments