@@ -57,30 +57,32 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
5757; GISEL-LABEL: combine_vec_udiv_nonuniform:
5858; GISEL: // %bb.0:
5959; GISEL-NEXT: adrp x8, .LCPI1_4
60- ; GISEL-NEXT: adrp x9, .LCPI1_5
60+ ; GISEL-NEXT: adrp x9, .LCPI1_0
6161; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
6262; GISEL-NEXT: adrp x8, .LCPI1_3
63+ ; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0]
6364; GISEL-NEXT: neg v1.8h, v1.8h
6465; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
6566; GISEL-NEXT: adrp x8, .LCPI1_2
6667; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
6768; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
6869; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
6970; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
70- ; GISEL-NEXT: adrp x8, .LCPI1_1
71+ ; GISEL-NEXT: adrp x8, .LCPI1_5
7172; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
7273; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
7374; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
7475; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
75- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1 ]
76- ; GISEL-NEXT: adrp x8, .LCPI1_0
77- ; GISEL-NEXT: neg v3.8h, v3.8h
76+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5 ]
77+ ; GISEL-NEXT: adrp x8, .LCPI1_1
78+ ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5 .8h
7879; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
79- ; GISEL-NEXT: ldr q4, [x9 , :lo12:.LCPI1_5 ]
80- ; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
80+ ; GISEL-NEXT: ldr q4, [x8 , :lo12:.LCPI1_1 ]
81+ ; GISEL-NEXT: shl v3.8h, v3.8h, #15
8182; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
82- ; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
83- ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
83+ ; GISEL-NEXT: neg v2.8h, v4.8h
84+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
85+ ; GISEL-NEXT: sshr v2.8h, v3.8h, #15
8486; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
8587; GISEL-NEXT: ret
8688 %1 = udiv <8 x i16 > %x , <i16 23 , i16 34 , i16 -23 , i16 56 , i16 128 , i16 -1 , i16 -256 , i16 -32768 >
@@ -106,23 +108,25 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
106108; GISEL-LABEL: combine_vec_udiv_nonuniform2:
107109; GISEL: // %bb.0:
108110; GISEL-NEXT: adrp x8, .LCPI2_3
109- ; GISEL-NEXT: adrp x9, .LCPI2_1
111+ ; GISEL-NEXT: adrp x9, .LCPI2_4
112+ ; GISEL-NEXT: adrp x10, .LCPI2_0
110113; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
111114; GISEL-NEXT: adrp x8, .LCPI2_2
112- ; GISEL-NEXT: ldr q4, [x9 , :lo12:.LCPI2_1 ]
115+ ; GISEL-NEXT: ldr q4, [x10 , :lo12:.LCPI2_0 ]
113116; GISEL-NEXT: neg v1.8h, v1.8h
114117; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
115- ; GISEL-NEXT: adrp x8, .LCPI2_4
118+ ; GISEL-NEXT: adrp x8, .LCPI2_1
116119; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
117- ; GISEL-NEXT: neg v4.8h, v4.8h
118120; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
121+ ; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1]
119122; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
120- ; GISEL-NEXT: ldr q2, [x8 , :lo12:.LCPI2_4]
121- ; GISEL-NEXT: adrp x8, .LCPI2_0
123+ ; GISEL-NEXT: ldr q2, [x9 , :lo12:.LCPI2_4]
124+ ; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h
122125; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
123- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
124- ; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
125- ; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h
126+ ; GISEL-NEXT: neg v3.8h, v5.8h
127+ ; GISEL-NEXT: shl v2.8h, v2.8h, #15
128+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
129+ ; GISEL-NEXT: sshr v2.8h, v2.8h, #15
126130; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
127131; GISEL-NEXT: ret
128132 %1 = udiv <8 x i16 > %x , <i16 -34 , i16 35 , i16 36 , i16 -37 , i16 38 , i16 -39 , i16 40 , i16 -41 >
@@ -147,21 +151,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
147151; GISEL-LABEL: combine_vec_udiv_nonuniform3:
148152; GISEL: // %bb.0:
149153; GISEL-NEXT: adrp x8, .LCPI3_2
150- ; GISEL-NEXT: adrp x9, .LCPI3_3
154+ ; GISEL-NEXT: adrp x9, .LCPI3_0
151155; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
152- ; GISEL-NEXT: adrp x8, .LCPI3_1
153- ; GISEL-NEXT: ldr q4 , [x9, :lo12:.LCPI3_3 ]
156+ ; GISEL-NEXT: adrp x8, .LCPI3_3
157+ ; GISEL-NEXT: ldr q3 , [x9, :lo12:.LCPI3_0 ]
154158; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
155159; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
156160; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
157- ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
158- ; GISEL-NEXT: adrp x8, .LCPI3_0
159- ; GISEL-NEXT: neg v2.8h, v2.8h
160- ; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
161- ; GISEL-NEXT: usra v1.8h, v3.8h, #1
162- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
163- ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
164- ; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
161+ ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
162+ ; GISEL-NEXT: adrp x8, .LCPI3_1
163+ ; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
164+ ; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
165+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
166+ ; GISEL-NEXT: shl v2.8h, v2.8h, #15
167+ ; GISEL-NEXT: usra v1.8h, v4.8h, #1
168+ ; GISEL-NEXT: neg v3.8h, v3.8h
169+ ; GISEL-NEXT: sshr v2.8h, v2.8h, #15
170+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
165171; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
166172; GISEL-NEXT: ret
167173 %1 = udiv <8 x i16 > %x , <i16 7 , i16 23 , i16 25 , i16 27 , i16 31 , i16 47 , i16 63 , i16 127 >
@@ -191,19 +197,21 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
191197; GISEL-LABEL: combine_vec_udiv_nonuniform4:
192198; GISEL: // %bb.0:
193199; GISEL-NEXT: adrp x8, .LCPI4_2
194- ; GISEL-NEXT: adrp x9, .LCPI4_1
200+ ; GISEL-NEXT: adrp x9, .LCPI4_0
195201; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
196202; GISEL-NEXT: adrp x8, .LCPI4_3
197- ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1 ]
203+ ; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0 ]
198204; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
199205; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
200206; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
201- ; GISEL-NEXT: adrp x8, .LCPI4_0
202- ; GISEL-NEXT: neg v4 .16b, v4.16b
207+ ; GISEL-NEXT: adrp x8, .LCPI4_1
208+ ; GISEL-NEXT: cmeq v3.16b, v3 .16b, v4.16b
203209; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
204- ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
205- ; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b
206- ; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b
210+ ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
211+ ; GISEL-NEXT: shl v3.16b, v3.16b, #7
212+ ; GISEL-NEXT: neg v2.16b, v2.16b
213+ ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
214+ ; GISEL-NEXT: sshr v2.16b, v3.16b, #7
207215; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
208216; GISEL-NEXT: ret
209217 %div = udiv <16 x i8 > %x , <i8 -64 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >
@@ -240,26 +248,28 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
240248; GISEL-LABEL: pr38477:
241249; GISEL: // %bb.0:
242250; GISEL-NEXT: adrp x8, .LCPI5_3
243- ; GISEL-NEXT: adrp x9, .LCPI5_4
251+ ; GISEL-NEXT: adrp x9, .LCPI5_0
244252; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
245253; GISEL-NEXT: adrp x8, .LCPI5_2
254+ ; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0]
246255; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
247256; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
248257; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
249258; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
250- ; GISEL-NEXT: adrp x8, .LCPI5_1
259+ ; GISEL-NEXT: adrp x8, .LCPI5_4
251260; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
252261; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
253262; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
254- ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1 ]
255- ; GISEL-NEXT: adrp x8, .LCPI5_0
256- ; GISEL-NEXT: neg v3.8h, v3.8h
263+ ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4 ]
264+ ; GISEL-NEXT: adrp x8, .LCPI5_1
265+ ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5 .8h
257266; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
258- ; GISEL-NEXT: ldr q4, [x9 , :lo12:.LCPI5_4 ]
259- ; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
267+ ; GISEL-NEXT: ldr q4, [x8 , :lo12:.LCPI5_1 ]
268+ ; GISEL-NEXT: shl v3.8h, v3.8h, #15
260269; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
261- ; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
262- ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
270+ ; GISEL-NEXT: neg v2.8h, v4.8h
271+ ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
272+ ; GISEL-NEXT: sshr v2.8h, v3.8h, #15
263273; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
264274; GISEL-NEXT: ret
265275 %1 = udiv <8 x i16 > %a0 , <i16 1 , i16 119 , i16 73 , i16 -111 , i16 -3 , i16 118 , i16 32 , i16 31 >
0 commit comments