@@ -127,9 +127,11 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
127127define <16 x i8 > @smulh_v16i8 (<16 x i8 > %a , <16 x i8 > %b ) {
128128; CHECK-LABEL: smulh_v16i8:
129129; CHECK: // %bb.0:
130- ; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
131- ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
132- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
130+ ; CHECK-NEXT: ptrue p0.b, vl16
131+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
132+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
133+ ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
134+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
133135; CHECK-NEXT: ret
134136 %1 = sext <16 x i8 > %a to <16 x i16 >
135137 %2 = sext <16 x i8 > %b to <16 x i16 >
@@ -142,9 +144,11 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
142144define <8 x i16 > @smulh_v8i16 (<8 x i16 > %a , <8 x i16 > %b ) {
143145; CHECK-LABEL: smulh_v8i16:
144146; CHECK: // %bb.0:
145- ; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
146- ; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
147- ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
147+ ; CHECK-NEXT: ptrue p0.h, vl8
148+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
149+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
150+ ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
151+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
148152; CHECK-NEXT: ret
149153 %1 = sext <8 x i16 > %a to <8 x i32 >
150154 %2 = sext <8 x i16 > %b to <8 x i32 >
@@ -157,9 +161,11 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
157161define <4 x i32 > @smulh_v4i32 (<4 x i32 > %a , <4 x i32 > %b ) {
158162; CHECK-LABEL: smulh_v4i32:
159163; CHECK: // %bb.0:
160- ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
161- ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
162- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
164+ ; CHECK-NEXT: ptrue p0.s, vl4
165+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
166+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
167+ ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
168+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
163169; CHECK-NEXT: ret
164170 %1 = sext <4 x i32 > %a to <4 x i64 >
165171 %2 = sext <4 x i32 > %b to <4 x i64 >
@@ -172,15 +178,11 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
172178define <2 x i64 > @smulh_v2i64 (<2 x i64 > %a , <2 x i64 > %b ) {
173179; CHECK-LABEL: smulh_v2i64:
174180; CHECK: // %bb.0:
175- ; CHECK-NEXT: mov x8, v0.d[1]
176- ; CHECK-NEXT: mov x9, v1.d[1]
177- ; CHECK-NEXT: fmov x10, d0
178- ; CHECK-NEXT: fmov x11, d1
179- ; CHECK-NEXT: smulh x10, x10, x11
180- ; CHECK-NEXT: smulh x8, x8, x9
181- ; CHECK-NEXT: fmov d0, x10
182- ; CHECK-NEXT: fmov d1, x8
183- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
181+ ; CHECK-NEXT: ptrue p0.d, vl2
182+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
183+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
184+ ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
185+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
184186; CHECK-NEXT: ret
185187 %1 = sext <2 x i64 > %a to <2 x i128 >
186188 %2 = sext <2 x i64 > %b to <2 x i128 >
@@ -193,9 +195,11 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
193195define <16 x i8 > @umulh_v16i8 (<16 x i8 > %a , <16 x i8 > %b ) {
194196; CHECK-LABEL: umulh_v16i8:
195197; CHECK: // %bb.0:
196- ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b
197- ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
198- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
198+ ; CHECK-NEXT: ptrue p0.b, vl16
199+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
200+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
201+ ; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
202+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
199203; CHECK-NEXT: ret
200204 %1 = zext <16 x i8 > %a to <16 x i16 >
201205 %2 = zext <16 x i8 > %b to <16 x i16 >
@@ -208,9 +212,11 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
208212define <8 x i16 > @umulh_v8i16 (<8 x i16 > %a , <8 x i16 > %b ) {
209213; CHECK-LABEL: umulh_v8i16:
210214; CHECK: // %bb.0:
211- ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
212- ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
213- ; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
215+ ; CHECK-NEXT: ptrue p0.h, vl8
216+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
217+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
218+ ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
219+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
214220; CHECK-NEXT: ret
215221 %1 = zext <8 x i16 > %a to <8 x i32 >
216222 %2 = zext <8 x i16 > %b to <8 x i32 >
@@ -223,9 +229,11 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
223229define <4 x i32 > @umulh_v4i32 (<4 x i32 > %a , <4 x i32 > %b ) {
224230; CHECK-LABEL: umulh_v4i32:
225231; CHECK: // %bb.0:
226- ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
227- ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
228- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
232+ ; CHECK-NEXT: ptrue p0.s, vl4
233+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
234+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
235+ ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
236+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
229237; CHECK-NEXT: ret
230238 %1 = zext <4 x i32 > %a to <4 x i64 >
231239 %2 = zext <4 x i32 > %b to <4 x i64 >
@@ -238,15 +246,11 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
238246define <2 x i64 > @umulh_v2i64 (<2 x i64 > %a , <2 x i64 > %b ) {
239247; CHECK-LABEL: umulh_v2i64:
240248; CHECK: // %bb.0:
241- ; CHECK-NEXT: mov x8, v0.d[1]
242- ; CHECK-NEXT: mov x9, v1.d[1]
243- ; CHECK-NEXT: fmov x10, d0
244- ; CHECK-NEXT: fmov x11, d1
245- ; CHECK-NEXT: umulh x10, x10, x11
246- ; CHECK-NEXT: umulh x8, x8, x9
247- ; CHECK-NEXT: fmov d0, x10
248- ; CHECK-NEXT: fmov d1, x8
249- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
249+ ; CHECK-NEXT: ptrue p0.d, vl2
250+ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
251+ ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
252+ ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
253+ ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
250254; CHECK-NEXT: ret
251255 %1 = zext <2 x i64 > %a to <2 x i128 >
252256 %2 = zext <2 x i64 > %b to <2 x i128 >
@@ -263,8 +267,11 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
263267define <8 x i8 > @smulh_v8i8 (<8 x i8 > %a , <8 x i8 > %b ) {
264268; CHECK-LABEL: smulh_v8i8:
265269; CHECK: // %bb.0:
266- ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
267- ; CHECK-NEXT: shrn v0.8b, v0.8h, #8
270+ ; CHECK-NEXT: ptrue p0.b, vl8
271+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
272+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
273+ ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
274+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
268275; CHECK-NEXT: ret
269276 %1 = sext <8 x i8 > %a to <8 x i16 >
270277 %2 = sext <8 x i8 > %b to <8 x i16 >
@@ -277,8 +284,11 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
277284define <4 x i16 > @smulh_v4i16 (<4 x i16 > %a , <4 x i16 > %b ) {
278285; CHECK-LABEL: smulh_v4i16:
279286; CHECK: // %bb.0:
280- ; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
281- ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
287+ ; CHECK-NEXT: ptrue p0.h, vl4
288+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
289+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
290+ ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
291+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
282292; CHECK-NEXT: ret
283293 %1 = sext <4 x i16 > %a to <4 x i32 >
284294 %2 = sext <4 x i16 > %b to <4 x i32 >
@@ -291,8 +301,11 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
291301define <2 x i32 > @smulh_v2i32 (<2 x i32 > %a , <2 x i32 > %b ) {
292302; CHECK-LABEL: smulh_v2i32:
293303; CHECK: // %bb.0:
294- ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
295- ; CHECK-NEXT: shrn v0.2s, v0.2d, #32
304+ ; CHECK-NEXT: ptrue p0.s, vl2
305+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
306+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
307+ ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
308+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
296309; CHECK-NEXT: ret
297310 %1 = sext <2 x i32 > %a to <2 x i64 >
298311 %2 = sext <2 x i32 > %b to <2 x i64 >
@@ -305,12 +318,11 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
305318define <1 x i64 > @smulh_v1i64 (<1 x i64 > %a , <1 x i64 > %b ) {
306319; CHECK-LABEL: smulh_v1i64:
307320; CHECK: // %bb.0:
308- ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
309- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
310- ; CHECK-NEXT: fmov x8, d0
311- ; CHECK-NEXT: fmov x9, d1
312- ; CHECK-NEXT: smulh x8, x8, x9
313- ; CHECK-NEXT: fmov d0, x8
321+ ; CHECK-NEXT: ptrue p0.d, vl1
322+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
323+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
324+ ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
325+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
314326; CHECK-NEXT: ret
315327 %1 = sext <1 x i64 > %a to <1 x i128 >
316328 %2 = sext <1 x i64 > %b to <1 x i128 >
@@ -323,8 +335,11 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
323335define <8 x i8 > @umulh_v8i8 (<8 x i8 > %a , <8 x i8 > %b ) {
324336; CHECK-LABEL: umulh_v8i8:
325337; CHECK: // %bb.0:
326- ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
327- ; CHECK-NEXT: shrn v0.8b, v0.8h, #8
338+ ; CHECK-NEXT: ptrue p0.b, vl8
339+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
340+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
341+ ; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
342+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
328343; CHECK-NEXT: ret
329344 %1 = zext <8 x i8 > %a to <8 x i16 >
330345 %2 = zext <8 x i8 > %b to <8 x i16 >
@@ -337,8 +352,11 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
337352define <4 x i16 > @umulh_v4i16 (<4 x i16 > %a , <4 x i16 > %b ) {
338353; CHECK-LABEL: umulh_v4i16:
339354; CHECK: // %bb.0:
340- ; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
341- ; CHECK-NEXT: shrn v0.4h, v0.4s, #16
355+ ; CHECK-NEXT: ptrue p0.h, vl4
356+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
357+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
358+ ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
359+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
342360; CHECK-NEXT: ret
343361 %1 = zext <4 x i16 > %a to <4 x i32 >
344362 %2 = zext <4 x i16 > %b to <4 x i32 >
@@ -351,8 +369,11 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
351369define <2 x i32 > @umulh_v2i32 (<2 x i32 > %a , <2 x i32 > %b ) {
352370; CHECK-LABEL: umulh_v2i32:
353371; CHECK: // %bb.0:
354- ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
355- ; CHECK-NEXT: shrn v0.2s, v0.2d, #32
372+ ; CHECK-NEXT: ptrue p0.s, vl2
373+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
374+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
375+ ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
376+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
356377; CHECK-NEXT: ret
357378 %1 = zext <2 x i32 > %a to <2 x i64 >
358379 %2 = zext <2 x i32 > %b to <2 x i64 >
@@ -365,12 +386,11 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
365386define <1 x i64 > @umulh_v1i64 (<1 x i64 > %a , <1 x i64 > %b ) {
366387; CHECK-LABEL: umulh_v1i64:
367388; CHECK: // %bb.0:
368- ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
369- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
370- ; CHECK-NEXT: fmov x8, d0
371- ; CHECK-NEXT: fmov x9, d1
372- ; CHECK-NEXT: umulh x8, x8, x9
373- ; CHECK-NEXT: fmov d0, x8
389+ ; CHECK-NEXT: ptrue p0.d, vl1
390+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
391+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
392+ ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
393+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
374394; CHECK-NEXT: ret
375395 %1 = zext <1 x i64 > %a to <1 x i128 >
376396 %2 = zext <1 x i64 > %b to <1 x i128 >
0 commit comments