@@ -106,25 +106,7 @@ define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
106106;
107107; CHECK-NEWLOWERING-LABEL: usdot:
108108; CHECK-NEWLOWERING: // %bb.0: // %entry
109- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z1.b
110- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
111- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
112- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
113- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
114- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z3.h
115- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
116- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
117- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
118- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z1.h
119- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
120- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
121- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
122- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
123- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
124- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
125- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
126- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
127- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
109+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b
128110; CHECK-NEWLOWERING-NEXT: ret
129111entry:
130112 %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -165,25 +147,7 @@ define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
165147;
166148; CHECK-NEWLOWERING-LABEL: sudot:
167149; CHECK-NEWLOWERING: // %bb.0: // %entry
168- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z1.b
169- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
170- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
171- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
172- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
173- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z3.h
174- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
175- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
176- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
177- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z1.h
178- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
179- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
180- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
181- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
182- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
183- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
184- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
185- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
186- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
150+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b
187151; CHECK-NEWLOWERING-NEXT: ret
188152entry:
189153 %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -415,59 +379,12 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
415379;
416380; CHECK-NEWLOWERING-LABEL: usdot_8to64:
417381; CHECK-NEWLOWERING: // %bb.0: // %entry
418- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
419- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
420- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
421- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
422- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
423- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
424- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
425- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
426- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
427- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
428- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
429- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
430- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
431- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
432- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
433- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
434- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
435- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
436- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
437- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
438- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
439- ; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
440- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
441- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
442- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
443- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
444- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
445- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
446- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
447- ; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
448- ; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
449- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
450- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
451- ; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
452- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
453- ; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
454- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
455- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
456- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
457- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
458- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
459- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
460- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
461- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
462- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
463- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
464- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
465- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
466- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
467- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
468- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
469- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
470- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
382+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
383+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z2.b, z3.b
384+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
385+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
386+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
387+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
471388; CHECK-NEWLOWERING-NEXT: ret
472389entry:
473390 %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
@@ -548,59 +465,12 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
548465;
549466; CHECK-NEWLOWERING-LABEL: sudot_8to64:
550467; CHECK-NEWLOWERING: // %bb.0: // %entry
551- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
552- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
553- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
554- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
555- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
556- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
557- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
558- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
559- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
560- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
561- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
562- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
563- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
564- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
565- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
566- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
567- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
568- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
569- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
570- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
571- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
572- ; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
573- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
574- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
575- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
576- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
577- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
578- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
579- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
580- ; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
581- ; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
582- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
583- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
584- ; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
585- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
586- ; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
587- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
588- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
589- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
590- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
591- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
592- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
593- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
594- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
595- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
596- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
597- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
598- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
599- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
600- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
601- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
602- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
603- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
468+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
469+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z3.b, z2.b
470+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
471+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
472+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
473+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
604474; CHECK-NEWLOWERING-NEXT: ret
605475entry:
606476 %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
0 commit comments