@@ -106,23 +106,7 @@ define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
106106;
107107; CHECK-NEWLOWERING-LABEL: usdot:
108108; CHECK-NEWLOWERING: // %bb.0: // %entry
109- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z1.b
110- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
111- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
112- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
113- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
114- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z3.h
115- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
116- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
117- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
118- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
119- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z1.h
120- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z2.h
121- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
122- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
123- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z3.s, z4.s
124- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
125- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
109+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b
126110; CHECK-NEWLOWERING-NEXT: ret
127111entry:
128112 %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -161,23 +145,7 @@ define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
161145;
162146; CHECK-NEWLOWERING-LABEL: sudot:
163147; CHECK-NEWLOWERING: // %bb.0: // %entry
164- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z1.b
165- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
166- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
167- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
168- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
169- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z3.h
170- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
171- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
172- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
173- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
174- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z1.h
175- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z2.h
176- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
177- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
178- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z3.s, z4.s
179- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
180- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
148+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b
181149; CHECK-NEWLOWERING-NEXT: ret
182150entry:
183151 %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -331,43 +299,12 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
331299;
332300; CHECK-NEWLOWERING-LABEL: usdot_8to64:
333301; CHECK-NEWLOWERING: // %bb.0: // %entry
334- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.h, z2.b
335- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
336- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.h, z3.b
337- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z3.b
338- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
339- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
340- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z2.h
341- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z5.h
342- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
343- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
344- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
345- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
346- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
347- ; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z6.s
348- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z7.s
349- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z24.s
350- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z25.s
351- ; CHECK-NEWLOWERING-NEXT: uunpkhi z6.d, z6.s
352- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
353- ; CHECK-NEWLOWERING-NEXT: sunpkhi z24.d, z24.s
354- ; CHECK-NEWLOWERING-NEXT: sunpkhi z25.d, z25.s
355- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
356- ; CHECK-NEWLOWERING-NEXT: uunpklo z26.d, z4.s
357- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z5.s
358- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
359- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z2.s
360- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z3.s
361- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
362- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
363- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
364- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
365- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d
366- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d
367- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
368- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
369- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d
370- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d
302+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
303+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z2.b, z3.b
304+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
305+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
306+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
307+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
371308; CHECK-NEWLOWERING-NEXT: ret
372309entry:
373310 %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
@@ -432,43 +369,12 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
432369;
433370; CHECK-NEWLOWERING-LABEL: sudot_8to64:
434371; CHECK-NEWLOWERING: // %bb.0: // %entry
435- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.h, z2.b
436- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
437- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.h, z3.b
438- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z3.b
439- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
440- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
441- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z2.h
442- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z5.h
443- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
444- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
445- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
446- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
447- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
448- ; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z6.s
449- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z7.s
450- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z24.s
451- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z25.s
452- ; CHECK-NEWLOWERING-NEXT: sunpkhi z6.d, z6.s
453- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
454- ; CHECK-NEWLOWERING-NEXT: uunpkhi z24.d, z24.s
455- ; CHECK-NEWLOWERING-NEXT: uunpkhi z25.d, z25.s
456- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
457- ; CHECK-NEWLOWERING-NEXT: sunpklo z26.d, z4.s
458- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z5.s
459- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
460- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z2.s
461- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z3.s
462- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
463- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
464- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
465- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
466- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z6.d, z24.d
467- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z7.d, z25.d
468- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z28.d
469- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z27.d, z29.d
470- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z4.d, z5.d
471- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z3.d
372+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
373+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z3.b, z2.b
374+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
375+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
376+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
377+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
472378; CHECK-NEWLOWERING-NEXT: ret
473379entry:
474380 %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
0 commit comments