@@ -561,31 +561,34 @@ define <vscale x 4 x i64> @udot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
561561; CHECK-NEXT: add z1.d, z1.d, z3.d
562562; CHECK-NEXT: ret
563563;
564- ; CHECK-NEWLOWERING-LABEL: udot_no_bin_op_8to64:
565- ; CHECK-NEWLOWERING: // %bb.0:
566- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z2.b
567- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.h, z2.b
568- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z3.h
569- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z3.h
570- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z2.h
571- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
572- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z4.s
573- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z3.s
574- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z5.s
575- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z3.s
576- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z2.s
577- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
578- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
579- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
580- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z24.d
581- ; CHECK-NEWLOWERING-NEXT: add z5.d, z5.d, z25.d
582- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
583- ; CHECK-NEWLOWERING-NEXT: add z3.d, z7.d, z6.d
584- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z5.d
585- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
586- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
587- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z4.d
588- ; CHECK-NEWLOWERING-NEXT: ret
564+ ; CHECK-NEWLOWERING-SVE-LABEL: udot_no_bin_op_8to64:
565+ ; CHECK-NEWLOWERING-SVE: // %bb.0:
566+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v3.2d, #0000000000000000
567+ ; CHECK-NEWLOWERING-SVE-NEXT: mov z4.b, #1 // =0x1
568+ ; CHECK-NEWLOWERING-SVE-NEXT: udot z3.s, z2.b, z4.b
569+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z2.d, z3.s
570+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z3.d, z3.s
571+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
572+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
573+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
574+ ;
575+ ; CHECK-NEWLOWERING-SVE2-LABEL: udot_no_bin_op_8to64:
576+ ; CHECK-NEWLOWERING-SVE2: // %bb.0:
577+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v3.2d, #0000000000000000
578+ ; CHECK-NEWLOWERING-SVE2-NEXT: mov z4.b, #1 // =0x1
579+ ; CHECK-NEWLOWERING-SVE2-NEXT: udot z3.s, z2.b, z4.b
580+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z3.s
581+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z3.s
582+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
583+ ;
584+ ; CHECK-NEWLOWERING-SME-LABEL: udot_no_bin_op_8to64:
585+ ; CHECK-NEWLOWERING-SME: // %bb.0:
586+ ; CHECK-NEWLOWERING-SME-NEXT: mov z3.b, #1 // =0x1
587+ ; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
588+ ; CHECK-NEWLOWERING-SME-NEXT: udot z4.s, z2.b, z3.b
589+ ; CHECK-NEWLOWERING-SME-NEXT: uaddwb z0.d, z0.d, z4.s
590+ ; CHECK-NEWLOWERING-SME-NEXT: uaddwt z0.d, z0.d, z4.s
591+ ; CHECK-NEWLOWERING-SME-NEXT: ret
589592 %a.ext = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
590593 %partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64 (<vscale x 4 x i64 > %acc , <vscale x 16 x i64 > %a.ext )
591594 ret <vscale x 4 x i64 > %partial.reduce
@@ -603,31 +606,34 @@ define <vscale x 4 x i64> @sdot_no_bin_op_8to64(<vscale x 4 x i64> %acc, <vscale
603606; CHECK-NEXT: add z1.d, z1.d, z3.d
604607; CHECK-NEXT: ret
605608;
606- ; CHECK-NEWLOWERING-LABEL: sdot_no_bin_op_8to64:
607- ; CHECK-NEWLOWERING: // %bb.0:
608- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z2.b
609- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.h, z2.b
610- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z3.h
611- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.s, z3.h
612- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z2.h
613- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
614- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z4.s
615- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z3.s
616- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z5.s
617- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.d, z3.s
618- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z2.s
619- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
620- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
621- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.d, z2.s
622- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z24.d
623- ; CHECK-NEWLOWERING-NEXT: add z5.d, z5.d, z25.d
624- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
625- ; CHECK-NEWLOWERING-NEXT: add z3.d, z7.d, z6.d
626- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z5.d
627- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
628- ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
629- ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z4.d
630- ; CHECK-NEWLOWERING-NEXT: ret
609+ ; CHECK-NEWLOWERING-SVE-LABEL: sdot_no_bin_op_8to64:
610+ ; CHECK-NEWLOWERING-SVE: // %bb.0:
611+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v3.2d, #0000000000000000
612+ ; CHECK-NEWLOWERING-SVE-NEXT: mov z4.b, #1 // =0x1
613+ ; CHECK-NEWLOWERING-SVE-NEXT: sdot z3.s, z2.b, z4.b
614+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z2.d, z3.s
615+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z3.d, z3.s
616+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
617+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z3.d
618+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
619+ ;
620+ ; CHECK-NEWLOWERING-SVE2-LABEL: sdot_no_bin_op_8to64:
621+ ; CHECK-NEWLOWERING-SVE2: // %bb.0:
622+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v3.2d, #0000000000000000
623+ ; CHECK-NEWLOWERING-SVE2-NEXT: mov z4.b, #1 // =0x1
624+ ; CHECK-NEWLOWERING-SVE2-NEXT: sdot z3.s, z2.b, z4.b
625+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z3.s
626+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z3.s
627+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
628+ ;
629+ ; CHECK-NEWLOWERING-SME-LABEL: sdot_no_bin_op_8to64:
630+ ; CHECK-NEWLOWERING-SME: // %bb.0:
631+ ; CHECK-NEWLOWERING-SME-NEXT: mov z3.b, #1 // =0x1
632+ ; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
633+ ; CHECK-NEWLOWERING-SME-NEXT: sdot z4.s, z2.b, z3.b
634+ ; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
635+ ; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
636+ ; CHECK-NEWLOWERING-SME-NEXT: ret
631637 %a.ext = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
632638 %partial.reduce = tail call <vscale x 4 x i64 > @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64 (<vscale x 4 x i64 > %acc , <vscale x 16 x i64 > %a.ext )
633639 ret <vscale x 4 x i64 > %partial.reduce
@@ -647,18 +653,34 @@ define <vscale x 4 x i32> @not_udot(<vscale x 4 x i32> %acc, <vscale x 8 x i8> %
647653; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
648654; CHECK-NEXT: ret
649655;
650- ; CHECK-NEWLOWERING-LABEL: not_udot:
651- ; CHECK-NEWLOWERING: // %bb.0: // %entry
652- ; CHECK-NEWLOWERING-NEXT: and z1.h, z1.h, #0xff
653- ; CHECK-NEWLOWERING-NEXT: and z2.h, z2.h, #0xff
654- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
655- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.s, z1.h
656- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.s, z2.h
657- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
658- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
659- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z3.s, z4.s
660- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
661- ; CHECK-NEWLOWERING-NEXT: ret
656+ ; CHECK-NEWLOWERING-SVE-LABEL: not_udot:
657+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
658+ ; CHECK-NEWLOWERING-SVE-NEXT: and z1.h, z1.h, #0xff
659+ ; CHECK-NEWLOWERING-SVE-NEXT: and z2.h, z2.h, #0xff
660+ ; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.s
661+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.s, z1.h
662+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.s, z2.h
663+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.s, z1.h
664+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.s, z2.h
665+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.s, p0/m, z3.s, z4.s
666+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.s, p0/m, z1.s, z2.s
667+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
668+ ;
669+ ; CHECK-NEWLOWERING-SVE2-LABEL: not_udot:
670+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
671+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z2.h, z2.h, #0xff
672+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z1.h, z1.h, #0xff
673+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalb z0.s, z1.h, z2.h
674+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalt z0.s, z1.h, z2.h
675+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
676+ ;
677+ ; CHECK-NEWLOWERING-SME-LABEL: not_udot:
678+ ; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
679+ ; CHECK-NEWLOWERING-SME-NEXT: and z2.h, z2.h, #0xff
680+ ; CHECK-NEWLOWERING-SME-NEXT: and z1.h, z1.h, #0xff
681+ ; CHECK-NEWLOWERING-SME-NEXT: umlalb z0.s, z1.h, z2.h
682+ ; CHECK-NEWLOWERING-SME-NEXT: umlalt z0.s, z1.h, z2.h
683+ ; CHECK-NEWLOWERING-SME-NEXT: ret
662684entry:
663685 %a.wide = zext <vscale x 8 x i8 > %a to <vscale x 8 x i32 >
664686 %b.wide = zext <vscale x 8 x i8 > %b to <vscale x 8 x i32 >
@@ -681,18 +703,34 @@ define <vscale x 2 x i64> @not_udot_wide(<vscale x 2 x i64> %acc, <vscale x 4 x
681703; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
682704; CHECK-NEXT: ret
683705;
684- ; CHECK-NEWLOWERING-LABEL: not_udot_wide:
685- ; CHECK-NEWLOWERING: // %bb.0: // %entry
686- ; CHECK-NEWLOWERING-NEXT: and z1.s, z1.s, #0xffff
687- ; CHECK-NEWLOWERING-NEXT: and z2.s, z2.s, #0xffff
688- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
689- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.d, z1.s
690- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.d, z2.s
691- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.d, z1.s
692- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.d, z2.s
693- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z3.d, z4.d
694- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z1.d, z2.d
695- ; CHECK-NEWLOWERING-NEXT: ret
706+ ; CHECK-NEWLOWERING-SVE-LABEL: not_udot_wide:
707+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
708+ ; CHECK-NEWLOWERING-SVE-NEXT: and z1.s, z1.s, #0xffff
709+ ; CHECK-NEWLOWERING-SVE-NEXT: and z2.s, z2.s, #0xffff
710+ ; CHECK-NEWLOWERING-SVE-NEXT: ptrue p0.d
711+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.d, z1.s
712+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z4.d, z2.s
713+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z1.d, z1.s
714+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.d, z2.s
715+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.d, p0/m, z3.d, z4.d
716+ ; CHECK-NEWLOWERING-SVE-NEXT: mla z0.d, p0/m, z1.d, z2.d
717+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
718+ ;
719+ ; CHECK-NEWLOWERING-SVE2-LABEL: not_udot_wide:
720+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
721+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z2.s, z2.s, #0xffff
722+ ; CHECK-NEWLOWERING-SVE2-NEXT: and z1.s, z1.s, #0xffff
723+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalb z0.d, z1.s, z2.s
724+ ; CHECK-NEWLOWERING-SVE2-NEXT: umlalt z0.d, z1.s, z2.s
725+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
726+ ;
727+ ; CHECK-NEWLOWERING-SME-LABEL: not_udot_wide:
728+ ; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
729+ ; CHECK-NEWLOWERING-SME-NEXT: and z2.s, z2.s, #0xffff
730+ ; CHECK-NEWLOWERING-SME-NEXT: and z1.s, z1.s, #0xffff
731+ ; CHECK-NEWLOWERING-SME-NEXT: umlalb z0.d, z1.s, z2.s
732+ ; CHECK-NEWLOWERING-SME-NEXT: umlalt z0.d, z1.s, z2.s
733+ ; CHECK-NEWLOWERING-SME-NEXT: ret
696734entry:
697735 %a.wide = zext <vscale x 4 x i16 > %a to <vscale x 4 x i64 >
698736 %b.wide = zext <vscale x 4 x i16 > %b to <vscale x 4 x i64 >
0 commit comments