@@ -590,12 +590,8 @@ define void @udot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
590590; CHECK-NEXT: mov w8, wzr
591591; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
592592; CHECK-NEXT: ld1b { z17.b, z25.b }, pn8/z, [x0, x1]
593- ; CHECK-NEXT: mov z0.d, z16.d
594- ; CHECK-NEXT: mov z1.d, z17.d
595- ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
596- ; CHECK-NEXT: mov z0.d, z24.d
597- ; CHECK-NEXT: mov z1.d, z25.d
598- ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
593+ ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z16.b, z17.b }, z0.b[0]
594+ ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z24.b, z25.b }, z0.b[0]
599595; CHECK-NEXT: ret
600596entry:
601597 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -622,26 +618,10 @@ define void @udot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
622618; CHECK-NEXT: add x10, x9, x1
623619; CHECK-NEXT: ld1b { z18.b, z22.b, z26.b, z30.b }, pn8/z, [x0, x9]
624620; CHECK-NEXT: ld1b { z19.b, z23.b, z27.b, z31.b }, pn8/z, [x0, x10]
625- ; CHECK-NEXT: mov z0.d, z16.d
626- ; CHECK-NEXT: mov z1.d, z17.d
627- ; CHECK-NEXT: mov z2.d, z18.d
628- ; CHECK-NEXT: mov z3.d, z19.d
629- ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
630- ; CHECK-NEXT: mov z0.d, z20.d
631- ; CHECK-NEXT: mov z1.d, z21.d
632- ; CHECK-NEXT: mov z2.d, z22.d
633- ; CHECK-NEXT: mov z3.d, z23.d
634- ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
635- ; CHECK-NEXT: mov z0.d, z24.d
636- ; CHECK-NEXT: mov z1.d, z25.d
637- ; CHECK-NEXT: mov z2.d, z26.d
638- ; CHECK-NEXT: mov z3.d, z27.d
639- ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
640- ; CHECK-NEXT: mov z0.d, z28.d
641- ; CHECK-NEXT: mov z1.d, z29.d
642- ; CHECK-NEXT: mov z2.d, z30.d
643- ; CHECK-NEXT: mov z3.d, z31.d
644- ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
621+ ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
622+ ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z20.b - z23.b }, z0.b[0]
623+ ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z24.b - z27.b }, z0.b[0]
624+ ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, z0.b[0]
645625; CHECK-NEXT: ret
646626entry:
647627 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -752,12 +732,8 @@ define void @usdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
752732; CHECK-NEXT: mov w8, wzr
753733; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
754734; CHECK-NEXT: ld1b { z17.b, z25.b }, pn8/z, [x0, x1]
755- ; CHECK-NEXT: mov z0.d, z16.d
756- ; CHECK-NEXT: mov z1.d, z17.d
757- ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
758- ; CHECK-NEXT: mov z0.d, z24.d
759- ; CHECK-NEXT: mov z1.d, z25.d
760- ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
735+ ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z16.b, z17.b }, z0.b[0]
736+ ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z24.b, z25.b }, z0.b[0]
761737; CHECK-NEXT: ret
762738entry:
763739 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -784,26 +760,10 @@ define void @usdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
784760; CHECK-NEXT: add x10, x9, x1
785761; CHECK-NEXT: ld1b { z18.b, z22.b, z26.b, z30.b }, pn8/z, [x0, x9]
786762; CHECK-NEXT: ld1b { z19.b, z23.b, z27.b, z31.b }, pn8/z, [x0, x10]
787- ; CHECK-NEXT: mov z0.d, z16.d
788- ; CHECK-NEXT: mov z1.d, z17.d
789- ; CHECK-NEXT: mov z2.d, z18.d
790- ; CHECK-NEXT: mov z3.d, z19.d
791- ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
792- ; CHECK-NEXT: mov z0.d, z20.d
793- ; CHECK-NEXT: mov z1.d, z21.d
794- ; CHECK-NEXT: mov z2.d, z22.d
795- ; CHECK-NEXT: mov z3.d, z23.d
796- ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
797- ; CHECK-NEXT: mov z0.d, z24.d
798- ; CHECK-NEXT: mov z1.d, z25.d
799- ; CHECK-NEXT: mov z2.d, z26.d
800- ; CHECK-NEXT: mov z3.d, z27.d
801- ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
802- ; CHECK-NEXT: mov z0.d, z28.d
803- ; CHECK-NEXT: mov z1.d, z29.d
804- ; CHECK-NEXT: mov z2.d, z30.d
805- ; CHECK-NEXT: mov z3.d, z31.d
806- ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
763+ ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
764+ ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z20.b - z23.b }, z0.b[0]
765+ ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z24.b - z27.b }, z0.b[0]
766+ ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, z0.b[0]
807767; CHECK-NEXT: ret
808768entry:
809769 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -916,12 +876,8 @@ define void @sdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
916876; CHECK-NEXT: mov w8, wzr
917877; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
918878; CHECK-NEXT: ld1b { z17.b, z25.b }, pn8/z, [x0, x1]
919- ; CHECK-NEXT: mov z0.d, z16.d
920- ; CHECK-NEXT: mov z1.d, z17.d
921- ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
922- ; CHECK-NEXT: mov z0.d, z24.d
923- ; CHECK-NEXT: mov z1.d, z25.d
924- ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
879+ ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z16.b, z17.b }, z0.b[0]
880+ ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z24.b, z25.b }, z0.b[0]
925881; CHECK-NEXT: ret
926882entry:
927883 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -948,26 +904,10 @@ define void @sdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
948904; CHECK-NEXT: add x10, x9, x1
949905; CHECK-NEXT: ld1b { z18.b, z22.b, z26.b, z30.b }, pn8/z, [x0, x9]
950906; CHECK-NEXT: ld1b { z19.b, z23.b, z27.b, z31.b }, pn8/z, [x0, x10]
951- ; CHECK-NEXT: mov z0.d, z16.d
952- ; CHECK-NEXT: mov z1.d, z17.d
953- ; CHECK-NEXT: mov z2.d, z18.d
954- ; CHECK-NEXT: mov z3.d, z19.d
955- ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
956- ; CHECK-NEXT: mov z0.d, z20.d
957- ; CHECK-NEXT: mov z1.d, z21.d
958- ; CHECK-NEXT: mov z2.d, z22.d
959- ; CHECK-NEXT: mov z3.d, z23.d
960- ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
961- ; CHECK-NEXT: mov z0.d, z24.d
962- ; CHECK-NEXT: mov z1.d, z25.d
963- ; CHECK-NEXT: mov z2.d, z26.d
964- ; CHECK-NEXT: mov z3.d, z27.d
965- ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
966- ; CHECK-NEXT: mov z0.d, z28.d
967- ; CHECK-NEXT: mov z1.d, z29.d
968- ; CHECK-NEXT: mov z2.d, z30.d
969- ; CHECK-NEXT: mov z3.d, z31.d
970- ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
907+ ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
908+ ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z20.b - z23.b }, z0.b[0]
909+ ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z24.b - z27.b }, z0.b[0]
910+ ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, z0.b[0]
971911; CHECK-NEXT: ret
972912entry:
973913 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -1080,12 +1020,8 @@ define void @sudot_form_2x_tuple(ptr %ptr, i64 %stride) #0 {
10801020; CHECK-NEXT: mov w8, wzr
10811021; CHECK-NEXT: ld1b { z16.b, z24.b }, pn8/z, [x0]
10821022; CHECK-NEXT: ld1b { z17.b, z25.b }, pn8/z, [x0, x1]
1083- ; CHECK-NEXT: mov z0.d, z16.d
1084- ; CHECK-NEXT: mov z1.d, z17.d
1085- ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
1086- ; CHECK-NEXT: mov z0.d, z24.d
1087- ; CHECK-NEXT: mov z1.d, z25.d
1088- ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0]
1023+ ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z16.b, z17.b }, z0.b[0]
1024+ ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z24.b, z25.b }, z0.b[0]
10891025; CHECK-NEXT: ret
10901026entry:
10911027 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
@@ -1112,26 +1048,10 @@ define void @sudot_form_4x_tuple(ptr %ptr, i64 %stride) #0 {
11121048; CHECK-NEXT: add x10, x9, x1
11131049; CHECK-NEXT: ld1b { z18.b, z22.b, z26.b, z30.b }, pn8/z, [x0, x9]
11141050; CHECK-NEXT: ld1b { z19.b, z23.b, z27.b, z31.b }, pn8/z, [x0, x10]
1115- ; CHECK-NEXT: mov z0.d, z16.d
1116- ; CHECK-NEXT: mov z1.d, z17.d
1117- ; CHECK-NEXT: mov z2.d, z18.d
1118- ; CHECK-NEXT: mov z3.d, z19.d
1119- ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
1120- ; CHECK-NEXT: mov z0.d, z20.d
1121- ; CHECK-NEXT: mov z1.d, z21.d
1122- ; CHECK-NEXT: mov z2.d, z22.d
1123- ; CHECK-NEXT: mov z3.d, z23.d
1124- ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
1125- ; CHECK-NEXT: mov z0.d, z24.d
1126- ; CHECK-NEXT: mov z1.d, z25.d
1127- ; CHECK-NEXT: mov z2.d, z26.d
1128- ; CHECK-NEXT: mov z3.d, z27.d
1129- ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
1130- ; CHECK-NEXT: mov z0.d, z28.d
1131- ; CHECK-NEXT: mov z1.d, z29.d
1132- ; CHECK-NEXT: mov z2.d, z30.d
1133- ; CHECK-NEXT: mov z3.d, z31.d
1134- ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0]
1051+ ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0]
1052+ ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z20.b - z23.b }, z0.b[0]
1053+ ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z24.b - z27.b }, z0.b[0]
1054+ ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z28.b - z31.b }, z0.b[0]
11351055; CHECK-NEXT: ret
11361056entry:
11371057 %0 = tail call target ("aarch64.svcount" ) @llvm.aarch64.sve.ptrue.c8 ()
0 commit comments