@@ -402,18 +402,18 @@ define <4 x i64> @udot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
402402; CHECK-NODOT: // %bb.0: // %entry
403403; CHECK-NODOT-NEXT: umull v4.8h, v2.8b, v3.8b
404404; CHECK-NODOT-NEXT: umull2 v2.8h, v2.16b, v3.16b
405- ; CHECK-NODOT-NEXT: ushll v5.4s, v4.4h, #0
405+ ; CHECK-NODOT-NEXT: ushll v3.4s, v4.4h, #0
406+ ; CHECK-NODOT-NEXT: ushll v5.4s, v2.4h, #0
406407; CHECK-NODOT-NEXT: ushll2 v4.4s, v4.8h, #0
407- ; CHECK-NODOT-NEXT: ushll v3.4s, v2.4h, #0
408408; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
409- ; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v5.2s
410- ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v5.4s
411- ; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
412- ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
409+ ; CHECK-NODOT-NEXT: uaddw v1.2d, v1.2d, v5.2s
413410; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v3.2s
411+ ; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v5.4s
414412; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v3.4s
415- ; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v2.2s
416- ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v2.4s
413+ ; CHECK-NODOT-NEXT: uaddw v1.2d, v1.2d, v2.2s
414+ ; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
415+ ; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v2.4s
416+ ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
417417; CHECK-NODOT-NEXT: ret
418418entry:
419419 %a.wide = zext <16 x i8 > %a to <16 x i64 >
@@ -437,18 +437,18 @@ define <4 x i64> @sdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
437437; CHECK-NODOT: // %bb.0: // %entry
438438; CHECK-NODOT-NEXT: smull v4.8h, v2.8b, v3.8b
439439; CHECK-NODOT-NEXT: smull2 v2.8h, v2.16b, v3.16b
440- ; CHECK-NODOT-NEXT: sshll v5.4s, v4.4h, #0
440+ ; CHECK-NODOT-NEXT: sshll v3.4s, v4.4h, #0
441+ ; CHECK-NODOT-NEXT: sshll v5.4s, v2.4h, #0
441442; CHECK-NODOT-NEXT: sshll2 v4.4s, v4.8h, #0
442- ; CHECK-NODOT-NEXT: sshll v3.4s, v2.4h, #0
443443; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
444- ; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v5.2s
445- ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v5.4s
446- ; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v4.2s
447- ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
444+ ; CHECK-NODOT-NEXT: saddw v1.2d, v1.2d, v5.2s
448445; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v3.2s
446+ ; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v5.4s
449447; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v3.4s
450- ; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v2.2s
451- ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v2.4s
448+ ; CHECK-NODOT-NEXT: saddw v1.2d, v1.2d, v2.2s
449+ ; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v4.2s
450+ ; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v2.4s
451+ ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
452452; CHECK-NODOT-NEXT: ret
453453entry:
454454 %a.wide = sext <16 x i8 > %a to <16 x i64 >
@@ -463,25 +463,25 @@ define <4 x i64> @usdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
463463; CHECK-NOI8MM-LABEL: usdot_8to64:
464464; CHECK-NOI8MM: // %bb.0: // %entry
465465; CHECK-NOI8MM-NEXT: ushll v4.8h, v2.8b, #0
466- ; CHECK-NOI8MM-NEXT: sshll v5.8h, v3.8b, #0
467466; CHECK-NOI8MM-NEXT: ushll2 v2.8h, v2.16b, #0
467+ ; CHECK-NOI8MM-NEXT: sshll v5.8h, v3.8b, #0
468468; CHECK-NOI8MM-NEXT: sshll2 v3.8h, v3.16b, #0
469469; CHECK-NOI8MM-NEXT: ushll v6.4s, v4.4h, #0
470- ; CHECK-NOI8MM-NEXT: sshll v7.4s, v5.4h, #0
470+ ; CHECK-NOI8MM-NEXT: ushll v7.4s, v2.4h, #0
471+ ; CHECK-NOI8MM-NEXT: sshll v16.4s, v5.4h, #0
472+ ; CHECK-NOI8MM-NEXT: sshll v17.4s, v3.4h, #0
471473; CHECK-NOI8MM-NEXT: ushll2 v4.4s, v4.8h, #0
472- ; CHECK-NOI8MM-NEXT: sshll2 v5.4s, v5.8h, #0
473- ; CHECK-NOI8MM-NEXT: smlal v0.2d, v6.2s, v7.2s
474- ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v6.4s, v7.4s
475- ; CHECK-NOI8MM-NEXT: smlal v0.2d, v4.2s, v5.2s
476- ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v4.4s, v5.4s
477- ; CHECK-NOI8MM-NEXT: ushll v4.4s, v2.4h, #0
478- ; CHECK-NOI8MM-NEXT: sshll v5.4s, v3.4h, #0
479474; CHECK-NOI8MM-NEXT: ushll2 v2.4s, v2.8h, #0
475+ ; CHECK-NOI8MM-NEXT: sshll2 v5.4s, v5.8h, #0
480476; CHECK-NOI8MM-NEXT: sshll2 v3.4s, v3.8h, #0
477+ ; CHECK-NOI8MM-NEXT: smlal v0.2d, v6.2s, v16.2s
478+ ; CHECK-NOI8MM-NEXT: smlal v1.2d, v7.2s, v17.2s
479+ ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v6.4s, v16.4s
480+ ; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v7.4s, v17.4s
481481; CHECK-NOI8MM-NEXT: smlal v0.2d, v4.2s, v5.2s
482+ ; CHECK-NOI8MM-NEXT: smlal v1.2d, v2.2s, v3.2s
482483; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v4.4s, v5.4s
483- ; CHECK-NOI8MM-NEXT: smlal v0.2d, v2.2s, v3.2s
484- ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v2.4s, v3.4s
484+ ; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s
485485; CHECK-NOI8MM-NEXT: ret
486486;
487487; CHECK-I8MM-LABEL: usdot_8to64:
@@ -504,25 +504,25 @@ define <4 x i64> @sudot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
504504; CHECK-NOI8MM-LABEL: sudot_8to64:
505505; CHECK-NOI8MM: // %bb.0: // %entry
506506; CHECK-NOI8MM-NEXT: sshll v4.8h, v2.8b, #0
507- ; CHECK-NOI8MM-NEXT: ushll v5.8h, v3.8b, #0
508507; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
508+ ; CHECK-NOI8MM-NEXT: ushll v5.8h, v3.8b, #0
509509; CHECK-NOI8MM-NEXT: ushll2 v3.8h, v3.16b, #0
510510; CHECK-NOI8MM-NEXT: sshll v6.4s, v4.4h, #0
511- ; CHECK-NOI8MM-NEXT: ushll v7.4s, v5.4h, #0
511+ ; CHECK-NOI8MM-NEXT: sshll v7.4s, v2.4h, #0
512+ ; CHECK-NOI8MM-NEXT: ushll v16.4s, v5.4h, #0
513+ ; CHECK-NOI8MM-NEXT: ushll v17.4s, v3.4h, #0
512514; CHECK-NOI8MM-NEXT: sshll2 v4.4s, v4.8h, #0
513- ; CHECK-NOI8MM-NEXT: ushll2 v5.4s, v5.8h, #0
514- ; CHECK-NOI8MM-NEXT: smlal v0.2d, v6.2s, v7.2s
515- ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v6.4s, v7.4s
516- ; CHECK-NOI8MM-NEXT: smlal v0.2d, v4.2s, v5.2s
517- ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v4.4s, v5.4s
518- ; CHECK-NOI8MM-NEXT: sshll v4.4s, v2.4h, #0
519- ; CHECK-NOI8MM-NEXT: ushll v5.4s, v3.4h, #0
520515; CHECK-NOI8MM-NEXT: sshll2 v2.4s, v2.8h, #0
516+ ; CHECK-NOI8MM-NEXT: ushll2 v5.4s, v5.8h, #0
521517; CHECK-NOI8MM-NEXT: ushll2 v3.4s, v3.8h, #0
518+ ; CHECK-NOI8MM-NEXT: smlal v0.2d, v6.2s, v16.2s
519+ ; CHECK-NOI8MM-NEXT: smlal v1.2d, v7.2s, v17.2s
520+ ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v6.4s, v16.4s
521+ ; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v7.4s, v17.4s
522522; CHECK-NOI8MM-NEXT: smlal v0.2d, v4.2s, v5.2s
523+ ; CHECK-NOI8MM-NEXT: smlal v1.2d, v2.2s, v3.2s
523524; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v4.4s, v5.4s
524- ; CHECK-NOI8MM-NEXT: smlal v0.2d, v2.2s, v3.2s
525- ; CHECK-NOI8MM-NEXT: smlal2 v0.2d, v2.4s, v3.4s
525+ ; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s
526526; CHECK-NOI8MM-NEXT: ret
527527;
528528; CHECK-I8MM-LABEL: sudot_8to64:
@@ -705,17 +705,17 @@ define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
705705; CHECK-NODOT-NEXT: ushll v3.8h, v2.8b, #0
706706; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
707707; CHECK-NODOT-NEXT: ushll v4.4s, v3.4h, #0
708+ ; CHECK-NODOT-NEXT: ushll v5.4s, v2.4h, #0
708709; CHECK-NODOT-NEXT: ushll2 v3.4s, v3.8h, #0
710+ ; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
711+ ; CHECK-NODOT-NEXT: uaddw v1.2d, v1.2d, v5.2s
709712; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
713+ ; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v5.4s
710714; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
711- ; CHECK-NODOT-NEXT: ushll v4.4s, v2.4h, #0
712- ; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
715+ ; CHECK-NODOT-NEXT: uaddw v1.2d, v1.2d, v2.2s
713716; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v3.2s
717+ ; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v2.4s
714718; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v3.4s
715- ; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
716- ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
717- ; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v2.2s
718- ; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v2.4s
719719; CHECK-NODOT-NEXT: ret
720720 %a.wide = zext <16 x i8 > %a to <16 x i64 >
721721 %partial.reduce = tail call <4 x i64 > @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64 (<4 x i64 > %acc , <16 x i64 > %a.wide )
@@ -737,17 +737,17 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
737737; CHECK-NODOT-NEXT: sshll v3.8h, v2.8b, #0
738738; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
739739; CHECK-NODOT-NEXT: sshll v4.4s, v3.4h, #0
740+ ; CHECK-NODOT-NEXT: sshll v5.4s, v2.4h, #0
740741; CHECK-NODOT-NEXT: sshll2 v3.4s, v3.8h, #0
742+ ; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
743+ ; CHECK-NODOT-NEXT: saddw v1.2d, v1.2d, v5.2s
741744; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v4.2s
745+ ; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v5.4s
742746; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
743- ; CHECK-NODOT-NEXT: sshll v4.4s, v2.4h, #0
744- ; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
747+ ; CHECK-NODOT-NEXT: saddw v1.2d, v1.2d, v2.2s
745748; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v3.2s
749+ ; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v2.4s
746750; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v3.4s
747- ; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v4.2s
748- ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
749- ; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v2.2s
750- ; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v2.4s
751751; CHECK-NODOT-NEXT: ret
752752 %a.wide = sext <16 x i8 > %a to <16 x i64 >
753753 %partial.reduce = tail call <4 x i64 > @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64 (<4 x i64 > %acc , <16 x i64 > %a.wide )
0 commit comments