@@ -772,35 +772,57 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
772772}
773773
774774define <4 x i32 > @udot_no_bin_op_in_loop (ptr %p ){
775- ; CHECK-COMMON-LABEL: udot_no_bin_op_in_loop:
776- ; CHECK-COMMON: // %bb.0: // %entry
777- ; CHECK-COMMON-NEXT: adrp x8, .LCPI16_0
778- ; CHECK-COMMON-NEXT: movi v2.2d, #0000000000000000
779- ; CHECK-COMMON-NEXT: adrp x9, .LCPI16_2
780- ; CHECK-COMMON-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
781- ; CHECK-COMMON-NEXT: adrp x8, .LCPI16_1
782- ; CHECK-COMMON-NEXT: adrp x10, .LCPI16_3
783- ; CHECK-COMMON-NEXT: ldr q3, [x8, :lo12:.LCPI16_1]
784- ; CHECK-COMMON-NEXT: ldr q4, [x9, :lo12:.LCPI16_2]
785- ; CHECK-COMMON-NEXT: ldr q5, [x10, :lo12:.LCPI16_3]
786- ; CHECK-COMMON-NEXT: mov x8, xzr
787- ; CHECK-COMMON-NEXT: .LBB16_1: // %vector.body
788- ; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1
789- ; CHECK-COMMON-NEXT: ldr q6, [x0, x8]
790- ; CHECK-COMMON-NEXT: mov v0.16b, v2.16b
791- ; CHECK-COMMON-NEXT: add x8, x8, #16
792- ; CHECK-COMMON-NEXT: cmp x8, #16
793- ; CHECK-COMMON-NEXT: tbl v7.16b, { v6.16b }, v3.16b
794- ; CHECK-COMMON-NEXT: tbl v16.16b, { v6.16b }, v4.16b
795- ; CHECK-COMMON-NEXT: tbl v17.16b, { v6.16b }, v5.16b
796- ; CHECK-COMMON-NEXT: tbl v6.16b, { v6.16b }, v1.16b
797- ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v17.4s
798- ; CHECK-COMMON-NEXT: add v7.4s, v16.4s, v7.4s
799- ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v7.4s
800- ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v6.4s
801- ; CHECK-COMMON-NEXT: b.ne .LBB16_1
802- ; CHECK-COMMON-NEXT: // %bb.2: // %end
803- ; CHECK-COMMON-NEXT: ret
775+ ; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
776+ ; CHECK-NODOT: // %bb.0: // %entry
777+ ; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
778+ ; CHECK-NODOT-NEXT: mov x8, xzr
779+ ; CHECK-NODOT-NEXT: .LBB16_1: // %vector.body
780+ ; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
781+ ; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
782+ ; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
783+ ; CHECK-NODOT-NEXT: add x8, x8, #16
784+ ; CHECK-NODOT-NEXT: cmp x8, #16
785+ ; CHECK-NODOT-NEXT: ushll v3.8h, v2.8b, #0
786+ ; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
787+ ; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v3.4h
788+ ; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
789+ ; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v2.4h
790+ ; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v2.8h
791+ ; CHECK-NODOT-NEXT: b.ne .LBB16_1
792+ ; CHECK-NODOT-NEXT: // %bb.2: // %end
793+ ; CHECK-NODOT-NEXT: ret
794+ ;
795+ ; CHECK-DOT-LABEL: udot_no_bin_op_in_loop:
796+ ; CHECK-DOT: // %bb.0: // %entry
797+ ; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
798+ ; CHECK-DOT-NEXT: movi v2.16b, #1
799+ ; CHECK-DOT-NEXT: mov x8, xzr
800+ ; CHECK-DOT-NEXT: .LBB16_1: // %vector.body
801+ ; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
802+ ; CHECK-DOT-NEXT: ldr q3, [x0, x8]
803+ ; CHECK-DOT-NEXT: mov v0.16b, v1.16b
804+ ; CHECK-DOT-NEXT: add x8, x8, #16
805+ ; CHECK-DOT-NEXT: cmp x8, #16
806+ ; CHECK-DOT-NEXT: udot v1.4s, v3.16b, v2.16b
807+ ; CHECK-DOT-NEXT: b.ne .LBB16_1
808+ ; CHECK-DOT-NEXT: // %bb.2: // %end
809+ ; CHECK-DOT-NEXT: ret
810+ ;
811+ ; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_in_loop:
812+ ; CHECK-DOT-I8MM: // %bb.0: // %entry
813+ ; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
814+ ; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
815+ ; CHECK-DOT-I8MM-NEXT: mov x8, xzr
816+ ; CHECK-DOT-I8MM-NEXT: .LBB16_1: // %vector.body
817+ ; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
818+ ; CHECK-DOT-I8MM-NEXT: ldr q3, [x0, x8]
819+ ; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
820+ ; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
821+ ; CHECK-DOT-I8MM-NEXT: cmp x8, #16
822+ ; CHECK-DOT-I8MM-NEXT: udot v1.4s, v3.16b, v2.16b
823+ ; CHECK-DOT-I8MM-NEXT: b.ne .LBB16_1
824+ ; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
825+ ; CHECK-DOT-I8MM-NEXT: ret
804826entry:
805827 br label %vector.body
806828
0 commit comments