@@ -574,35 +574,42 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
574574}
575575
576576define <4 x i32 > @udot_no_bin_op_in_loop (ptr %p ){
577- ; CHECK-LABEL: udot_no_bin_op_in_loop:
578- ; CHECK: // %bb.0: // %entry
579- ; CHECK-NEXT: adrp x8, .LCPI20_0
580- ; CHECK-NEXT: movi v4.2d, #0000000000000000
581- ; CHECK-NEXT: adrp x9, .LCPI20_2
582- ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
583- ; CHECK-NEXT: adrp x8, .LCPI20_1
584- ; CHECK-NEXT: adrp x10, .LCPI20_3
585- ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_1]
586- ; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI20_2]
587- ; CHECK-NEXT: ldr q5, [x10, :lo12:.LCPI20_3]
588- ; CHECK-NEXT: mov x8, xzr
589- ; CHECK-NEXT: .LBB20_1: // %vector.body
590- ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
591- ; CHECK-NEXT: ldr q6, [x0, x8]
592- ; CHECK-NEXT: mov v0.16b, v4.16b
593- ; CHECK-NEXT: add x8, x8, #16
594- ; CHECK-NEXT: cmp x8, #16
595- ; CHECK-NEXT: tbl v7.16b, { v6.16b }, v2.16b
596- ; CHECK-NEXT: tbl v4.16b, { v6.16b }, v1.16b
597- ; CHECK-NEXT: tbl v16.16b, { v6.16b }, v3.16b
598- ; CHECK-NEXT: tbl v6.16b, { v6.16b }, v5.16b
599- ; CHECK-NEXT: add v7.4s, v0.4s, v7.4s
600- ; CHECK-NEXT: add v6.4s, v6.4s, v16.4s
601- ; CHECK-NEXT: add v4.4s, v4.4s, v7.4s
602- ; CHECK-NEXT: add v4.4s, v6.4s, v4.4s
603- ; CHECK-NEXT: b.ne .LBB20_1
604- ; CHECK-NEXT: // %bb.2: // %end
605- ; CHECK-NEXT: ret
577+ ; CHECK-DOT-LABEL: udot_no_bin_op_in_loop:
578+ ; CHECK-DOT: // %bb.0: // %entry
579+ ; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
580+ ; CHECK-DOT-NEXT: movi v2.16b, #1
581+ ; CHECK-DOT-NEXT: mov x8, xzr
582+ ; CHECK-DOT-NEXT: .LBB20_1: // %vector.body
583+ ; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
584+ ; CHECK-DOT-NEXT: ldr q3, [x0, x8]
585+ ; CHECK-DOT-NEXT: mov v0.16b, v1.16b
586+ ; CHECK-DOT-NEXT: add x8, x8, #16
587+ ; CHECK-DOT-NEXT: cmp x8, #16
588+ ; CHECK-DOT-NEXT: udot v1.4s, v3.16b, v2.16b
589+ ; CHECK-DOT-NEXT: b.ne .LBB20_1
590+ ; CHECK-DOT-NEXT: // %bb.2: // %end
591+ ; CHECK-DOT-NEXT: ret
592+ ;
593+ ; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
594+ ; CHECK-NODOT: // %bb.0: // %entry
595+ ; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
596+ ; CHECK-NODOT-NEXT: mov x8, xzr
597+ ; CHECK-NODOT-NEXT: .LBB20_1: // %vector.body
598+ ; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
599+ ; CHECK-NODOT-NEXT: ldr q0, [x0, x8]
600+ ; CHECK-NODOT-NEXT: add x8, x8, #16
601+ ; CHECK-NODOT-NEXT: cmp x8, #16
602+ ; CHECK-NODOT-NEXT: ushll v2.8h, v0.8b, #0
603+ ; CHECK-NODOT-NEXT: ushll2 v3.8h, v0.16b, #0
604+ ; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
605+ ; CHECK-NODOT-NEXT: ushll v1.4s, v3.4h, #0
606+ ; CHECK-NODOT-NEXT: uaddw v4.4s, v0.4s, v2.4h
607+ ; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v2.8h
608+ ; CHECK-NODOT-NEXT: uaddw2 v2.4s, v4.4s, v3.8h
609+ ; CHECK-NODOT-NEXT: add v1.4s, v1.4s, v2.4s
610+ ; CHECK-NODOT-NEXT: b.ne .LBB20_1
611+ ; CHECK-NODOT-NEXT: // %bb.2: // %end
612+ ; CHECK-NODOT-NEXT: ret
606613entry:
607614 br label %vector.body
608615
0 commit comments