@@ -1451,3 +1451,34 @@ define <4 x i32> @partial_reduce_shl_zext_non_const_rhs(<16 x i8> %l, <4 x i32>
14511451 %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
14521452 ret <4 x i32 > %red
14531453}
1454+
1455+ define <2 x i32 > @udot_v16i8tov2i32 (<2 x i32 > %acc , <16 x i8 > %input ) {
1456+ ; CHECK-COMMON-LABEL: udot_v16i8tov2i32:
1457+ ; CHECK-COMMON: // %bb.0: // %entry
1458+ ; CHECK-COMMON-NEXT: ushll v2.8h, v1.8b, #0
1459+ ; CHECK-COMMON-NEXT: // kill: def $d0 killed $d0 def $q0
1460+ ; CHECK-COMMON-NEXT: ushll2 v1.8h, v1.16b, #0
1461+ ; CHECK-COMMON-NEXT: ushll v3.4s, v2.4h, #0
1462+ ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v2.4h
1463+ ; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #0
1464+ ; CHECK-COMMON-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1465+ ; CHECK-COMMON-NEXT: ext v3.16b, v3.16b, v3.16b, #8
1466+ ; CHECK-COMMON-NEXT: add v0.2s, v3.2s, v0.2s
1467+ ; CHECK-COMMON-NEXT: ext v3.16b, v4.16b, v4.16b, #8
1468+ ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v2.4h
1469+ ; CHECK-COMMON-NEXT: ushll v2.4s, v1.4h, #0
1470+ ; CHECK-COMMON-NEXT: add v0.2s, v3.2s, v0.2s
1471+ ; CHECK-COMMON-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1472+ ; CHECK-COMMON-NEXT: ushll2 v3.4s, v1.8h, #0
1473+ ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
1474+ ; CHECK-COMMON-NEXT: ext v1.16b, v1.16b, v1.16b, #8
1475+ ; CHECK-COMMON-NEXT: add v0.2s, v2.2s, v0.2s
1476+ ; CHECK-COMMON-NEXT: ext v2.16b, v3.16b, v3.16b, #8
1477+ ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
1478+ ; CHECK-COMMON-NEXT: add v0.2s, v2.2s, v0.2s
1479+ ; CHECK-COMMON-NEXT: ret
1480+ entry:
1481+ %input.wide = zext <16 x i8 > %input to <16 x i32 >
1482+ %partial.reduce = tail call <2 x i32 > @llvm.vector.partial.reduce.add (<2 x i32 > %acc , <16 x i32 > %input.wide )
1483+ ret <2 x i32 > %partial.reduce
1484+ }
0 commit comments