@@ -1453,30 +1453,48 @@ define <4 x i32> @partial_reduce_shl_zext_non_const_rhs(<16 x i8> %l, <4 x i32>
14531453}
14541454
14551455define <2 x i32 > @udot_v16i8tov2i32 (<2 x i32 > %acc , <16 x i8 > %input ) {
1456- ; CHECK-COMMON-LABEL: udot_v16i8tov2i32:
1457- ; CHECK-COMMON: // %bb.0: // %entry
1458- ; CHECK-COMMON-NEXT: ushll v2.8h, v1.8b, #0
1459- ; CHECK-COMMON-NEXT: // kill: def $d0 killed $d0 def $q0
1460- ; CHECK-COMMON-NEXT: ushll2 v1.8h, v1.16b, #0
1461- ; CHECK-COMMON-NEXT: ushll v3.4s, v2.4h, #0
1462- ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v2.4h
1463- ; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #0
1464- ; CHECK-COMMON-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1465- ; CHECK-COMMON-NEXT: ext v3.16b, v3.16b, v3.16b, #8
1466- ; CHECK-COMMON-NEXT: add v0.2s, v3.2s, v0.2s
1467- ; CHECK-COMMON-NEXT: ext v3.16b, v4.16b, v4.16b, #8
1468- ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v2.4h
1469- ; CHECK-COMMON-NEXT: ushll v2.4s, v1.4h, #0
1470- ; CHECK-COMMON-NEXT: add v0.2s, v3.2s, v0.2s
1471- ; CHECK-COMMON-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1472- ; CHECK-COMMON-NEXT: ushll2 v3.4s, v1.8h, #0
1473- ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
1474- ; CHECK-COMMON-NEXT: ext v1.16b, v1.16b, v1.16b, #8
1475- ; CHECK-COMMON-NEXT: add v0.2s, v2.2s, v0.2s
1476- ; CHECK-COMMON-NEXT: ext v2.16b, v3.16b, v3.16b, #8
1477- ; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
1478- ; CHECK-COMMON-NEXT: add v0.2s, v2.2s, v0.2s
1479- ; CHECK-COMMON-NEXT: ret
1456+ ; CHECK-NODOT-LABEL: udot_v16i8tov2i32:
1457+ ; CHECK-NODOT: // %bb.0: // %entry
1458+ ; CHECK-NODOT-NEXT: ushll v2.8h, v1.8b, #0
1459+ ; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
1460+ ; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
1461+ ; CHECK-NODOT-NEXT: ushll v3.4s, v2.4h, #0
1462+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h
1463+ ; CHECK-NODOT-NEXT: ushll2 v4.4s, v2.8h, #0
1464+ ; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1465+ ; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
1466+ ; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
1467+ ; CHECK-NODOT-NEXT: ext v3.16b, v4.16b, v4.16b, #8
1468+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h
1469+ ; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
1470+ ; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
1471+ ; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1472+ ; CHECK-NODOT-NEXT: ushll2 v3.4s, v1.8h, #0
1473+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
1474+ ; CHECK-NODOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8
1475+ ; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
1476+ ; CHECK-NODOT-NEXT: ext v2.16b, v3.16b, v3.16b, #8
1477+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
1478+ ; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
1479+ ; CHECK-NODOT-NEXT: ret
1480+ ;
1481+ ; CHECK-DOT-LABEL: udot_v16i8tov2i32:
1482+ ; CHECK-DOT: // %bb.0: // %entry
1483+ ; CHECK-DOT-NEXT: movi v2.16b, #1
1484+ ; CHECK-DOT-NEXT: fmov d0, d0
1485+ ; CHECK-DOT-NEXT: udot v0.4s, v1.16b, v2.16b
1486+ ; CHECK-DOT-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1487+ ; CHECK-DOT-NEXT: add v0.2s, v0.2s, v1.2s
1488+ ; CHECK-DOT-NEXT: ret
1489+ ;
1490+ ; CHECK-DOT-I8MM-LABEL: udot_v16i8tov2i32:
1491+ ; CHECK-DOT-I8MM: // %bb.0: // %entry
1492+ ; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
1493+ ; CHECK-DOT-I8MM-NEXT: fmov d0, d0
1494+ ; CHECK-DOT-I8MM-NEXT: udot v0.4s, v1.16b, v2.16b
1495+ ; CHECK-DOT-I8MM-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1496+ ; CHECK-DOT-I8MM-NEXT: add v0.2s, v0.2s, v1.2s
1497+ ; CHECK-DOT-I8MM-NEXT: ret
14801498entry:
14811499 %input.wide = zext <16 x i8 > %input to <16 x i32 >
14821500 %partial.reduce = tail call <2 x i32 > @llvm.vector.partial.reduce.add (<2 x i32 > %acc , <16 x i32 > %input.wide )
0 commit comments