Skip to content

Commit fc95348

Browse files
committed
Precommit test
1 parent 224a717 commit fc95348

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,3 +1451,34 @@ define <4 x i32> @partial_reduce_shl_zext_non_const_rhs(<16 x i8> %l, <4 x i32>
14511451
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
14521452
ret <4 x i32> %red
14531453
}
1454+
1455+
define <2 x i32> @udot_v16i8tov2i32(<2 x i32> %acc, <16 x i8> %input) {
1456+
; CHECK-COMMON-LABEL: udot_v16i8tov2i32:
1457+
; CHECK-COMMON: // %bb.0: // %entry
1458+
; CHECK-COMMON-NEXT: ushll v2.8h, v1.8b, #0
1459+
; CHECK-COMMON-NEXT: // kill: def $d0 killed $d0 def $q0
1460+
; CHECK-COMMON-NEXT: ushll2 v1.8h, v1.16b, #0
1461+
; CHECK-COMMON-NEXT: ushll v3.4s, v2.4h, #0
1462+
; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v2.4h
1463+
; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #0
1464+
; CHECK-COMMON-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1465+
; CHECK-COMMON-NEXT: ext v3.16b, v3.16b, v3.16b, #8
1466+
; CHECK-COMMON-NEXT: add v0.2s, v3.2s, v0.2s
1467+
; CHECK-COMMON-NEXT: ext v3.16b, v4.16b, v4.16b, #8
1468+
; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v2.4h
1469+
; CHECK-COMMON-NEXT: ushll v2.4s, v1.4h, #0
1470+
; CHECK-COMMON-NEXT: add v0.2s, v3.2s, v0.2s
1471+
; CHECK-COMMON-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1472+
; CHECK-COMMON-NEXT: ushll2 v3.4s, v1.8h, #0
1473+
; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
1474+
; CHECK-COMMON-NEXT: ext v1.16b, v1.16b, v1.16b, #8
1475+
; CHECK-COMMON-NEXT: add v0.2s, v2.2s, v0.2s
1476+
; CHECK-COMMON-NEXT: ext v2.16b, v3.16b, v3.16b, #8
1477+
; CHECK-COMMON-NEXT: uaddw v0.4s, v0.4s, v1.4h
1478+
; CHECK-COMMON-NEXT: add v0.2s, v2.2s, v0.2s
1479+
; CHECK-COMMON-NEXT: ret
1480+
entry:
1481+
%input.wide = zext <16 x i8> %input to <16 x i32>
1482+
%partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add(<2 x i32> %acc, <16 x i32> %input.wide)
1483+
ret <2 x i32> %partial.reduce
1484+
}

0 commit comments

Comments
 (0)