1- ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s -o -| FileCheck %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+ ; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm < %s | FileCheck %s
3+ ; RUN: llc -mtriple aarch64-none-linux-gnu -mattr=+neon,+i8mm -global-isel < %s | FileCheck %s
24
35define <4 x i32 > @smmla.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) {
6+ ; CHECK-LABEL: smmla.v4i32.v16i8:
7+ ; CHECK: // %bb.0: // %entry
8+ ; CHECK-NEXT: smmla v0.4s, v1.16b, v2.16b
9+ ; CHECK-NEXT: ret
410entry:
5- ; CHECK-LABEL: smmla.v4i32.v16i8
6- ; CHECK: smmla v0.4s, v1.16b, v2.16b
711 %vmmla1.i = tail call <4 x i32 > @llvm.aarch64.neon.smmla.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b )
812 ret <4 x i32 > %vmmla1.i
913}
1014
1115define <4 x i32 > @ummla.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) {
16+ ; CHECK-LABEL: ummla.v4i32.v16i8:
17+ ; CHECK: // %bb.0: // %entry
18+ ; CHECK-NEXT: ummla v0.4s, v1.16b, v2.16b
19+ ; CHECK-NEXT: ret
1220entry:
13- ; CHECK-LABEL: ummla.v4i32.v16i8
14- ; CHECK: ummla v0.4s, v1.16b, v2.16b
1521 %vmmla1.i = tail call <4 x i32 > @llvm.aarch64.neon.ummla.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b )
1622 ret <4 x i32 > %vmmla1.i
1723}
1824
1925define <4 x i32 > @usmmla.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) {
26+ ; CHECK-LABEL: usmmla.v4i32.v16i8:
27+ ; CHECK: // %bb.0: // %entry
28+ ; CHECK-NEXT: usmmla v0.4s, v1.16b, v2.16b
29+ ; CHECK-NEXT: ret
2030entry:
21- ; CHECK-LABEL: usmmla.v4i32.v16i8
22- ; CHECK: usmmla v0.4s, v1.16b, v2.16b
2331 %vusmmla1.i = tail call <4 x i32 > @llvm.aarch64.neon.usmmla.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) #3
2432 ret <4 x i32 > %vusmmla1.i
2533}
2634
2735define <2 x i32 > @usdot.v2i32.v8i8 (<2 x i32 > %r , <8 x i8 > %a , <8 x i8 > %b ) {
36+ ; CHECK-LABEL: usdot.v2i32.v8i8:
37+ ; CHECK: // %bb.0: // %entry
38+ ; CHECK-NEXT: usdot v0.2s, v1.8b, v2.8b
39+ ; CHECK-NEXT: ret
2840entry:
29- ; CHECK-LABEL: usdot.v2i32.v8i8
30- ; CHECK: usdot v0.2s, v1.8b, v2.8b
3141 %vusdot1.i = tail call <2 x i32 > @llvm.aarch64.neon.usdot.v2i32.v8i8 (<2 x i32 > %r , <8 x i8 > %a , <8 x i8 > %b )
3242 ret <2 x i32 > %vusdot1.i
3343}
3444
3545define <2 x i32 > @usdot_lane.v2i32.v8i8 (<2 x i32 > %r , <8 x i8 > %a , <8 x i8 > %b ) {
46+ ; CHECK-LABEL: usdot_lane.v2i32.v8i8:
47+ ; CHECK: // %bb.0: // %entry
48+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
49+ ; CHECK-NEXT: usdot v0.2s, v1.8b, v2.4b[0]
50+ ; CHECK-NEXT: ret
3651entry:
37- ; CHECK-LABEL: usdot_lane.v2i32.v8i8
38- ; CHECK: usdot v0.2s, v1.8b, v2.4b[0]
3952 %0 = bitcast <8 x i8 > %b to <2 x i32 >
4053 %shuffle = shufflevector <2 x i32 > %0 , <2 x i32 > undef , <2 x i32 > zeroinitializer
4154 %1 = bitcast <2 x i32 > %shuffle to <8 x i8 >
@@ -44,9 +57,12 @@ entry:
4457}
4558
4659define <2 x i32 > @sudot_lane.v2i32.v8i8 (<2 x i32 > %r , <8 x i8 > %a , <8 x i8 > %b ) {
60+ ; CHECK-LABEL: sudot_lane.v2i32.v8i8:
61+ ; CHECK: // %bb.0: // %entry
62+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
63+ ; CHECK-NEXT: sudot v0.2s, v1.8b, v2.4b[0]
64+ ; CHECK-NEXT: ret
4765entry:
48- ; CHECK-LABEL: sudot_lane.v2i32.v8i8
49- ; CHECK: sudot v0.2s, v1.8b, v2.4b[0]
5066 %0 = bitcast <8 x i8 > %b to <2 x i32 >
5167 %shuffle = shufflevector <2 x i32 > %0 , <2 x i32 > undef , <2 x i32 > zeroinitializer
5268 %1 = bitcast <2 x i32 > %shuffle to <8 x i8 >
@@ -55,9 +71,11 @@ entry:
5571}
5672
5773define <2 x i32 > @usdot_lane.v2i32.v16i8 (<2 x i32 > %r , <8 x i8 > %a , <16 x i8 > %b ) {
74+ ; CHECK-LABEL: usdot_lane.v2i32.v16i8:
75+ ; CHECK: // %bb.0: // %entry
76+ ; CHECK-NEXT: usdot v0.2s, v1.8b, v2.4b[0]
77+ ; CHECK-NEXT: ret
5878entry:
59- ; CHECK-LABEL: usdot_lane.v2i32.v16i8
60- ; CHECK: usdot v0.2s, v1.8b, v2.4b[0]
6179 %0 = bitcast <16 x i8 > %b to <4 x i32 >
6280 %shuffle = shufflevector <4 x i32 > %0 , <4 x i32 > undef , <2 x i32 > zeroinitializer
6381 %1 = bitcast <2 x i32 > %shuffle to <8 x i8 >
@@ -66,9 +84,11 @@ entry:
6684}
6785
6886define <2 x i32 > @sudot_lane.v2i32.v16i8 (<2 x i32 > %r , <8 x i8 > %a , <16 x i8 > %b ) {
87+ ; CHECK-LABEL: sudot_lane.v2i32.v16i8:
88+ ; CHECK: // %bb.0: // %entry
89+ ; CHECK-NEXT: sudot v0.2s, v1.8b, v2.4b[0]
90+ ; CHECK-NEXT: ret
6991entry:
70- ; CHECK-LABEL: sudot_lane.v2i32.v16i8
71- ; CHECK: sudot v0.2s, v1.8b, v2.4b[0]
7292 %0 = bitcast <16 x i8 > %b to <4 x i32 >
7393 %shuffle = shufflevector <4 x i32 > %0 , <4 x i32 > undef , <2 x i32 > zeroinitializer
7494 %1 = bitcast <2 x i32 > %shuffle to <8 x i8 >
@@ -77,17 +97,22 @@ entry:
7797}
7898
7999define <4 x i32 > @usdot.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) {
100+ ; CHECK-LABEL: usdot.v4i32.v16i8:
101+ ; CHECK: // %bb.0: // %entry
102+ ; CHECK-NEXT: usdot v0.4s, v1.16b, v2.16b
103+ ; CHECK-NEXT: ret
80104entry:
81- ; CHECK-LABEL: usdot.v4i32.v16i8
82- ; CHECK: usdot v0.4s, v1.16b, v2.16b
83105 %vusdot1.i = tail call <4 x i32 > @llvm.aarch64.neon.usdot.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) #3
84106 ret <4 x i32 > %vusdot1.i
85107}
86108
87109define <4 x i32 > @usdot_lane.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <8 x i8 > %b ) {
110+ ; CHECK-LABEL: usdot_lane.v4i32.v16i8:
111+ ; CHECK: // %bb.0: // %entry
112+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
113+ ; CHECK-NEXT: usdot v0.4s, v1.16b, v2.4b[0]
114+ ; CHECK-NEXT: ret
88115entry:
89- ; CHECK-LABEL: usdot_lane.v4i32.v16i8
90- ; CHECK: usdot v0.4s, v1.16b, v2.4b[0]
91116 %0 = bitcast <8 x i8 > %b to <2 x i32 >
92117 %shuffle = shufflevector <2 x i32 > %0 , <2 x i32 > undef , <4 x i32 > zeroinitializer
93118 %1 = bitcast <4 x i32 > %shuffle to <16 x i8 >
@@ -96,9 +121,12 @@ entry:
96121}
97122
98123define <4 x i32 > @sudot_lane.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <8 x i8 > %b ) {
124+ ; CHECK-LABEL: sudot_lane.v4i32.v16i8:
125+ ; CHECK: // %bb.0: // %entry
126+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
127+ ; CHECK-NEXT: sudot v0.4s, v1.16b, v2.4b[0]
128+ ; CHECK-NEXT: ret
99129entry:
100- ; CHECK-LABEL: sudot_lane.v4i32.v16i8
101- ; CHECK: sudot v0.4s, v1.16b, v2.4b[0]
102130 %0 = bitcast <8 x i8 > %b to <2 x i32 >
103131 %shuffle = shufflevector <2 x i32 > %0 , <2 x i32 > undef , <4 x i32 > zeroinitializer
104132 %1 = bitcast <4 x i32 > %shuffle to <16 x i8 >
@@ -107,9 +135,11 @@ entry:
107135}
108136
109137define <4 x i32 > @usdot_laneq.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) {
138+ ; CHECK-LABEL: usdot_laneq.v4i32.v16i8:
139+ ; CHECK: // %bb.0: // %entry
140+ ; CHECK-NEXT: usdot v0.4s, v1.16b, v2.4b[0]
141+ ; CHECK-NEXT: ret
110142entry:
111- ; CHECK-LABEL: usdot_laneq.v4i32.v16i8
112- ; CHECK: usdot v0.4s, v1.16b, v2.4b[0]
113143 %0 = bitcast <16 x i8 > %b to <4 x i32 >
114144 %shuffle = shufflevector <4 x i32 > %0 , <4 x i32 > undef , <4 x i32 > zeroinitializer
115145 %1 = bitcast <4 x i32 > %shuffle to <16 x i8 >
@@ -118,9 +148,11 @@ entry:
118148}
119149
120150define <4 x i32 > @sudot_laneq.v4i32.v16i8 (<4 x i32 > %r , <16 x i8 > %a , <16 x i8 > %b ) {
151+ ; CHECK-LABEL: sudot_laneq.v4i32.v16i8:
152+ ; CHECK: // %bb.0: // %entry
153+ ; CHECK-NEXT: sudot v0.4s, v1.16b, v2.4b[0]
154+ ; CHECK-NEXT: ret
121155entry:
122- ; CHECK-LABEL: sudot_laneq.v4i32.v16i8
123- ; CHECK: sudot v0.4s, v1.16b, v2.4b[0]
124156 %0 = bitcast <16 x i8 > %b to <4 x i32 >
125157 %shuffle = shufflevector <4 x i32 > %0 , <4 x i32 > undef , <4 x i32 > zeroinitializer
126158 %1 = bitcast <4 x i32 > %shuffle to <16 x i8 >
@@ -133,4 +165,3 @@ declare <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16
133165declare <4 x i32 > @llvm.aarch64.neon.usmmla.v4i32.v16i8 (<4 x i32 >, <16 x i8 >, <16 x i8 >) #2
134166declare <2 x i32 > @llvm.aarch64.neon.usdot.v2i32.v8i8 (<2 x i32 >, <8 x i8 >, <8 x i8 >) #2
135167declare <4 x i32 > @llvm.aarch64.neon.usdot.v4i32.v16i8 (<4 x i32 >, <16 x i8 >, <16 x i8 >) #2
136-
0 commit comments