|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=AVXVNNI |
3 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=AVX512,AVX512VNNI |
4 | | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVNNI |
| 2 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX512 |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VNNI |
| 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VLVNNI |
5 | 6 |
|
6 | 7 | define i32 @no_dpbusd(ptr%a, ptr%b, i32 %c, i32 %n) { |
7 | | -; AVXVNNI-LABEL: no_dpbusd: |
8 | | -; AVXVNNI: # %bb.0: # %entry |
9 | | -; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
10 | | -; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
11 | | -; AVXVNNI-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0 |
12 | | -; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1 |
13 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
14 | | -; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
15 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
16 | | -; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
17 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
18 | | -; AVXVNNI-NEXT: vmovd %xmm0, %eax |
19 | | -; AVXVNNI-NEXT: addl %edx, %eax |
20 | | -; AVXVNNI-NEXT: vzeroupper |
21 | | -; AVXVNNI-NEXT: retq |
22 | | -; |
23 | | -; AVX512-LABEL: no_dpbusd: |
24 | | -; AVX512: # %bb.0: # %entry |
25 | | -; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
26 | | -; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
27 | | -; AVX512-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0 |
28 | | -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 |
29 | | -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
30 | | -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
31 | | -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
32 | | -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
33 | | -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
34 | | -; AVX512-NEXT: vmovd %xmm0, %eax |
35 | | -; AVX512-NEXT: addl %edx, %eax |
36 | | -; AVX512-NEXT: vzeroupper |
37 | | -; AVX512-NEXT: retq |
| 8 | +; CHECK-LABEL: no_dpbusd: |
| 9 | +; CHECK: # %bb.0: # %entry |
| 10 | +; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| 11 | +; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| 12 | +; CHECK-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0 |
| 13 | +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 14 | +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 15 | +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
| 16 | +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 17 | +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
| 18 | +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 19 | +; CHECK-NEXT: vmovd %xmm0, %eax |
| 20 | +; CHECK-NEXT: addl %edx, %eax |
| 21 | +; CHECK-NEXT: vzeroupper |
| 22 | +; CHECK-NEXT: retq |
38 | 23 | entry: |
39 | 24 | %0 = load <16 x i8>, ptr %a, align 16 |
40 | 25 | %1 = zext <16 x i8> %0 to <16 x i32> |
@@ -99,25 +84,44 @@ entry: |
99 | 84 | } |
100 | 85 |
|
101 | 86 | define i32 @mul_zext(ptr%a, ptr%b, i32 %c, i32 %n) { |
102 | | -; AVXVNNI-LABEL: mul_zext: |
103 | | -; AVXVNNI: # %bb.0: # %entry |
104 | | -; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
105 | | -; AVXVNNI-NEXT: vpmovsxbw (%rsi), %ymm1 |
106 | | -; AVXVNNI-NEXT: vpmullw %ymm0, %ymm1, %ymm0 |
107 | | -; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1 |
108 | | -; AVXVNNI-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero |
109 | | -; AVXVNNI-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
110 | | -; AVXVNNI-NEXT: vpaddd %ymm1, %ymm0, %ymm0 |
111 | | -; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1 |
112 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
113 | | -; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
114 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
115 | | -; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
116 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
117 | | -; AVXVNNI-NEXT: vmovd %xmm0, %eax |
118 | | -; AVXVNNI-NEXT: addl %edx, %eax |
119 | | -; AVXVNNI-NEXT: vzeroupper |
120 | | -; AVXVNNI-NEXT: retq |
| 87 | +; AVXVNNI-AVX-LABEL: mul_zext: |
| 88 | +; AVXVNNI-AVX: # %bb.0: # %entry |
| 89 | +; AVXVNNI-AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| 90 | +; AVXVNNI-AVX-NEXT: vpmovsxbw (%rsi), %ymm1 |
| 91 | +; AVXVNNI-AVX-NEXT: vpmullw %ymm0, %ymm1, %ymm0 |
| 92 | +; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 93 | +; AVXVNNI-AVX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero |
| 94 | +; AVXVNNI-AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero |
| 95 | +; AVXVNNI-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 |
| 96 | +; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 97 | +; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 98 | +; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
| 99 | +; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 100 | +; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
| 101 | +; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 102 | +; AVXVNNI-AVX-NEXT: vmovd %xmm0, %eax |
| 103 | +; AVXVNNI-AVX-NEXT: addl %edx, %eax |
| 104 | +; AVXVNNI-AVX-NEXT: vzeroupper |
| 105 | +; AVXVNNI-AVX-NEXT: retq |
| 106 | +; |
| 107 | +; AVXVNNI-AVX512-LABEL: mul_zext: |
| 108 | +; AVXVNNI-AVX512: # %bb.0: # %entry |
| 109 | +; AVXVNNI-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| 110 | +; AVXVNNI-AVX512-NEXT: vpmovsxbw (%rsi), %ymm1 |
| 111 | +; AVXVNNI-AVX512-NEXT: vpmullw %ymm0, %ymm1, %ymm0 |
| 112 | +; AVXVNNI-AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero |
| 113 | +; AVXVNNI-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 |
| 114 | +; AVXVNNI-AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 |
| 115 | +; AVXVNNI-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 116 | +; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 117 | +; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
| 118 | +; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 119 | +; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
| 120 | +; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 121 | +; AVXVNNI-AVX512-NEXT: vmovd %xmm0, %eax |
| 122 | +; AVXVNNI-AVX512-NEXT: addl %edx, %eax |
| 123 | +; AVXVNNI-AVX512-NEXT: vzeroupper |
| 124 | +; AVXVNNI-AVX512-NEXT: retq |
121 | 125 | ; |
122 | 126 | ; AVX512-LABEL: mul_zext: |
123 | 127 | ; AVX512: # %bb.0: # %entry |
@@ -153,25 +157,44 @@ entry: |
153 | 157 | } |
154 | 158 |
|
155 | 159 | define i32 @mul_sext(ptr%a, ptr%b, i32 %c, i32 %n) { |
156 | | -; AVXVNNI-LABEL: mul_sext: |
157 | | -; AVXVNNI: # %bb.0: # %entry |
158 | | -; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
159 | | -; AVXVNNI-NEXT: vpmovsxbw (%rsi), %ymm1 |
160 | | -; AVXVNNI-NEXT: vpmullw %ymm0, %ymm1, %ymm0 |
161 | | -; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1 |
162 | | -; AVXVNNI-NEXT: vpmovsxwd %xmm1, %ymm1 |
163 | | -; AVXVNNI-NEXT: vpmovsxwd %xmm0, %ymm0 |
164 | | -; AVXVNNI-NEXT: vpaddd %ymm1, %ymm0, %ymm0 |
165 | | -; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1 |
166 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
167 | | -; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
168 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
169 | | -; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
170 | | -; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
171 | | -; AVXVNNI-NEXT: vmovd %xmm0, %eax |
172 | | -; AVXVNNI-NEXT: addl %edx, %eax |
173 | | -; AVXVNNI-NEXT: vzeroupper |
174 | | -; AVXVNNI-NEXT: retq |
| 160 | +; AVXVNNI-AVX-LABEL: mul_sext: |
| 161 | +; AVXVNNI-AVX: # %bb.0: # %entry |
| 162 | +; AVXVNNI-AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| 163 | +; AVXVNNI-AVX-NEXT: vpmovsxbw (%rsi), %ymm1 |
| 164 | +; AVXVNNI-AVX-NEXT: vpmullw %ymm0, %ymm1, %ymm0 |
| 165 | +; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 166 | +; AVXVNNI-AVX-NEXT: vpmovsxwd %xmm1, %ymm1 |
| 167 | +; AVXVNNI-AVX-NEXT: vpmovsxwd %xmm0, %ymm0 |
| 168 | +; AVXVNNI-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 |
| 169 | +; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 170 | +; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 171 | +; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
| 172 | +; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 173 | +; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
| 174 | +; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 175 | +; AVXVNNI-AVX-NEXT: vmovd %xmm0, %eax |
| 176 | +; AVXVNNI-AVX-NEXT: addl %edx, %eax |
| 177 | +; AVXVNNI-AVX-NEXT: vzeroupper |
| 178 | +; AVXVNNI-AVX-NEXT: retq |
| 179 | +; |
| 180 | +; AVXVNNI-AVX512-LABEL: mul_sext: |
| 181 | +; AVXVNNI-AVX512: # %bb.0: # %entry |
| 182 | +; AVXVNNI-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero |
| 183 | +; AVXVNNI-AVX512-NEXT: vpmovsxbw (%rsi), %ymm1 |
| 184 | +; AVXVNNI-AVX512-NEXT: vpmullw %ymm0, %ymm1, %ymm0 |
| 185 | +; AVXVNNI-AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 |
| 186 | +; AVXVNNI-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 |
| 187 | +; AVXVNNI-AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 |
| 188 | +; AVXVNNI-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| 189 | +; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 190 | +; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] |
| 191 | +; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 192 | +; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] |
| 193 | +; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 |
| 194 | +; AVXVNNI-AVX512-NEXT: vmovd %xmm0, %eax |
| 195 | +; AVXVNNI-AVX512-NEXT: addl %edx, %eax |
| 196 | +; AVXVNNI-AVX512-NEXT: vzeroupper |
| 197 | +; AVXVNNI-AVX512-NEXT: retq |
175 | 198 | ; |
176 | 199 | ; AVX512-LABEL: mul_sext: |
177 | 200 | ; AVX512: # %bb.0: # %entry |
@@ -312,17 +335,30 @@ entry: |
312 | 335 | declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) |
313 | 336 |
|
314 | 337 | define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) { |
315 | | -; AVXVNNI-LABEL: vpdpbusd_128: |
316 | | -; AVXVNNI: # %bb.0: # %entry |
317 | | -; AVXVNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero |
318 | | -; AVXVNNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero |
319 | | -; AVXVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
320 | | -; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] |
321 | | -; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] |
322 | | -; AVXVNNI-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2 |
323 | | -; AVXVNNI-NEXT: vmovd %xmm2, %eax |
324 | | -; AVXVNNI-NEXT: addl %edx, %eax |
325 | | -; AVXVNNI-NEXT: retq |
| 338 | +; AVXVNNI-AVX-LABEL: vpdpbusd_128: |
| 339 | +; AVXVNNI-AVX: # %bb.0: # %entry |
| 340 | +; AVXVNNI-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero |
| 341 | +; AVXVNNI-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero |
| 342 | +; AVXVNNI-AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| 343 | +; AVXVNNI-AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] |
| 344 | +; AVXVNNI-AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] |
| 345 | +; AVXVNNI-AVX-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2 |
| 346 | +; AVXVNNI-AVX-NEXT: vmovd %xmm2, %eax |
| 347 | +; AVXVNNI-AVX-NEXT: addl %edx, %eax |
| 348 | +; AVXVNNI-AVX-NEXT: retq |
| 349 | +; |
| 350 | +; AVXVNNI-AVX512-LABEL: vpdpbusd_128: |
| 351 | +; AVXVNNI-AVX512: # %bb.0: # %entry |
| 352 | +; AVXVNNI-AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero |
| 353 | +; AVXVNNI-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero |
| 354 | +; AVXVNNI-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| 355 | +; AVXVNNI-AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] |
| 356 | +; AVXVNNI-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] |
| 357 | +; AVXVNNI-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| 358 | +; AVXVNNI-AVX512-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2 |
| 359 | +; AVXVNNI-AVX512-NEXT: vmovd %xmm2, %eax |
| 360 | +; AVXVNNI-AVX512-NEXT: addl %edx, %eax |
| 361 | +; AVXVNNI-AVX512-NEXT: retq |
326 | 362 | ; |
327 | 363 | ; AVX512VNNI-LABEL: vpdpbusd_128: |
328 | 364 | ; AVX512VNNI: # %bb.0: # %entry |
|
0 commit comments