| 
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py  | 
2 |  | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=AVXVNNI  | 
3 |  | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=AVX512,AVX512VNNI  | 
4 |  | -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVNNI  | 
 | 2 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX  | 
 | 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX512  | 
 | 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VNNI  | 
 | 5 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VLVNNI  | 
5 | 6 | 
 
  | 
6 | 7 | define i32 @no_dpbusd(ptr%a, ptr%b, i32 %c, i32 %n) {  | 
7 |  | -; AVXVNNI-LABEL: no_dpbusd:  | 
8 |  | -; AVXVNNI:       # %bb.0: # %entry  | 
9 |  | -; AVXVNNI-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
10 |  | -; AVXVNNI-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
11 |  | -; AVXVNNI-NEXT:    vpmaddwd %ymm0, %ymm1, %ymm0  | 
12 |  | -; AVXVNNI-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
13 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
14 |  | -; AVXVNNI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
15 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
16 |  | -; AVXVNNI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
17 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
18 |  | -; AVXVNNI-NEXT:    vmovd %xmm0, %eax  | 
19 |  | -; AVXVNNI-NEXT:    addl %edx, %eax  | 
20 |  | -; AVXVNNI-NEXT:    vzeroupper  | 
21 |  | -; AVXVNNI-NEXT:    retq  | 
22 |  | -;  | 
23 |  | -; AVX512-LABEL: no_dpbusd:  | 
24 |  | -; AVX512:       # %bb.0: # %entry  | 
25 |  | -; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
26 |  | -; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
27 |  | -; AVX512-NEXT:    vpmaddwd %ymm0, %ymm1, %ymm0  | 
28 |  | -; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
29 |  | -; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
30 |  | -; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
31 |  | -; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
32 |  | -; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
33 |  | -; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
34 |  | -; AVX512-NEXT:    vmovd %xmm0, %eax  | 
35 |  | -; AVX512-NEXT:    addl %edx, %eax  | 
36 |  | -; AVX512-NEXT:    vzeroupper  | 
37 |  | -; AVX512-NEXT:    retq  | 
 | 8 | +; CHECK-LABEL: no_dpbusd:  | 
 | 9 | +; CHECK:       # %bb.0: # %entry  | 
 | 10 | +; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
 | 11 | +; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
 | 12 | +; CHECK-NEXT:    vpmaddwd %ymm0, %ymm1, %ymm0  | 
 | 13 | +; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 14 | +; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 15 | +; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
 | 16 | +; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 17 | +; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
 | 18 | +; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 19 | +; CHECK-NEXT:    vmovd %xmm0, %eax  | 
 | 20 | +; CHECK-NEXT:    addl %edx, %eax  | 
 | 21 | +; CHECK-NEXT:    vzeroupper  | 
 | 22 | +; CHECK-NEXT:    retq  | 
38 | 23 | entry:  | 
39 | 24 |   %0 = load <16 x i8>, ptr %a, align 16  | 
40 | 25 |   %1 = zext <16 x i8> %0 to <16 x i32>  | 
@@ -99,25 +84,44 @@ entry:  | 
99 | 84 | }  | 
100 | 85 | 
 
  | 
101 | 86 | define i32 @mul_zext(ptr%a, ptr%b, i32 %c, i32 %n) {  | 
102 |  | -; AVXVNNI-LABEL: mul_zext:  | 
103 |  | -; AVXVNNI:       # %bb.0: # %entry  | 
104 |  | -; AVXVNNI-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
105 |  | -; AVXVNNI-NEXT:    vpmovsxbw (%rsi), %ymm1  | 
106 |  | -; AVXVNNI-NEXT:    vpmullw %ymm0, %ymm1, %ymm0  | 
107 |  | -; AVXVNNI-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
108 |  | -; AVXVNNI-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero  | 
109 |  | -; AVXVNNI-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero  | 
110 |  | -; AVXVNNI-NEXT:    vpaddd %ymm1, %ymm0, %ymm0  | 
111 |  | -; AVXVNNI-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
112 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
113 |  | -; AVXVNNI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
114 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
115 |  | -; AVXVNNI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
116 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
117 |  | -; AVXVNNI-NEXT:    vmovd %xmm0, %eax  | 
118 |  | -; AVXVNNI-NEXT:    addl %edx, %eax  | 
119 |  | -; AVXVNNI-NEXT:    vzeroupper  | 
120 |  | -; AVXVNNI-NEXT:    retq  | 
 | 87 | +; AVXVNNI-AVX-LABEL: mul_zext:  | 
 | 88 | +; AVXVNNI-AVX:       # %bb.0: # %entry  | 
 | 89 | +; AVXVNNI-AVX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
 | 90 | +; AVXVNNI-AVX-NEXT:    vpmovsxbw (%rsi), %ymm1  | 
 | 91 | +; AVXVNNI-AVX-NEXT:    vpmullw %ymm0, %ymm1, %ymm0  | 
 | 92 | +; AVXVNNI-AVX-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 93 | +; AVXVNNI-AVX-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero  | 
 | 94 | +; AVXVNNI-AVX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero  | 
 | 95 | +; AVXVNNI-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0  | 
 | 96 | +; AVXVNNI-AVX-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 97 | +; AVXVNNI-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 98 | +; AVXVNNI-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
 | 99 | +; AVXVNNI-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 100 | +; AVXVNNI-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
 | 101 | +; AVXVNNI-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 102 | +; AVXVNNI-AVX-NEXT:    vmovd %xmm0, %eax  | 
 | 103 | +; AVXVNNI-AVX-NEXT:    addl %edx, %eax  | 
 | 104 | +; AVXVNNI-AVX-NEXT:    vzeroupper  | 
 | 105 | +; AVXVNNI-AVX-NEXT:    retq  | 
 | 106 | +;  | 
 | 107 | +; AVXVNNI-AVX512-LABEL: mul_zext:  | 
 | 108 | +; AVXVNNI-AVX512:       # %bb.0: # %entry  | 
 | 109 | +; AVXVNNI-AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
 | 110 | +; AVXVNNI-AVX512-NEXT:    vpmovsxbw (%rsi), %ymm1  | 
 | 111 | +; AVXVNNI-AVX512-NEXT:    vpmullw %ymm0, %ymm1, %ymm0  | 
 | 112 | +; AVXVNNI-AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero  | 
 | 113 | +; AVXVNNI-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1  | 
 | 114 | +; AVXVNNI-AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0  | 
 | 115 | +; AVXVNNI-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 116 | +; AVXVNNI-AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 117 | +; AVXVNNI-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
 | 118 | +; AVXVNNI-AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 119 | +; AVXVNNI-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
 | 120 | +; AVXVNNI-AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 121 | +; AVXVNNI-AVX512-NEXT:    vmovd %xmm0, %eax  | 
 | 122 | +; AVXVNNI-AVX512-NEXT:    addl %edx, %eax  | 
 | 123 | +; AVXVNNI-AVX512-NEXT:    vzeroupper  | 
 | 124 | +; AVXVNNI-AVX512-NEXT:    retq  | 
121 | 125 | ;  | 
122 | 126 | ; AVX512-LABEL: mul_zext:  | 
123 | 127 | ; AVX512:       # %bb.0: # %entry  | 
@@ -153,25 +157,44 @@ entry:  | 
153 | 157 | }  | 
154 | 158 | 
 
  | 
155 | 159 | define i32 @mul_sext(ptr%a, ptr%b, i32 %c, i32 %n) {  | 
156 |  | -; AVXVNNI-LABEL: mul_sext:  | 
157 |  | -; AVXVNNI:       # %bb.0: # %entry  | 
158 |  | -; AVXVNNI-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
159 |  | -; AVXVNNI-NEXT:    vpmovsxbw (%rsi), %ymm1  | 
160 |  | -; AVXVNNI-NEXT:    vpmullw %ymm0, %ymm1, %ymm0  | 
161 |  | -; AVXVNNI-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
162 |  | -; AVXVNNI-NEXT:    vpmovsxwd %xmm1, %ymm1  | 
163 |  | -; AVXVNNI-NEXT:    vpmovsxwd %xmm0, %ymm0  | 
164 |  | -; AVXVNNI-NEXT:    vpaddd %ymm1, %ymm0, %ymm0  | 
165 |  | -; AVXVNNI-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
166 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
167 |  | -; AVXVNNI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
168 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
169 |  | -; AVXVNNI-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
170 |  | -; AVXVNNI-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
171 |  | -; AVXVNNI-NEXT:    vmovd %xmm0, %eax  | 
172 |  | -; AVXVNNI-NEXT:    addl %edx, %eax  | 
173 |  | -; AVXVNNI-NEXT:    vzeroupper  | 
174 |  | -; AVXVNNI-NEXT:    retq  | 
 | 160 | +; AVXVNNI-AVX-LABEL: mul_sext:  | 
 | 161 | +; AVXVNNI-AVX:       # %bb.0: # %entry  | 
 | 162 | +; AVXVNNI-AVX-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
 | 163 | +; AVXVNNI-AVX-NEXT:    vpmovsxbw (%rsi), %ymm1  | 
 | 164 | +; AVXVNNI-AVX-NEXT:    vpmullw %ymm0, %ymm1, %ymm0  | 
 | 165 | +; AVXVNNI-AVX-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 166 | +; AVXVNNI-AVX-NEXT:    vpmovsxwd %xmm1, %ymm1  | 
 | 167 | +; AVXVNNI-AVX-NEXT:    vpmovsxwd %xmm0, %ymm0  | 
 | 168 | +; AVXVNNI-AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0  | 
 | 169 | +; AVXVNNI-AVX-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 170 | +; AVXVNNI-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 171 | +; AVXVNNI-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
 | 172 | +; AVXVNNI-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 173 | +; AVXVNNI-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
 | 174 | +; AVXVNNI-AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 175 | +; AVXVNNI-AVX-NEXT:    vmovd %xmm0, %eax  | 
 | 176 | +; AVXVNNI-AVX-NEXT:    addl %edx, %eax  | 
 | 177 | +; AVXVNNI-AVX-NEXT:    vzeroupper  | 
 | 178 | +; AVXVNNI-AVX-NEXT:    retq  | 
 | 179 | +;  | 
 | 180 | +; AVXVNNI-AVX512-LABEL: mul_sext:  | 
 | 181 | +; AVXVNNI-AVX512:       # %bb.0: # %entry  | 
 | 182 | +; AVXVNNI-AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero  | 
 | 183 | +; AVXVNNI-AVX512-NEXT:    vpmovsxbw (%rsi), %ymm1  | 
 | 184 | +; AVXVNNI-AVX512-NEXT:    vpmullw %ymm0, %ymm1, %ymm0  | 
 | 185 | +; AVXVNNI-AVX512-NEXT:    vpmovsxwd %ymm0, %zmm0  | 
 | 186 | +; AVXVNNI-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1  | 
 | 187 | +; AVXVNNI-AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0  | 
 | 188 | +; AVXVNNI-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1  | 
 | 189 | +; AVXVNNI-AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 190 | +; AVXVNNI-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]  | 
 | 191 | +; AVXVNNI-AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 192 | +; AVXVNNI-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]  | 
 | 193 | +; AVXVNNI-AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0  | 
 | 194 | +; AVXVNNI-AVX512-NEXT:    vmovd %xmm0, %eax  | 
 | 195 | +; AVXVNNI-AVX512-NEXT:    addl %edx, %eax  | 
 | 196 | +; AVXVNNI-AVX512-NEXT:    vzeroupper  | 
 | 197 | +; AVXVNNI-AVX512-NEXT:    retq  | 
175 | 198 | ;  | 
176 | 199 | ; AVX512-LABEL: mul_sext:  | 
177 | 200 | ; AVX512:       # %bb.0: # %entry  | 
@@ -312,17 +335,30 @@ entry:  | 
312 | 335 | declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)  | 
313 | 336 | 
 
  | 
314 | 337 | define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) {  | 
315 |  | -; AVXVNNI-LABEL: vpdpbusd_128:  | 
316 |  | -; AVXVNNI:       # %bb.0: # %entry  | 
317 |  | -; AVXVNNI-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero  | 
318 |  | -; AVXVNNI-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero  | 
319 |  | -; AVXVNNI-NEXT:    vpxor %xmm2, %xmm2, %xmm2  | 
320 |  | -; AVXVNNI-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]  | 
321 |  | -; AVXVNNI-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]  | 
322 |  | -; AVXVNNI-NEXT:    {vex} vpdpbusd %xmm1, %xmm0, %xmm2  | 
323 |  | -; AVXVNNI-NEXT:    vmovd %xmm2, %eax  | 
324 |  | -; AVXVNNI-NEXT:    addl %edx, %eax  | 
325 |  | -; AVXVNNI-NEXT:    retq  | 
 | 338 | +; AVXVNNI-AVX-LABEL: vpdpbusd_128:  | 
 | 339 | +; AVXVNNI-AVX:       # %bb.0: # %entry  | 
 | 340 | +; AVXVNNI-AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero  | 
 | 341 | +; AVXVNNI-AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero  | 
 | 342 | +; AVXVNNI-AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2  | 
 | 343 | +; AVXVNNI-AVX-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]  | 
 | 344 | +; AVXVNNI-AVX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]  | 
 | 345 | +; AVXVNNI-AVX-NEXT:    {vex} vpdpbusd %xmm1, %xmm0, %xmm2  | 
 | 346 | +; AVXVNNI-AVX-NEXT:    vmovd %xmm2, %eax  | 
 | 347 | +; AVXVNNI-AVX-NEXT:    addl %edx, %eax  | 
 | 348 | +; AVXVNNI-AVX-NEXT:    retq  | 
 | 349 | +;  | 
 | 350 | +; AVXVNNI-AVX512-LABEL: vpdpbusd_128:  | 
 | 351 | +; AVXVNNI-AVX512:       # %bb.0: # %entry  | 
 | 352 | +; AVXVNNI-AVX512-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero  | 
 | 353 | +; AVXVNNI-AVX512-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero  | 
 | 354 | +; AVXVNNI-AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2  | 
 | 355 | +; AVXVNNI-AVX512-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]  | 
 | 356 | +; AVXVNNI-AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]  | 
 | 357 | +; AVXVNNI-AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2  | 
 | 358 | +; AVXVNNI-AVX512-NEXT:    {vex} vpdpbusd %xmm1, %xmm0, %xmm2  | 
 | 359 | +; AVXVNNI-AVX512-NEXT:    vmovd %xmm2, %eax  | 
 | 360 | +; AVXVNNI-AVX512-NEXT:    addl %edx, %eax  | 
 | 361 | +; AVXVNNI-AVX512-NEXT:    retq  | 
326 | 362 | ;  | 
327 | 363 | ; AVX512VNNI-LABEL: vpdpbusd_128:  | 
328 | 364 | ; AVX512VNNI:       # %bb.0: # %entry  | 
 | 
0 commit comments