Skip to content

Commit 2ab2ffe

Browse files
authored
[X86] createVPDPBUSD - only use 512-bit X86ISD::VPDPBUSD on AVX512VNNI targets (llvm#161152)
Inspired by llvm#160928 - if we have a AVX512 target capable of AVXVNNI but not AVX512VNNI then we must split 512-bit (or larger) types to 256-bits
1 parent 585fd4c commit 2ab2ffe

File tree

3 files changed

+244
-156
lines changed

3 files changed

+244
-156
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4456,8 +4456,8 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
44564456
bool AllowAVX512 = true) {
44574457
assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2");
44584458
unsigned NumSubs = 1;
4459-
if ((CheckBWI && Subtarget.useBWIRegs()) ||
4460-
(!CheckBWI && AllowAVX512 && Subtarget.useAVX512Regs())) {
4459+
if (AllowAVX512 && ((CheckBWI && Subtarget.useBWIRegs()) ||
4460+
(!CheckBWI && Subtarget.useAVX512Regs()))) {
44614461
if (VT.getSizeInBits() > 512) {
44624462
NumSubs = VT.getSizeInBits() / 512;
44634463
assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");
@@ -46197,7 +46197,7 @@ static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS,
4619746197
SDValue Zero = DAG.getConstant(0, DL, DpVT);
4619846198

4619946199
return SplitOpsAndApply(DAG, Subtarget, DL, DpVT, {Zero, DpOp0, DpOp1},
46200-
DpBuilder, false);
46200+
DpBuilder, /*CheckBWI=*/false, Subtarget.hasVNNI());
4620146201
}
4620246202

4620346203
// Create a PSADBW given two sources representable as zexts of vXi8.

llvm/test/CodeGen/X86/dpbusd.ll

Lines changed: 119 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,25 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=AVXVNNI
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=AVX512,AVX512VNNI
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VLVNNI
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxvnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVXVNNI,AVXVNNI-AVX512
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VNNI
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512VLVNNI
56

67
define i32 @no_dpbusd(ptr%a, ptr%b, i32 %c, i32 %n) {
7-
; AVXVNNI-LABEL: no_dpbusd:
8-
; AVXVNNI: # %bb.0: # %entry
9-
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
10-
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
11-
; AVXVNNI-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
12-
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
13-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
14-
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
15-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
16-
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
17-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
18-
; AVXVNNI-NEXT: vmovd %xmm0, %eax
19-
; AVXVNNI-NEXT: addl %edx, %eax
20-
; AVXVNNI-NEXT: vzeroupper
21-
; AVXVNNI-NEXT: retq
22-
;
23-
; AVX512-LABEL: no_dpbusd:
24-
; AVX512: # %bb.0: # %entry
25-
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
26-
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
27-
; AVX512-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
28-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
29-
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
30-
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
31-
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
32-
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
33-
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
34-
; AVX512-NEXT: vmovd %xmm0, %eax
35-
; AVX512-NEXT: addl %edx, %eax
36-
; AVX512-NEXT: vzeroupper
37-
; AVX512-NEXT: retq
8+
; CHECK-LABEL: no_dpbusd:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
11+
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
12+
; CHECK-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
13+
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
14+
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
15+
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
16+
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
17+
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
18+
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
19+
; CHECK-NEXT: vmovd %xmm0, %eax
20+
; CHECK-NEXT: addl %edx, %eax
21+
; CHECK-NEXT: vzeroupper
22+
; CHECK-NEXT: retq
3823
entry:
3924
%0 = load <16 x i8>, ptr %a, align 16
4025
%1 = zext <16 x i8> %0 to <16 x i32>
@@ -99,25 +84,44 @@ entry:
9984
}
10085

10186
define i32 @mul_zext(ptr%a, ptr%b, i32 %c, i32 %n) {
102-
; AVXVNNI-LABEL: mul_zext:
103-
; AVXVNNI: # %bb.0: # %entry
104-
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
105-
; AVXVNNI-NEXT: vpmovsxbw (%rsi), %ymm1
106-
; AVXVNNI-NEXT: vpmullw %ymm0, %ymm1, %ymm0
107-
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
108-
; AVXVNNI-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
109-
; AVXVNNI-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
110-
; AVXVNNI-NEXT: vpaddd %ymm1, %ymm0, %ymm0
111-
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
112-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
113-
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
114-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
115-
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
116-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
117-
; AVXVNNI-NEXT: vmovd %xmm0, %eax
118-
; AVXVNNI-NEXT: addl %edx, %eax
119-
; AVXVNNI-NEXT: vzeroupper
120-
; AVXVNNI-NEXT: retq
87+
; AVXVNNI-AVX-LABEL: mul_zext:
88+
; AVXVNNI-AVX: # %bb.0: # %entry
89+
; AVXVNNI-AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
90+
; AVXVNNI-AVX-NEXT: vpmovsxbw (%rsi), %ymm1
91+
; AVXVNNI-AVX-NEXT: vpmullw %ymm0, %ymm1, %ymm0
92+
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
93+
; AVXVNNI-AVX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
94+
; AVXVNNI-AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
95+
; AVXVNNI-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
96+
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
97+
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
98+
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
99+
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
100+
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
101+
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
102+
; AVXVNNI-AVX-NEXT: vmovd %xmm0, %eax
103+
; AVXVNNI-AVX-NEXT: addl %edx, %eax
104+
; AVXVNNI-AVX-NEXT: vzeroupper
105+
; AVXVNNI-AVX-NEXT: retq
106+
;
107+
; AVXVNNI-AVX512-LABEL: mul_zext:
108+
; AVXVNNI-AVX512: # %bb.0: # %entry
109+
; AVXVNNI-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
110+
; AVXVNNI-AVX512-NEXT: vpmovsxbw (%rsi), %ymm1
111+
; AVXVNNI-AVX512-NEXT: vpmullw %ymm0, %ymm1, %ymm0
112+
; AVXVNNI-AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
113+
; AVXVNNI-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
114+
; AVXVNNI-AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
115+
; AVXVNNI-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
116+
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
117+
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
118+
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
119+
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
120+
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
121+
; AVXVNNI-AVX512-NEXT: vmovd %xmm0, %eax
122+
; AVXVNNI-AVX512-NEXT: addl %edx, %eax
123+
; AVXVNNI-AVX512-NEXT: vzeroupper
124+
; AVXVNNI-AVX512-NEXT: retq
121125
;
122126
; AVX512-LABEL: mul_zext:
123127
; AVX512: # %bb.0: # %entry
@@ -153,25 +157,44 @@ entry:
153157
}
154158

155159
define i32 @mul_sext(ptr%a, ptr%b, i32 %c, i32 %n) {
156-
; AVXVNNI-LABEL: mul_sext:
157-
; AVXVNNI: # %bb.0: # %entry
158-
; AVXVNNI-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
159-
; AVXVNNI-NEXT: vpmovsxbw (%rsi), %ymm1
160-
; AVXVNNI-NEXT: vpmullw %ymm0, %ymm1, %ymm0
161-
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
162-
; AVXVNNI-NEXT: vpmovsxwd %xmm1, %ymm1
163-
; AVXVNNI-NEXT: vpmovsxwd %xmm0, %ymm0
164-
; AVXVNNI-NEXT: vpaddd %ymm1, %ymm0, %ymm0
165-
; AVXVNNI-NEXT: vextracti128 $1, %ymm0, %xmm1
166-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
167-
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
168-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
169-
; AVXVNNI-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
170-
; AVXVNNI-NEXT: vpaddd %xmm1, %xmm0, %xmm0
171-
; AVXVNNI-NEXT: vmovd %xmm0, %eax
172-
; AVXVNNI-NEXT: addl %edx, %eax
173-
; AVXVNNI-NEXT: vzeroupper
174-
; AVXVNNI-NEXT: retq
160+
; AVXVNNI-AVX-LABEL: mul_sext:
161+
; AVXVNNI-AVX: # %bb.0: # %entry
162+
; AVXVNNI-AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
163+
; AVXVNNI-AVX-NEXT: vpmovsxbw (%rsi), %ymm1
164+
; AVXVNNI-AVX-NEXT: vpmullw %ymm0, %ymm1, %ymm0
165+
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
166+
; AVXVNNI-AVX-NEXT: vpmovsxwd %xmm1, %ymm1
167+
; AVXVNNI-AVX-NEXT: vpmovsxwd %xmm0, %ymm0
168+
; AVXVNNI-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
169+
; AVXVNNI-AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
170+
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
171+
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
172+
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
173+
; AVXVNNI-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
174+
; AVXVNNI-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
175+
; AVXVNNI-AVX-NEXT: vmovd %xmm0, %eax
176+
; AVXVNNI-AVX-NEXT: addl %edx, %eax
177+
; AVXVNNI-AVX-NEXT: vzeroupper
178+
; AVXVNNI-AVX-NEXT: retq
179+
;
180+
; AVXVNNI-AVX512-LABEL: mul_sext:
181+
; AVXVNNI-AVX512: # %bb.0: # %entry
182+
; AVXVNNI-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
183+
; AVXVNNI-AVX512-NEXT: vpmovsxbw (%rsi), %ymm1
184+
; AVXVNNI-AVX512-NEXT: vpmullw %ymm0, %ymm1, %ymm0
185+
; AVXVNNI-AVX512-NEXT: vpmovsxwd %ymm0, %zmm0
186+
; AVXVNNI-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
187+
; AVXVNNI-AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
188+
; AVXVNNI-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
189+
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
190+
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
191+
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
192+
; AVXVNNI-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
193+
; AVXVNNI-AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
194+
; AVXVNNI-AVX512-NEXT: vmovd %xmm0, %eax
195+
; AVXVNNI-AVX512-NEXT: addl %edx, %eax
196+
; AVXVNNI-AVX512-NEXT: vzeroupper
197+
; AVXVNNI-AVX512-NEXT: retq
175198
;
176199
; AVX512-LABEL: mul_sext:
177200
; AVX512: # %bb.0: # %entry
@@ -312,17 +335,30 @@ entry:
312335
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
313336

314337
define i32 @vpdpbusd_128(ptr%a, ptr%b, i32 %c, i32 %n) {
315-
; AVXVNNI-LABEL: vpdpbusd_128:
316-
; AVXVNNI: # %bb.0: # %entry
317-
; AVXVNNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
318-
; AVXVNNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
319-
; AVXVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
320-
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
321-
; AVXVNNI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
322-
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
323-
; AVXVNNI-NEXT: vmovd %xmm2, %eax
324-
; AVXVNNI-NEXT: addl %edx, %eax
325-
; AVXVNNI-NEXT: retq
338+
; AVXVNNI-AVX-LABEL: vpdpbusd_128:
339+
; AVXVNNI-AVX: # %bb.0: # %entry
340+
; AVXVNNI-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
341+
; AVXVNNI-AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
342+
; AVXVNNI-AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
343+
; AVXVNNI-AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
344+
; AVXVNNI-AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
345+
; AVXVNNI-AVX-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
346+
; AVXVNNI-AVX-NEXT: vmovd %xmm2, %eax
347+
; AVXVNNI-AVX-NEXT: addl %edx, %eax
348+
; AVXVNNI-AVX-NEXT: retq
349+
;
350+
; AVXVNNI-AVX512-LABEL: vpdpbusd_128:
351+
; AVXVNNI-AVX512: # %bb.0: # %entry
352+
; AVXVNNI-AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
353+
; AVXVNNI-AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
354+
; AVXVNNI-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
355+
; AVXVNNI-AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
356+
; AVXVNNI-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
357+
; AVXVNNI-AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
358+
; AVXVNNI-AVX512-NEXT: {vex} vpdpbusd %xmm1, %xmm0, %xmm2
359+
; AVXVNNI-AVX512-NEXT: vmovd %xmm2, %eax
360+
; AVXVNNI-AVX512-NEXT: addl %edx, %eax
361+
; AVXVNNI-AVX512-NEXT: retq
326362
;
327363
; AVX512VNNI-LABEL: vpdpbusd_128:
328364
; AVX512VNNI: # %bb.0: # %entry

0 commit comments

Comments
 (0)