11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,ZNVER,AVX512BW -VNNI
3- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,ZNVER, AVX-VNNI
2+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,AVX512VL -VNNI
3+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,AVX-VNNI
44; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512-VNNI
55; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
66
@@ -14,31 +14,11 @@ define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
1414}
1515
1616define <16 x i32 > @vpdpwssd_v16i32_accumulate (<32 x i16 > %a0 , <32 x i16 > %a1 , <16 x i32 > %a2 ) {
17- ; ZNVER-LABEL: vpdpwssd_v16i32_accumulate:
18- ; ZNVER: # %bb.0:
19- ; ZNVER-NEXT: vpdpwssd %zmm1, %zmm0, %zmm2
20- ; ZNVER-NEXT: vmovdqa64 %zmm2, %zmm0
21- ; ZNVER-NEXT: retq
22- ;
23- ; AVX512-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
24- ; AVX512-VNNI: # %bb.0:
25- ; AVX512-VNNI-NEXT: vextracti64x4 $1, %zmm1, %ymm3
26- ; AVX512-VNNI-NEXT: vextracti64x4 $1, %zmm0, %ymm4
27- ; AVX512-VNNI-NEXT: vpmaddwd %ymm3, %ymm4, %ymm3
28- ; AVX512-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
29- ; AVX512-VNNI-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
30- ; AVX512-VNNI-NEXT: vpaddd %zmm2, %zmm0, %zmm0
31- ; AVX512-VNNI-NEXT: retq
32- ;
33- ; AVX512VL-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
34- ; AVX512VL-VNNI: # %bb.0:
35- ; AVX512VL-VNNI-NEXT: vextracti64x4 $1, %zmm1, %ymm3
36- ; AVX512VL-VNNI-NEXT: vextracti64x4 $1, %zmm0, %ymm4
37- ; AVX512VL-VNNI-NEXT: vpmaddwd %ymm3, %ymm4, %ymm3
38- ; AVX512VL-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
39- ; AVX512VL-VNNI-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
40- ; AVX512VL-VNNI-NEXT: vpaddd %zmm2, %zmm0, %zmm0
41- ; AVX512VL-VNNI-NEXT: retq
17+ ; CHECK-LABEL: vpdpwssd_v16i32_accumulate:
18+ ; CHECK: # %bb.0:
19+ ; CHECK-NEXT: vpdpwssd %zmm1, %zmm0, %zmm2
20+ ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
21+ ; CHECK-NEXT: retq
4222 %x0 = sext <32 x i16 > %a0 to <32 x i32 >
4323 %x1 = sext <32 x i16 > %a1 to <32 x i32 >
4424 %m = mul nsw <32 x i32 > %x0 , %x1
@@ -50,11 +30,11 @@ define <16 x i32> @vpdpwssd_v16i32_accumulate(<32 x i16> %a0, <32 x i16> %a1, <1
5030}
5131
5232define <8 x i32 > @vpdpwssd_v8i32_accumulate (<16 x i16 > %a0 , <16 x i16 > %a1 , <8 x i32 > %a2 ) {
53- ; AVX512BW -VNNI-LABEL: vpdpwssd_v8i32_accumulate:
54- ; AVX512BW -VNNI: # %bb.0:
55- ; AVX512BW -VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
56- ; AVX512BW -VNNI-NEXT: vmovdqa %ymm2, %ymm0
57- ; AVX512BW -VNNI-NEXT: retq
33+ ; AVX512VL -VNNI-LABEL: vpdpwssd_v8i32_accumulate:
34+ ; AVX512VL -VNNI: # %bb.0:
35+ ; AVX512VL -VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
36+ ; AVX512VL -VNNI-NEXT: vmovdqa %ymm2, %ymm0
37+ ; AVX512VL -VNNI-NEXT: retq
5838;
5939; AVX-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
6040; AVX-VNNI: # %bb.0:
@@ -67,12 +47,6 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
6747; AVX512-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
6848; AVX512-VNNI-NEXT: vpaddd %ymm2, %ymm0, %ymm0
6949; AVX512-VNNI-NEXT: retq
70- ;
71- ; AVX512VL-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
72- ; AVX512VL-VNNI: # %bb.0:
73- ; AVX512VL-VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
74- ; AVX512VL-VNNI-NEXT: vmovdqa %ymm2, %ymm0
75- ; AVX512VL-VNNI-NEXT: retq
7650 %x0 = sext <16 x i16 > %a0 to <16 x i32 >
7751 %x1 = sext <16 x i16 > %a1 to <16 x i32 >
7852 %m = mul nsw <16 x i32 > %x0 , %x1
@@ -84,11 +58,11 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
8458}
8559
8660define <4 x i32 > @vpdpwssd_v4i32_accumulate (<8 x i16 > %a0 , <8 x i16 > %a1 , <4 x i32 > %a2 ) {
87- ; AVX512BW -VNNI-LABEL: vpdpwssd_v4i32_accumulate:
88- ; AVX512BW -VNNI: # %bb.0:
89- ; AVX512BW -VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
90- ; AVX512BW -VNNI-NEXT: vmovdqa %xmm2, %xmm0
91- ; AVX512BW -VNNI-NEXT: retq
61+ ; AVX512VL -VNNI-LABEL: vpdpwssd_v4i32_accumulate:
62+ ; AVX512VL -VNNI: # %bb.0:
63+ ; AVX512VL -VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
64+ ; AVX512VL -VNNI-NEXT: vmovdqa %xmm2, %xmm0
65+ ; AVX512VL -VNNI-NEXT: retq
9266;
9367; AVX-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
9468; AVX-VNNI: # %bb.0:
@@ -101,12 +75,6 @@ define <4 x i32> @vpdpwssd_v4i32_accumulate(<8 x i16> %a0, <8 x i16> %a1, <4 x i
10175; AVX512-VNNI-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
10276; AVX512-VNNI-NEXT: vpaddd %xmm2, %xmm0, %xmm0
10377; AVX512-VNNI-NEXT: retq
104- ;
105- ; AVX512VL-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
106- ; AVX512VL-VNNI: # %bb.0:
107- ; AVX512VL-VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
108- ; AVX512VL-VNNI-NEXT: vmovdqa %xmm2, %xmm0
109- ; AVX512VL-VNNI-NEXT: retq
11078 %x0 = sext <8 x i16 > %a0 to <8 x i32 >
11179 %x1 = sext <8 x i16 > %a1 to <8 x i32 >
11280 %m = mul nsw <8 x i32 > %x0 , %x1
0 commit comments