Skip to content

Commit 3735e20

Browse files
committed
[x86][AVX-VNNI] Fix VPDPWXXD Argument Types
Fixed the argument types of the following intrinsics to match with the ISA: - vpdpwssd_128, vpdpwssd_256, vpdpwssd_512, - vpdpwssds_128, vpdpwssds_256, vpdpwssds_512 - more to come
1 parent 2f7a5f7 commit 3735e20

21 files changed

+426
-283
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,27 +1132,27 @@ let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<5
11321132
}
11331133

11341134
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1135-
def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
1135+
def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
11361136
}
11371137

11381138
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1139-
def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
1139+
def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
11401140
}
11411141

11421142
let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1143-
def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
1143+
def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
11441144
}
11451145

11461146
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1147-
def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
1147+
def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
11481148
}
11491149

11501150
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1151-
def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
1151+
def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
11521152
}
11531153

11541154
let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1155-
def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
1155+
def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
11561156
}
11571157

11581158
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {

clang/lib/Headers/avx512vlvnniintrin.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@
8080
/// ENDFOR
8181
/// DST[MAX:256] := 0
8282
/// \endcode
83-
#define _mm256_dpwssd_epi32(S, A, B) \
84-
((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
83+
#define _mm256_dpwssd_epi32(S, A, B) \
84+
((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v16hi)(A), (__v16hi)(B)))
8585

8686
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
8787
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -98,8 +98,9 @@
9898
/// ENDFOR
9999
/// DST[MAX:256] := 0
100100
/// \endcode
101-
#define _mm256_dpwssds_epi32(S, A, B) \
102-
((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
101+
#define _mm256_dpwssds_epi32(S, A, B) \
102+
((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v16hi)(A), \
103+
(__v16hi)(B)))
103104

104105
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
105106
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -157,8 +158,8 @@
157158
/// ENDFOR
158159
/// DST[MAX:128] := 0
159160
/// \endcode
160-
#define _mm_dpwssd_epi32(S, A, B) \
161-
((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
161+
#define _mm_dpwssd_epi32(S, A, B) \
162+
((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
162163

163164
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
164165
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -175,8 +176,8 @@
175176
/// ENDFOR
176177
/// DST[MAX:128] := 0
177178
/// \endcode
178-
#define _mm_dpwssds_epi32(S, A, B) \
179-
((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
179+
#define _mm_dpwssds_epi32(S, A, B) \
180+
((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
180181

181182
static __inline__ __m256i __DEFAULT_FN_ATTRS256
182183
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)

clang/lib/Headers/avx512vnniintrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
6868
static __inline__ __m512i __DEFAULT_FN_ATTRS
6969
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
7070
{
71-
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
72-
(__v16si)__B);
71+
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A,
72+
(__v32hi)__B);
7373
}
7474

7575
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -91,8 +91,8 @@ _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
9191
static __inline__ __m512i __DEFAULT_FN_ATTRS
9292
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
9393
{
94-
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
95-
(__v16si)__B);
94+
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A,
95+
(__v32hi)__B);
9696
}
9797

9898
static __inline__ __m512i __DEFAULT_FN_ATTRS

clang/lib/Headers/avxvnniintrin.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
109109
static __inline__ __m256i __DEFAULT_FN_ATTRS256
110110
_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
111111
{
112-
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B);
112+
return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A,
113+
(__v16hi)__B);
113114
}
114115

115116
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
@@ -130,7 +131,8 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
130131
static __inline__ __m256i __DEFAULT_FN_ATTRS256
131132
_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
132133
{
133-
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B);
134+
return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A,
135+
(__v16hi)__B);
134136
}
135137

136138
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
@@ -199,7 +201,8 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
199201
static __inline__ __m128i __DEFAULT_FN_ATTRS128
200202
_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
201203
{
202-
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B);
204+
return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A,
205+
(__v8hi)__B);
203206
}
204207

205208
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
@@ -220,7 +223,8 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
220223
static __inline__ __m128i __DEFAULT_FN_ATTRS128
221224
_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
222225
{
223-
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B);
226+
return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A,
227+
(__v8hi)__B);
224228
}
225229

226230
#undef __DEFAULT_FN_ATTRS128

clang/test/CodeGen/X86/avx512vlvnni-builtins.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,41 +47,41 @@ __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
4747

4848
__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
4949
// CHECK-LABEL: test_mm256_mask_dpwssd_epi32
50-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
50+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
5151
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
5252
return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B);
5353
}
5454

5555
__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
5656
// CHECK-LABEL: test_mm256_maskz_dpwssd_epi32
57-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
57+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
5858
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
5959
return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B);
6060
}
6161

6262
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
6363
// CHECK-LABEL: test_mm256_dpwssd_epi32
64-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
64+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
6565
return _mm256_dpwssd_epi32(__S, __A, __B);
6666
}
6767

6868
__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
6969
// CHECK-LABEL: test_mm256_mask_dpwssds_epi32
70-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
70+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
7171
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
7272
return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B);
7373
}
7474

7575
__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
7676
// CHECK-LABEL: test_mm256_maskz_dpwssds_epi32
77-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
77+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
7878
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
7979
return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B);
8080
}
8181

8282
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
8383
// CHECK-LABEL: test_mm256_dpwssds_epi32
84-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
84+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
8585
return _mm256_dpwssds_epi32(__S, __A, __B);
8686
}
8787

@@ -127,41 +127,41 @@ __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
127127

128128
__m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
129129
// CHECK-LABEL: test_mm_mask_dpwssd_epi32
130-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
130+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
131131
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
132132
return _mm_mask_dpwssd_epi32(__S, __U, __A, __B);
133133
}
134134

135135
__m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
136136
// CHECK-LABEL: test_mm_maskz_dpwssd_epi32
137-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
137+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
138138
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
139139
return _mm_maskz_dpwssd_epi32(__U, __S, __A, __B);
140140
}
141141

142142
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
143143
// CHECK-LABEL: test_mm_dpwssd_epi32
144-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
144+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
145145
return _mm_dpwssd_epi32(__S, __A, __B);
146146
}
147147

148148
__m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
149149
// CHECK-LABEL: test_mm_mask_dpwssds_epi32
150-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
150+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
151151
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
152152
return _mm_mask_dpwssds_epi32(__S, __U, __A, __B);
153153
}
154154

155155
__m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
156156
// CHECK-LABEL: test_mm_maskz_dpwssds_epi32
157-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
157+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
158158
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
159159
return _mm_maskz_dpwssds_epi32(__U, __S, __A, __B);
160160
}
161161

162162
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
163163
// CHECK-LABEL: test_mm_dpwssds_epi32
164-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
164+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
165165
return _mm_dpwssds_epi32(__S, __A, __B);
166166
}
167167

clang/test/CodeGen/X86/avx512vnni-builtins.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,41 +47,41 @@ __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
4747

4848
__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
4949
// CHECK-LABEL: test_mm512_mask_dpwssd_epi32
50-
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
50+
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
5151
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
5252
return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B);
5353
}
5454

5555
__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
5656
// CHECK-LABEL: test_mm512_maskz_dpwssd_epi32
57-
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
57+
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
5858
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
5959
return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B);
6060
}
6161

6262
__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) {
6363
// CHECK-LABEL: test_mm512_dpwssd_epi32
64-
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
64+
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
6565
return _mm512_dpwssd_epi32(__S, __A, __B);
6666
}
6767

6868
__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
6969
// CHECK-LABEL: test_mm512_mask_dpwssds_epi32
70-
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
70+
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
7171
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
7272
return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B);
7373
}
7474

7575
__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
7676
// CHECK-LABEL: test_mm512_maskz_dpwssds_epi32
77-
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
77+
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
7878
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
7979
return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B);
8080
}
8181

8282
__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) {
8383
// CHECK-LABEL: test_mm512_dpwssds_epi32
84-
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
84+
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}})
8585
return _mm512_dpwssds_epi32(__S, __A, __B);
8686
}
8787

clang/test/CodeGen/X86/avxvnni-builtins.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
1919

2020
__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) {
2121
// CHECK-LABEL: test_mm256_dpwssd_epi32
22-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
22+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
2323
return _mm256_dpwssd_epi32(__S, __A, __B);
2424
}
2525

2626
__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {
2727
// CHECK-LABEL: test_mm256_dpwssds_epi32
28-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
28+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
2929
return _mm256_dpwssds_epi32(__S, __A, __B);
3030
}
3131

@@ -43,13 +43,13 @@ __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
4343

4444
__m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) {
4545
// CHECK-LABEL: test_mm_dpwssd_epi32
46-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
46+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
4747
return _mm_dpwssd_epi32(__S, __A, __B);
4848
}
4949

5050
__m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {
5151
// CHECK-LABEL: test_mm_dpwssds_epi32
52-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
52+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
5353
return _mm_dpwssds_epi32(__S, __A, __B);
5454
}
5555

@@ -67,13 +67,13 @@ __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
6767

6868
__m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
6969
// CHECK-LABEL: test_mm256_dpwssd_avx_epi32
70-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
70+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
7171
return _mm256_dpwssd_avx_epi32(__S, __A, __B);
7272
}
7373

7474
__m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
7575
// CHECK-LABEL: test_mm256_dpwssds_avx_epi32
76-
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
76+
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}})
7777
return _mm256_dpwssds_avx_epi32(__S, __A, __B);
7878
}
7979

@@ -91,12 +91,12 @@ __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
9191

9292
__m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
9393
// CHECK-LABEL: test_mm_dpwssd_avx_epi32
94-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
94+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
9595
return _mm_dpwssd_avx_epi32(__S, __A, __B);
9696
}
9797

9898
__m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
9999
// CHECK-LABEL: test_mm_dpwssds_avx_epi32
100-
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
100+
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
101101
return _mm_dpwssds_avx_epi32(__S, __A, __B);
102102
}

0 commit comments

Comments
 (0)