Skip to content
12 changes: 6 additions & 6 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1057,27 +1057,27 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
}

let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
}

let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
}

let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
}

let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">;
}

let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">;
}

let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">;
}

let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down
18 changes: 10 additions & 8 deletions clang/lib/Headers/avx512vlvnniintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
/// ENDFOR
/// DST[MAX:256] := 0
/// \endcode
#define _mm256_dpbusd_epi32(S, A, B) \
((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
#define _mm256_dpbusd_epi32(S, A, B) \
((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v32qu)(A), (__v32qi)(B)))

/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
Expand All @@ -61,8 +61,9 @@
/// ENDFOR
/// DST[MAX:256] := 0
/// \endcode
#define _mm256_dpbusds_epi32(S, A, B) \
((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
#define _mm256_dpbusds_epi32(S, A, B) \
((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v32qu)(A), \
(__v32qi)(B)))

/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
Expand Down Expand Up @@ -117,8 +118,8 @@
/// ENDFOR
/// DST[MAX:128] := 0
/// \endcode
#define _mm_dpbusd_epi32(S, A, B) \
((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
#define _mm_dpbusd_epi32(S, A, B) \
((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v16qu)(A), (__v16qi)(B)))

/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
Expand All @@ -137,8 +138,9 @@
/// ENDFOR
/// DST[MAX:128] := 0
/// \endcode
#define _mm_dpbusds_epi32(S, A, B) \
((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
#define _mm_dpbusds_epi32(S, A, B) \
((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v16qu)(A), \
(__v16qi)(B)))

/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/avx512vnniintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
(__v16si)__B);
return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A,
(__v64qi)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand All @@ -45,8 +45,8 @@ _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
(__v16si)__B);
return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A,
(__v64qi)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand Down
12 changes: 8 additions & 4 deletions clang/lib/Headers/avxvnniintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B);
return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A,
(__v32qi)__B);
}

/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
Expand All @@ -86,7 +87,8 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B);
return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A,
(__v32qi)__B);
}

/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
Expand Down Expand Up @@ -151,7 +153,8 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B);
return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A,
(__v16qi)__B);
}

/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
Expand All @@ -174,7 +177,8 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B);
return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A,
(__v16qi)__B);
}

/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with
Expand Down
24 changes: 12 additions & 12 deletions clang/test/CodeGen/X86/avx512vlvnni-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,41 @@

__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B);
}

__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B);
}

__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_epi32(__S, __A, __B);
}

__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_dpbusds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B);
}

__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_dpbusds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B);
}

__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusds_epi32(__S, __A, __B);
}

Expand Down Expand Up @@ -87,41 +87,41 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {

__m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpbusd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpbusd_epi32(__S, __U, __A, __B);
}

__m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpbusd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B);
}

__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusd_epi32(__S, __A, __B);
}

__m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_dpbusds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_dpbusds_epi32(__S, __U, __A, __B);
}

__m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_dpbusds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B);
}

__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusds_epi32(__S, __A, __B);
}

Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGen/X86/avx512vnni-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,41 @@

__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpbusd_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B);
}

__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpbusd_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B);
}

__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpbusd_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
return _mm512_dpbusd_epi32(__S, __A, __B);
}

__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_dpbusds_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B);
}

__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_dpbusds_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B);
}

__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_dpbusds_epi32
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
// CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}})
return _mm512_dpbusds_epi32(__S, __A, __B);
}

Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/X86/avxvnni-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_epi32(__S, __A, __B);
}

__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusds_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusds_epi32(__S, __A, __B);
}

Expand All @@ -31,13 +31,13 @@ __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) {

__m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusd_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusd_epi32(__S, __A, __B);
}

__m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusds_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusds_epi32(__S, __A, __B);
}

Expand All @@ -55,13 +55,13 @@ __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) {

__m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusd_avx_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusd_avx_epi32(__S, __A, __B);
}

__m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_dpbusds_avx_epi32
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
// CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_dpbusds_avx_epi32(__S, __A, __B);
}

Expand All @@ -79,13 +79,13 @@ __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) {

__m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusd_avx_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusd_avx_epi32(__S, __A, __B);
}

__m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_dpbusds_avx_epi32
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_dpbusds_avx_epi32(__S, __A, __B);
}

Expand Down
Loading