Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
}
return maskVec;
}
static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value source, mlir::Value mask, mlir::Value inputVector, const std::string &id){
auto ResultTy = cast<cir::VectorType>(mask.getType());
mlir::Value MaskValue = getMaskVecValue(builder, loc, inputVector, cast<cir::VectorType>(ResultTy).getSize());
return emitIntrinsicCallOp(builder, loc, id, ResultTy, source, mask, MaskValue);
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
Expand Down Expand Up @@ -421,6 +426,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_compressstoreqi128_mask:
case X86::BI__builtin_ia32_compressstoreqi256_mask:
case X86::BI__builtin_ia32_compressstoreqi512_mask:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_expanddf128_mask:
case X86::BI__builtin_ia32_expanddf256_mask:
case X86::BI__builtin_ia32_expanddf512_mask:
Expand All @@ -438,7 +447,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_expandhi512_mask:
case X86::BI__builtin_ia32_expandqi128_mask:
case X86::BI__builtin_ia32_expandqi256_mask:
case X86::BI__builtin_ia32_expandqi512_mask:
case X86::BI__builtin_ia32_expandqi512_mask:{
mlir::Location loc = getLoc(expr->getExprLoc());
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], "x86_avx512_mask_expand");
}
case X86::BI__builtin_ia32_compressdf128_mask:
case X86::BI__builtin_ia32_compressdf256_mask:
case X86::BI__builtin_ia32_compressdf512_mask:
Expand All @@ -456,7 +468,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_compresshi512_mask:
case X86::BI__builtin_ia32_compressqi128_mask:
case X86::BI__builtin_ia32_compressqi256_mask:
case X86::BI__builtin_ia32_compressqi512_mask:
case X86::BI__builtin_ia32_compressqi512_mask:{
mlir::Location loc = getLoc(expr->getExprLoc());
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], "x86_avx512_mask_compress");
}
case X86::BI__builtin_ia32_gather3div2df:
case X86::BI__builtin_ia32_gather3div2di:
case X86::BI__builtin_ia32_gather3div4df:
Expand Down Expand Up @@ -781,16 +796,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
case X86::BI__builtin_ia32_sqrtps:
case X86::BI__builtin_ia32_sqrtph256:
case X86::BI__builtin_ia32_sqrtph:
case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_vsqrtbf16256:
case X86::BI__builtin_ia32_vsqrtbf16:
case X86::BI__builtin_ia32_vsqrtbf16512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512:
case X86::BI__builtin_ia32_pmuludq128:
Expand Down Expand Up @@ -943,7 +949,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
case X86::BI__builtin_ia32_cvtsbf162ss_32:
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
case X86::BI__cpuid:
Expand Down
158 changes: 158 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

#include <immintrin.h>


__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {

return _mm_mask_expand_pd(__W,__U,__A);
}
__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {

return _mm_maskz_expand_pd(__U,__A);
}
__m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) {

return _mm256_mask_expand_pd(__W,__U,__A);
}
__m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) {

return _mm256_maskz_expand_pd(__U,__A);
}
__m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) {

return _mm_mask_expand_epi64(__W,__U,__A);
}
__m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) {

return _mm_maskz_expand_epi64(__U,__A);
}
__m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) {

return _mm256_mask_expand_epi64(__W,__U,__A);
}
__m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) {

return _mm256_maskz_expand_epi64(__U,__A);
}

__m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) {

return _mm_mask_expand_ps(__W,__U,__A);
}
__m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) {

return _mm_maskz_expand_ps(__U,__A);
}
__m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) {

return _mm256_mask_expand_ps(__W,__U,__A);
}
__m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) {

return _mm256_maskz_expand_ps(__U,__A);
}
__m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) {

return _mm_mask_expand_epi32(__W,__U,__A);
}
__m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) {

return _mm_maskz_expand_epi32(__U,__A);
}
__m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) {

return _mm256_mask_expand_epi32(__W,__U,__A);
}
__m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) {

return _mm256_maskz_expand_epi32(__U,__A);
}

__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {

return _mm_mask_compress_pd(__W,__U,__A);
}

__m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) {

return _mm_maskz_compress_pd(__U,__A);
}

__m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) {

return _mm256_mask_compress_pd(__W,__U,__A);
}

__m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) {

return _mm256_maskz_compress_pd(__U,__A);
}

__m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) {

return _mm_mask_compress_epi64(__W,__U,__A);
}

__m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) {

return _mm_maskz_compress_epi64(__U,__A);
}

__m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) {

return _mm256_mask_compress_epi64(__W,__U,__A);
}

__m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) {

return _mm256_maskz_compress_epi64(__U,__A);
}

__m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) {

return _mm_mask_compress_ps(__W,__U,__A);
}

__m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) {

return _mm_maskz_compress_ps(__U,__A);
}

__m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) {

return _mm256_mask_compress_ps(__W,__U,__A);
}

__m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) {

return _mm256_maskz_compress_ps(__U,__A);
}

__m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) {

return _mm_mask_compress_epi32(__W,__U,__A);
}

__m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) {

return _mm_maskz_compress_epi32(__U,__A);
}

__m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) {

return _mm256_mask_compress_epi32(__W,__U,__A);
}

__m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) {

return _mm256_maskz_compress_epi32(__U,__A);
}
53 changes: 53 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

#include <immintrin.h>


__m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {

return _mm_mask_compress_epi16(__S, __U, __D);
}

__m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) {

return _mm_maskz_compress_epi16(__U, __D);
}

__m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {

return _mm_mask_compress_epi8(__S, __U, __D);
}

__m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) {

return _mm_maskz_compress_epi8(__U, __D);
}

__m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {

return _mm_mask_expand_epi16(__S, __U, __D);
}

__m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) {

return _mm_maskz_expand_epi16(__U, __D);
}

__m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {

return _mm_mask_expand_epi8(__S, __U, __D);
}

__m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) {

return _mm_maskz_expand_epi8(__U, __D);
}