Skip to content

Commit 2944b9e

Browse files
committed
[CIR][CIRGen][Builtin][X86] Masked compress Intrinsics
This pr is related to the issue #167765 Added the support Masked compress builtin in CIR codeGen
1 parent 6bc66e5 commit 2944b9e

File tree

3 files changed

+225
-19
lines changed

3 files changed

+225
-19
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,10 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
8484
}
8585
return maskVec;
8686
}
87-
static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr *expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){
88-
auto ResultTy = cast<cir::VectorType>(ops[1].getType());
89-
mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2], cast<cir::VectorType>(ResultTy).getSize());
90-
llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue};
91-
92-
return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op);
93-
87+
static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder, mlir::Location loc, mlir::Value source, mlir::Value mask, mlir::Value inputVector, const std::string &id){
88+
auto ResultTy = cast<cir::VectorType>(mask.getType());
89+
mlir::Value MaskValue = getMaskVecValue(builder, loc, inputVector, cast<cir::VectorType>(ResultTy).getSize());
90+
return emitIntrinsicCallOp(builder, loc, id, ResultTy, source, mask, MaskValue);
9491
}
9592

9693
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
@@ -429,6 +426,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
429426
case X86::BI__builtin_ia32_compressstoreqi128_mask:
430427
case X86::BI__builtin_ia32_compressstoreqi256_mask:
431428
case X86::BI__builtin_ia32_compressstoreqi512_mask:
429+
cgm.errorNYI(expr->getSourceRange(),
430+
std::string("unimplemented X86 builtin call: ") +
431+
getContext().BuiltinInfo.getName(builtinID));
432+
return {};
432433
case X86::BI__builtin_ia32_expanddf128_mask:
433434
case X86::BI__builtin_ia32_expanddf256_mask:
434435
case X86::BI__builtin_ia32_expanddf512_mask:
@@ -446,7 +447,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
446447
case X86::BI__builtin_ia32_expandhi512_mask:
447448
case X86::BI__builtin_ia32_expandqi128_mask:
448449
case X86::BI__builtin_ia32_expandqi256_mask:
449-
case X86::BI__builtin_ia32_expandqi512_mask:
450+
case X86::BI__builtin_ia32_expandqi512_mask:{
451+
mlir::Location loc = getLoc(expr->getExprLoc());
452+
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], "x86_avx512_mask_expand");
453+
}
450454
case X86::BI__builtin_ia32_compressdf128_mask:
451455
case X86::BI__builtin_ia32_compressdf256_mask:
452456
case X86::BI__builtin_ia32_compressdf512_mask:
@@ -465,7 +469,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
465469
case X86::BI__builtin_ia32_compressqi128_mask:
466470
case X86::BI__builtin_ia32_compressqi256_mask:
467471
case X86::BI__builtin_ia32_compressqi512_mask:{
468-
return emitX86CompressExpand(*this, expr, ops, true, "x86_avx512_mask_compress");
472+
mlir::Location loc = getLoc(expr->getExprLoc());
473+
return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2], "x86_avx512_mask_compress");
469474
}
470475
case X86::BI__builtin_ia32_gather3div2df:
471476
case X86::BI__builtin_ia32_gather3div2di:
@@ -791,16 +796,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
791796
case X86::BI__builtin_ia32_sqrtsh_round_mask:
792797
case X86::BI__builtin_ia32_sqrtsd_round_mask:
793798
case X86::BI__builtin_ia32_sqrtss_round_mask:
794-
case X86::BI__builtin_ia32_sqrtpd256:
795-
case X86::BI__builtin_ia32_sqrtpd:
796-
case X86::BI__builtin_ia32_sqrtps256:
797-
case X86::BI__builtin_ia32_sqrtps:
798-
case X86::BI__builtin_ia32_sqrtph256:
799-
case X86::BI__builtin_ia32_sqrtph:
800799
case X86::BI__builtin_ia32_sqrtph512:
801-
case X86::BI__builtin_ia32_vsqrtbf16256:
802-
case X86::BI__builtin_ia32_vsqrtbf16:
803-
case X86::BI__builtin_ia32_vsqrtbf16512:
804800
case X86::BI__builtin_ia32_sqrtps512:
805801
case X86::BI__builtin_ia32_sqrtpd512:
806802
case X86::BI__builtin_ia32_pmuludq128:
@@ -953,7 +949,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
953949
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
954950
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
955951
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
956-
case X86::BI__builtin_ia32_cvtsbf162ss_32:
957952
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
958953
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
959954
case X86::BI__cpuid:
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
#include <immintrin.h>
12+
13+
14+
__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
15+
16+
return _mm_mask_expand_pd(__W,__U,__A);
17+
}
18+
__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
19+
20+
return _mm_maskz_expand_pd(__U,__A);
21+
}
22+
__m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) {
23+
24+
return _mm256_mask_expand_pd(__W,__U,__A);
25+
}
26+
__m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) {
27+
28+
return _mm256_maskz_expand_pd(__U,__A);
29+
}
30+
__m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
31+
32+
return _mm_mask_expand_epi64(__W,__U,__A);
33+
}
34+
__m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) {
35+
36+
return _mm_maskz_expand_epi64(__U,__A);
37+
}
38+
__m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
39+
40+
return _mm256_mask_expand_epi64(__W,__U,__A);
41+
}
42+
__m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) {
43+
44+
return _mm256_maskz_expand_epi64(__U,__A);
45+
}
46+
47+
__m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) {
48+
49+
return _mm_mask_expand_ps(__W,__U,__A);
50+
}
51+
__m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) {
52+
53+
return _mm_maskz_expand_ps(__U,__A);
54+
}
55+
__m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) {
56+
57+
return _mm256_mask_expand_ps(__W,__U,__A);
58+
}
59+
__m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) {
60+
61+
return _mm256_maskz_expand_ps(__U,__A);
62+
}
63+
__m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
64+
65+
return _mm_mask_expand_epi32(__W,__U,__A);
66+
}
67+
__m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) {
68+
69+
return _mm_maskz_expand_epi32(__U,__A);
70+
}
71+
__m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
72+
73+
return _mm256_mask_expand_epi32(__W,__U,__A);
74+
}
75+
__m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) {
76+
77+
return _mm256_maskz_expand_epi32(__U,__A);
78+
}
79+
80+
__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
81+
82+
return _mm_mask_compress_pd(__W,__U,__A);
83+
}
84+
85+
__m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) {
86+
87+
return _mm_maskz_compress_pd(__U,__A);
88+
}
89+
90+
__m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) {
91+
92+
return _mm256_mask_compress_pd(__W,__U,__A);
93+
}
94+
95+
__m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) {
96+
97+
return _mm256_maskz_compress_pd(__U,__A);
98+
}
99+
100+
__m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
101+
102+
return _mm_mask_compress_epi64(__W,__U,__A);
103+
}
104+
105+
__m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) {
106+
107+
return _mm_maskz_compress_epi64(__U,__A);
108+
}
109+
110+
__m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
111+
112+
return _mm256_mask_compress_epi64(__W,__U,__A);
113+
}
114+
115+
__m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) {
116+
117+
return _mm256_maskz_compress_epi64(__U,__A);
118+
}
119+
120+
__m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) {
121+
122+
return _mm_mask_compress_ps(__W,__U,__A);
123+
}
124+
125+
__m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) {
126+
127+
return _mm_maskz_compress_ps(__U,__A);
128+
}
129+
130+
__m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) {
131+
132+
return _mm256_mask_compress_ps(__W,__U,__A);
133+
}
134+
135+
__m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) {
136+
137+
return _mm256_maskz_compress_ps(__U,__A);
138+
}
139+
140+
__m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
141+
142+
return _mm_mask_compress_epi32(__W,__U,__A);
143+
}
144+
145+
__m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) {
146+
147+
return _mm_maskz_compress_epi32(__U,__A);
148+
}
149+
150+
__m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
151+
152+
return _mm256_mask_compress_epi32(__W,__U,__A);
153+
}
154+
155+
__m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) {
156+
157+
return _mm256_maskz_compress_epi32(__U,__A);
158+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
2+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
3+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
5+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
6+
7+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
8+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
9+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
10+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
11+
12+
#include <immintrin.h>
13+
14+
15+
__m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
16+
17+
return _mm_mask_compress_epi16(__S, __U, __D);
18+
}
19+
20+
__m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) {
21+
22+
return _mm_maskz_compress_epi16(__U, __D);
23+
}
24+
25+
__m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
26+
27+
return _mm_mask_compress_epi8(__S, __U, __D);
28+
}
29+
30+
__m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) {
31+
32+
return _mm_maskz_compress_epi8(__U, __D);
33+
}
34+
35+
__m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
36+
37+
return _mm_mask_expand_epi16(__S, __U, __D);
38+
}
39+
40+
__m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) {
41+
42+
return _mm_maskz_expand_epi16(__U, __D);
43+
}
44+
45+
__m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
46+
47+
return _mm_mask_expand_epi8(__S, __U, __D);
48+
}
49+
50+
__m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) {
51+
52+
return _mm_maskz_expand_epi8(__U, __D);
53+
}

0 commit comments

Comments
 (0)