Skip to content

Commit 1012b16

Browse files
committed
[CIR][CIRGen][Builtin][X86] Masked compress Intrinsics
Added masked compress builtin in CIR. Note: This is my first PR to llvm.
1 parent eb5297e commit 1012b16

File tree

3 files changed

+101
-1
lines changed

3 files changed

+101
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ static mlir::Value getMaskVecValue(CIRGenFunction &cgf, const CallExpr *expr,
8989
}
9090
return maskVec;
9191
}
92+
static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr *expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){
93+
auto ResultTy = cast<cir::VectorType>(ops[1].getType());
94+
mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2], cast<cir::VectorType>(ResultTy).getSize());
95+
llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue};
96+
97+
return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op);
98+
99+
}
92100

93101
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
94102
const CallExpr *expr) {
@@ -454,7 +462,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
454462
case X86::BI__builtin_ia32_compresshi512_mask:
455463
case X86::BI__builtin_ia32_compressqi128_mask:
456464
case X86::BI__builtin_ia32_compressqi256_mask:
457-
case X86::BI__builtin_ia32_compressqi512_mask:
465+
case X86::BI__builtin_ia32_compressqi512_mask:{
466+
return emitX86CompressExpand(*this, expr, ops, true, "x86_avx512_mask_compress");
467+
}
458468
case X86::BI__builtin_ia32_gather3div2df:
459469
case X86::BI__builtin_ia32_gather3div2di:
460470
case X86::BI__builtin_ia32_gather3div4df:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include <immintrin.h>
2+
///home/cs25resch11005/myFiles/off_contrib/llvm-project/clang/lib/Headers/avx512vlvbmi2intrin.h
3+
4+
__m128i test_mm_mask_compress(__m128i __S, __mmask8 __U, __m128i __D){
5+
6+
return (__m128i)_mm_mask_compress_epi16(__S, __U, __D);
7+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
!s16i = !cir.int<s, 16>
2+
!s64i = !cir.int<s, 64>
3+
!u8i = !cir.int<u, 8>
4+
#loc3 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:25)
5+
#loc4 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:33)
6+
#loc5 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:38)
7+
#loc6 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:47)
8+
#loc7 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:52)
9+
#loc8 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:60)
10+
#loc18 = loc("builtin_mask_compress.c":4:31)
11+
#loc19 = loc("builtin_mask_compress.c":4:39)
12+
#loc20 = loc("builtin_mask_compress.c":4:44)
13+
#loc21 = loc("builtin_mask_compress.c":4:53)
14+
#loc22 = loc("builtin_mask_compress.c":4:58)
15+
#loc23 = loc("builtin_mask_compress.c":4:66)
16+
#loc32 = loc(fused[#loc3, #loc4])
17+
#loc33 = loc(fused[#loc5, #loc6])
18+
#loc34 = loc(fused[#loc7, #loc8])
19+
#loc38 = loc(fused[#loc18, #loc19])
20+
#loc39 = loc(fused[#loc20, #loc21])
21+
#loc40 = loc(fused[#loc22, #loc23])
22+
module @"/home/cs25resch11005/myFiles/off_contrib/llvm-project/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c" attributes {cir.lang = #cir.lang<c>, cir.module_asm = [], cir.triple = "x86_64-unknown-linux-gnu", dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array<i32: 8, 16, 32, 64>, "dlti.stack_alignment" = 128 : i64>} {
23+
cir.func internal private dso_local @_mm_mask_compress_epi16(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !u8i loc(fused[#loc5, #loc6]), %arg2: !cir.vector<2 x !s64i> loc(fused[#loc7, #loc8])) -> !cir.vector<2 x !s64i> inline(always) {
24+
%0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__S", init] {alignment = 16 : i64} loc(#loc32)
25+
%1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc33)
26+
%2 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__D", init] {alignment = 16 : i64} loc(#loc34)
27+
%3 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__retval"] {alignment = 16 : i64} loc(#loc2)
28+
cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc9)
29+
cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc9)
30+
cir.store %arg2, %2 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc9)
31+
%4 = cir.load align(16) %2 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc10)
32+
%5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10)
33+
%6 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc11)
34+
%7 = cir.cast bitcast %6 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc11)
35+
%8 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc12)
36+
%9 = cir.cast bitcast %8 : !u8i -> !cir.vector<8 x !cir.int<u, 1>> loc(#loc12)
37+
%10 = cir.call_llvm_intrinsic "x86_avx512_mask_compress" %5, %7, %9 : (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i> loc(#loc13)
38+
%11 = cir.cast bitcast %10 : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i> loc(#loc35)
39+
cir.store %11, %3 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc36)
40+
%12 = cir.load %3 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc36)
41+
cir.return %12 : !cir.vector<2 x !s64i> loc(#loc36)
42+
} loc(#loc31)
43+
cir.func dso_local @test_mm_mask_compress(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !u8i loc(fused[#loc20, #loc21]), %arg2: !cir.vector<2 x !s64i> loc(fused[#loc22, #loc23])) -> !cir.vector<2 x !s64i> inline(never) {
44+
%0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__S", init] {alignment = 16 : i64} loc(#loc38)
45+
%1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc39)
46+
%2 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__D", init] {alignment = 16 : i64} loc(#loc40)
47+
%3 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__retval"] {alignment = 16 : i64} loc(#loc17)
48+
cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc24)
49+
cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc24)
50+
cir.store %arg2, %2 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc24)
51+
%4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc25)
52+
%5 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc26)
53+
%6 = cir.load align(16) %2 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc27)
54+
%7 = cir.call @_mm_mask_compress_epi16(%4, %5, %6) : (!cir.vector<2 x !s64i>, !u8i, !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i> loc(#loc28)
55+
cir.store %7, %3 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc41)
56+
%8 = cir.load %3 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc41)
57+
cir.return %8 : !cir.vector<2 x !s64i> loc(#loc41)
58+
} loc(#loc37)
59+
} loc(#loc)
60+
#loc = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c":0:0)
61+
#loc1 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":35:1)
62+
#loc2 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":41:1)
63+
#loc9 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":37:1)
64+
#loc10 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":38:64)
65+
#loc11 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":39:24)
66+
#loc12 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":40:15)
67+
#loc13 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":38:20)
68+
#loc14 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":40:18)
69+
#loc15 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":38:3)
70+
#loc16 = loc("builtin_mask_compress.c":4:1)
71+
#loc17 = loc("builtin_mask_compress.c":7:1)
72+
#loc24 = loc("builtin_mask_compress.c":4:70)
73+
#loc25 = loc("builtin_mask_compress.c":6:45)
74+
#loc26 = loc("builtin_mask_compress.c":6:50)
75+
#loc27 = loc("builtin_mask_compress.c":6:55)
76+
#loc28 = loc("builtin_mask_compress.c":6:21)
77+
#loc29 = loc("builtin_mask_compress.c":6:5)
78+
#loc30 = loc("builtin_mask_compress.c":6:58)
79+
#loc31 = loc(fused[#loc1, #loc2])
80+
#loc35 = loc(fused[#loc13, #loc14])
81+
#loc36 = loc(fused[#loc15, #loc14])
82+
#loc37 = loc(fused[#loc16, #loc17])
83+
#loc41 = loc(fused[#loc29, #loc30])

0 commit comments

Comments
 (0)