Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
2f02de3
feat: Add support for kunpack builtins
ahmednoursphinx Nov 19, 2025
ad1525e
chore: format files
ahmednoursphinx Nov 19, 2025
2b75e07
refactor: rename property to include full name
ahmednoursphinx Nov 19, 2025
052c48e
refactor: move logic to helper function and modify function name
ahmednoursphinx Nov 20, 2025
dab0645
refactor: rearrange funcs
ahmednoursphinx Nov 20, 2025
faab7d2
refactor: optimize createVecShuffle
ahmednoursphinx Nov 20, 2025
bb801bc
refactor: move test functions
ahmednoursphinx Nov 20, 2025
212c5df
chore: Format files
ahmednoursphinx Nov 20, 2025
b7ceb04
Merge branch 'main' into issue_-167765_kunpck
ahmednoursphinx Nov 20, 2025
cf728a9
refactor: remove duplicate test
ahmednoursphinx Nov 20, 2025
107d4a1
Update CIRGenBuiltinX86.cpp
ahmednoursphinx Nov 20, 2025
bff4131
Update CIRGenBuiltinX86.cpp
ahmednoursphinx Nov 20, 2025
553aa74
refactor: remove redundant tests
ahmednoursphinx Nov 20, 2025
349fb13
Update avx512bw-builtins.c
ahmednoursphinx Nov 20, 2025
14d8eab
Merge branch 'main' into issue_-167765_kunpck
ahmednoursphinx Nov 25, 2025
19e365d
refactor: Use getMaskVecValue func
ahmednoursphinx Nov 25, 2025
0f28143
feat: add test
ahmednoursphinx Nov 25, 2025
2c68211
chore: fix test
ahmednoursphinx Nov 25, 2025
ce3956e
Merge branch 'main' into issue_-167765_kunpck
ahmednoursphinx Nov 30, 2025
f56a71f
chore: fix merge conflict by adding test to the correct location
ahmednoursphinx Nov 30, 2025
36cd79a
chore: update CIR label
ahmednoursphinx Nov 30, 2025
bef4953
feat: add Also a test for _mm512_kunpackw and _mm512_kunpackd.
ahmednoursphinx Nov 30, 2025
416e122
feat: refactor to use a function instead of inlining and give name to…
ahmednoursphinx Nov 30, 2025
e7a6386
chore: Format files
ahmednoursphinx Nov 30, 2025
8318cea
feat: use builder
ahmednoursphinx Nov 30, 2025
8521696
refactor: fix tests
ahmednoursphinx Nov 30, 2025
6cbd359
Merge branch 'main' into issue_-167765_kunpck
ahmednoursphinx Dec 2, 2025
02b22e6
Merge branch 'main' into issue_-167765_kunpck
ahmednoursphinx Dec 2, 2025
cf8238b
chore: fix merge conflict
ahmednoursphinx Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,44 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};

case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi: {
// Get the number of elements from the bit width of the first operand.
unsigned numElems = cast<cir::IntType>(ops[0].getType()).getWidth();

// Convert both operands to mask vectors.
mlir::Value lhs = getMaskVecValue(*this, expr, ops[0], numElems);
mlir::Value rhs = getMaskVecValue(*this, expr, ops[1], numElems);

mlir::Location loc = getLoc(expr->getExprLoc());

// Create indices for extracting the first half of each vector.
SmallVector<mlir::Attribute, 32> halfIndices;
mlir::Type i32Ty = builder.getSInt32Ty();
for (auto i : llvm::seq<unsigned>(0, numElems / 2))
halfIndices.push_back(cir::IntAttr::get(i32Ty, i));

// Extract first half of each vector. This gives better codegen than
// doing it in a single shuffle.
lhs = builder.createVecShuffle(loc, lhs, lhs, halfIndices);
rhs = builder.createVecShuffle(loc, rhs, rhs, halfIndices);

// Create indices for concatenating the vectors.
// NOTE: Operands are swapped to match the intrinsic definition.
// After the half extraction, both vectors have numElems/2 elements.
// In createVecShuffle(rhs, lhs, indices), indices [0..numElems/2-1] select
// from rhs, and indices [numElems/2..numElems-1] select from lhs.
SmallVector<mlir::Attribute, 64> concatIndices;
for (auto i : llvm::seq<unsigned>(0, numElems))
concatIndices.push_back(cir::IntAttr::get(i32Ty, i));

// Concat the vectors (RHS first, then LHS).
mlir::Value res = builder.createVecShuffle(loc, rhs, lhs, concatIndices);
return builder.createBitcast(res, ops[0].getType());
}

case X86::BI_mm_setcsr:
case X86::BI__builtin_ia32_ldmxcsr: {
mlir::Location loc = getLoc(expr->getExprLoc());
Expand Down Expand Up @@ -775,9 +813,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_kmovw:
case X86::BI__builtin_ia32_kmovd:
case X86::BI__builtin_ia32_kmovq:
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi:
case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,27 @@ __m512i test_mm512_undefined_epi32(void) {
// OGCG: ret <8 x i64> zeroinitializer
return _mm512_undefined_epi32();
}

__mmask16 test_mm512_kunpackb(__mmask16 A, __mmask16 B) {
// CIR-LABEL: test_mm512_kunpackb
// CIR: cir.call @{{.*}}kunpackb{{.*}}

// LLVM-LABEL: test_mm512_kunpackb
// LLVM: [[A_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// LLVM: [[B_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// LLVM: [[A_HALF:%.*]] = shufflevector <16 x i1> [[A_VEC]], <16 x i1> [[A_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// LLVM: [[B_HALF:%.*]] = shufflevector <16 x i1> [[B_VEC]], <16 x i1> [[B_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// LLVM: [[RES:%.*]] = shufflevector <8 x i1> [[B_HALF]], <8 x i1> [[A_HALF]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// LLVM: %{{.*}} = bitcast <16 x i1> [[RES]] to i16
// LLVM: ret i16 %{{.*}}

// OGCG-LABEL: test_mm512_kunpackb
// OGCG: [[A_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// OGCG: [[B_VEC:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
// OGCG: [[A_HALF:%.*]] = shufflevector <16 x i1> [[A_VEC]], <16 x i1> [[A_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// OGCG: [[B_HALF:%.*]] = shufflevector <16 x i1> [[B_VEC]], <16 x i1> [[B_VEC]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
// OGCG: [[RES:%.*]] = shufflevector <8 x i1> [[B_HALF]], <8 x i1> [[A_HALF]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// OGCG: %{{.*}} = bitcast <16 x i1> [[RES]] to i16
// OGCG: ret i16 %{{.*}}
return _mm512_kunpackb(A, B);
}