Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 82 additions & 7 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,62 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
return bitCast;
}

//
static cir::VecShuffleOp emitPshufW(CIRGenFunction &cgf,
CIRGenBuilderTy &builder,
const mlir::Value vec,
const mlir::Value immediate,
const CallExpr *expr, const bool isLow) {
uint32_t imm = cgf.getZExtIntValueFromConstOp(immediate);

auto vecTy = cast<cir::VectorType>(vec.getType());
unsigned numElts = vecTy.getSize();

unsigned firstHalfStart = isLow ? 0 : 4;
unsigned secondHalfStart = 4 - firstHalfStart;

// Splat the 8-bits of immediate 4 times to help the loop wrap around.
imm = (imm & 0xff) * 0x01010101;

int64_t indices[32];
for (unsigned l = 0; l != numElts; l += 8) {
for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
indices[l + i] = l + (imm & 3) + firstHalfStart;
imm >>= 2;
}
for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
indices[l + i] = l + i;
}

return builder.createVecShuffle(cgf.getLoc(expr->getExprLoc()), vec,
ArrayRef(indices, numElts));
}

static llvm::SmallVector<int64_t, 16>
computeMaskPshufDOrShufP(CIRGenFunction &cgf, const mlir::Value vec,
uint32_t imm, const bool isShufP) {
auto vecTy = cast<cir::VectorType>(vec.getType());
unsigned numElts = vecTy.getSize();
unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
unsigned numLaneElts = numElts / numLanes;

// Splat the 8-bits of immediate 4 times to help the loop wrap around.
imm = (imm & 0xff) * 0x01010101;

llvm::SmallVector<int64_t, 16> indices(numElts);
for (unsigned l = 0; l != numElts; l += numLaneElts) {
for (unsigned i = 0; i != numLaneElts; ++i) {
uint32_t idx = imm % numLaneElts;
imm /= numLaneElts;
if (isShufP && i >= (numLaneElts / 2))
idx += numElts;
indices[l + i] = l + idx;
}
}

return indices;
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
Expand Down Expand Up @@ -163,9 +219,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vec_ext_v4di: {
unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();

uint64_t index =
ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();

uint64_t index = getZExtIntValueFromConstOp(ops[1]);
index &= numElts - 1;

cir::ConstantOp indexVal =
Expand Down Expand Up @@ -523,12 +577,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_pblendw256:
case X86::BI__builtin_ia32_pblendd128:
case X86::BI__builtin_ia32_pblendd256:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512:
case X86::BI__builtin_ia32_pshuflw512: {
return emitPshufW(*this, builder, ops[0], ops[1], expr, true);
}
case X86::BI__builtin_ia32_pshufhw:
case X86::BI__builtin_ia32_pshufhw256:
case X86::BI__builtin_ia32_pshufhw512:
case X86::BI__builtin_ia32_pshufhw512: {
return emitPshufW(*this, builder, ops[0], ops[1], expr, false);
}
case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512:
Expand All @@ -537,13 +599,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilps256:
case X86::BI__builtin_ia32_vpermilpd512:
case X86::BI__builtin_ia32_vpermilps512:
case X86::BI__builtin_ia32_vpermilps512: {
const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
const llvm::SmallVector<int64_t, 16> mask =
computeMaskPshufDOrShufP(*this, ops[0], imm, false);

return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
}
case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_shufpd256:
case X86::BI__builtin_ia32_shufpd512:
case X86::BI__builtin_ia32_shufps:
case X86::BI__builtin_ia32_shufps256:
case X86::BI__builtin_ia32_shufps512:
case X86::BI__builtin_ia32_shufps512: {
const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
const llvm::SmallVector<int64_t, 16> mask =
computeMaskPshufDOrShufP(*this, ops[0], imm, true);

return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
mask);
}
case X86::BI__builtin_ia32_permdi256:
case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi512:
Expand Down
18 changes: 17 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,22 @@ class CIRGenFunction : public CIRGenTypeCache {
return convertType(getContext().getTypeDeclType(t));
}

/// Get integer from a mlir::Value that is an int constant or a constant op.
static int64_t getSExtIntValueFromConstOp(mlir::Value val) {
auto constOp = val.getDefiningOp<cir::ConstantOp>();
assert(constOp && "getIntValueFromConstOp call with non ConstantOp");
return constOp.getIntValue().getSExtValue();
}

/// Get zero-extended integer from a mlir::Value that is an int constant or a
/// constant op.
static int64_t getZExtIntValueFromConstOp(mlir::Value val) {
auto constOp = val.getDefiningOp<cir::ConstantOp>();
assert(constOp &&
"getZeroExtendedIntValueFromConstOp call with non ConstantOp");
return constOp.getIntValue().getZExtValue();
}

/// Return the cir::TypeEvaluationKind of QualType \c type.
static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type);

Expand Down Expand Up @@ -1804,7 +1820,7 @@ class CIRGenFunction : public CIRGenTypeCache {

mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s);

mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e);
mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr);

/// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is
/// nonnull, if 1\p LHS is marked _Nonnull.
Expand Down
26 changes: 25 additions & 1 deletion clang/test/CIR/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,28 @@ __m256i test_mm256_undefined_si256(void) {
// OGCG-LABEL: test_mm256_undefined_si256
// OGCG: ret <4 x i64> zeroinitializer
return _mm256_undefined_si256();
}
}

__m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
// CIR-LABEL: test_mm256_shuffle_pd
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !cir.double>

// CHECK-LABEL: test_mm256_shuffle_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>

// OGCG-LABEL: test_mm256_shuffle_pd
// OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
return _mm256_shuffle_pd(A, B, 0);
}

__m256 test_mm256_shuffle_ps(__m256 A, __m256 B) {
// CIR-LABEL: test_mm256_shuffle_ps
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i] : !cir.vector<8 x !cir.float>

// CHECK-LABEL: test_mm256_shuffle_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>

// OGCG-LABEL: test_mm256_shuffle_ps
// OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
return _mm256_shuffle_ps(A, B, 0);
}
53 changes: 53 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG

// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
// CIR shall be able to support fully.

#include <immintrin.h>

__m256i test_mm256_shufflelo_epi16(__m256i a) {
// CIR-LABEL: _mm256_shufflelo_epi16
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s16i>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s16i>

// LLVM-LABEL: test_mm256_shufflelo_epi16
// LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>

// OGCG-LABEL: test_mm256_shufflelo_epi16
// OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
return _mm256_shufflelo_epi16(a, 83);
}

__m256i test_mm256_shufflehi_epi16(__m256i a) {
// CIR-LABEL: _mm256_shufflehi_epi16
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<6> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<15> : !s32i, #cir.int<14> : !s32i, #cir.int<14> : !s32i, #cir.int<13> : !s32i] : !cir.vector<16 x !s16i>

// LLVM-LABEL: test_mm256_shufflehi_epi16
// LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>

// OGCG-LABEL: test_mm256_shufflehi_epi16
// OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
return _mm256_shufflehi_epi16(a, 107);
}
41 changes: 41 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512bw-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG

// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

__m512i test_mm512_shufflelo_epi16(__m512i __A) {
// CIR-LABEL: _mm512_shufflelo_epi16
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<32 x !s16i>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<17> : !s32i, #cir.int<17> : !s32i, #cir.int<16> : !s32i, #cir.int<16> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<25> : !s32i, #cir.int<25> : !s32i, #cir.int<24> : !s32i, #cir.int<24> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i] : !cir.vector<32 x !s16i>

// LLVM-LABEL: @test_mm512_shufflelo_epi16
// LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>

// OGCG-LABEL: @test_mm512_shufflelo_epi16
// OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
return _mm512_shufflelo_epi16(__A, 5);
}

__m512i test_mm512_shufflehi_epi16(__m512i __A) {
// CIR-LABEL: _mm512_shufflehi_epi16
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<32 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i, #cir.int<13> : !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<21> : !s32i, #cir.int<21> : !s32i, #cir.int<20> : !s32i, #cir.int<20> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<29> : !s32i, #cir.int<29> : !s32i, #cir.int<28> : !s32i, #cir.int<28> : !s32i] : !cir.vector<32 x !s16i>

// LLVM-LABEL: @test_mm512_shufflehi_epi16
// LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>

// OGCG-LABEL: @test_mm512_shufflehi_epi16
// OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
return _mm512_shufflehi_epi16(__A, 5);
}
24 changes: 24 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,27 @@ __m512i test_mm512_undefined_epi32(void) {
// OGCG: ret <8 x i64> zeroinitializer
return _mm512_undefined_epi32();
}

__m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) {
// CIR-LABEL: test_mm512_shuffle_pd
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<3> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !cir.double>

// LLVM-LABEL: test_mm512_shuffle_pd
// LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>

// OGCG-LABEL: test_mm512_shuffle_pd
// OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
return _mm512_shuffle_pd(__M, __V, 4);
}

__m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) {
// CIR-LABEL: test_mm512_shuffle_ps
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<16> : !s32i, #cir.int<16> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<20> : !s32i, #cir.int<20> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<24> : !s32i, #cir.int<24> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<28> : !s32i, #cir.int<28> : !s32i] : !cir.vector<16 x !cir.float>

// LLVM-LABEL: test_mm512_shuffle_ps
// LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>

// OGCG-LABEL: test_mm512_shuffle_ps
// OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
return _mm512_shuffle_ps(__M, __V, 4);
}
Loading