Skip to content

Commit 8e108fb

Browse files
Upstream CIR Codegen for shuffle X86 builtins
1 parent 1264620 commit 8e108fb

File tree

8 files changed

+308
-10
lines changed

8 files changed

+308
-10
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 90 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,35 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
6868
return bitCast;
6969
}
7070

71+
static cir::VecShuffleOp emitPshufW(CIRGenFunction &cgf,
72+
CIRGenBuilderTy &builder,
73+
llvm::SmallVector<mlir::Value> &ops,
74+
const CallExpr *expr, const bool isLow) {
75+
uint32_t imm = cgf.getZExtIntValueFromConstOp(ops[1]);
76+
77+
auto vecTy = cast<cir::VectorType>(ops[0].getType());
78+
unsigned numElts = vecTy.getSize();
79+
80+
unsigned firstHalfStart = isLow ? 0 : 4;
81+
unsigned secondHalfStart = 4 - firstHalfStart;
82+
83+
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
84+
imm = (imm & 0xff) * 0x01010101;
85+
86+
int64_t indices[32];
87+
for (unsigned l = 0; l != numElts; l += 8) {
88+
for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
89+
indices[l + i] = l + (imm & 3) + firstHalfStart;
90+
imm /= 4;
91+
}
92+
for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
93+
indices[l + i] = l + i;
94+
}
95+
96+
return builder.createVecShuffle(cgf.getLoc(expr->getExprLoc()), ops[0],
97+
ArrayRef(indices, numElts));
98+
}
99+
71100
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
72101
const CallExpr *expr) {
73102
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -163,9 +192,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
163192
case X86::BI__builtin_ia32_vec_ext_v4di: {
164193
unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
165194

166-
uint64_t index =
167-
ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
168-
195+
uint64_t index = getZExtIntValueFromConstOp(ops[1]);
169196
index &= numElts - 1;
170197

171198
cir::ConstantOp indexVal =
@@ -523,12 +550,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
523550
case X86::BI__builtin_ia32_pblendw256:
524551
case X86::BI__builtin_ia32_pblendd128:
525552
case X86::BI__builtin_ia32_pblendd256:
553+
cgm.errorNYI(expr->getSourceRange(),
554+
std::string("unimplemented X86 builtin call: ") +
555+
getContext().BuiltinInfo.getName(builtinID));
556+
return {};
526557
case X86::BI__builtin_ia32_pshuflw:
527558
case X86::BI__builtin_ia32_pshuflw256:
528-
case X86::BI__builtin_ia32_pshuflw512:
559+
case X86::BI__builtin_ia32_pshuflw512: {
560+
return emitPshufW(*this, builder, ops, expr, true);
561+
}
529562
case X86::BI__builtin_ia32_pshufhw:
530563
case X86::BI__builtin_ia32_pshufhw256:
531-
case X86::BI__builtin_ia32_pshufhw512:
564+
case X86::BI__builtin_ia32_pshufhw512: {
565+
return emitPshufW(*this, builder, ops, expr, false);
566+
}
532567
case X86::BI__builtin_ia32_pshufd:
533568
case X86::BI__builtin_ia32_pshufd256:
534569
case X86::BI__builtin_ia32_pshufd512:
@@ -537,13 +572,61 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
537572
case X86::BI__builtin_ia32_vpermilpd256:
538573
case X86::BI__builtin_ia32_vpermilps256:
539574
case X86::BI__builtin_ia32_vpermilpd512:
540-
case X86::BI__builtin_ia32_vpermilps512:
575+
case X86::BI__builtin_ia32_vpermilps512: {
576+
// TODO: Add tests for this branch.
577+
uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
578+
579+
auto vecTy = cast<cir::VectorType>(ops[0].getType());
580+
unsigned numElts = vecTy.getSize();
581+
auto eltTy = vecTy.getElementType();
582+
583+
unsigned eltBitWidth = getTypeSizeInBits(eltTy).getFixedValue();
584+
unsigned numLaneElts = 128 / eltBitWidth;
585+
586+
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
587+
imm = (imm & 0xff) * 0x01010101;
588+
589+
llvm::SmallVector<int64_t, 16> indices;
590+
for (unsigned l = 0; l != numElts; l += numLaneElts) {
591+
for (unsigned i = 0; i != numLaneElts; ++i) {
592+
indices.push_back((imm % numLaneElts) + l);
593+
imm /= numLaneElts;
594+
}
595+
}
596+
597+
return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0],
598+
indices);
599+
}
541600
case X86::BI__builtin_ia32_shufpd:
542601
case X86::BI__builtin_ia32_shufpd256:
543602
case X86::BI__builtin_ia32_shufpd512:
544603
case X86::BI__builtin_ia32_shufps:
545604
case X86::BI__builtin_ia32_shufps256:
546-
case X86::BI__builtin_ia32_shufps512:
605+
case X86::BI__builtin_ia32_shufps512: {
606+
uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
607+
608+
auto vecTy = cast<cir::VectorType>(ops[0].getType());
609+
unsigned numElts = vecTy.getSize();
610+
unsigned numLanes = cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
611+
unsigned numLaneElts = numElts / numLanes;
612+
613+
// Splat the 8-bits of immediate 4 times to help the loop wrap around.
614+
imm = (imm & 0xff) * 0x01010101;
615+
616+
int64_t indices[16];
617+
for (unsigned l = 0; l != numElts; l += numLaneElts) {
618+
for (unsigned i = 0; i != numLaneElts; ++i) {
619+
uint32_t idx = imm % numLaneElts;
620+
imm /= numLaneElts;
621+
if (i >= (numLaneElts / 2))
622+
idx += numElts;
623+
indices[l + i] = l + idx;
624+
}
625+
}
626+
627+
return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
628+
ArrayRef(indices, numElts));
629+
}
547630
case X86::BI__builtin_ia32_permdi256:
548631
case X86::BI__builtin_ia32_permdf256:
549632
case X86::BI__builtin_ia32_permdi512:

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1349,6 +1349,28 @@ class CIRGenFunction : public CIRGenTypeCache {
13491349
cir::IntType resType, mlir::Value emittedE,
13501350
bool isDynamic);
13511351

1352+
/// Get integer from a mlir::Value that is an int constant or a constant op.
1353+
static int64_t getSExtIntValueFromConstOp(mlir::Value val) {
1354+
auto constOp = val.getDefiningOp<cir::ConstantOp>();
1355+
assert(constOp && "getIntValueFromConstOp call with non ConstantOp");
1356+
return constOp.getIntValue().getSExtValue();
1357+
}
1358+
1359+
/// Get zero-extended integer from a mlir::Value that is an int constant or a
1360+
/// constant op.
1361+
static int64_t getZExtIntValueFromConstOp(mlir::Value val) {
1362+
auto constOp = val.getDefiningOp<cir::ConstantOp>();
1363+
assert(constOp &&
1364+
"getZeroExtendedIntValueFromConstOp call with non ConstantOp");
1365+
return constOp.getIntValue().getZExtValue();
1366+
}
1367+
1368+
/// Get size of type in bits using SizedTypeInterface
1369+
llvm::TypeSize getTypeSizeInBits(mlir::Type ty) const {
1370+
assert(cir::isSized(ty) && "Type must implement SizedTypeInterface");
1371+
return cgm.getDataLayout().getTypeSizeInBits(ty);
1372+
}
1373+
13521374
mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e,
13531375
unsigned type,
13541376
cir::IntType resType,
@@ -1804,7 +1826,7 @@ class CIRGenFunction : public CIRGenTypeCache {
18041826

18051827
mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s);
18061828

1807-
mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e);
1829+
mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr);
18081830

18091831
/// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is
18101832
/// nonnull, if 1\p LHS is marked _Nonnull.

clang/test/CIR/CodeGen/X86/avx-builtins.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,28 @@ __m256i test_mm256_undefined_si256(void) {
7373
// OGCG-LABEL: test_mm256_undefined_si256
7474
// OGCG: ret <4 x i64> zeroinitializer
7575
return _mm256_undefined_si256();
76+
}
77+
78+
__m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
79+
// CIR-LABEL: test_mm256_shuffle_pd
80+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x !cir.double>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !cir.double>
81+
82+
// CHECK-LABEL: test_mm256_shuffle_pd
83+
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
84+
85+
// OGCG-LABEL: test_mm256_shuffle_pd
86+
// OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
87+
return _mm256_shuffle_pd(A, B, 0);
88+
}
89+
90+
__m256 test_mm256_shuffle_ps(__m256 A, __m256 B) {
91+
// CIR-LABEL: test_mm256_shuffle_ps
92+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i] : !cir.vector<8 x !cir.float>
93+
94+
// CHECK-LABEL: test_mm256_shuffle_ps
95+
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
96+
97+
// OGCG-LABEL: test_mm256_shuffle_ps
98+
// OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
99+
return _mm256_shuffle_ps(A, B, 0);
76100
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
7+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
12+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
13+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
14+
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
15+
16+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
17+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
18+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
19+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
20+
21+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
22+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
23+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
24+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
25+
26+
// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
27+
// CIR shall be able to support fully.
28+
29+
#include <immintrin.h>
30+
31+
__m256i test_mm256_shufflelo_epi16(__m256i a) {
32+
// CIR-LABEL: _mm256_shufflelo_epi16
33+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s16i>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s16i>
34+
35+
// LLVM-LABEL: test_mm256_shufflelo_epi16
36+
// LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
37+
38+
// OGCG-LABEL: test_mm256_shufflelo_epi16
39+
// OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
40+
return _mm256_shufflelo_epi16(a, 83);
41+
}
42+
43+
__m256i test_mm256_shufflehi_epi16(__m256i a) {
44+
// CIR-LABEL: _mm256_shufflehi_epi16
45+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<6> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<15> : !s32i, #cir.int<14> : !s32i, #cir.int<14> : !s32i, #cir.int<13> : !s32i] : !cir.vector<16 x !s16i>
46+
47+
// LLVM-LABEL: test_mm256_shufflehi_epi16
48+
// LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
49+
50+
// OGCG-LABEL: test_mm256_shufflehi_epi16
51+
// OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
52+
return _mm256_shufflehi_epi16(a, 107);
53+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
8+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
12+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
13+
14+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
15+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
16+
17+
#include <immintrin.h>
18+
19+
__m512i test_mm512_shufflelo_epi16(__m512i __A) {
20+
// CIR-LABEL: _mm512_shufflelo_epi16
21+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<32 x !s16i>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<17> : !s32i, #cir.int<17> : !s32i, #cir.int<16> : !s32i, #cir.int<16> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<25> : !s32i, #cir.int<25> : !s32i, #cir.int<24> : !s32i, #cir.int<24> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i] : !cir.vector<32 x !s16i>
22+
23+
// LLVM-LABEL: @test_mm512_shufflelo_epi16
24+
// LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
25+
26+
// OGCG-LABEL: @test_mm512_shufflelo_epi16
27+
// OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
28+
return _mm512_shufflelo_epi16(__A, 5);
29+
}
30+
31+
__m512i test_mm512_shufflehi_epi16(__m512i __A) {
32+
// CIR-LABEL: _mm512_shufflehi_epi16
33+
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<32 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i, #cir.int<13> : !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<21> : !s32i, #cir.int<21> : !s32i, #cir.int<20> : !s32i, #cir.int<20> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<29> : !s32i, #cir.int<29> : !s32i, #cir.int<28> : !s32i, #cir.int<28> : !s32i] : !cir.vector<32 x !s16i>
34+
35+
// LLVM-LABEL: @test_mm512_shufflehi_epi16
36+
// LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
37+
38+
// OGCG-LABEL: @test_mm512_shufflehi_epi16
39+
// OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
40+
return _mm512_shufflehi_epi16(__A, 5);
41+
}

0 commit comments

Comments
 (0)