Skip to content

Commit efdac64

Browse files
RiverDavelanza
authored andcommitted
[CIR][CIRGen][X86] lower Masked Store related intrinsics (llvm#1734)
Not too much to add. Added a method to call a masked store intrinsic from the builder. Haven't touched that class to much, so let me know if that's the right call. Also: Unfortunately there were a lot of test cases for the intrinsics in this PR, hope that's not a big hustle for review 😊
1 parent 67673d0 commit efdac64

File tree

9 files changed

+317
-1
lines changed

9 files changed

+317
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,31 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
894894
return CIRBaseBuilderTy::createStore(loc, flag, dst);
895895
}
896896

897+
/// Create a call to a masked store intrinsic.
898+
/// \p loc - expression location
899+
/// \p val - data to be stored
900+
/// \p ptr - base pointer for the store
901+
/// \p alignment - alignment of the destination location
902+
/// \p mask - vector of booleans which indicates what vector lanes should
903+
/// be accessed in memory
904+
mlir::Value createMaskedStore(mlir::Location loc, mlir::Value val,
905+
mlir::Value ptr, llvm::Align alignment,
906+
mlir::Value mask) {
907+
mlir::Type dataTy = val.getType();
908+
909+
assert(mlir::isa<cir::VectorType>(dataTy) && "val should be a vector");
910+
assert(mask && "mask should not be all-ones (null)");
911+
912+
auto alignmentValue = create<cir::ConstantOp>(
913+
loc, cir::IntAttr::get(getUInt32Ty(), alignment.value()));
914+
915+
mlir::Value ops[] = {val, ptr, alignmentValue, mask};
916+
917+
return create<cir::LLVMIntrinsicCallOp>(loc, getStringAttr("masked.store"),
918+
getVoidTy(), ops)
919+
.getResult();
920+
}
921+
897922
cir::VecShuffleOp
898923
createVecShuffle(mlir::Location loc, mlir::Value vec1, mlir::Value vec2,
899924
llvm::ArrayRef<mlir::Attribute> maskAttrs) {

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,40 @@ static int64_t getIntValueFromConstOp(mlir::Value val) {
7474
.getSExtValue();
7575
}
7676

77+
// Convert the mask from an integer type to a vector of i1.
78+
static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask,
79+
unsigned numElts, mlir::Location loc) {
80+
cir::VectorType maskTy =
81+
cir::VectorType::get(cgf.getBuilder().getSIntNTy(1),
82+
cast<cir::IntType>(mask.getType()).getWidth());
83+
84+
mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy);
85+
86+
// If we have less than 8 elements, then the starting mask was an i8 and
87+
// we need to extract down to the right number of elements.
88+
if (numElts < 8) {
89+
llvm::SmallVector<int64_t, 4> indices;
90+
for (unsigned i = 0; i != numElts; ++i)
91+
indices.push_back(i);
92+
maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, indices);
93+
}
94+
95+
return maskVec;
96+
}
97+
98+
static mlir::Value emitX86MaskedStore(CIRGenFunction &cgf,
99+
ArrayRef<mlir::Value> ops,
100+
llvm::Align alignment,
101+
mlir::Location loc) {
102+
mlir::Value ptr = ops[0];
103+
104+
mlir::Value maskVec = getMaskVecValue(
105+
cgf, ops[2], cast<cir::VectorType>(ops[1].getType()).getSize(), loc);
106+
107+
return cgf.getBuilder().createMaskedStore(loc, ops[1], ptr, alignment,
108+
maskVec);
109+
}
110+
77111
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
78112
const CallExpr *E) {
79113
if (BuiltinID == Builtin::BI__builtin_cpu_is)
@@ -368,5 +402,31 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
368402
builder.getStringAttr("x86.xgetbv"),
369403
builder.getUInt64Ty(), Ops)
370404
.getResult();
405+
case X86::BI__builtin_ia32_storedqudi128_mask:
406+
case X86::BI__builtin_ia32_storedqusi128_mask:
407+
case X86::BI__builtin_ia32_storedquhi128_mask:
408+
case X86::BI__builtin_ia32_storedquqi128_mask:
409+
case X86::BI__builtin_ia32_storeupd128_mask:
410+
case X86::BI__builtin_ia32_storeups128_mask:
411+
case X86::BI__builtin_ia32_storedqudi256_mask:
412+
case X86::BI__builtin_ia32_storedqusi256_mask:
413+
case X86::BI__builtin_ia32_storedquhi256_mask:
414+
case X86::BI__builtin_ia32_storedquqi256_mask:
415+
case X86::BI__builtin_ia32_storeupd256_mask:
416+
case X86::BI__builtin_ia32_storeups256_mask:
417+
case X86::BI__builtin_ia32_storedqudi512_mask:
418+
case X86::BI__builtin_ia32_storedqusi512_mask:
419+
case X86::BI__builtin_ia32_storedquhi512_mask:
420+
case X86::BI__builtin_ia32_storedquqi512_mask:
421+
case X86::BI__builtin_ia32_storeupd512_mask:
422+
case X86::BI__builtin_ia32_storeups512_mask:
423+
return emitX86MaskedStore(*this, Ops, llvm::Align(1),
424+
getLoc(E->getExprLoc()));
425+
case X86::BI__builtin_ia32_storesbf16128_mask:
426+
case X86::BI__builtin_ia32_storesh128_mask:
427+
case X86::BI__builtin_ia32_storess128_mask:
428+
case X86::BI__builtin_ia32_storesd128_mask:
429+
return emitX86MaskedStore(*this, Ops, llvm::Align(1),
430+
getLoc(E->getExprLoc()));
371431
}
372432
}

clang/lib/CIR/CodeGen/CIRGenFunction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1416,7 +1416,7 @@ void CIRGenFunction::StartFunction(GlobalDecl gd, QualType retTy,
14161416
if (CurFuncDecl)
14171417
if ([[maybe_unused]] const auto *vecWidth =
14181418
CurFuncDecl->getAttr<MinVectorWidthAttr>())
1419-
llvm_unreachable("NYI");
1419+
LargestVectorWidth = vecWidth->getVectorWidth();
14201420

14211421
if (CGM.shouldEmitConvergenceTokens())
14221422
llvm_unreachable("NYI");
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-256 -fclangir -emit-cir -o %t.cir -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-256 -fclangir -emit-llvm -o %t.ll -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
#include <immintrin.h>
7+
8+
void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) {
9+
// CIR-LABEL: _mm_mask_store_sbh
10+
// CIR: cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.bf16 x 8>, !cir.ptr<!cir.vector<!cir.bf16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
11+
12+
// LLVM-LABEL: @test_mm_mask_store_sbh
13+
// LLVM: call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
14+
_mm_mask_store_sbh(__P, __U, __A);
15+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
#include <immintrin.h>
12+
13+
void test_mm512_mask_storeu_epi16(void *__P, __mmask32 __U, __m512i __A) {
14+
// CIR-LABEL: _mm512_mask_storeu_epi16
15+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s16i x 32>, !cir.ptr<!cir.vector<!s16i x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>) -> !void
16+
17+
// LLVM-LABEL: @test_mm512_mask_storeu_epi16
18+
// LLVM: @llvm.masked.store.v32i16.p0(<32 x i16> %{{.*}}, ptr %{{.*}}, i32 1, <32 x i1> %{{.*}})
19+
return _mm512_mask_storeu_epi16(__P, __U, __A);
20+
}
21+
22+
void test_mm512_mask_storeu_epi8(void *__P, __mmask64 __U, __m512i __A) {
23+
// CIR-LABEL: _mm512_mask_storeu_epi8
24+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<{{!s8i|!u8i}} x 64>, !cir.ptr<!cir.vector<{{!s8i|!u8i}} x 64>>, !u32i, !cir.vector<!cir.int<s, 1> x 64>) -> !void
25+
26+
// LLVM-LABEL: @test_mm512_mask_storeu_epi8
27+
// LLVM: @llvm.masked.store.v64i8.p0(<64 x i8> %{{.*}}, ptr %{{.*}}, i32 1, <64 x i1> %{{.*}})
28+
return _mm512_mask_storeu_epi8(__P, __U, __A);
29+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
#include <immintrin.h>
12+
13+
void test_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) {
14+
// CIR-LABEL: _mm512_mask_storeu_epi64
15+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 8>, !cir.ptr<!s64i>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
16+
17+
// LLVM-LABEL: test_mm512_mask_storeu_epi64
18+
// LLVM: @llvm.masked.store.v8i64.p0(<8 x i64> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
19+
return _mm512_mask_storeu_epi64(__P, __U, __A);
20+
}
21+
22+
void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) {
23+
// CIR-LABEL: _mm512_mask_storeu_epi32
24+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 16>, !cir.ptr<!s32i>, !u32i, !cir.vector<!cir.int<s, 1> x 16>) -> !void
25+
26+
// LLVM-LABEL: test_mm512_mask_storeu_epi32
27+
// LLVM: @llvm.masked.store.v16i32.p0(<16 x i32> %{{.*}}, ptr %{{.*}}, i32 1, <16 x i1> %{{.*}})
28+
return _mm512_mask_storeu_epi32(__P, __U, __A);
29+
}
30+
31+
void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A){
32+
// CIR-LABEL: _mm_mask_store_ss
33+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
34+
35+
// LLVM-LABEL: test_mm_mask_store_ss
36+
// LLVM: call void @llvm.masked.store.v4f32.p0(<4 x float> %{{.*}}, ptr %{{.*}}, i32 1, <4 x i1> %{{.*}})
37+
38+
_mm_mask_store_ss(__P, __U, __A);
39+
}
40+
41+
void test_mm_mask_store_sd(double * __P, __mmask8 __U, __m128d __A){
42+
// CIR-LABEL: _mm_mask_store_sd
43+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>) -> !void
44+
45+
// LLVM-LABEL: test_mm_mask_store_sd
46+
// LLVM: call void @llvm.masked.store.v2f64.p0(<2 x double> %{{.*}}, ptr %{{.*}}, i32 1, <2 x i1> %{{.*}})
47+
_mm_mask_store_sd(__P, __U, __A);
48+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll -Wall -Werror
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
7+
#include <immintrin.h>
8+
9+
void test_mm_mask_store_sh(void *__P, __mmask8 __U, __m128h __A) {
10+
// CIR-LABEL: _mm_mask_store_sh
11+
// CIR: cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.f16 x 8>, !cir.ptr<!cir.vector<!cir.f16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
12+
13+
// LLVM-LABEL: @test_mm_mask_store_sh
14+
// LLVM: call void @llvm.masked.store.v8f16.p0(<8 x half> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
15+
_mm_mask_store_sh(__P, __U, __A);
16+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
7+
#include <immintrin.h>
8+
9+
void test_mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A) {
10+
// CIR-LABEL: _mm_mask_storeu_epi64
11+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>)
12+
13+
// LLVM-LABEL: @test_mm_mask_storeu_epi64
14+
// LLVM: @llvm.masked.store.v2i64.p0(<2 x i64> %{{.*}}, ptr %{{.*}}, i32 1, <2 x i1> %{{.*}})
15+
return _mm_mask_storeu_epi64(__P, __U, __A);
16+
}
17+
18+
void test_mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A) {
19+
// CIR-LABEL: _mm_mask_storeu_epi32
20+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>)
21+
22+
// LLVM-LABEL: @test_mm_mask_storeu_epi32
23+
// LLVM: @llvm.masked.store.v4i32.p0(<4 x i32> %{{.*}}, ptr %{{.*}}, i32 1, <4 x i1> %{{.*}})
24+
return _mm_mask_storeu_epi32(__P, __U, __A);
25+
}
26+
27+
void test_mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A) {
28+
// CIR-LABEL: _mm_mask_storeu_pd
29+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>)
30+
31+
// LLVM-LABEL: @test_mm_mask_storeu_pd
32+
// LLVM: @llvm.masked.store.v2f64.p0(<2 x double> %{{.*}}, ptr %{{.*}}, i32 1, <2 x i1> %{{.*}})
33+
return _mm_mask_storeu_pd(__P, __U, __A);
34+
}
35+
36+
void test_mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A) {
37+
// CIR-LABEL: _mm_mask_storeu_ps
38+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>)
39+
40+
// LLVM-LABEL: @test_mm_mask_storeu_ps
41+
// LLVM: @llvm.masked.store.v4f32.p0(<4 x float> %{{.*}}, ptr %{{.*}}, i32 1, <4 x i1> %{{.*}})
42+
return _mm_mask_storeu_ps(__P, __U, __A);
43+
}
44+
45+
void test_mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A) {
46+
// CIR-LABEL: _mm256_mask_storeu_epi32
47+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 8>, !cir.ptr<!cir.vector<!s32i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>)
48+
49+
// LLVM-LABEL: @test_mm256_mask_storeu_epi32
50+
// LLVM: @llvm.masked.store.v8i32.p0(<8 x i32> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
51+
return _mm256_mask_storeu_epi32(__P, __U, __A);
52+
}
53+
54+
void test_mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A) {
55+
// CIR-LABEL: _mm256_mask_storeu_epi64
56+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 4>, !cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>)
57+
58+
// LLVM-LABEL: @test_mm256_mask_storeu_epi64
59+
// LLVM: @llvm.masked.store.v4i64.p0(<4 x i64> %{{.*}}, ptr %{{.*}}, i32 1, <4 x i1> %{{.*}})
60+
return _mm256_mask_storeu_epi64(__P, __U, __A);
61+
}
62+
63+
void test_mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A) {
64+
// CIR-LABEL: _mm256_mask_storeu_ps
65+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 8>, !cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
66+
67+
// LLVM-LABEL: @test_mm256_mask_storeu_ps
68+
// LLVM: @llvm.masked.store.v8f32.p0(<8 x float> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
69+
return _mm256_mask_storeu_ps(__P, __U, __A);
70+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
12+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
13+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
14+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
15+
16+
17+
#include <immintrin.h>
18+
19+
void test_mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A) {
20+
// CIR-LABEL: _mm_mask_storeu_epi16
21+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>)
22+
23+
// LLVM-LABEL: @test_mm_mask_storeu_epi16
24+
// LLVM: @llvm.masked.store.v8i16.p0(<8 x i16> %{{.*}}, ptr %{{.*}}, i32 1, <8 x i1> %{{.*}})
25+
return _mm_mask_storeu_epi16(__P, __U, __A);
26+
}
27+
28+
void test_mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A) {
29+
// CIR-LABEL: _mm_mask_storeu_epi8
30+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<{{!s8i|!u8i}} x 16>, !cir.ptr<!cir.vector<{{!s8i|!u8i}} x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>)
31+
32+
// LLVM-LABEL: @test_mm_mask_storeu_epi8
33+
// LLVM: @llvm.masked.store.v16i8.p0(<16 x i8> %{{.*}}, ptr %{{.*}}, i32 1, <16 x i1> %{{.*}})
34+
return _mm_mask_storeu_epi8(__P, __U, __A);
35+
}
36+
37+
void test_mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A) {
38+
// CIR-LABEL: _mm256_mask_storeu_epi8
39+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<{{!s8i|!u8i}} x 32>, !cir.ptr<!cir.vector<{{!s8i|!u8i}} x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>) -> !void
40+
41+
// LLVM-LABEL: @test_mm256_mask_storeu_epi8
42+
// LLVM: @llvm.masked.store.v32i8.p0(<32 x i8> %{{.*}}, ptr %{{.*}}, i32 1, <32 x i1> %{{.*}})
43+
return _mm256_mask_storeu_epi8(__P, __U, __A);
44+
}
45+
46+
void test_mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A) {
47+
// CIR-LABEL: _mm256_mask_storeu_pd
48+
// CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 4>, !cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
49+
50+
// LLVM-LABEL: @test_mm256_mask_storeu_pd
51+
// LLVM: @llvm.masked.store.v4f64.p0(<4 x double> %{{.*}}, ptr %{{.*}}, i32 1, <4 x i1> %{{.*}})
52+
return _mm256_mask_storeu_pd(__P, __U, __A);
53+
}

0 commit comments

Comments
 (0)