Skip to content

Commit 17cc1dc

Browse files
authored
[Clang] Update __builtin_masked_load to accept passthrough argument (#155652)
Summary: It's important to be able to define the result of the masked-off lanes, add this as an optional argument to the builtin.
1 parent 6f0253b commit 17cc1dc

File tree

5 files changed

+44
-4
lines changed

5 files changed

+44
-4
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,9 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
946946

947947
Each builtin accesses memory according to a provided boolean mask. These are
948948
provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first
949-
argument is always boolean mask vector.
949+
argument is always boolean mask vector. The ``__builtin_masked_load`` builtin
950+
takes an optional third vector argument that will be used for the result of the
951+
masked-off lanes. These builtins assume the memory is always aligned.
950952

951953
Example:
952954

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4281,6 +4281,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
42814281
llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
42824282

42834283
llvm::Value *PassThru = llvm::PoisonValue::get(RetTy);
4284+
if (E->getNumArgs() > 2)
4285+
PassThru = EmitScalarExpr(E->getArg(2));
42844286

42854287
Function *F =
42864288
CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy});

clang/lib/Sema/SemaChecking.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2282,7 +2282,7 @@ static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg,
22822282
}
22832283

22842284
static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) {
2285-
if (S.checkArgCount(TheCall, 2))
2285+
if (S.checkArgCountRange(TheCall, 2, 3))
22862286
return ExprError();
22872287

22882288
Expr *MaskArg = TheCall->getArg(0);
@@ -2295,6 +2295,15 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) {
22952295
QualType PointeeTy = PtrTy->getPointeeType();
22962296
const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
22972297
const VectorType *DataVecTy = PointeeTy->getAs<VectorType>();
2298+
2299+
if (TheCall->getNumArgs() == 3) {
2300+
Expr *PassThruArg = TheCall->getArg(2);
2301+
QualType PassThruTy = PassThruArg->getType();
2302+
if (!S.Context.hasSameType(PassThruTy, PointeeTy))
2303+
return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
2304+
<< /* third argument */ 3 << PointeeTy;
2305+
}
2306+
22982307
if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
22992308
return ExprError(
23002309
S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)

clang/test/CodeGen/builtin-masked.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,32 @@ v8i test_load(v8b m, v8i *p) {
2626
return __builtin_masked_load(m, p);
2727
}
2828

29+
// CHECK-LABEL: define dso_local <8 x i32> @test_load_passthru(
30+
// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]]) #[[ATTR0]] {
31+
// CHECK-NEXT: [[ENTRY:.*:]]
32+
// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1
33+
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1
34+
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
35+
// CHECK-NEXT: [[T_ADDR:%.*]] = alloca <8 x i32>, align 32
36+
// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1
37+
// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1
38+
// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
39+
// CHECK-NEXT: [[T:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
40+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8
41+
// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1
42+
// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
43+
// CHECK-NEXT: store <8 x i32> [[T]], ptr [[T_ADDR]], align 32
44+
// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
45+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
46+
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
47+
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32
48+
// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 32, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]])
49+
// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]]
50+
//
51+
v8i test_load_passthru(v8b m, v8i *p, v8i t) {
52+
return __builtin_masked_load(m, p, t);
53+
}
54+
2955
// CHECK-LABEL: define dso_local void @test_store(
3056
// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR2:[0-9]+]] {
3157
// CHECK-NEXT: [[ENTRY:.*:]]

clang/test/Sema/builtin-masked.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ typedef _Bool v8b __attribute__((ext_vector_type(8)));
55
typedef _Bool v2b __attribute__((ext_vector_type(2)));
66
typedef float v8f __attribute__((ext_vector_type(8)));
77

8-
void test_masked_load(v8i *pf, v8b mask, v2b mask2) {
8+
void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
99
(void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}}
10-
(void)__builtin_masked_load(mask, pf, pf); // expected-error {{too many arguments to function call, expected 2, have 3}}
10+
(void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}}
1111
(void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}}
1212
(void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}}
1313
(void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}}
14+
(void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}}
1415
(void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}}
1516
}
1617

0 commit comments

Comments
 (0)