Skip to content

Commit 24937ea

Browse files
[SYCL] Allow buitin_assume_aligned to be called from device code. (#6705)
Prior to this PR, if there is a direct call to __builtin_assume_aligned from within the device code, we would get the following error: `error: SYCL kernel cannot call a variadic function` This PR allows `__builtin_assume_aligned` to be invoked from within device code. The reason for doing so is, for example, if we load a float* pointer from a data structure, the compiler has no way of knowing the underlying alignment of the data the pointer points to, and therefore it cannot combine the load/stores and one ends up with many unnecessary load/store instructions plus lots of unnecessary pointer arithmetic plus increased register pressure. We see this all over the place in our kernels. With `__builtin_assume_aligned` we can tell the compiler to safely assume a certain alignment, therefore implicitly forcing coalescing.
1 parent 2086035 commit 24937ea

File tree

4 files changed

+31
-4
lines changed

4 files changed

+31
-4
lines changed

clang/include/clang/Sema/Sema.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13982,7 +13982,7 @@ class Sema final {
1398213982
KernelConstStaticVariable
1398313983
};
1398413984

13985-
bool isKnownGoodSYCLDecl(const Decl *D);
13985+
bool isDeclAllowedInSYCLDeviceCode(const Decl *D);
1398613986
void checkSYCLDeviceVarDecl(VarDecl *Var);
1398713987
void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj);
1398813988
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);

clang/lib/Sema/SemaChecking.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5967,8 +5967,8 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto,
59675967
CheckSYCLKernelCall(FD, Range, Args);
59685968

59695969
// Diagnose variadic calls in SYCL.
5970-
if (FD && FD ->isVariadic() && getLangOpts().SYCLIsDevice &&
5971-
!isUnevaluatedContext() && !isKnownGoodSYCLDecl(FD))
5970+
if (FD && FD->isVariadic() && getLangOpts().SYCLIsDevice &&
5971+
!isUnevaluatedContext() && !isDeclAllowedInSYCLDeviceCode(FD))
59725972
SYCLDiagIfDeviceCode(Loc, diag::err_sycl_restrict)
59735973
<< Sema::KernelCallVariadicFunction;
59745974
}

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,15 @@ static bool IsSyclMathFunc(unsigned BuiltinID) {
364364
return true;
365365
}
366366

367-
bool Sema::isKnownGoodSYCLDecl(const Decl *D) {
367+
bool Sema::isDeclAllowedInSYCLDeviceCode(const Decl *D) {
368368
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
369369
const IdentifierInfo *II = FD->getIdentifier();
370+
371+
// Allow __builtin_assume_aligned to be called from within device code.
372+
if (FD->getBuiltinID() &&
373+
FD->getBuiltinID() == Builtin::BI__builtin_assume_aligned)
374+
return true;
375+
370376
// Allow to use `::printf` only for CUDA.
371377
if (Context.getTargetInfo().getTriple().isNVPTX()) {
372378
if (FD->getBuiltinID() == Builtin::BIprintf)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -verify -fsyntax-only %s
2+
// This test checks if __builtin_assume_aligned does not throw an error when
3+
// called from within device code.
4+
5+
#include "sycl.hpp"
6+
7+
using namespace sycl;
8+
queue q;
9+
10+
int main() {
11+
int *Ptr[2];
12+
// expected-no-diagnostics
13+
q.submit([&](handler &h) {
14+
h.single_task<class kernelA>([=]() {
15+
int *APtr = (int *)__builtin_assume_aligned(Ptr, 32);
16+
*APtr = 42;
17+
});
18+
});
19+
return 0;
20+
}
21+

0 commit comments

Comments
 (0)