Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ Bug Fixes to Attribute Support
- Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
- Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)

Bug Fixes to C++ Support
^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1606,7 +1606,7 @@ def SYCLKernel : InheritableAttr {
let Documentation = [SYCLKernelDocs];
}

def DeviceKernel : DeclOrTypeAttr {
def DeviceKernel : InheritableAttr {
let Spellings = [Clang<"device_kernel">,
Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -4126,6 +4126,9 @@ def warn_missing_sdksettings_for_availability_checking : Warning<
"%0 availability is ignored without a valid 'SDKSettings.json' in the SDK">,
InGroup<DiagGroup<"ignored-availability-without-sdk-settings">>;

def err_hidden_device_kernel
: Error<"%0 is specified as a device kernel but it is not externally visible">;

// Thread Safety Attributes
def warn_thread_attribute_ignored : Warning<
"ignoring %0 attribute because its argument is invalid">,
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/AST/TypePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
}
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
case attr::DeviceKernel:
OS << T->getAttr()->getSpelling();
break;
case attr::IntelOclBicc:
OS << "inteloclbicc";
break;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
// a host function.
if (HostTarget)
return HostTarget->checkCallingConvention(CC);
return CCCR_Warning;
return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning;
}

bool hasBitIntType() const override { return true; }
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
return;

const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (FD)
if (FD) {
setFunctionDeclAttributes(FD, F, M);

if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
F->setCallingConv(getDeviceKernelCallingConv());
}
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
}
Expand Down Expand Up @@ -634,7 +636,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
// kernel address (only the kernel descriptor).
auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
&Mod);
F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
F->setCallingConv(getDeviceKernelCallingConv());

llvm::AttrBuilder KernelAttrs(C);
// FIXME: The invoke isn't applying the right attributes either
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// And kernel functions are not subject to inlining
F->addFnAttr(llvm::Attribute::NoInline);
if (FD->hasAttr<CUDAGlobalAttr>()) {
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
F->setCallingConv(getDeviceKernelCallingConv());

for (auto IV : llvm::enumerate(FD->parameters()))
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
Expand All @@ -278,7 +278,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
// Attach kernel metadata directly if compiling for NVPTX.
if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
F->setCallingConv(getDeviceKernelCallingConv());
}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
Expand Down
36 changes: 29 additions & 7 deletions clang/lib/CodeGen/Targets/SPIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T,
QualType QT) const override;
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
};
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
Expand Down Expand Up @@ -266,6 +268,22 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::ConstantPointerNull::get(NPT), PT);
}

void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (M.getLangOpts().OpenCL || GV->isDeclaration())
return;

const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
if (!FD)
return;

llvm::Function *F = dyn_cast<llvm::Function>(GV);
assert(F && "Expected GlobalValue to be a Function");

if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(getDeviceKernelCallingConv());
}

LangAS
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
Expand All @@ -290,19 +308,23 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,

void SPIRVTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (!M.getLangOpts().HIP ||
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
return;
if (GV->isDeclaration())
return;

auto F = dyn_cast<llvm::Function>(GV);
if (!F)
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;

auto FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
llvm::Function *F = dyn_cast<llvm::Function>(GV);
assert(F && "Expected GlobalValue to be a Function");

if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(getDeviceKernelCallingConv());

if (!M.getLangOpts().HIP ||
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
return;

if (!FD->hasAttr<CUDAGlobalAttr>())
return;

Expand Down
24 changes: 22 additions & 2 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5204,16 +5204,36 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple();
const LangOptions &LangOpts = S.getLangOpts();
// OpenCL has its own error messages.
if (!LangOpts.OpenCL && FD && !FD->isExternallyVisible()) {
S.Diag(AL.getLoc(), diag::err_hidden_device_kernel) << FD;
AL.setInvalid();
return;
}
if (Triple.isNVPTX()) {
handleGlobalAttr(S, D, AL);
} else {
// OpenCL C++ will throw a more specific error.
if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
<< AL << AL.isRegularKeywordAttribute() << "functions";
AL.setInvalid();
return;
}
handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
}
// TODO: isGPU() should probably return true for SPIR.
bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() ||
LangOpts.isTargetDevice() || LangOpts.OpenCL;
if (!TargetDeviceEnvironment) {
S.Diag(AL.getLoc(), diag::warn_cconv_unsupported)
<< AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget;
AL.setInvalid();
return;
}

// Make sure we validate the CC with the target
// and warn/error if necessary.
handleCallConvAttr(S, D, AL);
Expand Down
18 changes: 2 additions & 16 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_VectorCall: \
Copy link
Member Author

@sarnex sarnex Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think clang-format is wrong so I'm planning on ignoring it (see above GH comment from the bot, not this code).
Let me know you disagree and I'm happy to fix in in this PR or separately.

case ParsedAttr::AT_AArch64VectorPcs: \
case ParsedAttr::AT_AArch64SVEPcs: \
case ParsedAttr::AT_DeviceKernel: \
case ParsedAttr::AT_MSABI: \
case ParsedAttr::AT_SysVABI: \
case ParsedAttr::AT_Pcs: \
Expand Down Expand Up @@ -3780,7 +3779,8 @@ static CallingConv getCCForDeclaratorChunk(
}
}
}
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
D.getDeclSpec().getAttributes(), D.getAttributes())) {
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
CC = CC_DeviceKernel;
break;
Expand Down Expand Up @@ -7563,8 +7563,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_ArmStreaming:
return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
case ParsedAttr::AT_DeviceKernel:
return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
case ParsedAttr::AT_Pcs: {
// The attribute may have had a fixit applied where we treated an
// identifier as a string literal. The contents of the string are valid,
Expand Down Expand Up @@ -8803,16 +8801,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
}
}

static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
// The DeviceKernel attribute is shared for many targets, and
// it is only allowed to be a type attribute with the AMDGPU
// spelling, so skip processing the attr as a type attr
// unless it has that spelling.
if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
return true;
return DeviceKernelAttr::isAMDGPUSpelling(Attr);
}

static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
const ParsedAttributesView &attrs,
Expand Down Expand Up @@ -9066,8 +9054,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
break;
[[fallthrough]];
FUNCTION_TYPE_ATTRS_CASELIST:
if (!isMultiSubjectAttrAllowedOnType(attr))
break;

attr.setUsedAsTypeAttr();

Expand Down
16 changes: 16 additions & 0 deletions clang/test/Sema/callingconv-devicekernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple spir64 -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple spirv64 -fsyntax-only -verify %s

[[clang::device_kernel]] void kernel1() {}

namespace {
[[clang::device_kernel]] void kernel2() {} // expected-error {{'kernel2' is specified as a device kernel but it is not externally visible}}
}

namespace ns {
[[clang::device_kernel]] void kernel3() {}
}

[[clang::device_kernel]] static void kernel4() {} // expected-error {{'kernel4' is specified as a device kernel but it is not externally visible}}
4 changes: 4 additions & 0 deletions clang/test/Sema/callingconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aar
int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}}

int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}}
int __attribute__((device_kernel)) device_kernel(void) { // expected-warning {{'device_kernel' calling convention is not supported for this target}}
}
int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning {{'sycl_kernel' attribute ignored}}
}

// PR6361
void ctest3();
Expand Down
Loading