Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ Bug Fixes to Attribute Support
- Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
- Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)

Bug Fixes to C++ Support
^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1599,7 +1599,7 @@ def CUDAShared : InheritableAttr {
}
def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;

def DeviceKernel : DeclOrTypeAttr {
def DeviceKernel : InheritableAttr {
let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/AST/TypePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
}
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
case attr::DeviceKernel:
OS << T->getAttr()->getSpelling();
break;
case attr::IntelOclBicc:
OS << "inteloclbicc";
break;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
// a host function.
if (HostTarget)
return HostTarget->checkCallingConvention(CC);
return CCCR_Warning;
return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning;
}

bool hasBitIntType() const override { return true; }
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
return;

const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (FD)
if (FD) {
setFunctionDeclAttributes(FD, F, M);

if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
}
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
}
Expand Down
33 changes: 30 additions & 3 deletions clang/lib/CodeGen/Targets/SPIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
QualType SampledType, CodeGenModule &CGM) const;
void
setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
};
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
Expand Down Expand Up @@ -240,6 +242,26 @@ void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
FT, FT->getExtInfo().withCallingConv(CC_SpirFunction));
}

void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (M.getLangOpts().OpenCL)
return;

if (GV->isDeclaration())
return;

llvm::Function *F = dyn_cast<llvm::Function>(GV);
if (!F)
return;

const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
if (!FD)
return;

if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(getDeviceKernelCallingConv());
}

LangAS
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
Expand All @@ -264,9 +286,6 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,

void SPIRVTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (!M.getLangOpts().HIP ||
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
return;
if (GV->isDeclaration())
return;

Expand All @@ -277,6 +296,14 @@ void SPIRVTargetCodeGenInfo::setTargetAttributes(
auto FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;

if (FD->hasAttr<DeviceKernelAttr>())
F->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the attribute called sycl_kernel? I thought that was a little strange since the others use the architecture name, not the language.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if I understand the question but there is no SYCL-specific calling convention, ex1, ex2. There is another alias (aka spelling) of the clang attribute but we have no need to check which specific spelling it's using here.


if (!M.getLangOpts().HIP ||
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
return;

if (!FD->hasAttr<CUDAGlobalAttr>())
return;

Expand Down
36 changes: 33 additions & 3 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5204,25 +5204,55 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
if (S.getLangOpts().SYCLIsDevice) {
llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple();
const LangOptions &LangOpts = S.getLangOpts();

if (LangOpts.SYCLIsDevice) {
if (!IsFunctionTemplate) {
S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
<< AL << AL.isRegularKeywordAttribute() << "function templates";
AL.setInvalid();
return;
} else {
S.SYCL().handleKernelAttr(D, AL);
}
} else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
} else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
AL.setInvalid();

return;
} else if (Triple.isNVPTX()) {
handleGlobalAttr(S, D, AL);
} else {
// OpenCL C++ will throw a more specific error.
if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
<< AL << AL.isRegularKeywordAttribute() << "functions";
AL.setInvalid();
return;
}
handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
}
// TODO: isGPU() should probably return true for SPIR.
bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() ||
LangOpts.isTargetDevice() || LangOpts.OpenCL;
bool IsAMDGPUMismatch =
DeviceKernelAttr::isAMDGPUSpelling(AL) && !Triple.isAMDGPU();
bool IsNVPTXMismatch =
DeviceKernelAttr::isNVPTXSpelling(AL) && !Triple.isNVPTX();
if (IsAMDGPUMismatch || IsNVPTXMismatch || !TargetDeviceEnvironment) {
Comment on lines +5234 to +5241
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I'm wondering what we should do here. Realistically it would make sense for these 'named' attributes to just be legacy aliases to device_kernel. I don't see any real value in keeping separate names unless there are special semantics that I'm unaware of. Ever since PTX moved over, this attribute is more plainly "set the kernel calling convention on this function."

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would definitely be simpler to have them as aliases. I'm happy to implement whatever the consensus us, so others feel free to drop in.

// While both are just different spellings of the same underlying
// attribute, it makes more sense to the user if amdgpu_kernel can only
// be used on AMDGPU and the equivalent for NVPTX, so warn and ignore
// the attribute if there's a mismatch.
// Also warn if this is not an environment where a device kernel makes
// sense.
S.Diag(AL.getLoc(), diag::warn_cconv_unsupported)
<< AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget;
AL.setInvalid();
return;
}

// Make sure we validate the CC with the target
// and warn/error if necessary.
handleCallConvAttr(S, D, AL);
Expand Down
18 changes: 2 additions & 16 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_VectorCall: \
Copy link
Member Author

@sarnex sarnex Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think clang-format is wrong so I'm planning on ignoring it (see above GH comment from the bot, not this code).
Let me know you disagree and I'm happy to fix in in this PR or separately.

case ParsedAttr::AT_AArch64VectorPcs: \
case ParsedAttr::AT_AArch64SVEPcs: \
case ParsedAttr::AT_DeviceKernel: \
case ParsedAttr::AT_MSABI: \
case ParsedAttr::AT_SysVABI: \
case ParsedAttr::AT_Pcs: \
Expand Down Expand Up @@ -3781,7 +3780,8 @@ static CallingConv getCCForDeclaratorChunk(
}
}
if (!S.getLangOpts().isSYCL()) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
D.getDeclSpec().getAttributes(), D.getAttributes())) {
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
CC = CC_DeviceKernel;
break;
Expand Down Expand Up @@ -7565,8 +7565,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_ArmStreaming:
return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
case ParsedAttr::AT_DeviceKernel:
return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
case ParsedAttr::AT_Pcs: {
// The attribute may have had a fixit applied where we treated an
// identifier as a string literal. The contents of the string are valid,
Expand Down Expand Up @@ -8805,16 +8803,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
}
}

static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
// The DeviceKernel attribute is shared for many targets, and
// it is only allowed to be a type attribute with the AMDGPU
// spelling, so skip processing the attr as a type attr
// unless it has that spelling.
if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
return true;
return DeviceKernelAttr::isAMDGPUSpelling(Attr);
}

static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
const ParsedAttributesView &attrs,
Expand Down Expand Up @@ -9068,8 +9056,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
break;
[[fallthrough]];
FUNCTION_TYPE_ATTRS_CASELIST:
if (!isMultiSubjectAttrAllowedOnType(attr))
break;

attr.setUsedAsTypeAttr();

Expand Down
24 changes: 24 additions & 0 deletions clang/test/Sema/callingconv-devicekernel.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-AMDGPU %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-NVPTX %s
// RUN: %clang_cc1 -triple spir64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s
// RUN: %clang_cc1 -triple spirv64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s

// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel1()
// CHECK-NVPTX-DAG: ptx_kernel void @kernel1()
// CHECK-SPIR-DAG: spir_kernel void @kernel1()
[[clang::device_kernel]] void kernel1() {}

// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel2()
// CHECK-NVPTX-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target
// CHECK-SPIR-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target
[[clang::amdgpu_kernel]] void kernel2() {}

// CHECK-AMDGPU-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target
// CHECK-NVPTX-DAG: ptx_kernel void @kernel3()
// CHECK-SPIR-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target
[[clang::nvptx_kernel]] void kernel3() {}

// CHECK-AMDGPU-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
// CHECK-NVPTX-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
// CHECK-SPIR-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
[[clang::sycl_kernel]] void kernel4() {}
4 changes: 4 additions & 0 deletions clang/test/Sema/callingconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aar
int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}}

int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}}
int __attribute__((device_kernel)) device_kernel(void) { // expected-warning {{'device_kernel' calling convention is not supported for this target}}
}
int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning {{'sycl_kernel' attribute ignored}}
}

// PR6361
void ctest3();
Expand Down
Loading