Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/AST/GlobalDecl.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class GlobalDecl {
}

static KernelReferenceKind getDefaultKernelReference(const FunctionDecl *D) {
return (D->hasAttr<OpenCLKernelAttr>() || D->getLangOpts().CUDAIsDevice)
return (D->hasAttr<DeviceKernelAttr>() || D->getLangOpts().CUDAIsDevice)
? KernelReferenceKind::Kernel
: KernelReferenceKind::Stub;
}
Expand Down
66 changes: 40 additions & 26 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,9 @@ def FunctionPointer : SubsetSubject<DeclBase,
"functions pointers">;

def OpenCLKernelFunction
: SubsetSubject<Function, [{S->hasAttr<OpenCLKernelAttr>()}],
"kernel functions">;
: SubsetSubject<Function, [{S->getASTContext().getLangOpts().OpenCL &&
S->hasAttr<DeviceKernelAttr>()}],
"kernel functions">;

// HasFunctionProto is a more strict version of FunctionLike, so it should
// never be specified in a Subjects list along with FunctionLike (due to the
Expand Down Expand Up @@ -1498,12 +1499,6 @@ def CUDAGridConstant : InheritableAttr {
let Documentation = [CUDAGridConstantAttrDocs];
}

def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
let Spellings = [Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
let Documentation = [Undocumented];
}

def HIPManaged : InheritableAttr {
let Spellings = [GNU<"managed">, Declspec<"__managed__">];
let Subjects = SubjectList<[Var]>;
Expand Down Expand Up @@ -1538,11 +1533,44 @@ def CUDAShared : InheritableAttr {
}
def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;

def SYCLKernel : InheritableAttr {
let Spellings = [Clang<"sycl_kernel">];
let Subjects = SubjectList<[FunctionTmpl]>;
let LangOpts = [SYCLDevice];
def DeviceKernel : DeclOrTypeAttr {
let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
let LangOpts = [];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can drop this entirely.

let Documentation = [SYCLKernelDocs];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect we'll need to update the documentation because it's now going to list all of the various kernel attributes under SYCL kernel docs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, will do, thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made an attempt in the latest commit I just pushed here. SYCL was the only documented one previously and I didn't want to be stucking having to document all the others, so hopefully the way I did it makes sense.

let AdditionalMembers =
[{
inline bool isAMDGPUSpelling() const {
return isAMDGPUSpelling(*this);
}
template<typename T>
static inline bool isAMDGPUSpelling(const T& Attr) {
return Attr.getAttrName()->getName() == "amdgpu_kernel";
}
inline bool isNVPTXSpelling() const {
return isNVPTXSpelling(*this);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's an easier way to do this, I think. We have:

  let Accessors = [Accessor<"isAMDGPU", [Clang<"amdgpu_kernel">]>,
                   Accessor<"isNVPTX", [Clang<"nvptx_kernel">]>];

which generates an accessor method for you which returns true/false based on the spelling.

Copy link
Member Author

@sarnex sarnex May 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah cool, thanks! Will do! I think I still need some of the methods to deal with the fact ParsedAttr isn't a subclass of Attr but also has a getAttrName() function, but maybe we can get rid of half of them

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this, but we actually need to check the spelling mostly for ParsedAttr, so the accessors here won't help. I implemented Erich's idea to use the enum instead of the string. Let me know if you have some tablegen magic that we can use!

template<typename T>
static inline bool isNVPTXSpelling(const T& Attr) {
return Attr.getAttrName()->getName() == "nvptx_kernel";
}
inline bool isOpenCLSpelling() const {
return isOpenCLSpelling(*this);
}
template<typename T>
static inline bool isOpenCLSpelling(const T& Attr) {
return Attr.getAttrName()->getName() == "kernel" ||
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even without the Accessors thing that Aaron suggests, you can check the spelling-id vs the generated enums which is better than string compares.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will try if it I hit an issue with Aaron's suggestion, thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up doing it this way using the enums.

Attr.getAttrName()->getName() == "__kernel";
}
inline bool isSYCLSpelling() const {
return isSYCLSpelling(*this);
}
template<typename T>
static inline bool isSYCLSpelling(const T& Attr) {
return Attr.getAttrName()->getName() == "sycl_kernel";
}
}];
}

def SYCLKernelEntryPoint : InheritableAttr {
Expand Down Expand Up @@ -1608,15 +1636,6 @@ def Allocating : TypeAttr {
let Documentation = [AllocatingDocs];
}

// Similar to CUDA, OpenCL attributes do not receive a [[]] spelling because
// the specification does not expose them with one currently.
def OpenCLKernel : InheritableAttr {
let Spellings = [CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [Undocumented];
let SimpleHandler = 1;
}

def OpenCLUnrollHint : StmtAttr {
let Spellings = [GNU<"opencl_unroll_hint">];
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
Expand Down Expand Up @@ -2351,11 +2370,6 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}

def AMDGPUKernelCall : DeclOrTypeAttr {
let Spellings = [Clang<"amdgpu_kernel">];
let Documentation = [Undocumented];
}

def BPFPreserveAccessIndex : InheritableAttr,
TargetSpecificAttr<TargetBPF> {
let Spellings = [Clang<"preserve_access_index">];
Expand Down
5 changes: 2 additions & 3 deletions clang/include/clang/Basic/Specifiers.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,14 +289,13 @@ namespace clang {
CC_AAPCS_VFP, // __attribute__((pcs("aapcs-vfp")))
CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
CC_SpirFunction, // default for OpenCL functions on SPIR target
CC_OpenCLKernel, // inferred for OpenCL kernels
CC_DeviceKernel, // __attribute__((device_kernel))
CC_Swift, // __attribute__((swiftcall))
CC_SwiftAsync, // __attribute__((swiftasynccall))
CC_PreserveMost, // __attribute__((preserve_most))
CC_PreserveAll, // __attribute__((preserve_all))
CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
CC_M68kRTD, // __attribute__((m68k_rtd))
CC_PreserveNone, // __attribute__((preserve_none))
CC_RISCVVectorCall, // __attribute__((riscv_vector_cc))
Expand Down Expand Up @@ -326,7 +325,7 @@ namespace clang {
case CC_X86Pascal:
case CC_X86VectorCall:
case CC_SpirFunction:
case CC_OpenCLKernel:
case CC_DeviceKernel:
case CC_Swift:
case CC_SwiftAsync:
case CC_M68kRTD:
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/AST/Decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3541,7 +3541,7 @@ bool FunctionDecl::isExternC() const {
}

bool FunctionDecl::isInExternCContext() const {
if (hasAttr<OpenCLKernelAttr>())
if (hasAttr<DeviceKernelAttr>() && getASTContext().getLangOpts().OpenCL)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really the same thing? Do we prevent enabling multiple kernel-defining languages at the same time? Should this instead check spelling?

Copy link
Member Author

@sarnex sarnex Jun 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review. In an ideal world we would just be able to use the fact it's been specified as a device kernel and do to the checks but right now it's not that simple, one case is the multiple languages with kernels like you mentioned. I tried OpenCL + SYCL and that worked, so there is some ambiguity there.

Let me update these checks to use the spelling just to simplify this change. Thanks again.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the delay, I just pushed a commit updating all the sites I previously had checking the language to now check for the OpenCL spelling. Should be less risky. Please take a look when you have a sec, thanks.

return true;
return getLexicalDeclContext()->isExternCContext();
}
Expand Down Expand Up @@ -5512,7 +5512,7 @@ FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, GlobalDeclID ID) {
}

bool FunctionDecl::isReferenceableKernel() const {
return hasAttr<CUDAGlobalAttr>() || hasAttr<OpenCLKernelAttr>();
return hasAttr<CUDAGlobalAttr>() || hasAttr<DeviceKernelAttr>();
}

BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/AST/ItaniumMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,8 @@ void CXXNameMangler::mangleUnqualifiedName(
FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
bool IsOCLDeviceStub =
FD && FD->hasAttr<OpenCLKernelAttr>() &&
getASTContext().getLangOpts().OpenCL && FD &&
FD->hasAttr<DeviceKernelAttr>() &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question here.

GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleDeviceStubName(II);
Expand Down Expand Up @@ -3529,10 +3530,9 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
case CC_AAPCS_VFP:
case CC_AArch64VectorCall:
case CC_AArch64SVEPCS:
case CC_AMDGPUKernelCall:
case CC_IntelOclBicc:
case CC_SpirFunction:
case CC_OpenCLKernel:
case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_M68kRTD:
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/AST/MicrosoftMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,8 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
->hasAttr<CUDAGlobalAttr>())) &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
bool IsOCLDeviceStub =
ND && isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>() &&
getASTContext().getLangOpts().OpenCL && ND &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And here :)

isa<FunctionDecl>(ND) && ND->hasAttr<DeviceKernelAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
if (IsDeviceStub)
mangleSourceName(
Expand Down
8 changes: 3 additions & 5 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3594,14 +3594,12 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
return "aarch64_vector_pcs";
case CC_AArch64SVEPCS:
return "aarch64_sve_pcs";
case CC_AMDGPUKernelCall:
return "amdgpu_kernel";
case CC_IntelOclBicc:
return "intel_ocl_bicc";
case CC_SpirFunction:
return "spir_function";
case CC_OpenCLKernel:
return "opencl_kernel";
case CC_DeviceKernel:
return "device_kernel";
case CC_Swift:
return "swiftcall";
case CC_SwiftAsync:
Expand Down Expand Up @@ -4302,7 +4300,7 @@ bool AttributedType::isCallingConv() const {
case attr::VectorCall:
case attr::AArch64VectorPcs:
case attr::AArch64SVEPcs:
case attr::AMDGPUKernelCall:
case attr::DeviceKernel:
case attr::Pascal:
case attr::MSABI:
case attr::SysVABI:
Expand Down
9 changes: 5 additions & 4 deletions clang/lib/AST/TypePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1096,8 +1096,8 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
case CC_AArch64SVEPCS:
OS << "__attribute__((aarch64_sve_pcs))";
break;
case CC_AMDGPUKernelCall:
OS << "__attribute__((amdgpu_kernel))";
case CC_DeviceKernel:
OS << "__attribute__((device_kernel))";
break;
case CC_IntelOclBicc:
OS << " __attribute__((intel_ocl_bicc))";
Expand All @@ -1112,7 +1112,6 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
OS << " __attribute__((regcall))";
break;
case CC_SpirFunction:
case CC_OpenCLKernel:
// Do nothing. These CCs are not available as attributes.
break;
case CC_Swift:
Expand Down Expand Up @@ -2065,7 +2064,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
}
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break;
case attr::DeviceKernel:
OS << T->getAttr()->getSpelling();
break;
case attr::IntelOclBicc: OS << "inteloclbicc"; break;
case attr::PreserveMost:
OS << "preserve_most";
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,7 @@ AArch64TargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_PreserveMost:
case CC_PreserveAll:
case CC_PreserveNone:
case CC_OpenCLKernel:
case CC_DeviceKernel:
case CC_AArch64VectorCall:
case CC_AArch64SVEPCS:
case CC_Win64:
Expand Down Expand Up @@ -1699,7 +1699,7 @@ WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_X86FastCall:
return CCCR_Ignore;
case CC_C:
case CC_OpenCLKernel:
case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_PreserveNone:
Expand Down
3 changes: 1 addition & 2 deletions clang/lib/Basic/Targets/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
default:
return CCCR_Warning;
case CC_C:
case CC_OpenCLKernel:
case CC_AMDGPUKernelCall:
case CC_DeviceKernel:
return CCCR_OK;
}
}
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Basic/Targets/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ ARMTargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_AAPCS_VFP:
case CC_Swift:
case CC_SwiftAsync:
case CC_OpenCLKernel:
case CC_DeviceKernel:
return CCCR_OK;
default:
return CCCR_Warning;
Expand Down Expand Up @@ -1480,7 +1480,7 @@ WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_X86VectorCall:
return CCCR_Ignore;
case CC_C:
case CC_OpenCLKernel:
case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_Swift:
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/BPF.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
default:
return CCCR_Warning;
case CC_C:
case CC_OpenCLKernel:
case CC_DeviceKernel:
return CCCR_OK;
}
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/Mips.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ WindowsMipsTargetInfo::checkCallingConvention(CallingConv CC) const {
case CC_X86VectorCall:
return CCCR_Ignore;
case CC_C:
case CC_OpenCLKernel:
case CC_DeviceKernel:
case CC_PreserveMost:
case CC_PreserveAll:
case CC_Swift:
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/SPIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
}

CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK
return (CC == CC_SpirFunction || CC == CC_DeviceKernel) ? CCCR_OK
: CCCR_Warning;
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/SystemZ.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
switch (CC) {
case CC_C:
case CC_Swift:
case CC_OpenCLKernel:
case CC_DeviceKernel:
return CCCR_OK;
case CC_SwiftAsync:
return CCCR_Error;
Expand Down
23 changes: 19 additions & 4 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,10 +408,11 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
case CC_Swift:
case CC_X86Pascal:
case CC_IntelOclBicc:
case CC_OpenCLKernel:
return CCCR_OK;
case CC_SwiftAsync:
return CCCR_Error;
case CC_DeviceKernel:
return IsOpenCL ? CCCR_OK : CCCR_Warning;
default:
return CCCR_Warning;
}
Expand Down Expand Up @@ -439,7 +440,13 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
uint64_t getPointerAlignV(LangAS AddrSpace) const override {
return getPointerWidthV(AddrSpace);
}
void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
TargetInfo::adjust(Diags, Opts);
IsOpenCL = Opts.OpenCL;
}

private:
bool IsOpenCL = false;
};

// X86-32 generic target
Expand Down Expand Up @@ -785,8 +792,9 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
case CC_PreserveAll:
case CC_PreserveNone:
case CC_X86RegCall:
case CC_OpenCLKernel:
return CCCR_OK;
case CC_DeviceKernel:
return IsOpenCL ? CCCR_OK : CCCR_Warning;
default:
return CCCR_Warning;
}
Expand Down Expand Up @@ -817,7 +825,6 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize,
HasSizeMismatch);
}

void setMaxAtomicWidth() override {
if (hasFeature("cx16"))
MaxAtomicInlineWidth = 128;
Expand All @@ -829,6 +836,14 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
size_t getMaxBitIntWidth() const override {
return llvm::IntegerType::MAX_INT_BITS;
}

void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
TargetInfo::adjust(Diags, Opts);
IsOpenCL = Opts.OpenCL;
}

private:
bool IsOpenCL = false;
};

// x86-64 UEFI target
Expand Down Expand Up @@ -913,7 +928,7 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_64TargetInfo
case CC_Swift:
case CC_SwiftAsync:
case CC_X86RegCall:
case CC_OpenCLKernel:
case CC_DeviceKernel:
return CCCR_OK;
default:
return CCCR_Warning;
Expand Down
Loading
Loading