Skip to content

Commit c836a13

Browse files
sarnexgithub-actions[bot]
authored andcommitted
Automerge: [clang] Fix inconsistencies with the device_kernel attr on different targets (#161905)
The original [change](llvm/llvm-project#137882) unifying the device kernel attributes had some inexplicable behavior, such as `amdgpu_kernel` resulting in a function ending up with the `spir_kernel` CC but `nvptx_kernel` not doing the same, both cases compiling for SPIR. There was also a [crash](llvm/llvm-project#161077). `sycl_kernel` is now separated out from `device_kernel`, but still there was some weird behavior for the remaining spellings. For the target-specific spellings (`nvptx_kernel` and `amdgpu_kernel`), while not technically required, we warn and ignore the attribute if the spelling doesn't match the target because it's weird from the user's point of view to allow it. Also we make sure that any valid usage actually applies the CC to the generated `llvm:Function`. This worked for `NVPTX` already but was missing for `SPIR/SPIR-V` and `AMDGPU`, it needs to be explicitly done in `TargetInfo`. This allows us to remove the `amdgpu_kernel` specific handing we had. That special handling was previously required because it was the only variation that was allowed on a type, and thus had a separate way to propagate the CC. These issues were reported [here](llvm/llvm-project#161077) and [here](llvm/llvm-project#161349). Closes: llvm/llvm-project#161077 --------- Signed-off-by: Sarnie, Nick <[email protected]>
2 parents c6d8937 + f6ba213 commit c836a13

File tree

12 files changed

+86
-35
lines changed

12 files changed

+86
-35
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ Bug Fixes to Attribute Support
447447
- Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
448448
``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
449449
- Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
450+
- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GH161905)
450451

451452
Bug Fixes to C++ Support
452453
^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/Attr.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1623,7 +1623,7 @@ def SYCLKernel : InheritableAttr {
16231623
let Documentation = [SYCLKernelDocs];
16241624
}
16251625

1626-
def DeviceKernel : DeclOrTypeAttr {
1626+
def DeviceKernel : InheritableAttr {
16271627
let Spellings = [Clang<"device_kernel">,
16281628
Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
16291629
CustomKeyword<"__kernel">, CustomKeyword<"kernel">];

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4126,6 +4126,9 @@ def warn_missing_sdksettings_for_availability_checking : Warning<
41264126
"%0 availability is ignored without a valid 'SDKSettings.json' in the SDK">,
41274127
InGroup<DiagGroup<"ignored-availability-without-sdk-settings">>;
41284128

4129+
def err_hidden_device_kernel
4130+
: Error<"%0 is specified as a device kernel but it is not externally visible">;
4131+
41294132
// Thread Safety Attributes
41304133
def warn_thread_attribute_ignored : Warning<
41314134
"ignoring %0 attribute because its argument is invalid">,

clang/lib/AST/TypePrinter.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
21472147
}
21482148
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
21492149
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
2150-
case attr::DeviceKernel:
2151-
OS << T->getAttr()->getSpelling();
2152-
break;
21532150
case attr::IntelOclBicc:
21542151
OS << "inteloclbicc";
21552152
break;

clang/lib/Basic/Targets/NVPTX.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
200200
// a host function.
201201
if (HostTarget)
202202
return HostTarget->checkCallingConvention(CC);
203-
return CCCR_Warning;
203+
return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning;
204204
}
205205

206206
bool hasBitIntType() const override { return true; }

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
439439
return;
440440

441441
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
442-
if (FD)
442+
if (FD) {
443443
setFunctionDeclAttributes(FD, F, M);
444-
444+
if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
445+
F->setCallingConv(getDeviceKernelCallingConv());
446+
}
445447
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
446448
F->addFnAttr("amdgpu-ieee", "false");
447449
}
@@ -658,7 +660,7 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
658660
// kernel address (only the kernel descriptor).
659661
auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
660662
&Mod);
661-
F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
663+
F->setCallingConv(getDeviceKernelCallingConv());
662664

663665
llvm::AttrBuilder KernelAttrs(C);
664666
// FIXME: The invoke isn't applying the right attributes either

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
264264
// And kernel functions are not subject to inlining
265265
F->addFnAttr(llvm::Attribute::NoInline);
266266
if (FD->hasAttr<CUDAGlobalAttr>()) {
267-
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
267+
F->setCallingConv(getDeviceKernelCallingConv());
268268

269269
for (auto IV : llvm::enumerate(FD->parameters()))
270270
if (IV.value()->hasAttr<CUDAGridConstantAttr>())
@@ -278,7 +278,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
278278
}
279279
// Attach kernel metadata directly if compiling for NVPTX.
280280
if (FD->hasAttr<DeviceKernelAttr>())
281-
F->setCallingConv(llvm::CallingConv::PTX_Kernel);
281+
F->setCallingConv(getDeviceKernelCallingConv());
282282
}
283283

284284
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
6464
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
6565
llvm::PointerType *T,
6666
QualType QT) const override;
67+
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
68+
CodeGen::CodeGenModule &M) const override;
6769
};
6870
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
6971
public:
@@ -268,6 +270,22 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
268270
llvm::ConstantPointerNull::get(NPT), PT);
269271
}
270272

273+
void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
274+
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
275+
if (M.getLangOpts().OpenCL || GV->isDeclaration())
276+
return;
277+
278+
const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
279+
if (!FD)
280+
return;
281+
282+
llvm::Function *F = dyn_cast<llvm::Function>(GV);
283+
assert(F && "Expected GlobalValue to be a Function");
284+
285+
if (FD->hasAttr<DeviceKernelAttr>())
286+
F->setCallingConv(getDeviceKernelCallingConv());
287+
}
288+
271289
LangAS
272290
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
273291
const VarDecl *D) const {
@@ -292,19 +310,23 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
292310

293311
void SPIRVTargetCodeGenInfo::setTargetAttributes(
294312
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
295-
if (!M.getLangOpts().HIP ||
296-
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
297-
return;
298313
if (GV->isDeclaration())
299314
return;
300315

301-
auto F = dyn_cast<llvm::Function>(GV);
302-
if (!F)
316+
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
317+
if (!FD)
303318
return;
304319

305-
auto FD = dyn_cast_or_null<FunctionDecl>(D);
306-
if (!FD)
320+
llvm::Function *F = dyn_cast<llvm::Function>(GV);
321+
assert(F && "Expected GlobalValue to be a Function");
322+
323+
if (FD->hasAttr<DeviceKernelAttr>())
324+
F->setCallingConv(getDeviceKernelCallingConv());
325+
326+
if (!M.getLangOpts().HIP ||
327+
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
307328
return;
329+
308330
if (!FD->hasAttr<CUDAGlobalAttr>())
309331
return;
310332

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5206,16 +5206,36 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
52065206
static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
52075207
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
52085208
bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
5209-
if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
5209+
llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple();
5210+
const LangOptions &LangOpts = S.getLangOpts();
5211+
// OpenCL has its own error messages.
5212+
if (!LangOpts.OpenCL && FD && !FD->isExternallyVisible()) {
5213+
S.Diag(AL.getLoc(), diag::err_hidden_device_kernel) << FD;
5214+
AL.setInvalid();
5215+
return;
5216+
}
5217+
if (Triple.isNVPTX()) {
52105218
handleGlobalAttr(S, D, AL);
52115219
} else {
52125220
// OpenCL C++ will throw a more specific error.
5213-
if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
5221+
if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
52145222
S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
52155223
<< AL << AL.isRegularKeywordAttribute() << "functions";
5224+
AL.setInvalid();
5225+
return;
52165226
}
52175227
handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
52185228
}
5229+
// TODO: isGPU() should probably return true for SPIR.
5230+
bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() ||
5231+
LangOpts.isTargetDevice() || LangOpts.OpenCL;
5232+
if (!TargetDeviceEnvironment) {
5233+
S.Diag(AL.getLoc(), diag::warn_cconv_unsupported)
5234+
<< AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget;
5235+
AL.setInvalid();
5236+
return;
5237+
}
5238+
52195239
// Make sure we validate the CC with the target
52205240
// and warn/error if necessary.
52215241
handleCallConvAttr(S, D, AL);

clang/lib/Sema/SemaType.cpp

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
134134
case ParsedAttr::AT_VectorCall: \
135135
case ParsedAttr::AT_AArch64VectorPcs: \
136136
case ParsedAttr::AT_AArch64SVEPcs: \
137-
case ParsedAttr::AT_DeviceKernel: \
138137
case ParsedAttr::AT_MSABI: \
139138
case ParsedAttr::AT_SysVABI: \
140139
case ParsedAttr::AT_Pcs: \
@@ -3786,7 +3785,8 @@ static CallingConv getCCForDeclaratorChunk(
37863785
}
37873786
}
37883787
}
3789-
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
3788+
for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
3789+
D.getDeclSpec().getAttributes(), D.getAttributes())) {
37903790
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
37913791
CC = CC_DeviceKernel;
37923792
break;
@@ -7569,8 +7569,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
75697569
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
75707570
case ParsedAttr::AT_ArmStreaming:
75717571
return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
7572-
case ParsedAttr::AT_DeviceKernel:
7573-
return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
75747572
case ParsedAttr::AT_Pcs: {
75757573
// The attribute may have had a fixit applied where we treated an
75767574
// identifier as a string literal. The contents of the string are valid,
@@ -8809,16 +8807,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
88098807
}
88108808
}
88118809

8812-
static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
8813-
// The DeviceKernel attribute is shared for many targets, and
8814-
// it is only allowed to be a type attribute with the AMDGPU
8815-
// spelling, so skip processing the attr as a type attr
8816-
// unless it has that spelling.
8817-
if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
8818-
return true;
8819-
return DeviceKernelAttr::isAMDGPUSpelling(Attr);
8820-
}
8821-
88228810
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
88238811
TypeAttrLocation TAL,
88248812
const ParsedAttributesView &attrs,
@@ -9072,8 +9060,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
90729060
break;
90739061
[[fallthrough]];
90749062
FUNCTION_TYPE_ATTRS_CASELIST:
9075-
if (!isMultiSubjectAttrAllowedOnType(attr))
9076-
break;
90779063

90789064
attr.setUsedAsTypeAttr();
90799065

0 commit comments

Comments
 (0)