Skip to content

Commit e169cc1

Browse files
authored
[Clang] Fix sema checks thinking kernels aren't kernels (#104460)
Summary: Currently we have some sema checks to make sure users don't apply kernel-only attributes to non-kernel functions. However, this currently did not correctly check for bare NVPTX / AMDGPU kernel attributes, making it impossible to use them at all w/o CUDA enabled. This patch fixes that by checking for the calling convention / attributes directly.
1 parent 085b04b commit e169cc1

File tree

2 files changed

+31
-18
lines changed

2 files changed

+31
-18
lines changed

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7123,6 +7123,13 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
71237123
}
71247124
}
71257125

7126+
static bool isKernelDecl(Decl *D) {
7127+
const FunctionType *FnTy = D->getFunctionType();
7128+
return D->hasAttr<OpenCLKernelAttr>() ||
7129+
(FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) ||
7130+
D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>();
7131+
}
7132+
71267133
void Sema::ProcessDeclAttributeList(
71277134
Scope *S, Decl *D, const ParsedAttributesView &AttrList,
71287135
const ProcessDeclAttributeOptions &Options) {
@@ -7163,24 +7170,25 @@ void Sema::ProcessDeclAttributeList(
71637170
} else if (const auto *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
71647171
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
71657172
D->setInvalidDecl();
7166-
} else if (!D->hasAttr<CUDAGlobalAttr>()) {
7167-
if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
7168-
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7169-
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7170-
D->setInvalidDecl();
7171-
} else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
7172-
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7173-
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7174-
D->setInvalidDecl();
7175-
} else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
7176-
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7177-
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7178-
D->setInvalidDecl();
7179-
} else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
7180-
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7181-
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7182-
D->setInvalidDecl();
7183-
}
7173+
}
7174+
}
7175+
if (!isKernelDecl(D)) {
7176+
if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
7177+
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7178+
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7179+
D->setInvalidDecl();
7180+
} else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
7181+
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7182+
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7183+
D->setInvalidDecl();
7184+
} else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
7185+
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7186+
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7187+
D->setInvalidDecl();
7188+
} else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
7189+
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
7190+
<< A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
7191+
D->setInvalidDecl();
71847192
}
71857193
}
71867194

clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
// The original test passes the result through opt O2, but that seems to introduce invalid
77
// addrspace casts which are not being fixed as part of the present change.
88

9+
// COMMON: define{{.*}} amdgpu_kernel void @_Z6kernelv() #[[ATTR:[0-9]+]]
10+
__attribute__((amdgpu_kernel, amdgpu_flat_work_group_size(1, 256))) void
11+
kernel() {}
12+
913
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(ptr {{.*}} %x)
1014
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to ptr
1115
__attribute__((amdgpu_kernel)) void kernel1(int *x) {
@@ -81,3 +85,4 @@ __attribute__((amdgpu_kernel)) void kernel8(struct SS a) {
8185
*a.x += 3.f;
8286
}
8387

88+
// COMMON: attributes #[[ATTR]] = { {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}} }

0 commit comments

Comments
 (0)