Skip to content

Commit 4b0a975

Browse files
authored
[OpenCL][NVPTX] Don't set calling convention for OpenCL kernel (#170170)
Fixes #154772 We previously set `ptx_kernel` for all kernels. But it's incorrect to add `ptx_kernel` to the stub version of kernel introduced in #115821. This patch copies the workaround of AMDGPU.
1 parent 6638d59 commit 4b0a975

File tree

6 files changed

+28
-33
lines changed

6 files changed

+28
-33
lines changed

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,11 +439,8 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
439439
return;
440440

441441
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
442-
if (FD) {
442+
if (FD)
443443
setFunctionDeclAttributes(FD, F, M);
444-
if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
445-
F->setCallingConv(getDeviceKernelCallingConv());
446-
}
447444
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
448445
F->addFnAttr("amdgpu-ieee", "false");
449446
}

clang/lib/CodeGen/Targets/NVPTX.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,6 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
276276
M.handleCUDALaunchBoundsAttr(F, Attr);
277277
}
278278
}
279-
// Attach kernel metadata directly if compiling for NVPTX.
280-
if (FD->hasAttr<DeviceKernelAttr>())
281-
F->setCallingConv(getDeviceKernelCallingConv());
282279
}
283280

284281
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
7777
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
7878
llvm::PointerType *T,
7979
QualType QT) const override;
80-
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
81-
CodeGen::CodeGenModule &M) const override;
8280
};
8381
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
8482
public:
@@ -292,22 +290,6 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
292290
llvm::ConstantPointerNull::get(NPT), PT);
293291
}
294292

295-
void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
296-
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
297-
if (M.getLangOpts().OpenCL || GV->isDeclaration())
298-
return;
299-
300-
const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
301-
if (!FD)
302-
return;
303-
304-
llvm::Function *F = dyn_cast<llvm::Function>(GV);
305-
assert(F && "Expected GlobalValue to be a Function");
306-
307-
if (FD->hasAttr<DeviceKernelAttr>())
308-
F->setCallingConv(getDeviceKernelCallingConv());
309-
}
310-
311293
LangAS
312294
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
313295
const VarDecl *D) const {
@@ -342,9 +324,6 @@ void SPIRVTargetCodeGenInfo::setTargetAttributes(
342324
llvm::Function *F = dyn_cast<llvm::Function>(GV);
343325
assert(F && "Expected GlobalValue to be a Function");
344326

345-
if (FD->hasAttr<DeviceKernelAttr>())
346-
F->setCallingConv(getDeviceKernelCallingConv());
347-
348327
if (!M.getLangOpts().HIP ||
349328
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
350329
return;

clang/lib/Sema/SemaType.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3798,8 +3798,10 @@ static CallingConv getCCForDeclaratorChunk(
37983798
}
37993799
}
38003800
}
3801+
38013802
for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
3802-
D.getDeclSpec().getAttributes(), D.getAttributes())) {
3803+
D.getDeclSpec().getAttributes(), D.getAttributes(),
3804+
D.getDeclarationAttributes())) {
38033805
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
38043806
CC = CC_DeviceKernel;
38053807
break;
Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6
12
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
23

34
void device_function() {
45
}
5-
// CHECK-LABEL: define{{.*}} void @device_function()
66

77
__kernel void kernel_function() {
88
device_function();
99
}
10-
// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
11-
// CHECK: call void @device_function()
10+
// CHECK-LABEL: define dso_local void @device_function(
11+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
12+
// CHECK-NEXT: [[ENTRY:.*:]]
13+
// CHECK-NEXT: ret void
14+
//
15+
//
16+
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
17+
// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META3]] !kernel_arg_base_type [[META3]] !kernel_arg_type_qual [[META3]] {
18+
// CHECK-NEXT: [[ENTRY:.*:]]
19+
// CHECK-NEXT: call void @__clang_ocl_kern_imp_kernel_function() #[[ATTR2:[0-9]+]]
20+
// CHECK-NEXT: ret void
21+
//
22+
//
23+
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_kernel_function(
24+
// CHECK-SAME: ) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META3]] !kernel_arg_base_type [[META3]] !kernel_arg_type_qual [[META3]] {
25+
// CHECK-NEXT: [[ENTRY:.*:]]
26+
// CHECK-NEXT: call void @device_function() #[[ATTR2]]
27+
// CHECK-NEXT: ret void
28+
//
29+
//.
30+
// CHECK: [[META3]] = !{}
31+
//.

clang/test/CodeGenOpenCL/reflect.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ __kernel void kernel_function(__global int *i) {
2626
// CHECK-NEXT: ret void
2727
//
2828
//
29-
// CHECK-LABEL: define dso_local ptx_kernel void @__clang_ocl_kern_imp_kernel_function(
29+
// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_kernel_function(
3030
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
3131
// CHECK-NEXT: entry:
3232
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4

0 commit comments

Comments
 (0)