Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions clang/lib/CodeGen/Targets/SPIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
SyncScope Scope,
llvm::AtomicOrdering Ordering,
Expand Down Expand Up @@ -245,6 +247,38 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
return DefaultGlobalAS;
}

void SPIRVTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (!M.getLangOpts().HIP ||
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
return;
Comment on lines +252 to +254
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the vendor check. The language check is also suspect, this is interpretation of a target attribute

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not super suspect in context as at the moment this is only for AMDGCN flavoured SPIR-V, which we only support in HIP, and the mapping from flat to dim X only makes sense there.

if (GV->isDeclaration())
return;

auto F = dyn_cast<llvm::Function>(GV);
if (!F)
return;
Comment on lines +258 to +260
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this fail if the FunctionDecl below fails?


auto FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
if (!FD->hasAttr<CUDAGlobalAttr>())
return;

unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock;
if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue();

auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext());
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)),
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)),
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))};

F->setMetadata("reqd_work_group_size",
llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs));
}

llvm::SyncScope::ID
SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
llvm::AtomicOrdering,
Expand Down
36 changes: 20 additions & 16 deletions clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META5:![0-9]+]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
Expand All @@ -58,7 +58,7 @@
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META5:![0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
Expand Down Expand Up @@ -102,7 +102,7 @@ __global__ void kernel1(int *x) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
Expand All @@ -126,7 +126,7 @@ __global__ void kernel1(int *x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
Expand Down Expand Up @@ -171,7 +171,7 @@ __global__ void kernel2(int &x) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
// CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
Expand All @@ -195,7 +195,7 @@ __global__ void kernel2(int &x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
Expand Down Expand Up @@ -302,7 +302,7 @@ struct S {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
Expand Down Expand Up @@ -343,7 +343,7 @@ struct S {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
Expand Down Expand Up @@ -406,7 +406,7 @@ __global__ void kernel4(struct S s) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8
Expand Down Expand Up @@ -446,7 +446,7 @@ __global__ void kernel4(struct S s) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
Expand Down Expand Up @@ -511,7 +511,7 @@ struct T {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
Expand Down Expand Up @@ -551,7 +551,7 @@ struct T {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
Expand Down Expand Up @@ -606,7 +606,7 @@ __global__ void kernel6(struct T t) {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
Expand All @@ -631,7 +631,7 @@ __global__ void kernel6(struct T t) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
Expand Down Expand Up @@ -677,7 +677,7 @@ struct SS {
// CHECK-NEXT: ret void
//
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
Expand All @@ -700,7 +700,7 @@ struct SS {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
Expand All @@ -727,5 +727,9 @@ __global__ void kernel8(struct SS a) {
*a.x += 3.f;
}
//.
// CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
//.
// OPT: [[META4]] = !{}
//.
// OPT-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
//.
7 changes: 7 additions & 0 deletions clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=CHECK,MAX1024 %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa --gpu-max-threads-per-block=1024 \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefixes=CHECK-SPIRV,MAX1024-SPIRV %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
// RUN: -check-prefix=NAMD
Expand All @@ -21,12 +24,14 @@

__global__ void flat_work_group_size_default() {
// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z28flat_work_group_size_defaultv(){{.*}} !reqd_work_group_size [[REQD_WORK_GROUP_SIZE_DEFAULT:![0-9]+]]
// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
}

__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
__global__ void flat_work_group_size_32_64() {
// CHECK: define{{.*}} amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z26flat_work_group_size_32_64v(){{.*}} !reqd_work_group_size [[REQD_WORK_GROUP_SIZE_64:![0-9]+]]
}
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
__global__ void waves_per_eu_2() {
Expand Down Expand Up @@ -82,7 +87,9 @@ template __global__ void template_32_4_a_max_num_work_groups<2>();

// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
// MAX1024-SPIRV-DAG: [[REQD_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1}
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
// CHECK-SPIRV-DAG: [[REQD_WORK_GROUP_SIZE_64]] = !{i32 64, i32 1, i32 1}
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"
Expand Down
Loading