Skip to content

Commit 66acb26

Browse files
authored
[clang][CodeGen][SPIRV] Translate amdgpu_flat_work_group_size into max_work_group_size. (#116820)
HIPAMD relies on the `amdgpu_flat_work_group_size` attribute to implement key functionality such as the `__launch_bounds__` `__global__` function annotation. This attribute is not available / directly translatable to SPIR-V, hence as it is AMDGCN flavoured SPIR-V suffers from information loss. This patch addresses that limitation by converting the unsupported attribute into the `max_work_group_size` attribute which maps to [`MaxWorkgroupSizeINTEL`](https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/INTEL/SPV_INTEL_kernel_attributes.asciidoc), which is available in / handled by SPIR-V. When reverse translating from SPIR-V to AMDGCN LLVMIR we invert the map and add the original AMDGPU attribute.
1 parent 7ce15f3 commit 66acb26

File tree

3 files changed

+65
-17
lines changed

3 files changed

+65
-17
lines changed

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
6464
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
6565
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
6666
const VarDecl *D) const override;
67+
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
68+
CodeGen::CodeGenModule &M) const override;
6769
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
6870
SyncScope Scope,
6971
llvm::AtomicOrdering Ordering,
@@ -245,6 +247,41 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
245247
return DefaultGlobalAS;
246248
}
247249

250+
void SPIRVTargetCodeGenInfo::setTargetAttributes(
251+
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
252+
if (!M.getLangOpts().HIP ||
253+
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
254+
return;
255+
if (GV->isDeclaration())
256+
return;
257+
258+
auto F = dyn_cast<llvm::Function>(GV);
259+
if (!F)
260+
return;
261+
262+
auto FD = dyn_cast_or_null<FunctionDecl>(D);
263+
if (!FD)
264+
return;
265+
if (!FD->hasAttr<CUDAGlobalAttr>())
266+
return;
267+
268+
unsigned N = M.getLangOpts().GPUMaxThreadsPerBlock;
269+
if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
270+
N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue();
271+
272+
// We encode the maximum flat WG size in the first component of the 3D
273+
// max_work_group_size attribute, which will get reverse translated into the
274+
// original AMDGPU attribute when targeting AMDGPU.
275+
auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext());
276+
llvm::Metadata *AttrMDArgs[] = {
277+
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)),
278+
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)),
279+
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))};
280+
281+
F->setMetadata("max_work_group_size",
282+
llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs));
283+
}
284+
248285
llvm::SyncScope::ID
249286
SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
250287
llvm::AtomicOrdering,

clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// CHECK-NEXT: ret void
3434
//
3535
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
36-
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
36+
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] {
3737
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
3838
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
3939
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -58,7 +58,7 @@
5858
// OPT-NEXT: ret void
5959
//
6060
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
61-
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
61+
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] {
6262
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
6363
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
6464
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -102,7 +102,7 @@ __global__ void kernel1(int *x) {
102102
// CHECK-NEXT: ret void
103103
//
104104
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
105-
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
105+
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
106106
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
107107
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
108108
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -126,7 +126,7 @@ __global__ void kernel1(int *x) {
126126
// OPT-NEXT: ret void
127127
//
128128
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
129-
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
129+
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
130130
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
131131
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
132132
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -171,7 +171,7 @@ __global__ void kernel2(int &x) {
171171
// CHECK-NEXT: ret void
172172
//
173173
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
174-
// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] {
174+
// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
175175
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
176176
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
177177
// CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
@@ -195,7 +195,7 @@ __global__ void kernel2(int &x) {
195195
// OPT-NEXT: ret void
196196
//
197197
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
198-
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
198+
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !max_work_group_size [[META5]] {
199199
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
200200
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
201201
// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
@@ -302,7 +302,7 @@ struct S {
302302
// CHECK-NEXT: ret void
303303
//
304304
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
305-
// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
305+
// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
306306
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
307307
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
308308
// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
@@ -343,7 +343,7 @@ struct S {
343343
// OPT-NEXT: ret void
344344
//
345345
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
346-
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
346+
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
347347
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
348348
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
349349
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
@@ -406,7 +406,7 @@ __global__ void kernel4(struct S s) {
406406
// CHECK-NEXT: ret void
407407
//
408408
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
409-
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
409+
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
410410
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
411411
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
412412
// CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -432,7 +432,7 @@ __global__ void kernel4(struct S s) {
432432
// CHECK-SPIRV-NEXT: ret void
433433
//
434434
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
435-
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
435+
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
436436
// OPT-NEXT: [[ENTRY:.*:]]
437437
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8
438438
// OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -446,7 +446,7 @@ __global__ void kernel4(struct S s) {
446446
// OPT-NEXT: ret void
447447
//
448448
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
449-
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
449+
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
450450
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
451451
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
452452
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -511,7 +511,7 @@ struct T {
511511
// CHECK-NEXT: ret void
512512
//
513513
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
514-
// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
514+
// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
515515
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
516516
// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
517517
// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
@@ -551,7 +551,7 @@ struct T {
551551
// OPT-NEXT: ret void
552552
//
553553
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
554-
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
554+
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
555555
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
556556
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
557557
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
@@ -606,7 +606,7 @@ __global__ void kernel6(struct T t) {
606606
// CHECK-NEXT: ret void
607607
//
608608
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
609-
// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
609+
// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
610610
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
611611
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
612612
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -631,7 +631,7 @@ __global__ void kernel6(struct T t) {
631631
// OPT-NEXT: ret void
632632
//
633633
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
634-
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
634+
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
635635
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
636636
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
637637
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -677,7 +677,7 @@ struct SS {
677677
// CHECK-NEXT: ret void
678678
//
679679
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
680-
// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
680+
// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
681681
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
682682
// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
683683
// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
@@ -700,7 +700,7 @@ struct SS {
700700
// OPT-NEXT: ret void
701701
//
702702
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
703-
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
703+
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
704704
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
705705
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
706706
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
@@ -727,5 +727,9 @@ __global__ void kernel8(struct SS a) {
727727
*a.x += 3.f;
728728
}
729729
//.
730+
// CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
731+
//.
730732
// OPT: [[META4]] = !{}
731733
//.
734+
// OPT-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1}
735+
//.

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \
55
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
66
// RUN: | FileCheck -check-prefixes=CHECK,MAX1024 %s
7+
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa --gpu-max-threads-per-block=1024 \
8+
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
9+
// RUN: | FileCheck -check-prefixes=CHECK-SPIRV,MAX1024-SPIRV %s
710
// RUN: %clang_cc1 -triple nvptx \
811
// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
912
// RUN: -check-prefix=NAMD
@@ -21,12 +24,14 @@
2124

2225
__global__ void flat_work_group_size_default() {
2326
// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
27+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z28flat_work_group_size_defaultv(){{.*}} !max_work_group_size [[MAX_WORK_GROUP_SIZE_DEFAULT:![0-9]+]]
2428
// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
2529
}
2630

2731
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
2832
__global__ void flat_work_group_size_32_64() {
2933
// CHECK: define{{.*}} amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
34+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z26flat_work_group_size_32_64v(){{.*}} !max_work_group_size [[MAX_WORK_GROUP_SIZE_64:![0-9]+]]
3035
}
3136
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
3237
__global__ void waves_per_eu_2() {
@@ -82,7 +87,9 @@ template __global__ void template_32_4_a_max_num_work_groups<2>();
8287

8388
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
8489
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
90+
// MAX1024-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1}
8591
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
92+
// CHECK-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_64]] = !{i32 64, i32 1, i32 1}
8693
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
8794
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
8895
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"

0 commit comments

Comments
 (0)