Skip to content

Commit 958d1cd

Browse files
committed
Switch to using max_workgroup_size which models expected semantics better and doesn't clash with OCL uses.
1 parent 2dd4456 commit 958d1cd

File tree

3 files changed

+25
-22
lines changed

3 files changed

+25
-22
lines changed

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,13 +269,16 @@ void SPIRVTargetCodeGenInfo::setTargetAttributes(
269269
if (auto FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>())
270270
N = FlatWGS->getMax()->EvaluateKnownConstInt(M.getContext()).getExtValue();
271271

272+
// We encode the maximum flat WG size in the first component of the 3D
273+
// max_work_group_size attribute, which will get reverse translated into the
274+
// original AMDGPU attribute when targeting AMDGPU.
272275
auto Int32Ty = llvm::IntegerType::getInt32Ty(M.getLLVMContext());
273276
llvm::Metadata *AttrMDArgs[] = {
274277
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, N)),
275278
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1)),
276279
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 1))};
277280

278-
F->setMetadata("reqd_work_group_size",
281+
F->setMetadata("max_work_group_size",
279282
llvm::MDNode::get(M.getLLVMContext(), AttrMDArgs));
280283
}
281284

clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// CHECK-NEXT: ret void
3434
//
3535
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
36-
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META5:![0-9]+]] {
36+
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] {
3737
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
3838
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
3939
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -58,7 +58,7 @@
5858
// OPT-NEXT: ret void
5959
//
6060
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
61-
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !reqd_work_group_size [[META5:![0-9]+]] {
61+
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] {
6262
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
6363
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
6464
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -102,7 +102,7 @@ __global__ void kernel1(int *x) {
102102
// CHECK-NEXT: ret void
103103
//
104104
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
105-
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
105+
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
106106
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
107107
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
108108
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -126,7 +126,7 @@ __global__ void kernel1(int *x) {
126126
// OPT-NEXT: ret void
127127
//
128128
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
129-
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
129+
// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
130130
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
131131
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
132132
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -171,7 +171,7 @@ __global__ void kernel2(int &x) {
171171
// CHECK-NEXT: ret void
172172
//
173173
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
174-
// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
174+
// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
175175
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
176176
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
177177
// CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
@@ -195,7 +195,7 @@ __global__ void kernel2(int &x) {
195195
// OPT-NEXT: ret void
196196
//
197197
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
198-
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
198+
// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !max_work_group_size [[META5]] {
199199
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
200200
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
201201
// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
@@ -302,7 +302,7 @@ struct S {
302302
// CHECK-NEXT: ret void
303303
//
304304
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
305-
// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
305+
// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
306306
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
307307
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
308308
// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
@@ -343,7 +343,7 @@ struct S {
343343
// OPT-NEXT: ret void
344344
//
345345
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
346-
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
346+
// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
347347
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
348348
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
349349
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
@@ -406,7 +406,7 @@ __global__ void kernel4(struct S s) {
406406
// CHECK-NEXT: ret void
407407
//
408408
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
409-
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
409+
// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
410410
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
411411
// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
412412
// CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -432,7 +432,7 @@ __global__ void kernel4(struct S s) {
432432
// CHECK-SPIRV-NEXT: ret void
433433
//
434434
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
435-
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
435+
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
436436
// OPT-NEXT: [[ENTRY:.*:]]
437437
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8
438438
// OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -446,7 +446,7 @@ __global__ void kernel4(struct S s) {
446446
// OPT-NEXT: ret void
447447
//
448448
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
449-
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
449+
// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
450450
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
451451
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
452452
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -511,7 +511,7 @@ struct T {
511511
// CHECK-NEXT: ret void
512512
//
513513
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
514-
// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
514+
// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
515515
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
516516
// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
517517
// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
@@ -551,7 +551,7 @@ struct T {
551551
// OPT-NEXT: ret void
552552
//
553553
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
554-
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
554+
// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
555555
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
556556
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
557557
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
@@ -606,7 +606,7 @@ __global__ void kernel6(struct T t) {
606606
// CHECK-NEXT: ret void
607607
//
608608
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
609-
// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
609+
// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
610610
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
611611
// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
612612
// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
@@ -631,7 +631,7 @@ __global__ void kernel6(struct T t) {
631631
// OPT-NEXT: ret void
632632
//
633633
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
634-
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
634+
// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
635635
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
636636
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
637637
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
@@ -677,7 +677,7 @@ struct SS {
677677
// CHECK-NEXT: ret void
678678
//
679679
// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
680-
// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
680+
// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
681681
// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
682682
// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
683683
// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
@@ -700,7 +700,7 @@ struct SS {
700700
// OPT-NEXT: ret void
701701
//
702702
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
703-
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !reqd_work_group_size [[META5]] {
703+
// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] {
704704
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
705705
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
706706
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424

2525
__global__ void flat_work_group_size_default() {
2626
// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
27-
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z28flat_work_group_size_defaultv(){{.*}} !reqd_work_group_size [[REQD_WORK_GROUP_SIZE_DEFAULT:![0-9]+]]
27+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z28flat_work_group_size_defaultv(){{.*}} !max_work_group_size [[MAX_WORK_GROUP_SIZE_DEFAULT:![0-9]+]]
2828
// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
2929
}
3030

3131
__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
3232
__global__ void flat_work_group_size_32_64() {
3333
// CHECK: define{{.*}} amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
34-
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z26flat_work_group_size_32_64v(){{.*}} !reqd_work_group_size [[REQD_WORK_GROUP_SIZE_64:![0-9]+]]
34+
// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z26flat_work_group_size_32_64v(){{.*}} !max_work_group_size [[MAX_WORK_GROUP_SIZE_64:![0-9]+]]
3535
}
3636
__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
3737
__global__ void waves_per_eu_2() {
@@ -87,9 +87,9 @@ template __global__ void template_32_4_a_max_num_work_groups<2>();
8787

8888
// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"{{.*}}"uniform-work-group-size"="true"
8989
// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
90-
// MAX1024-SPIRV-DAG: [[REQD_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1}
90+
// MAX1024-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_DEFAULT]] = !{i32 1024, i32 1, i32 1}
9191
// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
92-
// CHECK-SPIRV-DAG: [[REQD_WORK_GROUP_SIZE_64]] = !{i32 64, i32 1, i32 1}
92+
// CHECK-SPIRV-DAG: [[MAX_WORK_GROUP_SIZE_64]] = !{i32 64, i32 1, i32 1}
9393
// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
9494
// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
9595
// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"

0 commit comments

Comments
 (0)