Skip to content

Commit e37eff5

Browse files
authored
[AMDGPU] Add an option to completely disable kernel argument preload (#153975)
The existing `amdgpu-kernarg-preload-count` can't be used as a switch to turn it off if it is set to 0. This PR adds an extra option to turn it off. Fixes SWDEV-550147.
1 parent f38c83c commit e37eff5

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ static cl::opt<unsigned> KernargPreloadCount(
3737
"amdgpu-kernarg-preload-count",
3838
cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
3939

40+
static cl::opt<bool>
41+
EnableKernargPreload("amdgpu-kernarg-preload",
42+
cl::desc("Enable preload kernel arguments to SGPRs"),
43+
cl::init(true));
44+
4045
namespace {
4146

4247
class AMDGPUPreloadKernelArgumentsLegacy : public ModulePass {
@@ -275,6 +280,9 @@ AMDGPUPreloadKernelArgumentsLegacy::AMDGPUPreloadKernelArgumentsLegacy(
275280
: ModulePass(ID), TM(TM) {}
276281

277282
static bool markKernelArgsAsInreg(Module &M, const TargetMachine &TM) {
283+
if (!EnableKernargPreload)
284+
return false;
285+
278286
SmallVector<Function *, 4> FunctionsToErase;
279287
bool Changed = false;
280288
for (auto &F : M) {
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=amdgpu-preload-kernel-arguments -amdgpu-kernarg-preload=0 %s -o - | FileCheck -check-prefix=NO-PRELOAD %s
3+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=amdgpu-preload-kernel-arguments %s -o - | FileCheck -check-prefix=DEFAULT-PRELOAD %s
4+
5+
@g1 = protected addrspace(1) externally_initialized global i16 0, align 2
6+
7+
define amdgpu_kernel void @test_kernel_with_zero_kernel_arg() {
8+
; NO-PRELOAD-LABEL: define amdgpu_kernel void @test_kernel_with_zero_kernel_arg(
9+
; NO-PRELOAD-SAME: ) #[[ATTR0:[0-9]+]] {
10+
; NO-PRELOAD-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
11+
; NO-PRELOAD-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
12+
; NO-PRELOAD-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP]], align 2
13+
; NO-PRELOAD-NEXT: store i16 [[GROUP_SIZE_X]], ptr addrspace(1) @g1, align 2
14+
; NO-PRELOAD-NEXT: ret void
15+
;
16+
; DEFAULT-PRELOAD-LABEL: define amdgpu_kernel void @test_kernel_with_zero_kernel_arg(
17+
; DEFAULT-PRELOAD-SAME: i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_X:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_Y:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_Z:%.*]], i16 inreg "amdgpu-hidden-argument" [[_HIDDEN_GROUP_SIZE_X:%.*]]) #[[ATTR0:[0-9]+]] {
18+
; DEFAULT-PRELOAD-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
19+
; DEFAULT-PRELOAD-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
20+
; DEFAULT-PRELOAD-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP]], align 2
21+
; DEFAULT-PRELOAD-NEXT: store i16 [[_HIDDEN_GROUP_SIZE_X]], ptr addrspace(1) @g1, align 2
22+
; DEFAULT-PRELOAD-NEXT: ret void
23+
;
24+
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
25+
%gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12
26+
%group_size_x = load i16, ptr addrspace(4) %gep
27+
store i16 %group_size_x, ptr addrspace(1) @g1
28+
ret void
29+
}

0 commit comments

Comments
 (0)