diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp index 984c1ee89309e..a386fe621a553 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp @@ -37,6 +37,11 @@ static cl::opt KernargPreloadCount( "amdgpu-kernarg-preload-count", cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0)); +static cl::opt + EnableKernargPreload("amdgpu-kernarg-preload", + cl::desc("Enable preload kernel arguments to SGPRs"), + cl::init(true)); + namespace { class AMDGPUPreloadKernelArgumentsLegacy : public ModulePass { @@ -275,6 +280,9 @@ AMDGPUPreloadKernelArgumentsLegacy::AMDGPUPreloadKernelArgumentsLegacy( : ModulePass(ID), TM(TM) {} static bool markKernelArgsAsInreg(Module &M, const TargetMachine &TM) { + if (!EnableKernargPreload) + return false; + SmallVector FunctionsToErase; bool Changed = false; for (auto &F : M) { diff --git a/llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll new file mode 100644 index 0000000000000..75aaec6f1fa70 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/disable-preload-kernargs.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=amdgpu-preload-kernel-arguments -amdgpu-kernarg-preload=0 %s -o - | FileCheck -check-prefix=NO-PRELOAD %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=amdgpu-preload-kernel-arguments %s -o - | FileCheck -check-prefix=DEFAULT-PRELOAD %s + +@g1 = protected addrspace(1) externally_initialized global i16 0, align 2 + +define amdgpu_kernel void @test_kernel_with_zero_kernel_arg() { +; NO-PRELOAD-LABEL: define amdgpu_kernel void @test_kernel_with_zero_kernel_arg( +; NO-PRELOAD-SAME: ) #[[ATTR0:[0-9]+]] { +; NO-PRELOAD-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; NO-PRELOAD-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12 +; NO-PRELOAD-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP]], align 2 +; NO-PRELOAD-NEXT: store i16 [[GROUP_SIZE_X]], ptr addrspace(1) @g1, align 2 +; NO-PRELOAD-NEXT: ret void +; +; DEFAULT-PRELOAD-LABEL: define amdgpu_kernel void @test_kernel_with_zero_kernel_arg( +; DEFAULT-PRELOAD-SAME: i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_X:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_Y:%.*]], i32 inreg "amdgpu-hidden-argument" [[_HIDDEN_BLOCK_COUNT_Z:%.*]], i16 inreg "amdgpu-hidden-argument" [[_HIDDEN_GROUP_SIZE_X:%.*]]) #[[ATTR0:[0-9]+]] { +; DEFAULT-PRELOAD-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; DEFAULT-PRELOAD-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12 +; DEFAULT-PRELOAD-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP]], align 2 +; DEFAULT-PRELOAD-NEXT: store i16 [[_HIDDEN_GROUP_SIZE_X]], ptr addrspace(1) @g1, align 2 +; DEFAULT-PRELOAD-NEXT: ret void +; + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12 + %group_size_x = load i16, ptr addrspace(4) %gep + store i16 %group_size_x, ptr addrspace(1) @g1 + ret void +}