From ad744e94e1b59c35b90c8f16679702c5bd3f4cac Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Mon, 12 May 2025 21:28:55 +0000 Subject: [PATCH] [AMDGPU] Make readonly noilas kernel arguments constant memory If a function has a kernel calling convention - that is, if it will be called externally and isn't going to be called from code that might have modified memory at some other point in the program, the local assertions guaranteed by marking a pointer argument "readonly noalias" (that is, that the memory isn't modified within the function) can be streingthened to the assumption that the memory is constant - that is, that it will remain unmodified througout the execution of the program. If this strengthening wasn't possible, it would mean that some function that the kernel calls would be monifying memory despite the only pointer to that memory being readonly, which violates the semantics of readonly. The main purpose of this is to allow setting the `invariant` metadata on loads from readonly kernel arguments during IR translation. (However, this currently doesn't work for the `amdgpu-kernel` calling convention because that calling convention does a lowering to loads from the kernel argument descriptor before translation to MIR) Fixes internal SWDEV-532314 --- .../lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp | 10 +++++ .../irtranslator-kernel-invariant-loads.ll | 40 +++++++++++++++++++ .../AMDGPU/aa-points-to-constant-memory.ll | 16 ++++++++ 3 files changed, 66 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 7bcc128cb114f..dbb488aedb81b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -11,6 +11,7 @@ #include "AMDGPUAliasAnalysis.h" #include "AMDGPU.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" @@ -112,5 +113,14 @@ ModRefInfo AMDGPUAAResult::getModRefInfoMask(const MemoryLocation &Loc, AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return ModRefInfo::NoModRef; + // A `readonly noalias` function argument normally only gets a `Ref` mask. + // However,, if the calling convention of the function is one intended for + // program entry points, we know that such an argument will be invariant + // over the life of the program. + if (auto* Arg = dyn_cast(Base)) { + const Function *F = Arg->getParent(); + if (AMDGPU::isKernelCC(F) && Arg->hasNoAliasAttr() && Arg->onlyReadsMemory()) + return ModRefInfo::NoModRef; + } return ModRefInfo::ModRef; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll new file mode 100644 index 0000000000000..cfc64857040db --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-kernel-invariant-loads.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +define amdgpu_cs void @load_global_is_invariant(ptr addrspace(1) readonly noalias %x, ptr addrspace(1) writeonly noalias %y) { + ; CHECK-LABEL: name: load_global_is_invariant + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (invariant load (s32) from %ir.x, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV1]](p1) :: (store (s32) into %ir.y, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %v = load float, ptr addrspace(1) %x + store float %v, ptr addrspace(1) %y + ret void +} + +define void @load_global_isnt_invariant_non_kernel(ptr addrspace(1) readonly noalias %x, ptr addrspace(1) writeonly noalias %y) { + ; CHECK-LABEL: name: load_global_isnt_invariant_non_kernel + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.x, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV1]](p1) :: (store (s32) into %ir.y, addrspace 1) + ; CHECK-NEXT: SI_RETURN + %v = load float, ptr addrspace(1) %x + store float %v, ptr addrspace(1) %y + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll b/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll index 4e945951dab62..6a3da62c6610c 100644 --- a/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll @@ -110,3 +110,19 @@ define amdgpu_kernel void @nonconst_gv_constant_as() { store i32 0, ptr addrspace(4) @global_nonconstant_constant_as ret void } + +define amdgpu_kernel void @constant_kernel_args(ptr addrspace(1) readonly noalias inreg %x) { +; CHECK-LABEL: @constant_kernel_args( +; CHECK-NEXT: ret void +; + store i32 0, ptr addrspace(1) %x + ret void +} + +define amdgpu_cs void @constant_cs_args(ptr addrspace(1) readonly noalias %x) { +; CHECK-LABEL: @constant_cs_args( +; CHECK-NEXT: ret void +; + store i32 0, ptr addrspace(1) %x + ret void +}