Skip to content

Commit ad744e9

Browse files
committed
[AMDGPU] Make readonly noilas kernel arguments constant memory
If a function has a kernel calling convention - that is, if it will be called externally and isn't going to be called from code that might have modified memory at some other point in the program, the local assertions guaranteed by marking a pointer argument "readonly noalias" (that is, that the memory isn't modified within the function) can be streingthened to the assumption that the memory is constant - that is, that it will remain unmodified througout the execution of the program. If this strengthening wasn't possible, it would mean that some function that the kernel calls would be monifying memory despite the only pointer to that memory being readonly, which violates the semantics of readonly. The main purpose of this is to allow setting the `invariant` metadata on loads from readonly kernel arguments during IR translation. (However, this currently doesn't work for the `amdgpu-kernel` calling convention because that calling convention does a lowering to loads from the kernel argument descriptor before translation to MIR) Fixes internal SWDEV-532314
1 parent fc3b67a commit ad744e9

File tree

3 files changed

+66
-0
lines changed

3 files changed

+66
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "AMDGPUAliasAnalysis.h"
1313
#include "AMDGPU.h"
14+
#include "Utils/AMDGPUBaseInfo.h"
1415
#include "llvm/Analysis/ValueTracking.h"
1516
#include "llvm/IR/Instructions.h"
1617

@@ -112,5 +113,14 @@ ModRefInfo AMDGPUAAResult::getModRefInfoMask(const MemoryLocation &Loc,
112113
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
113114
return ModRefInfo::NoModRef;
114115

116+
// A `readonly noalias` function argument normally only gets a `Ref` mask.
117+
// However,, if the calling convention of the function is one intended for
118+
// program entry points, we know that such an argument will be invariant
119+
// over the life of the program.
120+
if (auto* Arg = dyn_cast<Argument>(Base)) {
121+
const Function *F = Arg->getParent();
122+
if (AMDGPU::isKernelCC(F) && Arg->hasNoAliasAttr() && Arg->onlyReadsMemory())
123+
return ModRefInfo::NoModRef;
124+
}
115125
return ModRefInfo::ModRef;
116126
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel -mtriple=amdgcn -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
define amdgpu_cs void @load_global_is_invariant(ptr addrspace(1) readonly noalias %x, ptr addrspace(1) writeonly noalias %y) {
5+
; CHECK-LABEL: name: load_global_is_invariant
6+
; CHECK: bb.1 (%ir-block.0):
7+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
10+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
11+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
12+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
13+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
14+
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
15+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (invariant load (s32) from %ir.x, addrspace 1)
16+
; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV1]](p1) :: (store (s32) into %ir.y, addrspace 1)
17+
; CHECK-NEXT: S_ENDPGM 0
18+
%v = load float, ptr addrspace(1) %x
19+
store float %v, ptr addrspace(1) %y
20+
ret void
21+
}
22+
23+
define void @load_global_isnt_invariant_non_kernel(ptr addrspace(1) readonly noalias %x, ptr addrspace(1) writeonly noalias %y) {
24+
; CHECK-LABEL: name: load_global_isnt_invariant_non_kernel
25+
; CHECK: bb.1 (%ir-block.0):
26+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
27+
; CHECK-NEXT: {{ $}}
28+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
29+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
30+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
31+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
32+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
33+
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
34+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.x, addrspace 1)
35+
; CHECK-NEXT: G_STORE [[LOAD]](s32), [[MV1]](p1) :: (store (s32) into %ir.y, addrspace 1)
36+
; CHECK-NEXT: SI_RETURN
37+
%v = load float, ptr addrspace(1) %x
38+
store float %v, ptr addrspace(1) %y
39+
ret void
40+
}

llvm/test/CodeGen/AMDGPU/aa-points-to-constant-memory.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,19 @@ define amdgpu_kernel void @nonconst_gv_constant_as() {
110110
store i32 0, ptr addrspace(4) @global_nonconstant_constant_as
111111
ret void
112112
}
113+
114+
define amdgpu_kernel void @constant_kernel_args(ptr addrspace(1) readonly noalias inreg %x) {
115+
; CHECK-LABEL: @constant_kernel_args(
116+
; CHECK-NEXT: ret void
117+
;
118+
store i32 0, ptr addrspace(1) %x
119+
ret void
120+
}
121+
122+
define amdgpu_cs void @constant_cs_args(ptr addrspace(1) readonly noalias %x) {
123+
; CHECK-LABEL: @constant_cs_args(
124+
; CHECK-NEXT: ret void
125+
;
126+
store i32 0, ptr addrspace(1) %x
127+
ret void
128+
}

0 commit comments

Comments
 (0)