Skip to content

Commit 1d30f71

Browse files
choikwaarsenm
andauthored
[AMDGPU] Make ds/global load intrinsics IntrArgMemOnly (#152792)
This along with IntrReadMem means that the Intrinsic only reads memory through the given argument ptr and its derivatives. This allows passes like Inliner to attach alias.scope to the call instruction as it sees that no other memory is accessed. Discovered via SWDEV-543741 --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent 5d099c2 commit 1d30f71

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3087,7 +3087,7 @@ class AMDGPULoadIntrinsic<LLVMType ptr_ty>:
30873087
Intrinsic<
30883088
[llvm_any_ty],
30893089
[ptr_ty],
3090-
[IntrReadMem, IntrWillReturn, IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree],
3090+
[IntrReadMem, IntrArgMemOnly, IntrWillReturn, IntrConvergent, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree],
30913091
"",
30923092
[SDNPMemOperand]
30933093
>;
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
2+
; RUN: opt -mtriple=amdgcn --passes=inline --enable-noalias-to-md-conversion -S %s | FileCheck --check-prefix=OPT %s
3+
4+
; This test tests if the load intrinsic gets correct memory(argmem: read) attribute and
5+
; the call instruction is assigned correct !alias.scope metadata post inlining
6+
7+
define void @caller(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f) {
8+
; OPT-LABEL: define void @caller(
9+
; OPT-SAME: ptr addrspace(3) [[ADDR_F:%.*]], ptr addrspace(1) [[USE_F:%.*]]) {
10+
; OPT-NEXT: [[ENTRY:.*:]]
11+
; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
12+
; OPT-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
13+
; OPT-NEXT: [[GEP_I:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR_F]], i32 4
14+
; OPT-NEXT: [[VAL_I:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP_I]]), !alias.scope [[META0]], !noalias [[META3]]
15+
; OPT-NEXT: store <2 x i32> [[VAL_I]], ptr addrspace(1) [[USE_F]], align 8, !alias.scope [[META3]], !noalias [[META0]]
16+
; OPT-NEXT: ret void
17+
;
18+
entry:
19+
call void @callee(ptr addrspace(3) %addr_f, ptr addrspace(1) %use_f)
20+
ret void
21+
}
22+
23+
define void @callee(ptr addrspace(3) noalias %addr, ptr addrspace(1) noalias %use) {
24+
; OPT-LABEL: define void @callee(
25+
; OPT-SAME: ptr addrspace(3) noalias [[ADDR:%.*]], ptr addrspace(1) noalias [[USE:%.*]]) {
26+
; OPT-NEXT: [[ENTRY:.*:]]
27+
; OPT-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(3) [[ADDR]], i32 4
28+
; OPT-NEXT: [[VAL:%.*]] = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) [[GEP]])
29+
; OPT-NEXT: store <2 x i32> [[VAL]], ptr addrspace(1) [[USE]], align 8
30+
; OPT-NEXT: ret void
31+
;
32+
entry:
33+
%gep = getelementptr i64, ptr addrspace(3) %addr, i32 4
34+
%val = call <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32.p3(ptr addrspace(3) %gep)
35+
store <2 x i32> %val, ptr addrspace(1) %use
36+
ret void
37+
}
38+
;.
39+
; Check Function Attribute on decl
40+
; OPT: declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3) captures(none)) #[[ATTR0:[0-9]+]]
41+
declare <2 x i32> @llvm.amdgcn.ds.read.tr4.b64.v2i32(ptr addrspace(3))
42+
; OPT: attributes #[[ATTR0]] = { convergent nocallback nofree nounwind willreturn memory(argmem: read) }
43+
; OPT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
44+
;.
45+
; OPT: [[META0]] = !{[[META1:![0-9]+]]}
46+
; OPT: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee: %addr"}
47+
; OPT: [[META2]] = distinct !{[[META2]], !"callee"}
48+
; OPT: [[META3]] = !{[[META4:![0-9]+]]}
49+
; OPT: [[META4]] = distinct !{[[META4]], [[META2]], !"callee: %use"}
50+
;.

0 commit comments

Comments
 (0)