Skip to content

Commit efd85c7

Browse files
author
Salinas, David
authored
[AMDGPU] Correctly merge noalias scopes during lowering of LDS data. … (llvm#2122)
2 parents e97f023 + dfbbc6a commit efd85c7

File tree

8 files changed

+203
-14
lines changed

8 files changed

+203
-14
lines changed

llvm/include/llvm/Analysis/ScopedNoAliasAA.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ class ScopedNoAliasAAResult : public AAResultBase {
4343
ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
4444
AAQueryInfo &AAQI);
4545

46+
void collectScopedDomains(const MDNode *NoAlias,
47+
SmallPtrSetImpl<const MDNode *> &Domains) const;
48+
4649
private:
4750
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
4851
};

llvm/lib/Analysis/ScopedNoAliasAA.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,17 +114,26 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
114114
Nodes.insert(MD);
115115
}
116116

117+
/// Collect the set of scoped domains relevant to the noalias scopes.
118+
void ScopedNoAliasAAResult::collectScopedDomains(
119+
const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
120+
if (!NoAlias)
121+
return;
122+
assert(Domains.empty() && "Domains should be empty");
123+
for (const MDOperand &MDOp : NoAlias->operands())
124+
if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
125+
if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
126+
Domains.insert(Domain);
127+
}
128+
117129
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
118130
const MDNode *NoAlias) const {
119131
if (!Scopes || !NoAlias)
120132
return true;
121133

122134
// Collect the set of scope domains relevant to the noalias scopes.
123135
SmallPtrSet<const MDNode *, 16> Domains;
124-
for (const MDOperand &MDOp : NoAlias->operands())
125-
if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
126-
if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
127-
Domains.insert(Domain);
136+
collectScopedDomains(NoAlias, Domains);
128137

129138
// We alias unless, for some domain, the set of noalias scopes in that domain
130139
// is a superset of the set of alias scopes in that domain.

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@
186186
#include "llvm/ADT/STLExtras.h"
187187
#include "llvm/ADT/SetOperations.h"
188188
#include "llvm/Analysis/CallGraph.h"
189+
#include "llvm/Analysis/ScopedNoAliasAA.h"
189190
#include "llvm/CodeGen/TargetPassConfig.h"
190191
#include "llvm/IR/Constants.h"
191192
#include "llvm/IR/DIBuilder.h"
@@ -1544,6 +1545,8 @@ class AMDGPULowerModuleLDS {
15441545
if (!MaxDepth || (A == 1 && !AliasScope))
15451546
return;
15461547

1548+
ScopedNoAliasAAResult ScopedNoAlias;
1549+
15471550
for (User *U : Ptr->users()) {
15481551
if (auto *I = dyn_cast<Instruction>(U)) {
15491552
if (AliasScope && I->mayReadOrWriteMemory()) {
@@ -1553,7 +1556,34 @@ class AMDGPULowerModuleLDS {
15531556
I->setMetadata(LLVMContext::MD_alias_scope, AS);
15541557

15551558
MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
1556-
NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
1559+
1560+
// Scoped aliases can originate from two different domains.
1561+
// First domain would be from LDS domain (created by this pass).
1562+
// All entries (LDS vars) into LDS struct will have same domain.
1563+
1564+
// Second domain could be existing scoped aliases that are the
1565+
// results of noalias params and subsequent optimizations that
1566+
// may alter thesse sets.
1567+
1568+
// We need to be careful how we create new alias sets, and
1569+
// have right scopes and domains for loads/stores of these new
1570+
// LDS variables. We intersect NoAlias set if alias sets belong
1571+
// to the same domain. This is the case if we have memcpy using
1572+
// LDS variables. Both src and dst of memcpy would belong to
1573+
// LDS struct, they donot alias.
1574+
// On the other hand, if one of the domains is LDS and other is
1575+
// existing domain prior to LDS, we need to have a union of all
1576+
// these aliases set to preserve existing aliasing information.
1577+
1578+
SmallPtrSet<const MDNode *, 16> ExistingDomains, LDSDomains;
1579+
ScopedNoAlias.collectScopedDomains(NA, ExistingDomains);
1580+
ScopedNoAlias.collectScopedDomains(NoAlias, LDSDomains);
1581+
auto Intersection = set_intersection(ExistingDomains, LDSDomains);
1582+
if (Intersection.empty()) {
1583+
NA = NA ? MDNode::concatenate(NA, NoAlias) : NoAlias;
1584+
} else {
1585+
NA = NA ? MDNode::intersect(NA, NoAlias) : NoAlias;
1586+
}
15571587
I->setMetadata(LLVMContext::MD_noalias, NA);
15581588
}
15591589
}

llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ define amdgpu_kernel void @calls_f0() {
8484
define void @f0() {
8585
; CHECK-LABEL: define void @f0()
8686
; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
87-
; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
87+
; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29
8888
; CHECK-NEXT: ret void
8989
store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
9090

llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs
1212
; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6
1313
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]]
1414
; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6
15-
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6
15+
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11
1616
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]]
17-
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6
17+
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11
1818
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
1919
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
2020
; CHECK-NEXT: ret void
@@ -48,4 +48,11 @@ bb:
4848
; CHECK:!3 = !{!"int", !4, i64 0}
4949
; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
5050
; CHECK:!5 = !{!"Simple C++ TBAA"}
51-
; CHECK:!6 = !{}
51+
; CHECK:!6 = !{!7, !9}
52+
; CHECK:!7 = distinct !{!7, !8}
53+
; CHECK:!8 = distinct !{!8}
54+
; CHECK:!9 = distinct !{!9, !10}
55+
; CHECK:!10 = distinct !{!10}
56+
; CHECK:!11 = !{!12, !13}
57+
; CHECK:!12 = distinct !{!12, !8}
58+
; CHECK:!13 = distinct !{!13, !10}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
3+
4+
@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
5+
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
6+
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
7+
8+
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
9+
; GCN-LABEL: ds_load_stores_aainfo:
10+
; GCN: ; %bb.0: ; %bb
11+
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
12+
; GCN-NEXT: v_mov_b32_e32 v0, 1
13+
; GCN-NEXT: v_mov_b32_e32 v1, 0
14+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
15+
; GCN-NEXT: s_lshl_b32 s0, s0, 2
16+
; GCN-NEXT: v_mov_b32_e32 v4, s0
17+
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
18+
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
19+
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
20+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
21+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
22+
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
23+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
24+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
25+
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
26+
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
27+
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
28+
; GCN-NEXT: s_endpgm
29+
bb:
30+
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
31+
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
32+
33+
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5
34+
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5
35+
36+
store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
37+
38+
%val = add i64 %val.a, %val.b
39+
store i64 %val, ptr addrspace(1) %arg, align 4
40+
41+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
42+
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
43+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
44+
ret void
45+
}
46+
47+
!0 = !{!"omnipotent char", !1, i64 0}
48+
!1 = !{!1}
49+
!2 = !{!3}
50+
!3 = distinct !{!3, !4}
51+
!4 = distinct !{!4}
52+
!5 = !{!3}
53+
!6 = !{!7}
54+
!7 = !{!7, !4}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
3+
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
4+
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
5+
6+
@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
7+
@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
8+
@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
9+
10+
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
11+
; GCN-LABEL: ds_load_stores_aainfo:
12+
; GCN: ; %bb.0: ; %bb
13+
; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c
14+
; GCN-NEXT: v_mov_b32_e32 v0, 1
15+
; GCN-NEXT: v_mov_b32_e32 v1, 0
16+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
17+
; GCN-NEXT: s_lshl_b32 s0, s0, 2
18+
; GCN-NEXT: v_mov_b32_e32 v4, s0
19+
; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1
20+
; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512
21+
; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
22+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
23+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
24+
; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
25+
; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
26+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
27+
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
28+
; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
29+
; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1]
30+
; GCN-NEXT: s_endpgm
31+
; CHECK-LABEL: define amdgpu_kernel void @ds_load_stores_aainfo(
32+
; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
33+
; CHECK-NEXT: [[BB:.*:]]
34+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 [[I]]
35+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 [[I]]
36+
; CHECK-NEXT: [[VAL_A:%.*]] = load i64, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1:![0-9]+]], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
37+
; CHECK-NEXT: [[VAL_B:%.*]] = load i64, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
38+
; CHECK-NEXT: store i64 1, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa [[TBAA1]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
39+
; CHECK-NEXT: [[VAL:%.*]] = add i64 [[VAL_A]], [[VAL_B]]
40+
; CHECK-NEXT: store i64 [[VAL]], ptr addrspace(1) [[ARG]], align 4
41+
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
42+
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
43+
; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
44+
; CHECK-NEXT: ret void
45+
;
46+
bb:
47+
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
48+
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
49+
50+
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
51+
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
52+
53+
store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
54+
55+
%val = add i64 %val.a, %val.b
56+
store i64 %val, ptr addrspace(1) %arg, align 4
57+
58+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
59+
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
60+
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
61+
ret void
62+
}
63+
64+
!0 = !{!"omnipotent char", !1, i64 0}
65+
!1 = !{!1}
66+
!2 = !{!3}
67+
!3 = distinct !{!3, !4}
68+
!4 = distinct !{!4}
69+
!5 = !{!3}
70+
;.
71+
; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0, i64 0}
72+
; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
73+
; CHECK: [[META3]] = distinct !{[[META3]]}
74+
; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
75+
; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
76+
; CHECK: [[META6]] = distinct !{[[META6]]}
77+
; CHECK: [[META7]] = !{[[META8:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
78+
; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]}
79+
; CHECK: [[META9]] = distinct !{[[META9]]}
80+
; CHECK: [[META10]] = distinct !{[[META10]], [[META6]]}
81+
; CHECK: [[META11]] = distinct !{[[META11]], [[META6]]}
82+
; CHECK: [[META12]] = !{[[META10]]}
83+
; CHECK: [[META13]] = !{[[META8]], [[META5]], [[META11]]}
84+
; CHECK: [[META14]] = !{[[META11]]}
85+
; CHECK: [[META15]] = !{[[META8]], [[META5]], [[META10]]}
86+
;.

llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ define void @f0() {
6060

6161
define amdgpu_kernel void @k_f0() {
6262
; MODULE-LABEL: @k_f0(
63-
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
63+
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]]
6464
; MODULE-NEXT: call void @f0()
6565
; MODULE-NEXT: ret void
6666
;
@@ -83,9 +83,9 @@ define amdgpu_kernel void @k_f0() {
8383
@both.lds = addrspace(3) global i32 undef
8484
define void @f_both() {
8585
; MODULE-LABEL: @f_both(
86-
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
86+
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]]
8787
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
88-
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
88+
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]]
8989
; MODULE-NEXT: ret void
9090
;
9191
; TABLE-LABEL: @f_both(
@@ -116,9 +116,9 @@ define void @f_both() {
116116
define amdgpu_kernel void @k0_both() {
117117
; MODULE-LABEL: @k0_both(
118118
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
119-
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
119+
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
120120
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
121-
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
121+
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
122122
; MODULE-NEXT: call void @f_both()
123123
; MODULE-NEXT: ret void
124124
;

0 commit comments

Comments
 (0)