diff --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h index f6ade7c83a61a..96afe3ce6ecdf 100644 --- a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h +++ b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h @@ -43,6 +43,9 @@ class ScopedNoAliasAAResult : public AAResultBase { ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, AAQueryInfo &AAQI); + void collectScopedDomains(const MDNode *NoAlias, + SmallPtrSetImpl &Domains) const; + private: bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; }; diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp index 3815bdf49d59c..59e1179119160 100644 --- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -114,6 +114,18 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain, Nodes.insert(MD); } +/// Collect the set of scoped domains relevant to the noalias scopes. +void ScopedNoAliasAAResult::collectScopedDomains( + const MDNode *NoAlias, SmallPtrSetImpl &Domains) const { + if (!NoAlias) + return; + assert(Domains.empty() && "Domains should be empty"); + for (const MDOperand &MDOp : NoAlias->operands()) + if (const MDNode *NAMD = dyn_cast(MDOp)) + if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) + Domains.insert(Domain); +} + bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const { if (!Scopes || !NoAlias) @@ -121,10 +133,7 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, // Collect the set of scope domains relevant to the noalias scopes. SmallPtrSet Domains; - for (const MDOperand &MDOp : NoAlias->operands()) - if (const MDNode *NAMD = dyn_cast(MDOp)) - if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) - Domains.insert(Domain); + collectScopedDomains(NoAlias, Domains); // We alias unless, for some domain, the set of noalias scopes in that domain // is a superset of the set of alias scopes in that domain. diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index f9f2d43a5b041..88acfe13357dc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -186,6 +186,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -1441,6 +1442,8 @@ class AMDGPULowerModuleLDS { if (!MaxDepth || (A == 1 && !AliasScope)) return; + ScopedNoAliasAAResult ScopedNoAlias; + for (User *U : Ptr->users()) { if (auto *I = dyn_cast(U)) { if (AliasScope && I->mayReadOrWriteMemory()) { @@ -1450,7 +1453,34 @@ class AMDGPULowerModuleLDS { I->setMetadata(LLVMContext::MD_alias_scope, AS); MDNode *NA = I->getMetadata(LLVMContext::MD_noalias); - NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias); + + // Scoped aliases can originate from two different domains. + // First domain would be from LDS domain (created by this pass). + // All entries (LDS vars) into LDS struct will have same domain. + + // Second domain could be existing scoped aliases that are the + // results of noalias params and subsequent optimizations that + // may alter thesse sets. + + // We need to be careful how we create new alias sets, and + // have right scopes and domains for loads/stores of these new + // LDS variables. We intersect NoAlias set if alias sets belong + // to the same domain. This is the case if we have memcpy using + // LDS variables. Both src and dst of memcpy would belong to + // LDS struct, they donot alias. + // On the other hand, if one of the domains is LDS and other is + // existing domain prior to LDS, we need to have a union of all + // these aliases set to preserve existing aliasing information. + + SmallPtrSet ExistingDomains, LDSDomains; + ScopedNoAlias.collectScopedDomains(NA, ExistingDomains); + ScopedNoAlias.collectScopedDomains(NoAlias, LDSDomains); + auto Intersection = set_intersection(ExistingDomains, LDSDomains); + if (Intersection.empty()) { + NA = NA ? MDNode::concatenate(NA, NoAlias) : NoAlias; + } else { + NA = NA ? MDNode::intersect(NA, NoAlias) : NoAlias; + } I->setMetadata(LLVMContext::MD_noalias, NA); } } diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll index eefa0b23d0c08..92d0a05f35732 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll @@ -84,7 +84,7 @@ define amdgpu_kernel void @calls_f0() { define void @f0() { ; CHECK-LABEL: define void @f0() ; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24 -; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24 +; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll index bb09d3a670bc9..154c798a44f93 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll @@ -12,9 +12,9 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs ; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]] ; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6 -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6 +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11 ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 ; CHECK-NEXT: ret void @@ -48,4 +48,11 @@ bb: ; CHECK:!3 = !{!"int", !4, i64 0} ; CHECK:!4 = !{!"omnipotent char", !5, i64 0} ; CHECK:!5 = !{!"Simple C++ TBAA"} -; CHECK:!6 = !{} +; CHECK:!6 = !{!7, !9} +; CHECK:!7 = distinct !{!7, !8} +; CHECK:!8 = distinct !{!8} +; CHECK:!9 = distinct !{!9, !10} +; CHECK:!10 = distinct !{!10} +; CHECK:!11 = !{!12, !13} +; CHECK:!12 = distinct !{!12, !8} +; CHECK:!13 = distinct !{!13, !10} diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll new file mode 100644 index 0000000000000..d8d7fc1d7a3bd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s + +@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4 +@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4 +@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4 + +define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) { +; GCN-LABEL: ds_load_stores_aainfo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshl_b32 s0, s0, 2 +; GCN-NEXT: v_mov_b32_e32 v4, s0 +; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1 +; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512 +; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0) +; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0) +; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0) +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] +; GCN-NEXT: s_endpgm +bb: + %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i + %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i + + %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5 + %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5 + + store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2 + + %val = add i64 %val.a, %val.b + store i64 %val, ptr addrspace(1) %arg, align 4 + + tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0) + tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0) + tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0) + ret void +} + + !0 = !{!"omnipotent char", !1, i64 0} + !1 = !{!1} + !2 = !{!3} + !3 = distinct !{!3, !4} + !4 = distinct !{!4} + !5 = !{!3} + !6 = !{!7} + !7 = !{!7, !4} diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll new file mode 100644 index 0000000000000..0d0daeaae547d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s + +@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4 +@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4 +@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4 + +define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) { +; GCN-LABEL: ds_load_stores_aainfo: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s0, s[4:5], 0x2c +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshl_b32 s0, s0, 2 +; GCN-NEXT: v_mov_b32_e32 v4, s0 +; GCN-NEXT: ds_read2_b32 v[2:3], v4 offset1:1 +; GCN-NEXT: ds_write_b64 v1, v[0:1] offset:512 +; GCN-NEXT: ds_read2_b32 v[4:5], v4 offset0:64 offset1:65 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0) +; GCN-NEXT: ; sched_group_barrier mask(0x00000200) size(1) SyncID(0) +; GCN-NEXT: ; sched_group_barrier mask(0x00000100) size(1) SyncID(0) +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GCN-NEXT: global_store_dwordx2 v1, v[2:3], s[0:1] +; GCN-NEXT: s_endpgm +; CHECK-LABEL: define amdgpu_kernel void @ds_load_stores_aainfo( +; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 [[I]] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i64, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1:![0-9]+]], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i64, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]] +; CHECK-NEXT: store i64 1, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa [[TBAA1]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = add i64 [[VAL_A]], [[VAL_B]] +; CHECK-NEXT: store i64 [[VAL]], ptr addrspace(1) [[ARG]], align 4 +; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0) +; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0) +; CHECK-NEXT: tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0) +; CHECK-NEXT: ret void +; +bb: + %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i + %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i + + %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5 + %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5 + + store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2 + + %val = add i64 %val.a, %val.b + store i64 %val, ptr addrspace(1) %arg, align 4 + + tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0) + tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0) + tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0) + ret void +} + + !0 = !{!"omnipotent char", !1, i64 0} + !1 = !{!1} + !2 = !{!3} + !3 = distinct !{!3, !4} + !4 = distinct !{!4} + !5 = !{!3} +;. +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0, i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]} +; CHECK: [[META3]] = distinct !{[[META3]]} +; CHECK: [[META4]] = !{[[META5:![0-9]+]]} +; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]} +; CHECK: [[META6]] = distinct !{[[META6]]} +; CHECK: [[META7]] = !{[[META8:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]} +; CHECK: [[META9]] = distinct !{[[META9]]} +; CHECK: [[META10]] = distinct !{[[META10]], [[META6]]} +; CHECK: [[META11]] = distinct !{[[META11]], [[META6]]} +; CHECK: [[META12]] = !{[[META10]]} +; CHECK: [[META13]] = !{[[META8]], [[META5]], [[META11]]} +; CHECK: [[META14]] = !{[[META11]]} +; CHECK: [[META15]] = !{[[META8]], [[META5]], [[META10]]} +;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll index 96e8099ed59e1..e7f78b4c6897a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll @@ -60,7 +60,7 @@ define void @f0() { define amdgpu_kernel void @k_f0() { ; MODULE-LABEL: @k_f0( -; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]] +; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]] ; MODULE-NEXT: call void @f0() ; MODULE-NEXT: ret void ; @@ -83,9 +83,9 @@ define amdgpu_kernel void @k_f0() { @both.lds = addrspace(3) global i32 poison define void @f_both() { ; MODULE-LABEL: @f_both( -; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]] +; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]] ; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4 -; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]] +; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]] ; MODULE-NEXT: ret void ; ; TABLE-LABEL: @f_both( @@ -116,9 +116,9 @@ define void @f_both() { define amdgpu_kernel void @k0_both() { ; MODULE-LABEL: @k0_both( ; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] -; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]] +; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]] ; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5 -; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]] +; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]] ; MODULE-NEXT: call void @f_both() ; MODULE-NEXT: ret void ;