[AMDGPU] Correctly merge noalias scopes during lowering of LDS data. … (llvm#2122)

Salinas, David · web-flow · commit efd85c7d7c3b · 2025-05-20T10:02:19.000-04:00
diff --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -43,6 +43,9 @@ class ScopedNoAliasAAResult : public AAResultBase {
   ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
                            AAQueryInfo &AAQI);
 
+  void collectScopedDomains(const MDNode *NoAlias,
+                            SmallPtrSetImpl<const MDNode *> &Domains) const;
+
 private:
   bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
 };
diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -114,17 +114,26 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
         Nodes.insert(MD);
 }
 
+/// Collect the set of scoped domains relevant to the noalias scopes.
+void ScopedNoAliasAAResult::collectScopedDomains(
+    const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
+  if (!NoAlias)
+    return;
+  assert(Domains.empty() && "Domains should be empty");
+  for (const MDOperand &MDOp : NoAlias->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        Domains.insert(Domain);
+}
+
 bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
                                              const MDNode *NoAlias) const {
   if (!Scopes || !NoAlias)
     return true;
 
   // Collect the set of scope domains relevant to the noalias scopes.
   SmallPtrSet<const MDNode *, 16> Domains;
-  for (const MDOperand &MDOp : NoAlias->operands())
-    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
-      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
-        Domains.insert(Domain);
+  collectScopedDomains(NoAlias, Domains);
 
   // We alias unless, for some domain, the set of noalias scopes in that domain
   // is a superset of the set of alias scopes in that domain.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -186,6 +186,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
@@ -1544,6 +1545,8 @@ class AMDGPULowerModuleLDS {
     if (!MaxDepth || (A == 1 && !AliasScope))
       return;
 
+    ScopedNoAliasAAResult ScopedNoAlias;
+
     for (User *U : Ptr->users()) {
       if (auto *I = dyn_cast<Instruction>(U)) {
         if (AliasScope && I->mayReadOrWriteMemory()) {
@@ -1553,7 +1556,34 @@ class AMDGPULowerModuleLDS {
           I->setMetadata(LLVMContext::MD_alias_scope, AS);
 
           MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
-          NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
+
+          // Scoped aliases can originate from two different domains.
+          // First domain would be from LDS domain (created by this pass).
+          // All entries (LDS vars) into LDS struct will have same domain.
+
+          // Second domain could be existing scoped aliases that are the
+          // results of noalias params and subsequent optimizations that
+          // may alter thesse sets.
+
+          // We need to be careful how we create new alias sets, and
+          // have right scopes and domains for loads/stores of these new
+          // LDS variables. We intersect NoAlias set if alias sets belong
+          // to the same domain. This is the case if we have memcpy using
+          // LDS variables. Both src and dst of memcpy would belong to
+          // LDS struct, they donot alias.
+          // On the other hand, if one of the domains is LDS and other is
+          // existing domain prior to LDS, we need to have a union of all
+          // these aliases set to preserve existing aliasing information.
+
+          SmallPtrSet<const MDNode *, 16> ExistingDomains, LDSDomains;
+          ScopedNoAlias.collectScopedDomains(NA, ExistingDomains);
+          ScopedNoAlias.collectScopedDomains(NoAlias, LDSDomains);
+          auto Intersection = set_intersection(ExistingDomains, LDSDomains);
+          if (Intersection.empty()) {
+            NA = NA ? MDNode::concatenate(NA, NoAlias) : NoAlias;
+          } else {
+            NA = NA ? MDNode::intersect(NA, NoAlias) : NoAlias;
+          }
           I->setMetadata(LLVMContext::MD_noalias, NA);
         }
       }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
@@ -84,7 +84,7 @@ define amdgpu_kernel void @calls_f0() {
 define void @f0() {
 ; CHECK-LABEL: define void @f0()
 ; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24
-; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !24
+; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29
 ; CHECK-NEXT: ret void
   store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1
 
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll
@@ -12,9 +12,9 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs
 ; CHECK-NEXT:    store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6
 ; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]]
 ; CHECK-NEXT:    [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6
-; CHECK-NEXT:    store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6
+; CHECK-NEXT:    store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11
 ; CHECK-NEXT:    [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]]
-; CHECK-NEXT:    [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6
+; CHECK-NEXT:    [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11
 ; CHECK-NEXT:    [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
 ; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
 ; CHECK-NEXT:    ret void
@@ -48,4 +48,11 @@ bb:
 ; CHECK:!3 = !{!"int", !4, i64 0}
 ; CHECK:!4 = !{!"omnipotent char", !5, i64 0}
 ; CHECK:!5 = !{!"Simple C++ TBAA"}
-; CHECK:!6 = !{}
+; CHECK:!6 = !{!7, !9}
+; CHECK:!7 = distinct !{!7, !8}
+; CHECK:!8 = distinct !{!8}
+; CHECK:!9 = distinct !{!9, !10}
+; CHECK:!10 = distinct !{!10}
+; CHECK:!11 = !{!12, !13}
+; CHECK:!12 = distinct !{!12, !8}
+; CHECK:!13 = distinct !{!13, !10}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-with-alias-scope.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 < %s | FileCheck -check-prefix=GCN %s
+
+@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+
+define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
+; GCN-LABEL: ds_load_stores_aainfo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GCN-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshl_b32 s0, s0, 2
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    ds_read2_b32 v[2:3], v4 offset1:1
+; GCN-NEXT:    ds_write_b64 v1, v[0:1] offset:512
+; GCN-NEXT:    ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT:    ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
+; GCN-NEXT:    ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
+; GCN-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GCN-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; GCN-NEXT:    s_endpgm
+bb:
+  %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+  %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+
+  %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !alias.scope !6, !noalias !5
+  %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !alias.scope !6, !noalias !5
+
+  store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
+
+  %val = add i64 %val.a, %val.b
+  store i64 %val, ptr addrspace(1) %arg, align 4
+
+  tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+  tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
+  tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+  ret void
+}
+
+  !0 = !{!"omnipotent char", !1, i64 0}
+  !1 = !{!1}
+  !2 = !{!3}
+  !3 = distinct !{!3, !4}
+  !4 = distinct !{!4}
+  !5 = !{!3}
+  !6 = !{!7}
+  !7 = !{!7, !4}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-with-noalias.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -O3 --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
+
+@a = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@b = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+@c = internal unnamed_addr addrspace(3) global [64 x i32] poison, align 4
+
+define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
+; GCN-LABEL: ds_load_stores_aainfo:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x2c
+; GCN-NEXT:    v_mov_b32_e32 v0, 1
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshl_b32 s0, s0, 2
+; GCN-NEXT:    v_mov_b32_e32 v4, s0
+; GCN-NEXT:    ds_read2_b32 v[2:3], v4 offset1:1
+; GCN-NEXT:    ds_write_b64 v1, v[0:1] offset:512
+; GCN-NEXT:    ds_read2_b32 v[4:5], v4 offset0:64 offset1:65
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GCN-NEXT:    ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT:    ; sched_group_barrier mask(0x00000200) size(1) SyncID(0)
+; GCN-NEXT:    ; sched_group_barrier mask(0x00000100) size(1) SyncID(0)
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
+; GCN-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GCN-NEXT:    global_store_dwordx2 v1, v[2:3], s[0:1]
+; GCN-NEXT:    s_endpgm
+; CHECK-LABEL: define amdgpu_kernel void @ds_load_stores_aainfo(
+; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 [[I]]
+; CHECK-NEXT:    [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 [[I]]
+; CHECK-NEXT:    [[VAL_A:%.*]] = load i64, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1:![0-9]+]], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT:    [[VAL_B:%.*]] = load i64, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
+; CHECK-NEXT:    store i64 1, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_DS_LOAD_STORES_AAINFO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa [[TBAA1]], !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
+; CHECK-NEXT:    [[VAL:%.*]] = add i64 [[VAL_A]], [[VAL_B]]
+; CHECK-NEXT:    store i64 [[VAL]], ptr addrspace(1) [[ARG]], align 4
+; CHECK-NEXT:    tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+; CHECK-NEXT:    tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
+; CHECK-NEXT:    tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+; CHECK-NEXT:    ret void
+;
+bb:
+  %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
+  %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
+
+  %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
+  %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
+
+  store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
+
+  %val = add i64 %val.a, %val.b
+  store i64 %val, ptr addrspace(1) %arg, align 4
+
+  tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+  tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
+  tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
+  ret void
+}
+
+  !0 = !{!"omnipotent char", !1, i64 0}
+  !1 = !{!1}
+  !2 = !{!3}
+  !3 = distinct !{!3, !4}
+  !4 = distinct !{!4}
+  !5 = !{!3}
+;.
+; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0, i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = distinct !{[[META3]]}
+; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
+; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
+; CHECK: [[META6]] = distinct !{[[META6]]}
+; CHECK: [[META7]] = !{[[META8:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]}
+; CHECK: [[META9]] = distinct !{[[META9]]}
+; CHECK: [[META10]] = distinct !{[[META10]], [[META6]]}
+; CHECK: [[META11]] = distinct !{[[META11]], [[META6]]}
+; CHECK: [[META12]] = !{[[META10]]}
+; CHECK: [[META13]] = !{[[META8]], [[META5]], [[META11]]}
+; CHECK: [[META14]] = !{[[META11]]}
+; CHECK: [[META15]] = !{[[META8]], [[META5]], [[META10]]}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-unambiguous.ll
@@ -60,7 +60,7 @@ define void @f0() {
 
 define amdgpu_kernel void @k_f0() {
 ; MODULE-LABEL: @k_f0(
-; MODULE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
+; MODULE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META1]]
 ; MODULE-NEXT:    call void @f0()
 ; MODULE-NEXT:    ret void
 ;
@@ -83,9 +83,9 @@ define amdgpu_kernel void @k_f0() {
 @both.lds = addrspace(3) global i32 undef
 define void @f_both() {
 ; MODULE-LABEL: @f_both(
-; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
+; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11:![0-9]+]]
 ; MODULE-NEXT:    [[MUL:%.*]] = mul i32 [[LD]], 4
-; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
+; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META11]]
 ; MODULE-NEXT:    ret void
 ;
 ; TABLE-LABEL: @f_both(
@@ -116,9 +116,9 @@ define void @f_both() {
 define amdgpu_kernel void @k0_both() {
 ; MODULE-LABEL: @k0_both(
 ; MODULE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
-; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
+; MODULE-NEXT:    [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
 ; MODULE-NEXT:    [[MUL:%.*]] = mul i32 [[LD]], 5
-; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
+; MODULE-NEXT:    store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META10]], !noalias [[META1]]
 ; MODULE-NEXT:    call void @f_both()
 ; MODULE-NEXT:    ret void
 ;