Skip to content

Commit cb8caef

Browse files
authored
[SWDEV-533402] amdgpu-sw-lower-lds PRs (llvm#2270)
2 parents 1eb3c45 + ac25e41 commit cb8caef

14 files changed

+1197
-306
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,7 @@ class AMDGPUSwLowerLDS {
193193
void getLDSMemoryInstructions(Function *Func,
194194
SetVector<Instruction *> &LDSInstructions);
195195
void replaceKernelLDSAccesses(Function *Func);
196-
Value *getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
197-
Value *LDSPtr);
196+
Value *getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr, Value *LDSPtr);
198197
void translateLDSMemoryOperationsToGlobalMemory(
199198
Function *Func, Value *LoadMallocPtr,
200199
SetVector<Instruction *> &LDSInstructions);
@@ -300,8 +299,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
300299
for (User *V : GV->users()) {
301300
if (auto *I = dyn_cast<Instruction>(V)) {
302301
Function *F = I->getFunction();
303-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
304-
!F->isDeclaration())
302+
if (!isKernelLDS(F) && !F->isDeclaration())
305303
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
306304
}
307305
}
@@ -660,20 +658,30 @@ void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
660658
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(&Inst)) {
661659
if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
662660
LDSInstructions.insert(&Inst);
661+
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&Inst)) {
662+
if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
663+
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS)
664+
LDSInstructions.insert(&Inst);
663665
} else
664666
continue;
665667
}
666668
}
667669
}
668670

669-
Value *
670-
AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
671+
Value *AMDGPUSwLowerLDS::getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr,
671672
Value *LDSPtr) {
672673
assert(LDSPtr && "Invalid LDS pointer operand");
673-
Value *PtrToInt = IRB.CreatePtrToInt(LDSPtr, IRB.getInt32Ty());
674-
Value *GEP =
675-
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt});
676-
return GEP;
674+
Type *LDSPtrType = LDSPtr->getType();
675+
LLVMContext &Ctx = M.getContext();
676+
const DataLayout &DL = M.getDataLayout();
677+
Type *IntTy = DL.getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS);
678+
if (auto *VecPtrTy = dyn_cast<VectorType>(LDSPtrType)) {
679+
// Handle vector of pointers
680+
ElementCount NumElements = VecPtrTy->getElementCount();
681+
IntTy = VectorType::get(IntTy, NumElements);
682+
}
683+
Value *GepIndex = IRB.CreatePtrToInt(LDSPtr, IntTy);
684+
return IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {GepIndex});
677685
}
678686

679687
void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
@@ -686,7 +694,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
686694
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
687695
Value *LIOperand = LI->getPointerOperand();
688696
Value *Replacement =
689-
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, LIOperand);
697+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, LIOperand);
690698
LoadInst *NewLI = IRB.CreateAlignedLoad(LI->getType(), Replacement,
691699
LI->getAlign(), LI->isVolatile());
692700
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@@ -696,7 +704,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
696704
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
697705
Value *SIOperand = SI->getPointerOperand();
698706
Value *Replacement =
699-
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, SIOperand);
707+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, SIOperand);
700708
StoreInst *NewSI = IRB.CreateAlignedStore(
701709
SI->getValueOperand(), Replacement, SI->getAlign(), SI->isVolatile());
702710
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
@@ -706,8 +714,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
706714
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
707715
Value *RMWPtrOperand = RMW->getPointerOperand();
708716
Value *RMWValOperand = RMW->getValOperand();
709-
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer(
710-
LoadMallocPtr, RMWPtrOperand);
717+
Value *Replacement =
718+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, RMWPtrOperand);
711719
AtomicRMWInst *NewRMW = IRB.CreateAtomicRMW(
712720
RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(),
713721
RMW->getOrdering(), RMW->getSyncScopeID());
@@ -717,8 +725,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
717725
RMW->eraseFromParent();
718726
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Inst)) {
719727
Value *XCHGPtrOperand = XCHG->getPointerOperand();
720-
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer(
721-
LoadMallocPtr, XCHGPtrOperand);
728+
Value *Replacement =
729+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, XCHGPtrOperand);
722730
AtomicCmpXchgInst *NewXCHG = IRB.CreateAtomicCmpXchg(
723731
Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(),
724732
XCHG->getAlign(), XCHG->getSuccessOrdering(),
@@ -727,6 +735,16 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
727735
AsanInfo.Instructions.insert(NewXCHG);
728736
XCHG->replaceAllUsesWith(NewXCHG);
729737
XCHG->eraseFromParent();
738+
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Inst)) {
739+
Value *AIOperand = ASC->getPointerOperand();
740+
Value *Replacement =
741+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, AIOperand);
742+
Value *NewAI = IRB.CreateAddrSpaceCast(Replacement, ASC->getType());
743+
// Note: No need to add the instruction to AsanInfo instructions to be
744+
// instrumented list. FLAT_ADDRESS ptr would have been already
745+
// instrumented by asan pass prior to this pass.
746+
ASC->replaceAllUsesWith(NewAI);
747+
ASC->eraseFromParent();
730748
} else
731749
report_fatal_error("Unimplemented LDS lowering instruction");
732750
}
@@ -1123,6 +1141,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
11231141
return;
11241142
}
11251143

1144+
static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
1145+
for (auto &K : LDSAccesses) {
1146+
Function *F = K.first;
1147+
if (!F)
1148+
continue;
1149+
if (F->hasFnAttribute(Attribute::SanitizeAddress))
1150+
return true;
1151+
}
1152+
return false;
1153+
}
1154+
11261155
bool AMDGPUSwLowerLDS::run() {
11271156
bool Changed = false;
11281157

@@ -1133,6 +1162,14 @@ bool AMDGPUSwLowerLDS::run() {
11331162
// Get all the direct and indirect access of LDS for all the kernels.
11341163
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
11351164

1165+
// Flag to decide whether to lower all the LDS accesses
1166+
// based on sanitize_address attribute.
1167+
bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
1168+
hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
1169+
1170+
if (!LowerAllLDS)
1171+
return Changed;
1172+
11361173
// Utility to group LDS access into direct, indirect, static and dynamic.
11371174
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
11381175
bool DirectAccess) {
@@ -1142,8 +1179,6 @@ bool AMDGPUSwLowerLDS::run() {
11421179
continue;
11431180

11441181
assert(isKernelLDS(F));
1145-
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
1146-
continue;
11471182

11481183
// Only inserts if key isn't already in the map.
11491184
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1184,10 +1219,13 @@ bool AMDGPUSwLowerLDS::run() {
11841219
LDSParams.IndirectAccess.DynamicLDSGlobals.empty()) {
11851220
Changed = false;
11861221
} else {
1187-
removeFnAttrFromReachable(CG, Func,
1188-
{"amdgpu-no-workitem-id-x",
1189-
"amdgpu-no-workitem-id-y",
1190-
"amdgpu-no-workitem-id-z"});
1222+
removeFnAttrFromReachable(
1223+
CG, Func,
1224+
{"amdgpu-no-workitem-id-x", "amdgpu-no-workitem-id-y",
1225+
"amdgpu-no-workitem-id-z", "amdgpu-no-heap-ptr"});
1226+
if (!LDSParams.IndirectAccess.StaticLDSGlobals.empty() ||
1227+
!LDSParams.IndirectAccess.DynamicLDSGlobals.empty())
1228+
removeFnAttrFromReachable(CG, Func, {"amdgpu-no-lds-kernel-id"});
11911229
reorderStaticDynamicIndirectLDSSet(LDSParams);
11921230
buildSwLDSGlobal(Func);
11931231
buildSwDynLDSGlobal(Func);
@@ -1207,6 +1245,7 @@ bool AMDGPUSwLowerLDS::run() {
12071245
// Get non-kernels with LDS ptr as argument and called by kernels.
12081246
getNonKernelsWithLDSArguments(CG);
12091247

1248+
// Lower LDS accesses in non-kernels.
12101249
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
12111250
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
12121251
NonKernelLDSParameters NKLDSParams;
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
5+
; other kernels in module have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
define amdgpu_kernel void @k0() sanitize_address {
10+
; CHECK-LABEL: define amdgpu_kernel void @k0(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[WID:.*]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
14+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
15+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
16+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
17+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
18+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
19+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
20+
; CHECK: [[MALLOC]]:
21+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
22+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
23+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
24+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
25+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
26+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
27+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
28+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
29+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
30+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
31+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
32+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
33+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
34+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
35+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
36+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
37+
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
38+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
39+
; CHECK-NEXT: br label %[[BB20]]
40+
; CHECK: [[BB20]]:
41+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
42+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
43+
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
44+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
45+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
46+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
47+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
48+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
49+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
50+
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
51+
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
52+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
53+
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
54+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
55+
; CHECK: [[CONDFREE]]:
56+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
57+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
58+
; CHECK: [[FREE]]:
59+
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
60+
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
61+
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
62+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
63+
; CHECK-NEXT: br label %[[END]]
64+
; CHECK: [[END]]:
65+
; CHECK-NEXT: ret void
66+
;
67+
store i8 7, ptr addrspace(3) @lds_1, align 4
68+
store i32 8, ptr addrspace(3) @lds_2, align 2
69+
ret void
70+
}
71+
72+
define amdgpu_kernel void @k1() {
73+
; CHECK-LABEL: define amdgpu_kernel void @k1(
74+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
75+
; CHECK-NEXT: [[WID:.*]]:
76+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
77+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
78+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
79+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
80+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
81+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
82+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
83+
; CHECK: [[MALLOC]]:
84+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
85+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
86+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
87+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
88+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
89+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
90+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
91+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
92+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
93+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
94+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
95+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
96+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
97+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
98+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
99+
; CHECK-NEXT: br label %[[BB18]]
100+
; CHECK: [[BB18]]:
101+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
102+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
103+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
104+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
105+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
106+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
107+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
108+
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
109+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
110+
; CHECK: [[CONDFREE]]:
111+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
112+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
113+
; CHECK: [[FREE]]:
114+
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
115+
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
116+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
117+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
118+
; CHECK-NEXT: br label %[[END]]
119+
; CHECK: [[END]]:
120+
; CHECK-NEXT: ret void
121+
;
122+
store i32 9, ptr addrspace(3) @lds_2, align 2
123+
ret void
124+
}
125+
126+
!llvm.module.flags = !{!0}
127+
!0 = !{i32 4, !"nosanitize_address", i32 1}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
5+
; if all other kernels don't have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
;.
10+
; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
11+
; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
12+
;.
13+
define amdgpu_kernel void @k0() {
14+
; CHECK-LABEL: define amdgpu_kernel void @k0() {
15+
; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
16+
; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
17+
; CHECK-NEXT: ret void
18+
;
19+
store i8 7, ptr addrspace(3) @lds_1, align 4
20+
store i32 8, ptr addrspace(3) @lds_2, align 2
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @k1() {
25+
; CHECK-LABEL: define amdgpu_kernel void @k1() {
26+
; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
27+
; CHECK-NEXT: ret void
28+
;
29+
store i32 9, ptr addrspace(3) @lds_2, align 2
30+
ret void
31+
}
32+
33+
!llvm.module.flags = !{!0}
34+
!0 = !{i32 4, !"nosanitize_address", i32 1}
35+
;.
36+
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
37+
;.

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@ define void @non_kernel_function() sanitize_address {
2020
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
2121
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
2222
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
23-
; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
24-
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
23+
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
24+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
25+
; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
26+
; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
27+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP12]]
28+
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(1) [[TMP13]] to ptr
2529
; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
2630
; CHECK-NEXT: ret void
2731
;

0 commit comments

Comments
 (0)