Skip to content

Commit e56c3d4

Browse files
authored
[SWDEV-520916] amdgpu-sw-lower-lds amd-mainline PRs (llvm#2516)
[AMDGPU] Handle lowering addrspace casts from LDS to FLAT address in amdgpu-sw-lower-lds. [AMDGPU] Lower LDS in functions without sanitize_address in amdgpu-sw-lower-lds.
2 parents 64e29ec + 0c5187d commit e56c3d4

13 files changed

+902
-304
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,7 @@ class AMDGPUSwLowerLDS {
192192
void getLDSMemoryInstructions(Function *Func,
193193
SetVector<Instruction *> &LDSInstructions);
194194
void replaceKernelLDSAccesses(Function *Func);
195-
Value *getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
196-
Value *LDSPtr);
195+
Value *getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr, Value *LDSPtr);
197196
void translateLDSMemoryOperationsToGlobalMemory(
198197
Function *Func, Value *LoadMallocPtr,
199198
SetVector<Instruction *> &LDSInstructions);
@@ -299,8 +298,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
299298
for (User *V : GV->users()) {
300299
if (auto *I = dyn_cast<Instruction>(V)) {
301300
Function *F = I->getFunction();
302-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
303-
!F->isDeclaration())
301+
if (!isKernelLDS(F) && !F->isDeclaration())
304302
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
305303
}
306304
}
@@ -655,20 +653,30 @@ void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
655653
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(&Inst)) {
656654
if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
657655
LDSInstructions.insert(&Inst);
656+
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&Inst)) {
657+
if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
658+
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS)
659+
LDSInstructions.insert(&Inst);
658660
} else
659661
continue;
660662
}
661663
}
662664
}
663665

664-
Value *
665-
AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr,
666+
Value *AMDGPUSwLowerLDS::getTranslatedGlobalMemoryPtrOfLDS(Value *LoadMallocPtr,
666667
Value *LDSPtr) {
667668
assert(LDSPtr && "Invalid LDS pointer operand");
668-
Value *PtrToInt = IRB.CreatePtrToInt(LDSPtr, IRB.getInt32Ty());
669-
Value *GEP =
670-
IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt});
671-
return GEP;
669+
Type *LDSPtrType = LDSPtr->getType();
670+
LLVMContext &Ctx = M.getContext();
671+
const DataLayout &DL = M.getDataLayout();
672+
Type *IntTy = DL.getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS);
673+
if (auto *VecPtrTy = dyn_cast<VectorType>(LDSPtrType)) {
674+
// Handle vector of pointers
675+
ElementCount NumElements = VecPtrTy->getElementCount();
676+
IntTy = VectorType::get(IntTy, NumElements);
677+
}
678+
Value *GepIndex = IRB.CreatePtrToInt(LDSPtr, IntTy);
679+
return IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {GepIndex});
672680
}
673681

674682
void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
@@ -681,7 +689,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
681689
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
682690
Value *LIOperand = LI->getPointerOperand();
683691
Value *Replacement =
684-
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, LIOperand);
692+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, LIOperand);
685693
LoadInst *NewLI = IRB.CreateAlignedLoad(LI->getType(), Replacement,
686694
LI->getAlign(), LI->isVolatile());
687695
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@@ -691,7 +699,7 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
691699
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
692700
Value *SIOperand = SI->getPointerOperand();
693701
Value *Replacement =
694-
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, SIOperand);
702+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, SIOperand);
695703
StoreInst *NewSI = IRB.CreateAlignedStore(
696704
SI->getValueOperand(), Replacement, SI->getAlign(), SI->isVolatile());
697705
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
@@ -701,8 +709,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
701709
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
702710
Value *RMWPtrOperand = RMW->getPointerOperand();
703711
Value *RMWValOperand = RMW->getValOperand();
704-
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer(
705-
LoadMallocPtr, RMWPtrOperand);
712+
Value *Replacement =
713+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, RMWPtrOperand);
706714
AtomicRMWInst *NewRMW = IRB.CreateAtomicRMW(
707715
RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(),
708716
RMW->getOrdering(), RMW->getSyncScopeID());
@@ -712,8 +720,8 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
712720
RMW->eraseFromParent();
713721
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(Inst)) {
714722
Value *XCHGPtrOperand = XCHG->getPointerOperand();
715-
Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer(
716-
LoadMallocPtr, XCHGPtrOperand);
723+
Value *Replacement =
724+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, XCHGPtrOperand);
717725
AtomicCmpXchgInst *NewXCHG = IRB.CreateAtomicCmpXchg(
718726
Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(),
719727
XCHG->getAlign(), XCHG->getSuccessOrdering(),
@@ -722,6 +730,16 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
722730
AsanInfo.Instructions.insert(NewXCHG);
723731
XCHG->replaceAllUsesWith(NewXCHG);
724732
XCHG->eraseFromParent();
733+
} else if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Inst)) {
734+
Value *AIOperand = ASC->getPointerOperand();
735+
Value *Replacement =
736+
getTranslatedGlobalMemoryPtrOfLDS(LoadMallocPtr, AIOperand);
737+
Value *NewAI = IRB.CreateAddrSpaceCast(Replacement, ASC->getType());
738+
// Note: No need to add the instruction to AsanInfo instructions to be
739+
// instrumented list. FLAT_ADDRESS ptr would have been already
740+
// instrumented by asan pass prior to this pass.
741+
ASC->replaceAllUsesWith(NewAI);
742+
ASC->eraseFromParent();
725743
} else
726744
report_fatal_error("Unimplemented LDS lowering instruction");
727745
}
@@ -1115,6 +1133,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
11151133
AsanInfo.Offset = Offset;
11161134
}
11171135

1136+
static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
1137+
for (auto &K : LDSAccesses) {
1138+
Function *F = K.first;
1139+
if (!F)
1140+
continue;
1141+
if (F->hasFnAttribute(Attribute::SanitizeAddress))
1142+
return true;
1143+
}
1144+
return false;
1145+
}
1146+
11181147
bool AMDGPUSwLowerLDS::run() {
11191148
bool Changed = false;
11201149

@@ -1125,6 +1154,14 @@ bool AMDGPUSwLowerLDS::run() {
11251154
// Get all the direct and indirect access of LDS for all the kernels.
11261155
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
11271156

1157+
// Flag to decide whether to lower all the LDS accesses
1158+
// based on sanitize_address attribute.
1159+
bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
1160+
hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
1161+
1162+
if (!LowerAllLDS)
1163+
return Changed;
1164+
11281165
// Utility to group LDS access into direct, indirect, static and dynamic.
11291166
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
11301167
bool DirectAccess) {
@@ -1134,8 +1171,6 @@ bool AMDGPUSwLowerLDS::run() {
11341171
continue;
11351172

11361173
assert(isKernelLDS(F));
1137-
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
1138-
continue;
11391174

11401175
// Only inserts if key isn't already in the map.
11411176
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1202,6 +1237,7 @@ bool AMDGPUSwLowerLDS::run() {
12021237
// Get non-kernels with LDS ptr as argument and called by kernels.
12031238
getNonKernelsWithLDSArguments(CG);
12041239

1240+
// Lower LDS accesses in non-kernels.
12051241
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
12061242
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
12071243
NonKernelLDSParameters NKLDSParams;
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
5+
; other kernels in module have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
define amdgpu_kernel void @k0() sanitize_address {
10+
; CHECK-LABEL: define amdgpu_kernel void @k0(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[WID:.*]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
14+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
15+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
16+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
17+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
18+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
19+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
20+
; CHECK: [[MALLOC]]:
21+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
22+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
23+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
24+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
25+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
26+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
27+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
28+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
29+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
30+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
31+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
32+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
33+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
34+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
35+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
36+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
37+
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
38+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
39+
; CHECK-NEXT: br label %[[BB20]]
40+
; CHECK: [[BB20]]:
41+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
42+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
43+
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
44+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
45+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
46+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
47+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
48+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
49+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
50+
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
51+
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
52+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
53+
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
54+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
55+
; CHECK: [[CONDFREE]]:
56+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
57+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
58+
; CHECK: [[FREE]]:
59+
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
60+
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
61+
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
62+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
63+
; CHECK-NEXT: br label %[[END]]
64+
; CHECK: [[END]]:
65+
; CHECK-NEXT: ret void
66+
;
67+
store i8 7, ptr addrspace(3) @lds_1, align 4
68+
store i32 8, ptr addrspace(3) @lds_2, align 2
69+
ret void
70+
}
71+
72+
define amdgpu_kernel void @k1() {
73+
; CHECK-LABEL: define amdgpu_kernel void @k1(
74+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
75+
; CHECK-NEXT: [[WID:.*]]:
76+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
77+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
78+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
79+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
80+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
81+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
82+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
83+
; CHECK: [[MALLOC]]:
84+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
85+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
86+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
87+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
88+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
89+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
90+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
91+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
92+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
93+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
94+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
95+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
96+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
97+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
98+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
99+
; CHECK-NEXT: br label %[[BB18]]
100+
; CHECK: [[BB18]]:
101+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
102+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
103+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
104+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
105+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
106+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
107+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
108+
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
109+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
110+
; CHECK: [[CONDFREE]]:
111+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
112+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
113+
; CHECK: [[FREE]]:
114+
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
115+
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
116+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
117+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
118+
; CHECK-NEXT: br label %[[END]]
119+
; CHECK: [[END]]:
120+
; CHECK-NEXT: ret void
121+
;
122+
store i32 9, ptr addrspace(3) @lds_2, align 2
123+
ret void
124+
}
125+
126+
!llvm.module.flags = !{!0}
127+
!0 = !{i32 4, !"nosanitize_address", i32 1}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
5+
; if all other kernels don't have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
;.
10+
; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
11+
; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
12+
;.
13+
define amdgpu_kernel void @k0() {
14+
; CHECK-LABEL: define amdgpu_kernel void @k0() {
15+
; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
16+
; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
17+
; CHECK-NEXT: ret void
18+
;
19+
store i8 7, ptr addrspace(3) @lds_1, align 4
20+
store i32 8, ptr addrspace(3) @lds_2, align 2
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @k1() {
25+
; CHECK-LABEL: define amdgpu_kernel void @k1() {
26+
; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
27+
; CHECK-NEXT: ret void
28+
;
29+
store i32 9, ptr addrspace(3) @lds_2, align 2
30+
ret void
31+
}
32+
33+
!llvm.module.flags = !{!0}
34+
!0 = !{i32 4, !"nosanitize_address", i32 1}
35+
;.
36+
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
37+
;.

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@ define void @non_kernel_function() sanitize_address {
2020
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
2121
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
2222
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
23-
; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
24-
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
23+
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
24+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
25+
; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
26+
; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
27+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP12]]
28+
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(1) [[TMP13]] to ptr
2529
; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
2630
; CHECK-NEXT: ret void
2731
;

0 commit comments

Comments
 (0)