Skip to content

Commit 0c1a58d

Browse files
committed
[AMDGPU] Lower LDS in functions without sanitize_address in amdgpu-sw-lower-lds. (llvm#131147)
Background: "amdgpu-sw-lower-lds" pass lowers LDS accesses based on "sanitize_address" attribute being tagged to kernel or non-kernels. "amdgpu-sw-lower-lds" pass ideally should either lower all LDS accesses or should not lower any based on if asan is enabled. Issue: But there has been cases when instrumented and non instrumented bitcodes are linked and this is leading to few LDS being lowered correctly while others are not. This typically leads to below error in the subsequent pass. "Module cannot mix absolute and non-absolute LDS GVs" Fix: This patch fixes this issue, by checking if any kernels in module are tagged with "sanitize_address" attribute and then lowers all the LDS accesses in all other kernels and non-kernels even though they do not have "sanitize_address" attribute.
1 parent 35d51c7 commit 0c1a58d

File tree

5 files changed

+363
-4
lines changed

5 files changed

+363
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
299299
for (User *V : GV->users()) {
300300
if (auto *I = dyn_cast<Instruction>(V)) {
301301
Function *F = I->getFunction();
302-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
303-
!F->isDeclaration())
302+
if (!isKernelLDS(F) && !F->isDeclaration())
304303
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
305304
}
306305
}
@@ -1142,6 +1141,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
11421141
return;
11431142
}
11441143

1144+
static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
1145+
for (auto &K : LDSAccesses) {
1146+
Function *F = K.first;
1147+
if (!F)
1148+
continue;
1149+
if (F->hasFnAttribute(Attribute::SanitizeAddress))
1150+
return true;
1151+
}
1152+
return false;
1153+
}
1154+
11451155
bool AMDGPUSwLowerLDS::run() {
11461156
bool Changed = false;
11471157

@@ -1152,6 +1162,14 @@ bool AMDGPUSwLowerLDS::run() {
11521162
// Get all the direct and indirect access of LDS for all the kernels.
11531163
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
11541164

1165+
// Flag to decide whether to lower all the LDS accesses
1166+
// based on sanitize_address attribute.
1167+
bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
1168+
hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
1169+
1170+
if (!LowerAllLDS)
1171+
return Changed;
1172+
11551173
// Utility to group LDS access into direct, indirect, static and dynamic.
11561174
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
11571175
bool DirectAccess) {
@@ -1161,8 +1179,6 @@ bool AMDGPUSwLowerLDS::run() {
11611179
continue;
11621180

11631181
assert(isKernelLDS(F));
1164-
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
1165-
continue;
11661182

11671183
// Only inserts if key isn't already in the map.
11681184
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1229,6 +1245,7 @@ bool AMDGPUSwLowerLDS::run() {
12291245
// Get non-kernels with LDS ptr as argument and called by kernels.
12301246
getNonKernelsWithLDSArguments(CG);
12311247

1248+
// Lower LDS accesses in non-kernels.
12321249
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
12331250
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
12341251
NonKernelLDSParameters NKLDSParams;
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
5+
; other kernels in module have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
define amdgpu_kernel void @k0() sanitize_address {
10+
; CHECK-LABEL: define amdgpu_kernel void @k0(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[WID:.*]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
14+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
15+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
16+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
17+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
18+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
19+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
20+
; CHECK: [[MALLOC]]:
21+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
22+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
23+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
24+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
25+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
26+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
27+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
28+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
29+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
30+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
31+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
32+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
33+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
34+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
35+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
36+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
37+
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
38+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
39+
; CHECK-NEXT: br label %[[BB20]]
40+
; CHECK: [[BB20]]:
41+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
42+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
43+
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
44+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
45+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
46+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
47+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
48+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
49+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
50+
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
51+
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
52+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
53+
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
54+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
55+
; CHECK: [[CONDFREE]]:
56+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
57+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
58+
; CHECK: [[FREE]]:
59+
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
60+
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
61+
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
62+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
63+
; CHECK-NEXT: br label %[[END]]
64+
; CHECK: [[END]]:
65+
; CHECK-NEXT: ret void
66+
;
67+
store i8 7, ptr addrspace(3) @lds_1, align 4
68+
store i32 8, ptr addrspace(3) @lds_2, align 2
69+
ret void
70+
}
71+
72+
define amdgpu_kernel void @k1() {
73+
; CHECK-LABEL: define amdgpu_kernel void @k1(
74+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
75+
; CHECK-NEXT: [[WID:.*]]:
76+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
77+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
78+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
79+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
80+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
81+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
82+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
83+
; CHECK: [[MALLOC]]:
84+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
85+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
86+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
87+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
88+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
89+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
90+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
91+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
92+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
93+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
94+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
95+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
96+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
97+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
98+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
99+
; CHECK-NEXT: br label %[[BB18]]
100+
; CHECK: [[BB18]]:
101+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
102+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
103+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
104+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
105+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
106+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
107+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
108+
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
109+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
110+
; CHECK: [[CONDFREE]]:
111+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
112+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
113+
; CHECK: [[FREE]]:
114+
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
115+
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
116+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
117+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
118+
; CHECK-NEXT: br label %[[END]]
119+
; CHECK: [[END]]:
120+
; CHECK-NEXT: ret void
121+
;
122+
store i32 9, ptr addrspace(3) @lds_2, align 2
123+
ret void
124+
}
125+
126+
!llvm.module.flags = !{!0}
127+
!0 = !{i32 4, !"nosanitize_address", i32 1}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
5+
; if all other kernels don't have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
;.
10+
; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
11+
; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
12+
;.
13+
define amdgpu_kernel void @k0() {
14+
; CHECK-LABEL: define amdgpu_kernel void @k0() {
15+
; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
16+
; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
17+
; CHECK-NEXT: ret void
18+
;
19+
store i8 7, ptr addrspace(3) @lds_1, align 4
20+
store i32 8, ptr addrspace(3) @lds_2, align 2
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @k1() {
25+
; CHECK-LABEL: define amdgpu_kernel void @k1() {
26+
; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
27+
; CHECK-NEXT: ret void
28+
;
29+
store i32 9, ptr addrspace(3) @lds_2, align 2
30+
ret void
31+
}
32+
33+
!llvm.module.flags = !{!0}
34+
!0 = !{i32 4, !"nosanitize_address", i32 1}
35+
;.
36+
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
37+
;.

0 commit comments

Comments
 (0)