Skip to content

Commit 0c5187d

Browse files
committed
[AMDGPU] Lower LDS in functions without sanitize_address in amdgpu-sw-lower-lds. (#131147)
Background: "amdgpu-sw-lower-lds" pass lowers LDS accesses based on "sanitize_address" attribute being tagged to kernel or non-kernels. "amdgpu-sw-lower-lds" pass ideally should either lower all LDS accesses or should not lower any based on if asan is enabled. Issue: But there has been cases when instrumented and non instrumented bitcodes are linked and this is leading to few LDS being lowered correctly while others are not. This typically leads to below error in the subsequent pass. "Module cannot mix absolute and non-absolute LDS GVs" Fix: This patch fixes this issue, by checking if any kernels in module are tagged with "sanitize_address" attribute and then lowers all the LDS accesses in all other kernels and non-kernels even though they do not have "sanitize_address" attribute.
1 parent b4f6f2b commit 0c5187d

File tree

5 files changed

+363
-4
lines changed

5 files changed

+363
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
298298
for (User *V : GV->users()) {
299299
if (auto *I = dyn_cast<Instruction>(V)) {
300300
Function *F = I->getFunction();
301-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
302-
!F->isDeclaration())
301+
if (!isKernelLDS(F) && !F->isDeclaration())
303302
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
304303
}
305304
}
@@ -1134,6 +1133,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
11341133
AsanInfo.Offset = Offset;
11351134
}
11361135

1136+
static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
1137+
for (auto &K : LDSAccesses) {
1138+
Function *F = K.first;
1139+
if (!F)
1140+
continue;
1141+
if (F->hasFnAttribute(Attribute::SanitizeAddress))
1142+
return true;
1143+
}
1144+
return false;
1145+
}
1146+
11371147
bool AMDGPUSwLowerLDS::run() {
11381148
bool Changed = false;
11391149

@@ -1144,6 +1154,14 @@ bool AMDGPUSwLowerLDS::run() {
11441154
// Get all the direct and indirect access of LDS for all the kernels.
11451155
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
11461156

1157+
// Flag to decide whether to lower all the LDS accesses
1158+
// based on sanitize_address attribute.
1159+
bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
1160+
hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
1161+
1162+
if (!LowerAllLDS)
1163+
return Changed;
1164+
11471165
// Utility to group LDS access into direct, indirect, static and dynamic.
11481166
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
11491167
bool DirectAccess) {
@@ -1153,8 +1171,6 @@ bool AMDGPUSwLowerLDS::run() {
11531171
continue;
11541172

11551173
assert(isKernelLDS(F));
1156-
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
1157-
continue;
11581174

11591175
// Only inserts if key isn't already in the map.
11601176
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1221,6 +1237,7 @@ bool AMDGPUSwLowerLDS::run() {
12211237
// Get non-kernels with LDS ptr as argument and called by kernels.
12221238
getNonKernelsWithLDSArguments(CG);
12231239

1240+
// Lower LDS accesses in non-kernels.
12241241
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
12251242
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
12261243
NonKernelLDSParameters NKLDSParams;
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
5+
; other kernels in module have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
define amdgpu_kernel void @k0() sanitize_address {
10+
; CHECK-LABEL: define amdgpu_kernel void @k0(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[WID:.*]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
14+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
15+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
16+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
17+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
18+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
19+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
20+
; CHECK: [[MALLOC]]:
21+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
22+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
23+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
24+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
25+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
26+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
27+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
28+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
29+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
30+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
31+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
32+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
33+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
34+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
35+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
36+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
37+
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
38+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
39+
; CHECK-NEXT: br label %[[BB20]]
40+
; CHECK: [[BB20]]:
41+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
42+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
43+
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
44+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
45+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
46+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
47+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
48+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
49+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
50+
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
51+
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
52+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
53+
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
54+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
55+
; CHECK: [[CONDFREE]]:
56+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
57+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
58+
; CHECK: [[FREE]]:
59+
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
60+
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
61+
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
62+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
63+
; CHECK-NEXT: br label %[[END]]
64+
; CHECK: [[END]]:
65+
; CHECK-NEXT: ret void
66+
;
67+
store i8 7, ptr addrspace(3) @lds_1, align 4
68+
store i32 8, ptr addrspace(3) @lds_2, align 2
69+
ret void
70+
}
71+
72+
define amdgpu_kernel void @k1() {
73+
; CHECK-LABEL: define amdgpu_kernel void @k1(
74+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
75+
; CHECK-NEXT: [[WID:.*]]:
76+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
77+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
78+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
79+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
80+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
81+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
82+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
83+
; CHECK: [[MALLOC]]:
84+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
85+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
86+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
87+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
88+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
89+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
90+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
91+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
92+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
93+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
94+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
95+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
96+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
97+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
98+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
99+
; CHECK-NEXT: br label %[[BB18]]
100+
; CHECK: [[BB18]]:
101+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
102+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
103+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
104+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
105+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
106+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
107+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
108+
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
109+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
110+
; CHECK: [[CONDFREE]]:
111+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
112+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
113+
; CHECK: [[FREE]]:
114+
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
115+
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
116+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
117+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
118+
; CHECK-NEXT: br label %[[END]]
119+
; CHECK: [[END]]:
120+
; CHECK-NEXT: ret void
121+
;
122+
store i32 9, ptr addrspace(3) @lds_2, align 2
123+
ret void
124+
}
125+
126+
!llvm.module.flags = !{!0}
127+
!0 = !{i32 4, !"nosanitize_address", i32 1}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
5+
; if all other kernels don't have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
;.
10+
; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
11+
; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
12+
;.
13+
define amdgpu_kernel void @k0() {
14+
; CHECK-LABEL: define amdgpu_kernel void @k0() {
15+
; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
16+
; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
17+
; CHECK-NEXT: ret void
18+
;
19+
store i8 7, ptr addrspace(3) @lds_1, align 4
20+
store i32 8, ptr addrspace(3) @lds_2, align 2
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @k1() {
25+
; CHECK-LABEL: define amdgpu_kernel void @k1() {
26+
; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
27+
; CHECK-NEXT: ret void
28+
;
29+
store i32 9, ptr addrspace(3) @lds_2, align 2
30+
ret void
31+
}
32+
33+
!llvm.module.flags = !{!0}
34+
!0 = !{i32 4, !"nosanitize_address", i32 1}
35+
;.
36+
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
37+
;.

0 commit comments

Comments
 (0)