Skip to content

Commit 575fad2

Browse files
authored
[AMDGPU] Upstream the Support for array of named barriers (#154604)
1 parent d20a74e commit 575fad2

File tree

3 files changed

+46
-29
lines changed

3 files changed

+46
-29
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,7 @@ class AMDGPULowerModuleLDS {
955955
Module &M, LDSUsesInfoTy &LDSUsesInfo,
956956
VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly) {
957957
bool Changed = false;
958+
const DataLayout &DL = M.getDataLayout();
958959
// The 1st round: give module-absolute assignments
959960
int NumAbsolutes = 0;
960961
std::vector<GlobalVariable *> OrderedGVs;
@@ -976,8 +977,11 @@ class AMDGPULowerModuleLDS {
976977
}
977978
OrderedGVs = sortByName(std::move(OrderedGVs));
978979
for (GlobalVariable *GV : OrderedGVs) {
979-
int BarId = ++NumAbsolutes;
980980
unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
981+
unsigned BarId = NumAbsolutes + 1;
982+
unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
983+
NumAbsolutes += BarCnt;
984+
981985
// 4 bits for alignment, 5 bits for the barrier num,
982986
// 3 bits for the barrier scope
983987
unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
@@ -1015,12 +1019,11 @@ class AMDGPULowerModuleLDS {
10151019
// create a new GV used only by this kernel and its function.
10161020
auto NewGV = uniquifyGVPerKernel(M, GV, F);
10171021
Changed |= (NewGV != GV);
1018-
int BarId = (NumAbsolutes + 1);
1019-
if (Kernel2BarId.contains(F)) {
1020-
BarId = (Kernel2BarId[F] + 1);
1021-
}
1022-
Kernel2BarId[F] = BarId;
10231022
unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
1023+
unsigned BarId = Kernel2BarId[F];
1024+
BarId += NumAbsolutes + 1;
1025+
unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
1026+
Kernel2BarId[F] += BarCnt;
10241027
unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
10251028
recordLDSAbsoluteAddress(&M, NewGV, Offset);
10261029
}

llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,28 +31,40 @@ Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
3131
GV->getValueType());
3232
}
3333

34-
TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
35-
// TODO: Allow arrays and structs, if all members are barriers
36-
// in the same scope.
37-
// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
38-
// - Structs containing barriers in different scope.
39-
// - Structs containing a mixture of barriers and other data.
40-
// - Globals in other address spaces.
41-
// - Allocas.
34+
// Returns the target extension type of a global variable,
35+
// which can only be a TargetExtType, an array or single-element struct of it,
36+
// or their nesting combination.
37+
// TODO: allow struct of multiple TargetExtType elements of the same type.
38+
// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
39+
// - Structs containing barriers in different scope/rank
40+
// - Structs containing a mixture of barriers and other data.
41+
// - Globals in other address spaces.
42+
// - Allocas.
43+
static TargetExtType *getTargetExtType(const GlobalVariable &GV) {
4244
Type *Ty = GV.getValueType();
4345
while (true) {
4446
if (auto *TTy = dyn_cast<TargetExtType>(Ty))
45-
return TTy->getName() == "amdgcn.named.barrier" ? TTy : nullptr;
47+
return TTy;
4648
if (auto *STy = dyn_cast<StructType>(Ty)) {
47-
if (STy->getNumElements() == 0)
49+
if (STy->getNumElements() != 1)
4850
return nullptr;
4951
Ty = STy->getElementType(0);
5052
continue;
5153
}
54+
if (auto *ATy = dyn_cast<ArrayType>(Ty)) {
55+
Ty = ATy->getElementType();
56+
continue;
57+
}
5258
return nullptr;
5359
}
5460
}
5561

62+
TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
63+
if (TargetExtType *Ty = getTargetExtType(GV))
64+
return Ty->getName() == "amdgcn.named.barrier" ? Ty : nullptr;
65+
return nullptr;
66+
}
67+
5668
bool isDynamicLDS(const GlobalVariable &GV) {
5769
// external zero size addrspace(3) without initializer is dynlds.
5870
const Module *M = GV.getParent();

llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,35 @@
11
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=SOUT %s
33

4-
@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
4+
%class.ExpAmdWorkgroupWaveBarrier = type { target("amdgcn.named.barrier", 0) }
5+
6+
@bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison
57
@bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
6-
@bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
8+
@bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison
79

8-
; CHECK: @bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !0
10+
; CHECK: @bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison, !absolute_symbol !0
911
; CHECK-NEXT: @bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !1
10-
; CHECK-NEXT: @bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
11-
; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
12+
; CHECK-NEXT: @bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
13+
; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
1214

13-
; SOUT: .set func1.num_named_barrier, 3
15+
; SOUT: .set func1.num_named_barrier, 7
1416
define void @func1() {
1517
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
1618
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
1719
call void @llvm.amdgcn.s.barrier.wait(i16 1)
1820
ret void
1921
}
2022

21-
; SOUT: .set func2.num_named_barrier, 1
23+
; SOUT: .set func2.num_named_barrier, 2
2224
define void @func2() {
2325
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
2426
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
2527
call void @llvm.amdgcn.s.barrier.wait(i16 1)
2628
ret void
2729
}
2830

29-
; SOUT: .amdhsa_named_barrier_count 1
30-
; SOUT: .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier)
31+
; SOUT: .amdhsa_named_barrier_count 2
32+
; SOUT: .set kernel1.num_named_barrier, max(6, func1.num_named_barrier, func2.num_named_barrier)
3133
define amdgpu_kernel void @kernel1() #0 {
3234
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
3335
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 11)
@@ -40,8 +42,8 @@ define amdgpu_kernel void @kernel1() #0 {
4042
ret void
4143
}
4244

43-
; SOUT: .amdhsa_named_barrier_count 1
44-
; SOUT: .set kernel2.num_named_barrier, max(2, func2.num_named_barrier)
45+
; SOUT: .amdhsa_named_barrier_count 2
46+
; SOUT: .set kernel2.num_named_barrier, max(6, func2.num_named_barrier)
4547
define amdgpu_kernel void @kernel2() #0 {
4648
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
4749
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
@@ -68,5 +70,5 @@ attributes #1 = { convergent nounwind }
6870
attributes #2 = { nounwind readnone }
6971

7072
; CHECK: !0 = !{i32 8396816, i32 8396817}
71-
; CHECK-NEXT: !1 = !{i32 8396848, i32 8396849}
72-
; CHECK-NEXT: !2 = !{i32 8396832, i32 8396833}
73+
; CHECK-NEXT: !1 = !{i32 8396912, i32 8396913}
74+
; CHECK-NEXT: !2 = !{i32 8396848, i32 8396849}

0 commit comments

Comments
 (0)