Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,7 @@ class AMDGPULowerModuleLDS {
Module &M, LDSUsesInfoTy &LDSUsesInfo,
VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly) {
bool Changed = false;
const DataLayout &DL = M.getDataLayout();
// The 1st round: give module-absolute assignments
int NumAbsolutes = 0;
std::vector<GlobalVariable *> OrderedGVs;
Expand All @@ -976,8 +977,11 @@ class AMDGPULowerModuleLDS {
}
OrderedGVs = sortByName(std::move(OrderedGVs));
for (GlobalVariable *GV : OrderedGVs) {
int BarId = ++NumAbsolutes;
unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
unsigned BarId = NumAbsolutes + 1;
unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
NumAbsolutes += BarCnt;

// 4 bits for alignment, 5 bits for the barrier num,
// 3 bits for the barrier scope
unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
Expand Down Expand Up @@ -1015,12 +1019,11 @@ class AMDGPULowerModuleLDS {
// create a new GV used only by this kernel and its function.
auto NewGV = uniquifyGVPerKernel(M, GV, F);
Changed |= (NewGV != GV);
int BarId = (NumAbsolutes + 1);
if (Kernel2BarId.contains(F)) {
BarId = (Kernel2BarId[F] + 1);
}
Kernel2BarId[F] = BarId;
unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
unsigned BarId = Kernel2BarId[F];
BarId += NumAbsolutes + 1;
unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
Kernel2BarId[F] += BarCnt;
unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
recordLDSAbsoluteAddress(&M, NewGV, Offset);
}
Expand Down
32 changes: 22 additions & 10 deletions llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,40 @@ Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
GV->getValueType());
}

TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
// TODO: Allow arrays and structs, if all members are barriers
// in the same scope.
// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
// - Structs containing barriers in different scope.
// - Structs containing a mixture of barriers and other data.
// - Globals in other address spaces.
// - Allocas.
// Returns the target extension type of a global variable,
// which can only be a TargetExtType, an array or single-element struct of it,
// or their nesting combination.
// TODO: allow struct of multiple TargetExtType elements of the same type.
// TODO: Disallow other uses of target("amdgcn.named.barrier") including:
// - Structs containing barriers in different scope/rank
// - Structs containing a mixture of barriers and other data.
// - Globals in other address spaces.
// - Allocas.
static TargetExtType *getTargetExtType(const GlobalVariable &GV) {
Type *Ty = GV.getValueType();
while (true) {
if (auto *TTy = dyn_cast<TargetExtType>(Ty))
return TTy->getName() == "amdgcn.named.barrier" ? TTy : nullptr;
return TTy;
if (auto *STy = dyn_cast<StructType>(Ty)) {
if (STy->getNumElements() == 0)
if (STy->getNumElements() != 1)
return nullptr;
Ty = STy->getElementType(0);
continue;
}
if (auto *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
continue;
}
return nullptr;
}
}

TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
if (TargetExtType *Ty = getTargetExtType(GV))
return Ty->getName() == "amdgcn.named.barrier" ? Ty : nullptr;
return nullptr;
}

bool isDynamicLDS(const GlobalVariable &GV) {
// external zero size addrspace(3) without initializer is dynlds.
const Module *M = GV.getParent();
Expand Down
28 changes: 15 additions & 13 deletions llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
Original file line number Diff line number Diff line change
@@ -1,33 +1,35 @@
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs -o - %s | FileCheck -check-prefixes=SOUT %s

@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
%class.ExpAmdWorkgroupWaveBarrier = type { target("amdgcn.named.barrier", 0) }

@bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison
@bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
@bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
@bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison

; CHECK: @bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !0
; CHECK: @bar2 = internal addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] poison, !absolute_symbol !0
; CHECK-NEXT: @bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !1
; CHECK-NEXT: @bar1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison, !absolute_symbol !2
; CHECK-NEXT: @bar1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2
; CHECK-NEXT: @bar1.kernel1 = internal addrspace(3) global [4 x %class.ExpAmdWorkgroupWaveBarrier] poison, !absolute_symbol !2

; SOUT: .set func1.num_named_barrier, 3
; SOUT: .set func1.num_named_barrier, 7
define void @func1() {
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
ret void
}

; SOUT: .set func2.num_named_barrier, 1
; SOUT: .set func2.num_named_barrier, 2
define void @func2() {
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
ret void
}

; SOUT: .amdhsa_named_barrier_count 1
; SOUT: .set kernel1.num_named_barrier, max(2, func1.num_named_barrier, func2.num_named_barrier)
; SOUT: .amdhsa_named_barrier_count 2
; SOUT: .set kernel1.num_named_barrier, max(6, func1.num_named_barrier, func2.num_named_barrier)
define amdgpu_kernel void @kernel1() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 11)
Expand All @@ -40,8 +42,8 @@ define amdgpu_kernel void @kernel1() #0 {
ret void
}

; SOUT: .amdhsa_named_barrier_count 1
; SOUT: .set kernel2.num_named_barrier, max(2, func2.num_named_barrier)
; SOUT: .amdhsa_named_barrier_count 2
; SOUT: .set kernel2.num_named_barrier, max(6, func2.num_named_barrier)
define amdgpu_kernel void @kernel2() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
Expand All @@ -68,5 +70,5 @@ attributes #1 = { convergent nounwind }
attributes #2 = { nounwind readnone }

; CHECK: !0 = !{i32 8396816, i32 8396817}
; CHECK-NEXT: !1 = !{i32 8396848, i32 8396849}
; CHECK-NEXT: !2 = !{i32 8396832, i32 8396833}
; CHECK-NEXT: !1 = !{i32 8396912, i32 8396913}
; CHECK-NEXT: !2 = !{i32 8396848, i32 8396849}