Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1843,8 +1843,9 @@ bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
}
}

// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
if (STI.hasSplitBarriers()) {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
Expand Down Expand Up @@ -2161,6 +2162,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_init_whole_wave:
return selectInitWholeWave(I);
case Intrinsic::amdgcn_s_barrier:
case Intrinsic::amdgcn_s_barrier_signal:
case Intrinsic::amdgcn_s_barrier_wait:
return selectSBarrier(I);
case Intrinsic::amdgcn_raw_buffer_load_lds:
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9605,7 +9605,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned Opc = Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
case Intrinsic::amdgcn_s_barrier:
case Intrinsic::amdgcn_s_barrier_signal:
case Intrinsic::amdgcn_s_barrier_wait: {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (getTargetMachine().getOptLevel() > CodeGenOptLevel::None) {
unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
Expand All @@ -9615,8 +9617,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
0);
}

// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
if (ST.hasSplitBarriers()) {
if (ST.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
SDValue K =
DAG.getTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32);
SDValue BarSignal =
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AMDGPU/barrier-elimination-gfx12.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s -global-isel | FileCheck %s

; CHECK-LABEL: {{^}}signal_unknown_wgs:
; CHECK: s_barrier_signal
define amdgpu_kernel void @signal_unknown_wgs() {
tail call void @llvm.amdgcn.s.barrier.signal(i32 -1) #0
ret void
}

; CHECK-LABEL: {{^}}signal_flat_wgs_attr_32_128:
; CHECK: s_barrier_signal
define amdgpu_kernel void @signal_flat_wgs_attr_32_128() #1 {
tail call void @llvm.amdgcn.s.barrier.signal(i32 -1) #0
ret void
}

; CHECK-LABEL: {{^}}signal_flat_wgs_attr_32_64:
; CHECK: :
; CHECK-NEXT: ; wave barrier
; CHECK-NEXT: s_endpgm
define amdgpu_kernel void @signal_flat_wgs_attr_32_64() #2 {
tail call void @llvm.amdgcn.s.barrier.signal(i32 -1) #0
ret void
}


; CHECK-LABEL: {{^}}wait_unknown_wgs:
; CHECK: s_barrier_wait
define amdgpu_kernel void @wait_unknown_wgs() {
tail call void @llvm.amdgcn.s.barrier.wait(i16 -1) #0
ret void
}

; CHECK-LABEL: {{^}}wait_flat_wgs_attr_32_128:
; CHECK: s_barrier_wait
define amdgpu_kernel void @wait_flat_wgs_attr_32_128() #1 {
tail call void @llvm.amdgcn.s.barrier.wait(i16 -1) #0
ret void
}

; CHECK-LABEL: {{^}}wait_flat_wgs_attr_32_64:
; CHECK: :
; CHECK-NEXT: ; wave barrier
; CHECK-NEXT: s_endpgm
define amdgpu_kernel void @wait_flat_wgs_attr_32_64() #2 {
tail call void @llvm.amdgcn.s.barrier.wait(i16 -1) #0
ret void
}

declare void @llvm.amdgcn.s.barrier.signal(i32 immarg) #0
declare void @llvm.amdgcn.s.barrier.wait(i16 immarg) #0

attributes #0 = { convergent nounwind }
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
attributes #2 = { nounwind "amdgpu-flat-work-group-size"="16,32" }
Loading