Skip to content

Commit f800d64

Browse files
committed
[AMDGPU] Optimize out s_barrier_signal/_wait
Extend the optimization that converts s_barrier to wave_barrier (nop) when the number of work items is not larger than wave size. This handles the "split barrier" form of s_barrier where the barrier is represented by separate intrinsics (s_barrier_signal/s_barrier_wait). Note: the version where s_barrier is used in gfx12 (and later split) has the optimization already, but some front-ends may prefer to use split intrinsics and this is being addressed by the patch.
1 parent 3955c2b commit f800d64

File tree

3 files changed

+15
-7
lines changed

3 files changed

+15
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,8 +1843,9 @@ bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
18431843
}
18441844
}
18451845

1846-
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
1847-
if (STI.hasSplitBarriers()) {
1846+
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1847+
if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
1848+
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
18481849
MachineBasicBlock *MBB = MI.getParent();
18491850
const DebugLoc &DL = MI.getDebugLoc();
18501851
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
@@ -2161,6 +2162,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
21612162
case Intrinsic::amdgcn_init_whole_wave:
21622163
return selectInitWholeWave(I);
21632164
case Intrinsic::amdgcn_s_barrier:
2165+
case Intrinsic::amdgcn_s_barrier_signal:
2166+
case Intrinsic::amdgcn_s_barrier_wait:
21642167
return selectSBarrier(I);
21652168
case Intrinsic::amdgcn_raw_buffer_load_lds:
21662169
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9605,7 +9605,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
96059605
unsigned Opc = Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
96069606
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
96079607
}
9608-
case Intrinsic::amdgcn_s_barrier: {
9608+
case Intrinsic::amdgcn_s_barrier:
9609+
case Intrinsic::amdgcn_s_barrier_signal:
9610+
case Intrinsic::amdgcn_s_barrier_wait: {
96099611
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
96109612
if (getTargetMachine().getOptLevel() > CodeGenOptLevel::None) {
96119613
unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
@@ -9615,8 +9617,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
96159617
0);
96169618
}
96179619

9618-
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
9619-
if (ST.hasSplitBarriers()) {
9620+
if (ST.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
9621+
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
96209622
SDValue K =
96219623
DAG.getTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32);
96229624
SDValue BarSignal =

llvm/test/CodeGen/AMDGPU/barrier-elimination-gfx12.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ define amdgpu_kernel void @signal_flat_wgs_attr_32_128() #1 {
1616
}
1717

1818
; CHECK-LABEL: {{^}}signal_flat_wgs_attr_32_64:
19-
; CHECK: s_barrier_signal
19+
; CHECK: :
20+
; CHECK-NEXT: ; wave barrier
21+
; CHECK-NEXT: s_endpgm
2022
define amdgpu_kernel void @signal_flat_wgs_attr_32_64() #2 {
2123
tail call void @llvm.amdgcn.s.barrier.signal(i32 -1) #0
2224
ret void
@@ -39,7 +41,8 @@ define amdgpu_kernel void @wait_flat_wgs_attr_32_128() #1 {
3941

4042
; CHECK-LABEL: {{^}}wait_flat_wgs_attr_32_64:
4143
; CHECK: :
42-
; CHECK-NEXT: s_barrier_wait
44+
; CHECK-NEXT: ; wave barrier
45+
; CHECK-NEXT: s_endpgm
4346
define amdgpu_kernel void @wait_flat_wgs_attr_32_64() #2 {
4447
tail call void @llvm.amdgcn.s.barrier.wait(i16 -1) #0
4548
ret void

0 commit comments

Comments
 (0)