Skip to content

Commit dc157f7

Browse files
committed
Do not generate wave_barrier for s_barrier_signal
1 parent 7174202 commit dc157f7

File tree

3 files changed

+19
-9
lines changed

3 files changed

+19
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,18 +1832,23 @@ bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const {
18321832
}
18331833

18341834
bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
1835+
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
18351836
if (TM.getOptLevel() > CodeGenOptLevel::None) {
18361837
unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
18371838
if (WGSize <= STI.getWavefrontSize()) {
1838-
MachineBasicBlock *MBB = MI.getParent();
1839-
const DebugLoc &DL = MI.getDebugLoc();
1840-
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
1839+
// If the workgroup fits in a wave, remove s_barrier_signal and lower
1840+
// s_barrier/s_barrier_wait to wave_barrier.
1841+
if (IntrinsicID == Intrinsic::amdgcn_s_barrier ||
1842+
IntrinsicID == Intrinsic::amdgcn_s_barrier_wait) {
1843+
MachineBasicBlock *MBB = MI.getParent();
1844+
const DebugLoc &DL = MI.getDebugLoc();
1845+
BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
1846+
}
18411847
MI.eraseFromParent();
18421848
return true;
18431849
}
18441850
}
18451851

1846-
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
18471852
if (STI.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {
18481853
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
18491854
MachineBasicBlock *MBB = MI.getParent();

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9611,10 +9611,16 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
96119611
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
96129612
if (getTargetMachine().getOptLevel() > CodeGenOptLevel::None) {
96139613
unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
9614-
if (WGSize <= ST.getWavefrontSize())
9615-
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
9616-
Op.getOperand(0)),
9617-
0);
9614+
if (WGSize <= ST.getWavefrontSize()) {
9615+
// If the workgroup fits in a wave, remove s_barrier_signal and lower
9616+
// s_barrier/s_barrier_wait to wave_barrier.
9617+
if (IntrinsicID == Intrinsic::amdgcn_s_barrier_signal)
9618+
return Op.getOperand(0);
9619+
else
9620+
return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL,
9621+
MVT::Other, Op.getOperand(0)),
9622+
0);
9623+
}
96189624
}
96199625

96209626
if (ST.hasSplitBarriers() && IntrinsicID == Intrinsic::amdgcn_s_barrier) {

llvm/test/CodeGen/AMDGPU/barrier-elimination-gfx12.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ define amdgpu_kernel void @signal_flat_wgs_attr_32_128() #1 {
1717

1818
; CHECK-LABEL: {{^}}signal_flat_wgs_attr_16_32:
1919
; CHECK: :
20-
; CHECK-NEXT: ; wave barrier
2120
; CHECK-NEXT: s_endpgm
2221
define amdgpu_kernel void @signal_flat_wgs_attr_16_32() #2 {
2322
tail call void @llvm.amdgcn.s.barrier.signal(i32 -1)

0 commit comments

Comments
 (0)