Skip to content

Commit 28ea046

Browse files
committed
AMDGPU/GFX12: Fix s_barrier_signal_isfirst for single-wave workgroups
Barrier instructions are no-ops in single-wave workgroups. This includes s_barrier_signal_isfirst, which will leave SCC unmodified. Model this correctly (via an implicit use of SCC) and ensure SCC==1 before the barrier instruction (if the wave is the only one of the workgroup, then it is the first).
1 parent 2676f40 commit 28ea046

File tree

5 files changed

+23
-3
lines changed

5 files changed

+23
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5918,6 +5918,9 @@ bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
59185918
const DebugLoc &DL = I.getDebugLoc();
59195919
Register CCReg = I.getOperand(0).getReg();
59205920

5921+
// Set SCC to true, in case the barrier instruction gets converted to a NOP.
5922+
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_CMP_EQ_U32)).addImm(0).addImm(0);
5923+
59215924
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
59225925
.addImm(I.getOperand(2).getImm());
59235926

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5411,6 +5411,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
54115411
MI.eraseFromParent();
54125412
return BB;
54135413
}
5414+
case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM: {
5415+
// Set SCC to true, in case the barrier instruction gets converted to a NOP.
5416+
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
5417+
TII->get(AMDGPU::S_CMP_EQ_U32))
5418+
.addImm(0)
5419+
.addImm(0);
5420+
return BB;
5421+
}
54145422
case AMDGPU::GET_GROUPSTATICSIZE: {
54155423
assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
54165424
getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ def S_BARRIER_SIGNAL_M0 : SOP1_Pseudo <"s_barrier_signal m0", (outs), (ins),
472472
def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (outs), (ins),
473473
"", []>{
474474
let Defs = [SCC];
475+
let Uses = [M0, SCC];
475476
let SchedRW = [WriteBarrier];
476477
let isConvergent = 1;
477478
}
@@ -487,6 +488,8 @@ def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
487488
def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (outs),
488489
(ins SplitBarrier:$src0), "$src0", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst timm:$src0))]>{
489490
let Defs = [SCC];
491+
let Uses = [SCC];
492+
let usesCustomInserter = 1;
490493
let SchedRW = [WriteBarrier];
491494
let isConvergent = 1;
492495
}

llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,8 @@ body: |
374374
; CHECK-NEXT: successors: %bb.2(0x80000000)
375375
; CHECK-NEXT: {{ $}}
376376
; CHECK-NEXT: V_NOP_e32 implicit $exec
377-
; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
377+
; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
378+
; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit $scc
378379
; CHECK-NEXT: {{ $}}
379380
; CHECK-NEXT: bb.2:
380381
; CHECK-NEXT: S_ENDPGM 0
@@ -385,7 +386,8 @@ body: |
385386
bb.1:
386387
successors: %bb.2
387388
V_NOP_e32 implicit $exec
388-
S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
389+
S_CMP_EQ_U32 0, 0, implicit-def $scc
390+
S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc, implicit $scc
389391
390392
bb.2:
391393
S_ENDPGM 0
@@ -437,6 +439,7 @@ body: |
437439
; CHECK-NEXT: {{ $}}
438440
; CHECK-NEXT: V_NOP_e32 implicit $exec
439441
; CHECK-NEXT: $m0 = S_MOV_B32 -1
442+
; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
440443
; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
441444
; CHECK-NEXT: {{ $}}
442445
; CHECK-NEXT: bb.2:
@@ -449,7 +452,8 @@ body: |
449452
successors: %bb.2
450453
V_NOP_e32 implicit $exec
451454
$m0 = S_MOV_B32 -1
452-
S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
455+
S_CMP_EQ_U32 0, 0, implicit-def $scc
456+
S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc, implicit $scc
453457
454458
bb.2:
455459
S_ENDPGM 0

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.signal.isfirst.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ define i1 @func1() {
1010
; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
1111
; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
1212
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
13+
; GFX12-SDAG-NEXT: s_cmp_eq_u32 0, 0
1314
; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
1415
; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
1516
; GFX12-SDAG-NEXT: s_cselect_b32 s0, -1, 0
@@ -25,6 +26,7 @@ define i1 @func1() {
2526
; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
2627
; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
2728
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
29+
; GFX12-GISEL-NEXT: s_cmp_eq_u32 0, 0
2830
; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
2931
; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
3032
; GFX12-GISEL-NEXT: s_cselect_b32 s0, 1, 0

0 commit comments

Comments
 (0)