Skip to content

Commit 709a74d

Browse files
AMDGPU: Fix s_barrier_leave to write to scc (#161221)
s_barrier_leave implicitly defines $scc and does not use imm that represents type of barrier, isel pattern ignores imm operand from llvm intrinsic. Test if SIInsertWaitcnts tracks this scc write.
1 parent 2f7252a commit 709a74d

File tree

9 files changed

+47
-11
lines changed

9 files changed

+47
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
include "AMDGPU.td"
1414
include "AMDGPUCombine.td"
1515

16+
def gi_ignore :
17+
GIComplexOperandMatcher<s32, "selectIgnore">,
18+
GIComplexPatternEquiv<Ignore>;
19+
1620
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
1721
def gi_vsrc0 :
1822
GIComplexOperandMatcher<s32, "selectVSRC0">,

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4312,6 +4312,8 @@ bool AMDGPUDAGToDAGISel::SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1,
43124312
return true;
43134313
}
43144314

4315+
bool AMDGPUDAGToDAGISel::SelectIgnore(SDValue In) const { return true; }
4316+
43154317
SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
43164318
if (In.isUndef())
43174319
return CurDAG->getUNDEF(MVT::i32);

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
305305
void SelectWAVE_ADDRESS(SDNode *N);
306306
void SelectSTACKRESTORE(SDNode *N);
307307

308+
bool SelectIgnore(SDValue In) const;
309+
308310
protected:
309311
// Include the pieces autogenerated from the target description.
310312
#include "AMDGPUGenDAGISel.inc"

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4266,6 +4266,11 @@ Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
42664266
return Src;
42674267
}
42684268

4269+
InstructionSelector::ComplexRendererFns
4270+
AMDGPUInstructionSelector::selectIgnore(MachineOperand &Root) const {
4271+
return {{}};
4272+
}
4273+
42694274
///
42704275
/// This will select either an SGPR or VGPR operand and will save us from
42714276
/// having to write an extra tablegen pattern.

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
166166
MachineOperand Root, MachineInstr *InsertPt,
167167
bool ForceVGPR = false) const;
168168

169+
InstructionSelector::ComplexRendererFns
170+
selectIgnore(MachineOperand &Root) const;
171+
169172
InstructionSelector::ComplexRendererFns
170173
selectVCSRC(MachineOperand &Root) const;
171174

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,9 +1006,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
10061006
Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
10071007
Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
10081008
Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
1009-
Opcode == AMDGPU::S_BARRIER_LEAVE ||
1010-
Opcode == AMDGPU::S_BARRIER_LEAVE_IMM ||
1011-
Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
1009+
Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT ||
1010+
Opcode == AMDGPU::DS_GWS_BARRIER;
10121011
}
10131012

10141013
static bool isF16PseudoScalarTrans(unsigned Opcode) {

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,8 @@ def VOP3PMadMixBF16Mods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16Mods"
17101710
def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
17111711
def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
17121712

1713+
def Ignore : ComplexPattern<untyped, 0, "SelectIgnore">;
1714+
17131715
//===----------------------------------------------------------------------===//
17141716
// SI assembler operands
17151717
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,17 +1616,15 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm
16161616
let isConvergent = 1;
16171617
}
16181618

1619-
def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> {
1619+
def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave",
1620+
(ins), "", [(int_amdgcn_s_barrier_leave (Ignore))] > {
16201621
let SchedRW = [WriteBarrier];
16211622
let simm16 = 0;
16221623
let fixed_imm = 1;
16231624
let isConvergent = 1;
16241625
let Defs = [SCC];
16251626
}
16261627

1627-
def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave",
1628-
(ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>;
1629-
16301628
def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
16311629
let SubtargetPredicate = isGFX8Plus;
16321630
let simm16 = 0;

llvm/test/CodeGen/AMDGPU/s-barrier.ll

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
44

55
@bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
66
@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
@@ -102,6 +102,7 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
102102
; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
103103
; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
104104
; GFX12-SDAG-NEXT: s_barrier_wait 1
105+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
105106
; GFX12-SDAG-NEXT: s_barrier_leave
106107
; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
107108
; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
@@ -155,10 +156,11 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in)
155156
; GFX12-GISEL-NEXT: s_barrier_signal -1
156157
; GFX12-GISEL-NEXT: s_barrier_join m0
157158
; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
158-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
159-
; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
160159
; GFX12-GISEL-NEXT: s_barrier_wait 1
160+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
161161
; GFX12-GISEL-NEXT: s_barrier_leave
162+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
163+
; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
162164
; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
163165
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
164166
; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
@@ -256,6 +258,25 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in)
256258
ret void
257259
}
258260

261+
define amdgpu_ps void @test_barrier_leave_write_to_scc(i32 inreg %val, ptr addrspace(1) %out) {
262+
; GFX12-LABEL: test_barrier_leave_write_to_scc:
263+
; GFX12: ; %bb.0:
264+
; GFX12-NEXT: s_barrier_leave
265+
; GFX12-NEXT: s_wait_kmcnt 0x0
266+
; GFX12-NEXT: s_cmp_lg_u32 s0, 0
267+
; GFX12-NEXT: s_movk_i32 s0, 0x7b
268+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
269+
; GFX12-NEXT: s_cselect_b32 s0, s0, 0x1c8
270+
; GFX12-NEXT: v_mov_b32_e32 v2, s0
271+
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
272+
; GFX12-NEXT: s_endpgm
273+
call void @llvm.amdgcn.s.barrier.leave(i16 1)
274+
%cmp = icmp ne i32 %val, 0
275+
%ret = select i1 %cmp, i32 123, i32 456
276+
store i32 %ret, ptr addrspace(1) %out
277+
ret void
278+
}
279+
259280
declare void @llvm.amdgcn.s.barrier() #1
260281
declare void @llvm.amdgcn.s.barrier.wait(i16) #1
261282
declare void @llvm.amdgcn.s.barrier.signal(i32) #1

0 commit comments

Comments
 (0)